work_queue.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2008- The University of Notre Dame
00003 This software is distributed under the GNU General Public License.
00004 See the file COPYING for details.
00005 */
00006 
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009 
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "rmsummary.h"
00023 
00024 #define WORK_QUEUE_DEFAULT_PORT 9123  
00025 #define WORK_QUEUE_RANDOM_PORT  0    
00026 #define WORK_QUEUE_WAITFORTASK  -1    
00028 #define WORK_QUEUE_SCHEDULE_UNSET 0
00029 #define WORK_QUEUE_SCHEDULE_FCFS         1 
00030 #define WORK_QUEUE_SCHEDULE_FILES        2 
00031 #define WORK_QUEUE_SCHEDULE_TIME         3 
00032 #define WORK_QUEUE_SCHEDULE_RAND         4 
00034 #define WORK_QUEUE_INPUT  0     
00035 #define WORK_QUEUE_OUTPUT 1     
00037 #define WORK_QUEUE_NOCACHE 0    
00038 #define WORK_QUEUE_CACHE 1      
00039 #define WORK_QUEUE_SYMLINK 2    /* Create a symlink to the file rather than copying it, if possible. */
00040 #define WORK_QUEUE_PREEXIST 4   /* If the filename already exists on the host, use it in place. */
00041 #define WORK_QUEUE_THIRDGET 8   /* Access the file on the client from a shared filesystem */
00042 #define WORK_QUEUE_THIRDPUT 8   /* Access the file on the client from a shared filesystem (included for readability) */
00043 #define WORK_QUEUE_WATCH 16     
00045 #define WORK_QUEUE_RESET_ALL        0  
00046 #define WORK_QUEUE_RESET_KEEP_TASKS 1  
00048 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 300  
00049 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30    
00051 #define WORK_QUEUE_RESULT_SUCCESS 0                
00052 #define WORK_QUEUE_RESULT_INPUT_MISSING 1  
00053 #define WORK_QUEUE_RESULT_OUTPUT_MISSING 2 
00054 #define WORK_QUEUE_RESULT_STDOUT_MISSING 4 
00055 #define WORK_QUEUE_RESULT_SIGNAL         8 
00056 #define WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION 16 
00057 #define WORK_QUEUE_RESULT_TASK_TIMEOUT 32 
00060 #define WORK_QUEUE_TASK_UNKNOWN           0  
00061 #define WORK_QUEUE_TASK_READY             1  
00062 #define WORK_QUEUE_TASK_RUNNING           2  
00063 #define WORK_QUEUE_TASK_WAITING_RETRIEVAL 3  
00064 #define WORK_QUEUE_TASK_RETRIEVED         4  
00065 #define WORK_QUEUE_TASK_DONE              5  
00066 #define WORK_QUEUE_TASK_CANCELED          6  
00068 extern double wq_option_fast_abort_multiplier; 
00070 extern int wq_option_scheduler; 
00074 struct work_queue_task {
00075         char *tag;                      
00076         char *command_line;             
00077         int worker_selection_algorithm;           
00078         char *output;                   
00079         struct list *input_files;       
00080         struct list *output_files;      
00081         int taskid;                     
00082         int return_status;              
00083         int result;                     
00084         char *host;                     
00085         char *hostname;                 
00087         timestamp_t time_committed;     
00089         timestamp_t time_task_submit;   
00090         timestamp_t time_task_finish;   
00091         timestamp_t time_send_input_start;      
00092         timestamp_t time_send_input_finish;     
00093         timestamp_t time_execute_cmd_start;                 
00094         timestamp_t time_execute_cmd_finish;            
00095         timestamp_t time_receive_result_start;  
00096         timestamp_t time_receive_result_finish; 
00097         timestamp_t time_receive_output_start;  
00098         timestamp_t time_receive_output_finish; 
00100         int64_t total_bytes_received;
00101         int64_t total_bytes_sent;
00102         int64_t total_bytes_transferred;
00103         timestamp_t total_transfer_time;    
00104         timestamp_t cmd_execution_time;    
00105         int total_submissions;                     
00106         timestamp_t total_cmd_execution_time;   
00108         int64_t maximum_end_time;                       
00109         int64_t memory;                       
00110         int64_t disk;
00111         int cores;
00112         int gpus;
00113         int unlabeled;
00114         struct rmsummary *resources_measured;   
00116         double priority;                        
00118         timestamp_t time_app_delay;      
00120 };
00121 
00124 struct work_queue_stats {
00125         int total_workers_connected;    
00126         int workers_init;               
00127         int workers_idle;               
00128         int workers_busy;               
00129         int total_workers_joined;       
00130         int total_workers_removed;      
00132         int tasks_waiting;              
00133         int tasks_running;              
00134         int tasks_complete;             
00135         int total_tasks_dispatched;     
00136         int total_tasks_complete;       
00137         int total_tasks_failed;         
00138         int total_tasks_cancelled;      
00140         timestamp_t start_time;         
00141         timestamp_t total_send_time;    
00142         timestamp_t total_receive_time; 
00143         timestamp_t total_good_transfer_time;    
00145         timestamp_t total_execute_time; 
00146         timestamp_t total_good_execute_time; 
00149         int64_t total_bytes_sent;       
00150         int64_t total_bytes_received;   
00151         double efficiency;              
00152         double idle_percentage;         
00153         int capacity;                   
00155         double  bandwidth;              
00156         int64_t total_cores;            
00157         int64_t total_memory;           
00158         int64_t total_disk;                 
00159         int64_t total_gpus;             
00160         int64_t committed_cores;        
00161         int64_t committed_memory;       
00162         int64_t committed_disk;         
00163         int64_t committed_gpus;         
00164         int64_t min_cores;              
00165         int64_t max_cores;              
00166         int64_t min_memory;             
00167         int64_t max_memory;             
00168         int64_t min_disk;               
00169         int64_t max_disk;               
00170         int64_t min_gpus;               
00171         int64_t max_gpus;               
00172         int port;                                               
00173         int priority;                                   
00174         int workers_ready;              
00175         int workers_full;               
00176         int total_worker_slots;         
00177         int avg_capacity;               
00178 };
00179 
00180 
00184 
00192 struct work_queue_task *work_queue_task_create(const char *full_command);
00193 
00199 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00200 
00205 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00206 
00223 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags);
00224 
00239 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, int type, int flags);
00240 
00251 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, int flags);
00252 
00266 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags, int recursive);
00267 
00273 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00274 
00280 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00281 
00287 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00288 
00294 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00295 
00301 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t seconds );
00302 
00309 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00310 
00317 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00318 
00328 void work_queue_task_specify_algorithm(struct work_queue_task *t, int algo );
00329 
00334 void work_queue_task_delete(struct work_queue_task *t);
00335 
00337 
00341 
00358 struct work_queue *work_queue_create(int port);
00359 
00367 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file);
00368 
00377 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00378 
00383 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00384 
00385 
00390 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00391 
00392 
00396 void work_queue_blacklist_clear(struct work_queue *q);
00397 
00412 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00413 
00425 int work_queue_hungry(struct work_queue *q);
00426 
00434 int work_queue_empty(struct work_queue *q);
00435 
00442 int work_queue_port(struct work_queue *q);
00443 
00448 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00449 
00454 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00455 
00456 
00462 int work_queue_task_state(struct work_queue *q, int taskid);
00463 
00468 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00469 
00474 double work_queue_get_effective_bandwidth(struct work_queue *q);
00475 
00482 char * work_queue_get_worker_summary( struct work_queue *q );
00483 
00489 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00490 
00491 
00495 int work_queue_send_receive_ratio(struct work_queue *q, double ratio);
00496 
00506 void work_queue_specify_algorithm(struct work_queue *q, int algo);
00507 
00512 const char *work_queue_name(struct work_queue *q);
00513 
00518 void work_queue_specify_name(struct work_queue *q, const char *name);
00519 
00524 void work_queue_specify_priority(struct work_queue *q, int priority);
00525 
00531 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00532 
00538 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00539 
00545 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00546 
00551 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00552 
00557 int work_queue_shut_down_workers(struct work_queue *q, int n);
00558 
00563 void work_queue_delete(struct work_queue *q);
00564 
00570 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00571 
00577 void work_queue_specify_password( struct work_queue *q, const char *password );
00578 
00585 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00586 
00591 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00592 
00597 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00598 
00599 
00615 int work_queue_tune(struct work_queue *q, const char *name, double value);
00616 
00618 
00622 
00623 #define WORK_QUEUE_TASK_ORDER_FIFO 0  
00624 #define WORK_QUEUE_TASK_ORDER_LIFO 1  
00632 void work_queue_specify_task_order(struct work_queue *q, int order);
00633 
00634 
00635 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0 
00636 #define WORK_QUEUE_MASTER_MODE_CATALOG 1    
00645 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00646 
00652 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00653 
00662 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00663 
00671 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00672 
00680 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00681 
00689 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00690 
00698 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
00699 
00701 
00702 /* Experimental feature - intentionally left undocumented.
00703 This feature exists to simplify performance evaulation and is not recommended
00704 for production use since it delays execution of the workload. 
00705 Force the master to wait for the given number of workers to connect before
00706 starting to dispatch tasks.  
00707 @param q A work queue object.
00708 @param worker The number of workers to wait before tasks are dispatched.*/
00709 void work_queue_activate_worker_waiting(struct work_queue *q, int resources);
00710 
00711 #endif

Generated on 14 May 2015 for cctools by  doxygen 1.4.7