work_queue.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2008- The University of Notre Dame
00003 This software is distributed under the GNU General Public License.
00004 See the file COPYING for details.
00005 */
00006 
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009 
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "rmsummary.h"
00023 
00024 #define WORK_QUEUE_DEFAULT_PORT 9123               
00025 #define WORK_QUEUE_RANDOM_PORT  0                  
00027 #define WORK_QUEUE_WAITFORTASK  -1                 
00029 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120  
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT  30   
00032 typedef enum {
00033         WORK_QUEUE_INPUT  = 0,                         
00034         WORK_QUEUE_OUTPUT = 1                          
00035 } work_queue_file_type_t;
00036 
00037 typedef enum {
00038         WORK_QUEUE_NOCACHE  = 0, 
00039         WORK_QUEUE_CACHE    = 1, 
00040         WORK_QUEUE_SYMLINK  = 2, 
00041         WORK_QUEUE_PREEXIST = 4, 
00042         WORK_QUEUE_THIRDGET = 8, 
00043         WORK_QUEUE_THIRDPUT = 8, 
00044         WORK_QUEUE_WATCH    = 16 
00045 } work_queue_file_flags_t;
00046 
00047 typedef enum {
00048         WORK_QUEUE_SCHEDULE_UNSET = 0,
00049         WORK_QUEUE_SCHEDULE_FCFS,      
00050         WORK_QUEUE_SCHEDULE_FILES,     
00051         WORK_QUEUE_SCHEDULE_TIME,      
00052         WORK_QUEUE_SCHEDULE_RAND,      
00053         WORK_QUEUE_SCHEDULE_WORST      
00054 } work_queue_schedule_t;
00055 
00056 
00057 typedef enum {
00058         WORK_QUEUE_RESULT_SUCCESS        = 0,       
00059         WORK_QUEUE_RESULT_INPUT_MISSING  = 1,       
00060         WORK_QUEUE_RESULT_OUTPUT_MISSING = 2,       
00061         WORK_QUEUE_RESULT_STDOUT_MISSING = 4,       
00062         WORK_QUEUE_RESULT_SIGNAL         = 8,       
00063         WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 16, 
00064         WORK_QUEUE_RESULT_TASK_TIMEOUT   = 32,      
00065         WORK_QUEUE_RESULT_UNKNOWN        = 64,      
00066         WORK_QUEUE_RESULT_FORSAKEN       = 128,     
00067         WORK_QUEUE_RESULT_MAX_RETRIES    = 256,     
00068         WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME = 512   
00069 } work_queue_result_t;
00070 
00071 typedef enum {
00072         WORK_QUEUE_TASK_UNKNOWN = 0,       
00073         WORK_QUEUE_TASK_READY,             
00074         WORK_QUEUE_TASK_RUNNING,           
00075         WORK_QUEUE_TASK_WAITING_RETRIEVAL, 
00076         WORK_QUEUE_TASK_RETRIEVED,         
00077         WORK_QUEUE_TASK_DONE,              
00078         WORK_QUEUE_TASK_CANCELED,           
00079         WORK_QUEUE_TASK_WAITING_RESUBMISSION 
00080 } work_queue_task_state_t;
00081 
00082 typedef enum {
00083         WORK_QUEUE_FILE = 1,              
00084         WORK_QUEUE_BUFFER,                
00085         WORK_QUEUE_REMOTECMD,             
00086         WORK_QUEUE_FILE_PIECE,            
00087         WORK_QUEUE_DIRECTORY,             
00088         WORK_QUEUE_URL                    
00089 } work_queue_file_t;
00090 
00091 
00092 extern double wq_option_fast_abort_multiplier; 
00098 extern int wq_option_scheduler;                
00108 struct work_queue_task {
00109         char *tag;                                             
00110         char *command_line;                                    
00111         work_queue_schedule_t worker_selection_algorithm; 
00112         char *output;                                          
00113         struct list *input_files;                              
00114         struct list *output_files;                             
00115         struct list *env_list;                                 
00116         int taskid;                                            
00117         int return_status;                                     
00118         work_queue_result_t result;                       
00119         char *host;                                            
00120         char *hostname;                                        
00122         timestamp_t time_committed;                            
00124         timestamp_t time_task_submit;                          
00125         timestamp_t time_task_finish;                          
00126         timestamp_t time_send_input_start;                     
00127         timestamp_t time_send_input_finish;                    
00128         timestamp_t time_execute_cmd_start;                    
00129         timestamp_t time_execute_cmd_finish;                   
00130         timestamp_t time_receive_result_start;                 
00131         timestamp_t time_receive_result_finish;                
00132         timestamp_t time_receive_output_start;                 
00133         timestamp_t time_receive_output_finish;                
00135         int64_t total_bytes_received;                          
00136         int64_t total_bytes_sent;                              
00137         int64_t total_bytes_transferred;                       
00138         timestamp_t total_transfer_time;                       
00139         timestamp_t cmd_execution_time;                        
00140         int total_submissions;                                 
00141         timestamp_t total_cmd_execution_time;                  
00143         timestamp_t maximum_end_time;                               
00144         int64_t memory;                                        
00145         int64_t disk;                                          
00146         int cores;                                             
00147         int gpus;                                              
00148         int unlabeled;                                         
00150         double priority;                                       
00152         int max_retries;                                       
00154         struct rmsummary *resources_measured;                  
00156         timestamp_t time_app_delay;                            
00158         timestamp_t maximum_running_time;                      
00160 };
00161 
00164 struct work_queue_stats {
00165         int total_workers_connected;    
00166         int workers_init;               
00167         int workers_idle;               
00168         int workers_busy;               
00170         int total_workers_joined;       
00171         int total_workers_removed;      
00172         int total_workers_lost;         
00173         int total_workers_idled_out;    
00174         int total_workers_fast_aborted; 
00176         int tasks_waiting;              
00177         int tasks_running;              
00178         int tasks_complete;             
00179         int total_tasks_dispatched;     
00180         int total_tasks_complete;       
00181         int total_tasks_failed;         
00182         int total_tasks_cancelled;      
00184         timestamp_t start_time;         
00185         timestamp_t total_send_time;    
00186         timestamp_t total_receive_time; 
00187         timestamp_t total_good_transfer_time;    
00189         timestamp_t total_execute_time;      
00190         timestamp_t total_good_execute_time; 
00193         int64_t total_bytes_sent;       
00194         int64_t total_bytes_received;   
00195         double efficiency;              
00196         double idle_percentage;         
00197         int capacity;                   
00199         double  bandwidth;              
00200         int64_t total_cores;            
00201         int64_t total_memory;           
00202         int64_t total_disk;                 
00203         int64_t total_gpus;             
00204         int64_t committed_cores;        
00205         int64_t committed_memory;       
00206         int64_t committed_disk;         
00207         int64_t committed_gpus;         
00208         int64_t min_cores;              
00209         int64_t max_cores;              
00210         int64_t min_memory;             
00211         int64_t max_memory;             
00212         int64_t min_disk;               
00213         int64_t max_disk;               
00214         int64_t min_gpus;               
00215         int64_t max_gpus;               
00216         int port;
00217         int priority;
00218         int workers_ready;              
00219         int workers_full;               
00220         int total_worker_slots;         
00221         int avg_capacity;               
00222 };
00223 
00224 
00228 
00236 struct work_queue_task *work_queue_task_create(const char *full_command);
00237 
00243 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00244 
00249 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00250 
00267 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00268 
00283 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00284 
00295 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00296 
00310 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00311 
00317 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00318 
00324 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00325 
00331 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00332 
00338 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00339 
00345 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00346 
00354 void work_queue_task_specify_end_time( struct work_queue_task *t, timestamp_t useconds );
00355 
00363 void work_queue_task_specify_running_time( struct work_queue_task *t, timestamp_t useconds );
00364 
00371 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00372 
00379 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00380 
00387 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00388 
00394 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00395 
00400 void work_queue_task_delete(struct work_queue_task *t);
00401 
00403 
00407 
00424 struct work_queue *work_queue_create(int port);
00425 
00434 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file);
00435 
00436 
00443 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory);
00444 
00453 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00454 
00459 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00460 
00468 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00469 
00470 
00475 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00476 
00477 
00481 void work_queue_blacklist_clear(struct work_queue *q);
00482 
00496 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00497 
00498 
00513 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00514 
00526 int work_queue_hungry(struct work_queue *q);
00527 
00535 int work_queue_empty(struct work_queue *q);
00536 
00543 int work_queue_port(struct work_queue *q);
00544 
00549 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00550 
00555 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00556 
00557 
00563 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00564 
00569 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00570 
00575 double work_queue_get_effective_bandwidth(struct work_queue *q);
00576 
00583 char * work_queue_get_worker_summary( struct work_queue *q );
00584 
00590 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00591 
00592 
00596 int work_queue_send_receive_ratio(struct work_queue *q, double ratio);
00597 
00603 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00604 
00609 const char *work_queue_name(struct work_queue *q);
00610 
00615 void work_queue_specify_name(struct work_queue *q, const char *name);
00616 
00621 void work_queue_specify_priority(struct work_queue *q, int priority);
00622 
00631 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00632 
00638 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00639 
00645 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00646 
00652 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00653 
00658 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00659 
00664 int work_queue_shut_down_workers(struct work_queue *q, int n);
00665 
00670 void work_queue_delete(struct work_queue *q);
00671 
00677 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00678 
00684 void work_queue_specify_password( struct work_queue *q, const char *password );
00685 
00692 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00693 
00698 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00699 
00704 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00705 
00711 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00712 
00728 int work_queue_tune(struct work_queue *q, const char *name, double value);
00729 
00731 
00735 
00736 #define WORK_QUEUE_TASK_ORDER_FIFO 0  
00737 #define WORK_QUEUE_TASK_ORDER_LIFO 1  
00745 void work_queue_specify_task_order(struct work_queue *q, int order);
00746 
00747 
00748 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0 
00749 #define WORK_QUEUE_MASTER_MODE_CATALOG 1    
00758 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00759 
00760 
00766 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00767 
00776 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00777 
00785 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00786 
00794 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00795 
00803 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00804 
00812 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
00813 
00815 
00816 /* Experimental feature - intentionally left undocumented.
00817 This feature exists to simplify performance evaulation and is not recommended
00818 for production use since it delays execution of the workload.
00819 Force the master to wait for the given number of workers to connect before
00820 starting to dispatch tasks.
00821 @param q A work queue object.
00822 @param worker The number of workers to wait before tasks are dispatched.*/
00823 void work_queue_activate_worker_waiting(struct work_queue *q, int resources);
00824 
00825 #endif

Generated on 23 Nov 2015 for cctools by  doxygen 1.6.1