work_queue.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2008- The University of Notre Dame
00003 This software is distributed under the GNU General Public License.
00004 See the file COPYING for details.
00005 */
00006 
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009 
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024 
00025 #define WORK_QUEUE_DEFAULT_PORT 9123               
00026 #define WORK_QUEUE_RANDOM_PORT  0                  
00028 #define WORK_QUEUE_WAITFORTASK  -1                 
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120  
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT  30   
00033 typedef enum {
00034         WORK_QUEUE_INPUT  = 0,                         
00035         WORK_QUEUE_OUTPUT = 1                          
00036 } work_queue_file_type_t;
00037 
00038 typedef enum {
00039         WORK_QUEUE_NOCACHE  = 0, 
00040         WORK_QUEUE_CACHE    = 1, 
00041         WORK_QUEUE_SYMLINK  = 2, 
00042         WORK_QUEUE_PREEXIST = 4, 
00043         WORK_QUEUE_THIRDGET = 8, 
00044         WORK_QUEUE_THIRDPUT = 8, 
00045         WORK_QUEUE_WATCH    = 16 
00046 } work_queue_file_flags_t;
00047 
00048 typedef enum {
00049         WORK_QUEUE_SCHEDULE_UNSET = 0,
00050         WORK_QUEUE_SCHEDULE_FCFS,      
00051         WORK_QUEUE_SCHEDULE_FILES,     
00052         WORK_QUEUE_SCHEDULE_TIME,      
00053         WORK_QUEUE_SCHEDULE_RAND,      
00054         WORK_QUEUE_SCHEDULE_WORST      
00055 } work_queue_schedule_t;
00056 
00057 
00058 typedef enum {
00059         WORK_QUEUE_RESULT_SUCCESS             = 0,      
00060         WORK_QUEUE_RESULT_INPUT_MISSING       = 1,      
00061         WORK_QUEUE_RESULT_OUTPUT_MISSING      = 2,      
00062         WORK_QUEUE_RESULT_STDOUT_MISSING      = 4,      
00063         WORK_QUEUE_RESULT_SIGNAL              = 1 << 3, 
00064         WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3, 
00065         WORK_QUEUE_RESULT_TASK_TIMEOUT        = 3 << 3, 
00066         WORK_QUEUE_RESULT_UNKNOWN             = 4 << 3, 
00067         WORK_QUEUE_RESULT_FORSAKEN            = 5 << 3, 
00068         WORK_QUEUE_RESULT_MAX_RETRIES         = 6 << 3, 
00069         WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME   = 7 << 3, 
00070         WORK_QUEUE_RESULT_DISK_ALLOC_FULL     = 8 << 3  
00071 } work_queue_result_t;
00072 
00073 typedef enum {
00074         WORK_QUEUE_TASK_UNKNOWN = 0,       
00075         WORK_QUEUE_TASK_READY,             
00076         WORK_QUEUE_TASK_RUNNING,           
00077         WORK_QUEUE_TASK_WAITING_RETRIEVAL, 
00078         WORK_QUEUE_TASK_RETRIEVED,         
00079         WORK_QUEUE_TASK_DONE,              
00080         WORK_QUEUE_TASK_CANCELED,           
00081 } work_queue_task_state_t;
00082 
00083 typedef enum {
00084         WORK_QUEUE_FILE = 1,              
00085         WORK_QUEUE_BUFFER,                
00086         WORK_QUEUE_REMOTECMD,             
00087         WORK_QUEUE_FILE_PIECE,            
00088         WORK_QUEUE_DIRECTORY,             
00089         WORK_QUEUE_URL                    
00090 } work_queue_file_t;
00091 
00092 
00093 extern int wq_option_scheduler;                
00103 struct work_queue_task {
00104         char *tag;                                        
00105         char *command_line;                               
00106         work_queue_schedule_t worker_selection_algorithm; 
00107         char *output;                                     
00108         struct list *input_files;                         
00109         struct list *output_files;                        
00110         struct list *env_list;                            
00111         int taskid;                                       
00112         int return_status;                                
00113         work_queue_result_t result;                       
00114         char *host;                                       
00115         char *hostname;                                   
00117         char *category;                         
00118         category_allocation_t resource_request; 
00120         double priority;        
00121         int max_retries;        
00123         int try_count;          
00124         int exhausted_attempts; 
00126         /* All times in microseconds */
00127         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00128         timestamp_t time_when_submitted;    
00129         timestamp_t time_when_done;         
00131         int disk_allocation_exhausted;                        
00133         timestamp_t time_when_commit_start; 
00134         timestamp_t time_when_commit_end;   
00136         timestamp_t time_when_retrieval;    
00138         timestamp_t time_workers_execute_last;                 
00139         timestamp_t time_workers_execute_all;                  
00140         timestamp_t time_workers_execute_exhaustion;           
00141         timestamp_t time_workers_execute_failure;              
00143         int64_t bytes_received;                                
00144         int64_t bytes_sent;                                    
00145         int64_t bytes_transferred;                             
00147         struct rmsummary *resources_allocated;                 
00148         struct rmsummary *resources_measured;                  
00149         struct rmsummary *resources_requested;                 
00150         char *monitor_output_directory;                        
00152         char *monitor_snapshot_file;                          
00153         struct list *features;                                
00155         /* deprecated fields */
00156         //int total_submissions;                                 /**< @deprecated Use try_count. */
00157 
00158         timestamp_t time_task_submit;                          
00159         timestamp_t time_task_finish;                          
00160         timestamp_t time_committed;                            
00162         timestamp_t time_send_input_start;                     
00163         timestamp_t time_send_input_finish;                    
00164         timestamp_t time_receive_result_start;                 
00165         timestamp_t time_receive_result_finish;                
00166         timestamp_t time_receive_output_start;                 
00167         timestamp_t time_receive_output_finish;                
00169         timestamp_t time_execute_cmd_start;                    
00170         timestamp_t time_execute_cmd_finish;                   
00172         timestamp_t total_transfer_time;                       
00174         timestamp_t cmd_execution_time;                        
00175         timestamp_t total_cmd_execution_time;                  
00176         timestamp_t total_cmd_exhausted_execute_time;          
00177         timestamp_t total_time_until_worker_failure;           
00179         int64_t total_bytes_received;                          
00180         int64_t total_bytes_sent;                              
00181         int64_t total_bytes_transferred;                       
00183         timestamp_t time_app_delay;                            
00184 };
00185 
00188 struct work_queue_stats {
00189         /* Stats for the current state of workers: */
00190         int workers_connected;    
00191         int workers_init;         
00192         int workers_idle;         
00193         int workers_busy;         
00194         int workers_able;         
00196         /* Cumulative stats for workers: */
00197         int workers_joined;       
00198         int workers_removed;      
00199         int workers_released;     
00200         int workers_idled_out;    
00201         int workers_fast_aborted; 
00202         int workers_blacklisted ; 
00203         int workers_lost;         
00205         /* Stats for the current state of tasks: */
00206         int tasks_waiting;        
00207         int tasks_on_workers;     
00208         int tasks_running;        
00209         int tasks_with_results;   
00211         /* Cumulative stats for tasks: */
00212         int tasks_submitted;           
00213         int tasks_dispatched;          
00214         int tasks_done;                
00215         int tasks_failed;              
00216         int tasks_cancelled;           
00217         int tasks_exhausted_attempts;  
00219         /* All times in microseconds */
00220         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00221 
00222         /* Master time statistics: */
00223         timestamp_t time_when_started; 
00224         timestamp_t time_send;         
00225         timestamp_t time_receive;      
00226         timestamp_t time_send_good;    
00227         timestamp_t time_receive_good; 
00228         timestamp_t time_status_msgs;  
00229         timestamp_t time_internal;     
00230         timestamp_t time_polling;      
00231         timestamp_t time_application;  
00233         /* Workers time statistics: */
00234         timestamp_t time_workers_execute;            
00235         timestamp_t time_workers_execute_good;       
00236         timestamp_t time_workers_execute_exhaustion; 
00238         /* BW statistics */
00239         int64_t bytes_sent;     
00240         int64_t bytes_received; 
00241         double  bandwidth;      
00243         /* resources statistics */
00244         int capacity_tasks;     
00245         int capacity_cores;     
00246         int capacity_memory;    
00247         int capacity_disk;      
00248         int capacity_instantaneous;      
00249         int capacity_weighted;  
00251         int64_t total_cores;      
00252         int64_t total_memory;     
00253         int64_t total_disk;           
00255         int64_t committed_cores;  
00256         int64_t committed_memory; 
00257         int64_t committed_disk;   
00259         int64_t max_cores;        
00260         int64_t max_memory;       
00261         int64_t max_disk;         
00263         int64_t min_cores;        
00264         int64_t min_memory;       
00265         int64_t min_disk;         
00268         int total_workers_connected;    
00269         int total_workers_joined;       
00270         int total_workers_removed;      
00271         int total_workers_lost;         
00272         int total_workers_idled_out;    
00273         int total_workers_fast_aborted; 
00275         int tasks_complete;             
00277         int total_tasks_dispatched;     
00278         int total_tasks_complete;       
00279         int total_tasks_failed;         
00280         int total_tasks_cancelled;      
00281         int total_exhausted_attempts;   
00282         timestamp_t start_time;               
00283         timestamp_t total_send_time;          
00284         timestamp_t total_receive_time;       
00285         timestamp_t total_good_transfer_time; 
00287         timestamp_t total_execute_time;           
00288         timestamp_t total_good_execute_time;      
00289         timestamp_t total_exhausted_execute_time; 
00291         int64_t total_bytes_sent;     
00292         int64_t total_bytes_received; 
00294         double capacity; 
00296         double efficiency;      
00297         double idle_percentage; 
00299         int64_t total_gpus;       
00300         int64_t committed_gpus;   
00301         int64_t max_gpus;         
00302         int64_t min_gpus;         
00304         int port;                       
00305         int priority;                   
00306         int workers_ready;              
00307         int workers_full;               
00308         int total_worker_slots;         
00309         int avg_capacity;               
00310 };
00311 
00312 
00316 
00324 struct work_queue_task *work_queue_task_create(const char *full_command);
00325 
00331 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00332 
00337 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00338 
00355 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00356 
00371 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00372 
00383 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00384 
00398 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00399 
00405 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00406 
00412 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00413 
00419 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00420 
00426 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00427 
00433 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00434 
00442 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00443 
00451 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00452 
00459 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00460 
00467 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00468 
00474 void work_queue_task_specify_feature(struct work_queue_task *t, const char *name);
00475 
00482 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00483 
00490 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00491 
00497 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00498 
00504 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00505 
00510 void work_queue_task_delete(struct work_queue_task *t);
00511 
00512 
00563 int work_queue_specify_snapshot_file(struct work_queue_task *t, const char *monitor_snapshot_file);
00564 
00565 
00567 
00571 
00588 struct work_queue *work_queue_create(int port);
00589 
00603 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory, int watchdog);
00604 
00614 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory, int watchdog);
00615 
00624 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00625 
00626 
00637 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00638 
00643 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00644 
00652 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00653 
00654 
00659 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00660 
00661 
00665 void work_queue_blacklist_clear(struct work_queue *q);
00666 
00680 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00681 
00682 
00697 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00698 
00710 int work_queue_hungry(struct work_queue *q);
00711 
00719 int work_queue_empty(struct work_queue *q);
00720 
00727 int work_queue_port(struct work_queue *q);
00728 
00733 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00734 
00739 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00740 
00746 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00747 
00748 
00754 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00755 
00760 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00761 
00766 double work_queue_get_effective_bandwidth(struct work_queue *q);
00767 
00774 char * work_queue_get_worker_summary( struct work_queue *q );
00775 
00785 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00786 
00787 
00797 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00798 
00799 
00807 int work_queue_specify_draining_by_hostname(struct work_queue *q, const char *hostname, int drain_flag);
00808 
00815 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00816 
00824 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00825 
00831 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00832 
00837 const char *work_queue_name(struct work_queue *q);
00838 
00843 void work_queue_specify_name(struct work_queue *q, const char *name);
00844 
00849 void work_queue_specify_priority(struct work_queue *q, int priority);
00850 
00859 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00860 
00866 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00867 
00872 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00873 
00879 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00880 
00886 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00887 
00892 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00893 
00898 int work_queue_shut_down_workers(struct work_queue *q, int n);
00899 
00904 void work_queue_delete(struct work_queue *q);
00905 
00911 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00912 
00918 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00919 
00925 void work_queue_specify_password( struct work_queue *q, const char *password );
00926 
00933 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00934 
00939 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00940 
00945 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00946 
00952 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00953 
00969 int work_queue_tune(struct work_queue *q, const char *name, double value);
00970 
00976 void work_queue_specify_max_resources(struct work_queue *q,  const struct rmsummary *rm);
00977 
00983 void work_queue_specify_category_max_resources(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00984 
00990 void work_queue_specify_category_first_allocation_guess(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00991 
00997 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00998 
00999 
01001 
01005 
01006 #define WORK_QUEUE_TASK_ORDER_FIFO 0  
01007 #define WORK_QUEUE_TASK_ORDER_LIFO 1  
01015 void work_queue_specify_task_order(struct work_queue *q, int order);
01016 
01017 
01018 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0 
01019 #define WORK_QUEUE_MASTER_MODE_CATALOG 1    
01028 void work_queue_specify_master_mode(struct work_queue *q, int mode);
01029 
01030 
01036 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
01037 
01046 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
01047 
01055 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
01056 
01064 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
01065 
01073 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
01074 
01082 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01083 
01088 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01089 
01091 
01092 #endif

Generated on 17 Sep 2019 for cctools by  doxygen 1.6.1