work_queue.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2008- The University of Notre Dame
00003 This software is distributed under the GNU General Public License.
00004 See the file COPYING for details.
00005 */
00006 
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009 
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024 
00025 #define WORK_QUEUE_DEFAULT_PORT 9123               
00026 #define WORK_QUEUE_RANDOM_PORT  0                  
00028 #define WORK_QUEUE_WAITFORTASK  -1                 
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120  
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT  30   
00033 typedef enum {
00034         WORK_QUEUE_INPUT  = 0,                         
00035         WORK_QUEUE_OUTPUT = 1                          
00036 } work_queue_file_type_t;
00037 
00038 typedef enum {
00039         WORK_QUEUE_NOCACHE  = 0, 
00040         WORK_QUEUE_CACHE    = 1, 
00041         WORK_QUEUE_SYMLINK  = 2, 
00042         WORK_QUEUE_PREEXIST = 4, 
00043         WORK_QUEUE_THIRDGET = 8, 
00044         WORK_QUEUE_THIRDPUT = 8, 
00045         WORK_QUEUE_WATCH    = 16 
00046 } work_queue_file_flags_t;
00047 
00048 typedef enum {
00049         WORK_QUEUE_SCHEDULE_UNSET = 0,
00050         WORK_QUEUE_SCHEDULE_FCFS,      
00051         WORK_QUEUE_SCHEDULE_FILES,     
00052         WORK_QUEUE_SCHEDULE_TIME,      
00053         WORK_QUEUE_SCHEDULE_RAND,      
00054         WORK_QUEUE_SCHEDULE_WORST      
00055 } work_queue_schedule_t;
00056 
00057 
00058 typedef enum {
00059         WORK_QUEUE_RESULT_SUCCESS             = 0,      
00060         WORK_QUEUE_RESULT_INPUT_MISSING       = 1,      
00061         WORK_QUEUE_RESULT_OUTPUT_MISSING      = 2,      
00062         WORK_QUEUE_RESULT_STDOUT_MISSING      = 4,      
00063         WORK_QUEUE_RESULT_SIGNAL              = 1 << 3, 
00064         WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3, 
00065         WORK_QUEUE_RESULT_TASK_TIMEOUT        = 3 << 3, 
00066         WORK_QUEUE_RESULT_UNKNOWN             = 4 << 3, 
00067         WORK_QUEUE_RESULT_FORSAKEN            = 5 << 3, 
00068         WORK_QUEUE_RESULT_MAX_RETRIES         = 6 << 3, 
00069         WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME   = 7 << 3, 
00070         WORK_QUEUE_RESULT_DISK_ALLOC_FULL     = 8 << 3  
00071 } work_queue_result_t;
00072 
00073 typedef enum {
00074         WORK_QUEUE_TASK_UNKNOWN = 0,       
00075         WORK_QUEUE_TASK_READY,             
00076         WORK_QUEUE_TASK_RUNNING,           
00077         WORK_QUEUE_TASK_WAITING_RETRIEVAL, 
00078         WORK_QUEUE_TASK_RETRIEVED,         
00079         WORK_QUEUE_TASK_DONE,              
00080         WORK_QUEUE_TASK_CANCELED,           
00081         WORK_QUEUE_TASK_WAITING_RESUBMISSION 
00082 } work_queue_task_state_t;
00083 
00084 typedef enum {
00085         WORK_QUEUE_FILE = 1,              
00086         WORK_QUEUE_BUFFER,                
00087         WORK_QUEUE_REMOTECMD,             
00088         WORK_QUEUE_FILE_PIECE,            
00089         WORK_QUEUE_DIRECTORY,             
00090         WORK_QUEUE_URL                    
00091 } work_queue_file_t;
00092 
00093 
00094 extern int wq_option_scheduler;                
00104 struct work_queue_task {
00105         char *tag;                                        
00106         char *command_line;                               
00107         work_queue_schedule_t worker_selection_algorithm; 
00108         char *output;                                     
00109         struct list *input_files;                         
00110         struct list *output_files;                        
00111         struct list *env_list;                            
00112         int taskid;                                       
00113         int return_status;                                
00114         work_queue_result_t result;                       
00115         char *host;                                       
00116         char *hostname;                                   
00118         char *category;                         
00119         category_allocation_t resource_request; 
00121         double priority;        
00122         int max_retries;        
00124         int try_count;          
00125         int exhausted_attempts; 
00127         /* All times in microseconds */
00128         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00129         timestamp_t time_when_submitted;    
00130         timestamp_t time_when_done;         
00132         int disk_allocation_exhausted;                        
00134         timestamp_t time_when_commit_start; 
00135         timestamp_t time_when_commit_end;   
00137         timestamp_t time_when_retrieval;    
00139         timestamp_t time_workers_execute_last;                 
00140         timestamp_t time_workers_execute_all;                  
00141         timestamp_t time_workers_execute_exhaustion;           
00142         timestamp_t time_workers_execute_failure;              
00144         int64_t bytes_received;                                
00145         int64_t bytes_sent;                                    
00146         int64_t bytes_transferred;                             
00148         struct rmsummary *resources_allocated;                 
00149         struct rmsummary *resources_measured;                  
00150         struct rmsummary *resources_requested;                 
00151         char *monitor_output_directory;                        
00153         /* deprecated fields */
00154         //int total_submissions;                                 /**< @deprecated Use try_count. */
00155 
00156         timestamp_t time_task_submit;                          
00157         timestamp_t time_task_finish;                          
00158         timestamp_t time_committed;                            
00160         timestamp_t time_send_input_start;                     
00161         timestamp_t time_send_input_finish;                    
00162         timestamp_t time_receive_result_start;                 
00163         timestamp_t time_receive_result_finish;                
00164         timestamp_t time_receive_output_start;                 
00165         timestamp_t time_receive_output_finish;                
00167         timestamp_t time_execute_cmd_start;                    
00168         timestamp_t time_execute_cmd_finish;                   
00170         timestamp_t total_transfer_time;                       
00172         timestamp_t cmd_execution_time;                        
00173         timestamp_t total_cmd_execution_time;                  
00174         timestamp_t total_cmd_exhausted_execute_time;          
00175         timestamp_t total_time_until_worker_failure;           
00177         int64_t total_bytes_received;                          
00178         int64_t total_bytes_sent;                              
00179         int64_t total_bytes_transferred;                       
00181         timestamp_t time_app_delay;                            
00182 };
00183 
00186 struct work_queue_stats {
00187         /* Stats for the current state of workers: */
00188         int workers_connected;    
00189         int workers_init;         
00190         int workers_idle;         
00191         int workers_busy;         
00192         int workers_able;         
00194         /* Cumulative stats for workers: */
00195         int workers_joined;       
00196         int workers_removed;      
00197         int workers_released;     
00198         int workers_idled_out;    
00199         int workers_fast_aborted; 
00200         int workers_blacklisted ; 
00201         int workers_lost;         
00203         /* Stats for the current state of tasks: */
00204         int tasks_waiting;        
00205         int tasks_on_workers;     
00206         int tasks_running;        
00207         int tasks_with_results;   
00209         /* Cumulative stats for tasks: */
00210         int tasks_submitted;           
00211         int tasks_dispatched;          
00212         int tasks_done;                
00213         int tasks_failed;              
00214         int tasks_cancelled;           
00215         int tasks_exhausted_attempts;  
00217         /* All times in microseconds */
00218         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00219 
00220         /* Master time statistics: */
00221         timestamp_t time_when_started; 
00222         timestamp_t time_send;         
00223         timestamp_t time_receive;      
00224         timestamp_t time_send_good;    
00225         timestamp_t time_receive_good; 
00226         timestamp_t time_status_msgs;  
00227         timestamp_t time_internal;     
00228         timestamp_t time_polling;      
00229         timestamp_t time_application;  
00231         /* Workers time statistics: */
00232         timestamp_t time_workers_execute;            
00233         timestamp_t time_workers_execute_good;       
00234         timestamp_t time_workers_execute_exhaustion; 
00236         /* BW statistics */
00237         int64_t bytes_sent;     
00238         int64_t bytes_received; 
00239         double  bandwidth;      
00241         /* resources statistics */
00242         int capacity_tasks;     
00243         int capacity_cores;     
00244         int capacity_memory;    
00245         int capacity_disk;      
00247         int64_t total_cores;      
00248         int64_t total_memory;     
00249         int64_t total_disk;           
00251         int64_t committed_cores;  
00252         int64_t committed_memory; 
00253         int64_t committed_disk;   
00255         int64_t max_cores;        
00256         int64_t max_memory;       
00257         int64_t max_disk;         
00259         int64_t min_cores;        
00260         int64_t min_memory;       
00261         int64_t min_disk;         
00264         int total_workers_connected;    
00265         int total_workers_joined;       
00266         int total_workers_removed;      
00267         int total_workers_lost;         
00268         int total_workers_idled_out;    
00269         int total_workers_fast_aborted; 
00271         int tasks_complete;             
00273         int total_tasks_dispatched;     
00274         int total_tasks_complete;       
00275         int total_tasks_failed;         
00276         int total_tasks_cancelled;      
00277         int total_exhausted_attempts;   
00278         timestamp_t start_time;               
00279         timestamp_t total_send_time;          
00280         timestamp_t total_receive_time;       
00281         timestamp_t total_good_transfer_time; 
00283         timestamp_t total_execute_time;           
00284         timestamp_t total_good_execute_time;      
00285         timestamp_t total_exhausted_execute_time; 
00287         int64_t total_bytes_sent;     
00288         int64_t total_bytes_received; 
00290         double capacity; 
00292         double efficiency;      
00293         double idle_percentage; 
00295         int64_t total_gpus;       
00296         int64_t committed_gpus;   
00297         int64_t max_gpus;         
00298         int64_t min_gpus;         
00300         int port;                       
00301         int priority;                   
00302         int workers_ready;              
00303         int workers_full;               
00304         int total_worker_slots;         
00305         int avg_capacity;               
00306 };
00307 
00308 
00312 
00320 struct work_queue_task *work_queue_task_create(const char *full_command);
00321 
00327 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00328 
00333 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00334 
00351 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00352 
00367 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00368 
00379 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00380 
00394 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00395 
00401 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00402 
00408 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00409 
00415 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00416 
00422 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00423 
00429 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00430 
00438 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00439 
00447 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00448 
00455 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00456 
00463 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00464 
00471 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00472 
00479 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00480 
00486 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00487 
00493 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00494 
00499 void work_queue_task_delete(struct work_queue_task *t);
00500 
00502 
00506 
00523 struct work_queue *work_queue_create(int port);
00524 
00537 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory);
00538 
00547 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory);
00548 
00557 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00558 
00559 
00570 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00571 
00576 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00577 
00585 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00586 
00587 
00592 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00593 
00594 
00598 void work_queue_blacklist_clear(struct work_queue *q);
00599 
00613 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00614 
00615 
00630 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00631 
00643 int work_queue_hungry(struct work_queue *q);
00644 
00652 int work_queue_empty(struct work_queue *q);
00653 
00660 int work_queue_port(struct work_queue *q);
00661 
00666 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00667 
00672 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00673 
00679 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00680 
00681 
00687 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00688 
00693 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00694 
00699 double work_queue_get_effective_bandwidth(struct work_queue *q);
00700 
00707 char * work_queue_get_worker_summary( struct work_queue *q );
00708 
00718 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00719 
00720 
00730 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00731 
00738 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00739 
00747 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00748 
00754 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00755 
00760 const char *work_queue_name(struct work_queue *q);
00761 
00766 void work_queue_specify_name(struct work_queue *q, const char *name);
00767 
00772 void work_queue_specify_priority(struct work_queue *q, int priority);
00773 
00782 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00783 
00789 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00790 
00795 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00796 
00802 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00803 
00809 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00810 
00815 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00816 
00821 int work_queue_shut_down_workers(struct work_queue *q, int n);
00822 
00827 void work_queue_delete(struct work_queue *q);
00828 
00834 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00835 
00841 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00842 
00848 void work_queue_specify_password( struct work_queue *q, const char *password );
00849 
00856 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00857 
00862 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00863 
00868 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00869 
00875 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00876 
00892 int work_queue_tune(struct work_queue *q, const char *name, double value);
00893 
00899 void work_queue_specify_max_resources(struct work_queue *q,  const struct rmsummary *rm);
00900 
00906 void work_queue_specify_category_max_resources(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00907 
00913 void work_queue_specify_category_first_allocation_guess(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00914 
00920 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00921 
00922 
00924 
00928 
00929 #define WORK_QUEUE_TASK_ORDER_FIFO 0  
00930 #define WORK_QUEUE_TASK_ORDER_LIFO 1  
00938 void work_queue_specify_task_order(struct work_queue *q, int order);
00939 
00940 
00941 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0 
00942 #define WORK_QUEUE_MASTER_MODE_CATALOG 1    
00951 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00952 
00953 
00959 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00960 
00969 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00970 
00978 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00979 
00987 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00988 
00996 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00997 
01005 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01006 
01011 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01012 
01014 
01015 #endif

Generated on 19 Sep 2016 for cctools by  doxygen 1.6.1