work_queue.h

Go to the documentation of this file.
00001 /*
00002 Copyright (C) 2008- The University of Notre Dame
00003 This software is distributed under the GNU General Public License.
00004 See the file COPYING for details.
00005 */
00006 
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009 
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024 
00025 #define WORK_QUEUE_DEFAULT_PORT 9123               
00026 #define WORK_QUEUE_RANDOM_PORT  0                  
00028 #define WORK_QUEUE_WAITFORTASK  -1                 
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120  
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT  30   
00033 typedef enum {
00034         WORK_QUEUE_INPUT  = 0,                         
00035         WORK_QUEUE_OUTPUT = 1                          
00036 } work_queue_file_type_t;
00037 
00038 typedef enum {
00039         WORK_QUEUE_NOCACHE  = 0, 
00040         WORK_QUEUE_CACHE    = 1, 
00041         WORK_QUEUE_SYMLINK  = 2, 
00042         WORK_QUEUE_PREEXIST = 4, 
00043         WORK_QUEUE_THIRDGET = 8, 
00044         WORK_QUEUE_THIRDPUT = 8, 
00045         WORK_QUEUE_WATCH    = 16 
00046 } work_queue_file_flags_t;
00047 
00048 typedef enum {
00049         WORK_QUEUE_SCHEDULE_UNSET = 0,
00050         WORK_QUEUE_SCHEDULE_FCFS,      
00051         WORK_QUEUE_SCHEDULE_FILES,     
00052         WORK_QUEUE_SCHEDULE_TIME,      
00053         WORK_QUEUE_SCHEDULE_RAND,      
00054         WORK_QUEUE_SCHEDULE_WORST      
00055 } work_queue_schedule_t;
00056 
00057 
00058 typedef enum {
00059         WORK_QUEUE_RESULT_SUCCESS             = 0,      
00060         WORK_QUEUE_RESULT_INPUT_MISSING       = 1,      
00061         WORK_QUEUE_RESULT_OUTPUT_MISSING      = 2,      
00062         WORK_QUEUE_RESULT_STDOUT_MISSING      = 4,      
00063         WORK_QUEUE_RESULT_SIGNAL              = 1 << 3, 
00064         WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3, 
00065         WORK_QUEUE_RESULT_TASK_TIMEOUT        = 3 << 3, 
00066         WORK_QUEUE_RESULT_UNKNOWN             = 4 << 3, 
00067         WORK_QUEUE_RESULT_FORSAKEN            = 5 << 3, 
00068         WORK_QUEUE_RESULT_MAX_RETRIES         = 6 << 3, 
00069         WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME   = 7 << 3, 
00070         WORK_QUEUE_RESULT_DISK_ALLOC_FULL     = 8 << 3  
00071 } work_queue_result_t;
00072 
00073 typedef enum {
00074         WORK_QUEUE_TASK_UNKNOWN = 0,       
00075         WORK_QUEUE_TASK_READY,             
00076         WORK_QUEUE_TASK_RUNNING,           
00077         WORK_QUEUE_TASK_WAITING_RETRIEVAL, 
00078         WORK_QUEUE_TASK_RETRIEVED,         
00079         WORK_QUEUE_TASK_DONE,              
00080         WORK_QUEUE_TASK_CANCELED,           
00081 } work_queue_task_state_t;
00082 
00083 typedef enum {
00084         WORK_QUEUE_FILE = 1,              
00085         WORK_QUEUE_BUFFER,                
00086         WORK_QUEUE_REMOTECMD,             
00087         WORK_QUEUE_FILE_PIECE,            
00088         WORK_QUEUE_DIRECTORY,             
00089         WORK_QUEUE_URL                    
00090 } work_queue_file_t;
00091 
00092 
00093 extern int wq_option_scheduler;                
00103 struct work_queue_task {
00104         char *tag;                                        
00105         char *command_line;                               
00106         work_queue_schedule_t worker_selection_algorithm; 
00107         char *output;                                     
00108         struct list *input_files;                         
00109         struct list *output_files;                        
00110         struct list *env_list;                            
00111         int taskid;                                       
00112         int return_status;                                
00113         work_queue_result_t result;                       
00114         char *host;                                       
00115         char *hostname;                                   
00117         char *category;                         
00118         category_allocation_t resource_request; 
00120         double priority;        
00121         int max_retries;        
00123         int try_count;          
00124         int exhausted_attempts; 
00126         /* All times in microseconds */
00127         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00128         timestamp_t time_when_submitted;    
00129         timestamp_t time_when_done;         
00131         int disk_allocation_exhausted;                        
00133         timestamp_t time_when_commit_start; 
00134         timestamp_t time_when_commit_end;   
00136         timestamp_t time_when_retrieval;    
00138         timestamp_t time_workers_execute_last;                 
00139         timestamp_t time_workers_execute_all;                  
00140         timestamp_t time_workers_execute_exhaustion;           
00141         timestamp_t time_workers_execute_failure;              
00143         int64_t bytes_received;                                
00144         int64_t bytes_sent;                                    
00145         int64_t bytes_transferred;                             
00147         struct rmsummary *resources_allocated;                 
00148         struct rmsummary *resources_measured;                  
00149         struct rmsummary *resources_requested;                 
00150         char *monitor_output_directory;                        
00152         /* deprecated fields */
00153         //int total_submissions;                                 /**< @deprecated Use try_count. */
00154 
00155         timestamp_t time_task_submit;                          
00156         timestamp_t time_task_finish;                          
00157         timestamp_t time_committed;                            
00159         timestamp_t time_send_input_start;                     
00160         timestamp_t time_send_input_finish;                    
00161         timestamp_t time_receive_result_start;                 
00162         timestamp_t time_receive_result_finish;                
00163         timestamp_t time_receive_output_start;                 
00164         timestamp_t time_receive_output_finish;                
00166         timestamp_t time_execute_cmd_start;                    
00167         timestamp_t time_execute_cmd_finish;                   
00169         timestamp_t total_transfer_time;                       
00171         timestamp_t cmd_execution_time;                        
00172         timestamp_t total_cmd_execution_time;                  
00173         timestamp_t total_cmd_exhausted_execute_time;          
00174         timestamp_t total_time_until_worker_failure;           
00176         int64_t total_bytes_received;                          
00177         int64_t total_bytes_sent;                              
00178         int64_t total_bytes_transferred;                       
00180         timestamp_t time_app_delay;                            
00181 };
00182 
00185 struct work_queue_stats {
00186         /* Stats for the current state of workers: */
00187         int workers_connected;    
00188         int workers_init;         
00189         int workers_idle;         
00190         int workers_busy;         
00191         int workers_able;         
00193         /* Cumulative stats for workers: */
00194         int workers_joined;       
00195         int workers_removed;      
00196         int workers_released;     
00197         int workers_idled_out;    
00198         int workers_fast_aborted; 
00199         int workers_blacklisted ; 
00200         int workers_lost;         
00202         /* Stats for the current state of tasks: */
00203         int tasks_waiting;        
00204         int tasks_on_workers;     
00205         int tasks_running;        
00206         int tasks_with_results;   
00208         /* Cumulative stats for tasks: */
00209         int tasks_submitted;           
00210         int tasks_dispatched;          
00211         int tasks_done;                
00212         int tasks_failed;              
00213         int tasks_cancelled;           
00214         int tasks_exhausted_attempts;  
00216         /* All times in microseconds */
00217         /* A time_when_* refers to an instant in time, otherwise it refers to a length of time. */
00218 
00219         /* Master time statistics: */
00220         timestamp_t time_when_started; 
00221         timestamp_t time_send;         
00222         timestamp_t time_receive;      
00223         timestamp_t time_send_good;    
00224         timestamp_t time_receive_good; 
00225         timestamp_t time_status_msgs;  
00226         timestamp_t time_internal;     
00227         timestamp_t time_polling;      
00228         timestamp_t time_application;  
00230         /* Workers time statistics: */
00231         timestamp_t time_workers_execute;            
00232         timestamp_t time_workers_execute_good;       
00233         timestamp_t time_workers_execute_exhaustion; 
00235         /* BW statistics */
00236         int64_t bytes_sent;     
00237         int64_t bytes_received; 
00238         double  bandwidth;      
00240         /* resources statistics */
00241         int capacity_tasks;     
00242         int capacity_cores;     
00243         int capacity_memory;    
00244         int capacity_disk;      
00246         int64_t total_cores;      
00247         int64_t total_memory;     
00248         int64_t total_disk;           
00250         int64_t committed_cores;  
00251         int64_t committed_memory; 
00252         int64_t committed_disk;   
00254         int64_t max_cores;        
00255         int64_t max_memory;       
00256         int64_t max_disk;         
00258         int64_t min_cores;        
00259         int64_t min_memory;       
00260         int64_t min_disk;         
00263         int total_workers_connected;    
00264         int total_workers_joined;       
00265         int total_workers_removed;      
00266         int total_workers_lost;         
00267         int total_workers_idled_out;    
00268         int total_workers_fast_aborted; 
00270         int tasks_complete;             
00272         int total_tasks_dispatched;     
00273         int total_tasks_complete;       
00274         int total_tasks_failed;         
00275         int total_tasks_cancelled;      
00276         int total_exhausted_attempts;   
00277         timestamp_t start_time;               
00278         timestamp_t total_send_time;          
00279         timestamp_t total_receive_time;       
00280         timestamp_t total_good_transfer_time; 
00282         timestamp_t total_execute_time;           
00283         timestamp_t total_good_execute_time;      
00284         timestamp_t total_exhausted_execute_time; 
00286         int64_t total_bytes_sent;     
00287         int64_t total_bytes_received; 
00289         double capacity; 
00291         double efficiency;      
00292         double idle_percentage; 
00294         int64_t total_gpus;       
00295         int64_t committed_gpus;   
00296         int64_t max_gpus;         
00297         int64_t min_gpus;         
00299         int port;                       
00300         int priority;                   
00301         int workers_ready;              
00302         int workers_full;               
00303         int total_worker_slots;         
00304         int avg_capacity;               
00305 };
00306 
00307 
00311 
00319 struct work_queue_task *work_queue_task_create(const char *full_command);
00320 
00326 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00327 
00332 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00333 
00350 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00351 
00366 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00367 
00378 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00379 
00393 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00394 
00400 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00401 
00407 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00408 
00414 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00415 
00421 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00422 
00428 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00429 
00437 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00438 
00446 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00447 
00454 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00455 
00462 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00463 
00470 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00471 
00478 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00479 
00485 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00486 
00492 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00493 
00498 void work_queue_task_delete(struct work_queue_task *t);
00499 
00501 
00505 
00522 struct work_queue *work_queue_create(int port);
00523 
00536 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory);
00537 
00546 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory);
00547 
00556 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00557 
00558 
00569 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00570 
00575 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00576 
00584 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00585 
00586 
00591 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00592 
00593 
00597 void work_queue_blacklist_clear(struct work_queue *q);
00598 
00612 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00613 
00614 
00629 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00630 
00642 int work_queue_hungry(struct work_queue *q);
00643 
00651 int work_queue_empty(struct work_queue *q);
00652 
00659 int work_queue_port(struct work_queue *q);
00660 
00665 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00666 
00671 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00672 
00678 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00679 
00680 
00686 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00687 
00692 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00693 
00698 double work_queue_get_effective_bandwidth(struct work_queue *q);
00699 
00706 char * work_queue_get_worker_summary( struct work_queue *q );
00707 
00717 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00718 
00719 
00729 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00730 
00737 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00738 
00746 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00747 
00753 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00754 
00759 const char *work_queue_name(struct work_queue *q);
00760 
00765 void work_queue_specify_name(struct work_queue *q, const char *name);
00766 
00771 void work_queue_specify_priority(struct work_queue *q, int priority);
00772 
00781 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00782 
00788 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00789 
00794 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00795 
00801 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00802 
00808 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00809 
00814 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00815 
00820 int work_queue_shut_down_workers(struct work_queue *q, int n);
00821 
00826 void work_queue_delete(struct work_queue *q);
00827 
00833 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00834 
00840 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00841 
00847 void work_queue_specify_password( struct work_queue *q, const char *password );
00848 
00855 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00856 
00861 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00862 
00867 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00868 
00874 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00875 
00891 int work_queue_tune(struct work_queue *q, const char *name, double value);
00892 
00898 void work_queue_specify_max_resources(struct work_queue *q,  const struct rmsummary *rm);
00899 
00905 void work_queue_specify_category_max_resources(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00906 
00912 void work_queue_specify_category_first_allocation_guess(struct work_queue *q,  const char *category, const struct rmsummary *rm);
00913 
00919 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00920 
00921 
00923 
00927 
00928 #define WORK_QUEUE_TASK_ORDER_FIFO 0  
00929 #define WORK_QUEUE_TASK_ORDER_LIFO 1  
00937 void work_queue_specify_task_order(struct work_queue *q, int order);
00938 
00939 
00940 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0 
00941 #define WORK_QUEUE_MASTER_MODE_CATALOG 1    
00950 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00951 
00952 
00958 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00959 
00968 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00969 
00977 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00978 
00986 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00987 
00995 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00996 
01004 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01005 
01010 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01011 
01013 
01014 #endif

Generated on 17 Oct 2016 for cctools by  doxygen 1.6.1