00001
00002
00003
00004
00005
00006
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024
00025 #define WORK_QUEUE_DEFAULT_PORT 9123
00026 #define WORK_QUEUE_RANDOM_PORT 0
00028 #define WORK_QUEUE_WAITFORTASK -1
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
00033 typedef enum {
00034 WORK_QUEUE_INPUT = 0,
00035 WORK_QUEUE_OUTPUT = 1
00036 } work_queue_file_type_t;
00037
00038 typedef enum {
00039 WORK_QUEUE_NOCACHE = 0,
00040 WORK_QUEUE_CACHE = 1,
00041 WORK_QUEUE_SYMLINK = 2,
00042 WORK_QUEUE_PREEXIST = 4,
00043 WORK_QUEUE_THIRDGET = 8,
00044 WORK_QUEUE_THIRDPUT = 8,
00045 WORK_QUEUE_WATCH = 16
00046 } work_queue_file_flags_t;
00047
00048 typedef enum {
00049 WORK_QUEUE_SCHEDULE_UNSET = 0,
00050 WORK_QUEUE_SCHEDULE_FCFS,
00051 WORK_QUEUE_SCHEDULE_FILES,
00052 WORK_QUEUE_SCHEDULE_TIME,
00053 WORK_QUEUE_SCHEDULE_RAND,
00054 WORK_QUEUE_SCHEDULE_WORST
00055 } work_queue_schedule_t;
00056
00057
00058 typedef enum {
00059 WORK_QUEUE_RESULT_SUCCESS = 0,
00060 WORK_QUEUE_RESULT_INPUT_MISSING = 1,
00061 WORK_QUEUE_RESULT_OUTPUT_MISSING = 2,
00062 WORK_QUEUE_RESULT_STDOUT_MISSING = 4,
00063 WORK_QUEUE_RESULT_SIGNAL = 1 << 3,
00064 WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3,
00065 WORK_QUEUE_RESULT_TASK_TIMEOUT = 3 << 3,
00066 WORK_QUEUE_RESULT_UNKNOWN = 4 << 3,
00067 WORK_QUEUE_RESULT_FORSAKEN = 5 << 3,
00068 WORK_QUEUE_RESULT_MAX_RETRIES = 6 << 3,
00069 WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME = 7 << 3,
00070 WORK_QUEUE_RESULT_DISK_ALLOC_FULL = 8 << 3
00071 } work_queue_result_t;
00072
00073 typedef enum {
00074 WORK_QUEUE_TASK_UNKNOWN = 0,
00075 WORK_QUEUE_TASK_READY,
00076 WORK_QUEUE_TASK_RUNNING,
00077 WORK_QUEUE_TASK_WAITING_RETRIEVAL,
00078 WORK_QUEUE_TASK_RETRIEVED,
00079 WORK_QUEUE_TASK_DONE,
00080 WORK_QUEUE_TASK_CANCELED,
00081 } work_queue_task_state_t;
00082
00083 typedef enum {
00084 WORK_QUEUE_FILE = 1,
00085 WORK_QUEUE_BUFFER,
00086 WORK_QUEUE_REMOTECMD,
00087 WORK_QUEUE_FILE_PIECE,
00088 WORK_QUEUE_DIRECTORY,
00089 WORK_QUEUE_URL
00090 } work_queue_file_t;
00091
00092
00093 extern int wq_option_scheduler;
00103 struct work_queue_task {
00104 char *tag;
00105 char *command_line;
00106 work_queue_schedule_t worker_selection_algorithm;
00107 char *output;
00108 struct list *input_files;
00109 struct list *output_files;
00110 struct list *env_list;
00111 int taskid;
00112 int return_status;
00113 work_queue_result_t result;
00114 char *host;
00115 char *hostname;
00117 char *category;
00118 category_allocation_t resource_request;
00120 double priority;
00121 int max_retries;
00123 int try_count;
00124 int exhausted_attempts;
00126
00127
00128 timestamp_t time_when_submitted;
00129 timestamp_t time_when_done;
00131 int disk_allocation_exhausted;
00133 timestamp_t time_when_commit_start;
00134 timestamp_t time_when_commit_end;
00136 timestamp_t time_when_retrieval;
00138 timestamp_t time_workers_execute_last;
00139 timestamp_t time_workers_execute_all;
00140 timestamp_t time_workers_execute_exhaustion;
00141 timestamp_t time_workers_execute_failure;
00143 int64_t bytes_received;
00144 int64_t bytes_sent;
00145 int64_t bytes_transferred;
00147 struct rmsummary *resources_allocated;
00148 struct rmsummary *resources_measured;
00149 struct rmsummary *resources_requested;
00150 char *monitor_output_directory;
00152 char *monitor_snapshot_file;
00153 struct list *features;
00155
00156
00157
00158 timestamp_t time_task_submit;
00159 timestamp_t time_task_finish;
00160 timestamp_t time_committed;
00162 timestamp_t time_send_input_start;
00163 timestamp_t time_send_input_finish;
00164 timestamp_t time_receive_result_start;
00165 timestamp_t time_receive_result_finish;
00166 timestamp_t time_receive_output_start;
00167 timestamp_t time_receive_output_finish;
00169 timestamp_t time_execute_cmd_start;
00170 timestamp_t time_execute_cmd_finish;
00172 timestamp_t total_transfer_time;
00174 timestamp_t cmd_execution_time;
00175 timestamp_t total_cmd_execution_time;
00176 timestamp_t total_cmd_exhausted_execute_time;
00177 timestamp_t total_time_until_worker_failure;
00179 int64_t total_bytes_received;
00180 int64_t total_bytes_sent;
00181 int64_t total_bytes_transferred;
00183 timestamp_t time_app_delay;
00184 };
00185
00188 struct work_queue_stats {
00189
00190 int workers_connected;
00191 int workers_init;
00192 int workers_idle;
00193 int workers_busy;
00194 int workers_able;
00196
00197 int workers_joined;
00198 int workers_removed;
00199 int workers_released;
00200 int workers_idled_out;
00201 int workers_fast_aborted;
00202 int workers_blacklisted ;
00203 int workers_lost;
00205
00206 int tasks_waiting;
00207 int tasks_on_workers;
00208 int tasks_running;
00209 int tasks_with_results;
00211
00212 int tasks_submitted;
00213 int tasks_dispatched;
00214 int tasks_done;
00215 int tasks_failed;
00216 int tasks_cancelled;
00217 int tasks_exhausted_attempts;
00219
00220
00221
00222
00223 timestamp_t time_when_started;
00224 timestamp_t time_send;
00225 timestamp_t time_receive;
00226 timestamp_t time_send_good;
00227 timestamp_t time_receive_good;
00228 timestamp_t time_status_msgs;
00229 timestamp_t time_internal;
00230 timestamp_t time_polling;
00231 timestamp_t time_application;
00233
00234 timestamp_t time_workers_execute;
00235 timestamp_t time_workers_execute_good;
00236 timestamp_t time_workers_execute_exhaustion;
00238
00239 int64_t bytes_sent;
00240 int64_t bytes_received;
00241 double bandwidth;
00243
00244 int capacity_tasks;
00245 int capacity_cores;
00246 int capacity_memory;
00247 int capacity_disk;
00248 int capacity_instantaneous;
00249 int capacity_weighted;
00251 int64_t total_cores;
00252 int64_t total_memory;
00253 int64_t total_disk;
00255 int64_t committed_cores;
00256 int64_t committed_memory;
00257 int64_t committed_disk;
00259 int64_t max_cores;
00260 int64_t max_memory;
00261 int64_t max_disk;
00263 int64_t min_cores;
00264 int64_t min_memory;
00265 int64_t min_disk;
00268 int total_workers_connected;
00269 int total_workers_joined;
00270 int total_workers_removed;
00271 int total_workers_lost;
00272 int total_workers_idled_out;
00273 int total_workers_fast_aborted;
00275 int tasks_complete;
00277 int total_tasks_dispatched;
00278 int total_tasks_complete;
00279 int total_tasks_failed;
00280 int total_tasks_cancelled;
00281 int total_exhausted_attempts;
00282 timestamp_t start_time;
00283 timestamp_t total_send_time;
00284 timestamp_t total_receive_time;
00285 timestamp_t total_good_transfer_time;
00287 timestamp_t total_execute_time;
00288 timestamp_t total_good_execute_time;
00289 timestamp_t total_exhausted_execute_time;
00291 int64_t total_bytes_sent;
00292 int64_t total_bytes_received;
00294 double capacity;
00296 double efficiency;
00297 double idle_percentage;
00299 int64_t total_gpus;
00300 int64_t committed_gpus;
00301 int64_t max_gpus;
00302 int64_t min_gpus;
00304 int port;
00305 int priority;
00306 int workers_ready;
00307 int workers_full;
00308 int total_worker_slots;
00309 int avg_capacity;
00310 };
00311
00312
00316
00324 struct work_queue_task *work_queue_task_create(const char *full_command);
00325
00331 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00332
00337 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00338
00355 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00356
00371 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00372
00383 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00384
00398 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00399
00405 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00406
00412 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00413
00419 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00420
00426 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00427
00433 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00434
00442 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00443
00451 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00452
00459 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00460
00467 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00468
00474 void work_queue_task_specify_feature(struct work_queue_task *t, const char *name);
00475
00482 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00483
00490 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00491
00497 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00498
00504 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00505
00510 void work_queue_task_delete(struct work_queue_task *t);
00511
00512
00563 int work_queue_specify_snapshot_file(struct work_queue_task *t, const char *monitor_snapshot_file);
00564
00565
00567
00571
00588 struct work_queue *work_queue_create(int port);
00589
00603 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory, int watchdog);
00604
00614 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory, int watchdog);
00615
00624 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00625
00626
00637 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00638
00643 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00644
00652 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00653
00654
00659 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00660
00661
00665 void work_queue_blacklist_clear(struct work_queue *q);
00666
00680 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00681
00682
00697 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00698
00710 int work_queue_hungry(struct work_queue *q);
00711
00719 int work_queue_empty(struct work_queue *q);
00720
00727 int work_queue_port(struct work_queue *q);
00728
00733 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00734
00739 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00740
00746 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00747
00748
00754 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00755
00760 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00761
00766 double work_queue_get_effective_bandwidth(struct work_queue *q);
00767
00774 char * work_queue_get_worker_summary( struct work_queue *q );
00775
00785 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00786
00787
00797 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00798
00799
00807 int work_queue_specify_draining_by_hostname(struct work_queue *q, const char *hostname, int drain_flag);
00808
00815 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00816
00824 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00825
00831 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00832
00837 const char *work_queue_name(struct work_queue *q);
00838
00843 void work_queue_specify_name(struct work_queue *q, const char *name);
00844
00849 void work_queue_specify_priority(struct work_queue *q, int priority);
00850
00859 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00860
00866 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00867
00872 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00873
00879 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00880
00886 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00887
00892 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00893
00898 int work_queue_shut_down_workers(struct work_queue *q, int n);
00899
00904 void work_queue_delete(struct work_queue *q);
00905
00911 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00912
00918 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00919
00925 void work_queue_specify_password( struct work_queue *q, const char *password );
00926
00933 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00934
00939 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00940
00945 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00946
00952 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00953
00969 int work_queue_tune(struct work_queue *q, const char *name, double value);
00970
00976 void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm);
00977
00983 void work_queue_specify_category_max_resources(struct work_queue *q, const char *category, const struct rmsummary *rm);
00984
00990 void work_queue_specify_category_first_allocation_guess(struct work_queue *q, const char *category, const struct rmsummary *rm);
00991
00997 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00998
00999
01001
01005
01006 #define WORK_QUEUE_TASK_ORDER_FIFO 0
01007 #define WORK_QUEUE_TASK_ORDER_LIFO 1
01015 void work_queue_specify_task_order(struct work_queue *q, int order);
01016
01017
01018 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
01019 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
01028 void work_queue_specify_master_mode(struct work_queue *q, int mode);
01029
01030
01036 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
01037
01046 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
01047
01055 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
01056
01064 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
01065
01073 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
01074
01082 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01083
01088 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01089
01091
01092 #endif