00001
00002
00003
00004
00005
00006
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024
00025 #define WORK_QUEUE_DEFAULT_PORT 9123
00026 #define WORK_QUEUE_RANDOM_PORT 0
00028 #define WORK_QUEUE_WAITFORTASK -1
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
00033 typedef enum {
00034 WORK_QUEUE_INPUT = 0,
00035 WORK_QUEUE_OUTPUT = 1
00036 } work_queue_file_type_t;
00037
00038 typedef enum {
00039 WORK_QUEUE_NOCACHE = 0,
00040 WORK_QUEUE_CACHE = 1,
00041 WORK_QUEUE_SYMLINK = 2,
00042 WORK_QUEUE_PREEXIST = 4,
00043 WORK_QUEUE_THIRDGET = 8,
00044 WORK_QUEUE_THIRDPUT = 8,
00045 WORK_QUEUE_WATCH = 16
00046 } work_queue_file_flags_t;
00047
00048 typedef enum {
00049 WORK_QUEUE_SCHEDULE_UNSET = 0,
00050 WORK_QUEUE_SCHEDULE_FCFS,
00051 WORK_QUEUE_SCHEDULE_FILES,
00052 WORK_QUEUE_SCHEDULE_TIME,
00053 WORK_QUEUE_SCHEDULE_RAND,
00054 WORK_QUEUE_SCHEDULE_WORST
00055 } work_queue_schedule_t;
00056
00057
00058 typedef enum {
00059 WORK_QUEUE_RESULT_SUCCESS = 0,
00060 WORK_QUEUE_RESULT_INPUT_MISSING = 1,
00061 WORK_QUEUE_RESULT_OUTPUT_MISSING = 2,
00062 WORK_QUEUE_RESULT_STDOUT_MISSING = 4,
00063 WORK_QUEUE_RESULT_SIGNAL = 1 << 3,
00064 WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3,
00065 WORK_QUEUE_RESULT_TASK_TIMEOUT = 3 << 3,
00066 WORK_QUEUE_RESULT_UNKNOWN = 4 << 3,
00067 WORK_QUEUE_RESULT_FORSAKEN = 5 << 3,
00068 WORK_QUEUE_RESULT_MAX_RETRIES = 6 << 3,
00069 WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME = 7 << 3,
00070 WORK_QUEUE_RESULT_DISK_ALLOC_FULL = 8 << 3
00071 } work_queue_result_t;
00072
00073 typedef enum {
00074 WORK_QUEUE_TASK_UNKNOWN = 0,
00075 WORK_QUEUE_TASK_READY,
00076 WORK_QUEUE_TASK_RUNNING,
00077 WORK_QUEUE_TASK_WAITING_RETRIEVAL,
00078 WORK_QUEUE_TASK_RETRIEVED,
00079 WORK_QUEUE_TASK_DONE,
00080 WORK_QUEUE_TASK_CANCELED,
00081 WORK_QUEUE_TASK_WAITING_RESUBMISSION
00082 } work_queue_task_state_t;
00083
00084 typedef enum {
00085 WORK_QUEUE_FILE = 1,
00086 WORK_QUEUE_BUFFER,
00087 WORK_QUEUE_REMOTECMD,
00088 WORK_QUEUE_FILE_PIECE,
00089 WORK_QUEUE_DIRECTORY,
00090 WORK_QUEUE_URL
00091 } work_queue_file_t;
00092
00093
00094 extern int wq_option_scheduler;
00104 struct work_queue_task {
00105 char *tag;
00106 char *command_line;
00107 work_queue_schedule_t worker_selection_algorithm;
00108 char *output;
00109 struct list *input_files;
00110 struct list *output_files;
00111 struct list *env_list;
00112 int taskid;
00113 int return_status;
00114 work_queue_result_t result;
00115 char *host;
00116 char *hostname;
00118 char *category;
00119 category_allocation_t resource_request;
00121 double priority;
00122 int max_retries;
00124 int try_count;
00125 int exhausted_attempts;
00127
00128
00129 timestamp_t time_when_submitted;
00130 timestamp_t time_when_done;
00132 int disk_allocation_exhausted;
00134 timestamp_t time_when_commit_start;
00135 timestamp_t time_when_commit_end;
00137 timestamp_t time_when_retrieval;
00139 timestamp_t time_workers_execute_last;
00140 timestamp_t time_workers_execute_all;
00141 timestamp_t time_workers_execute_exhaustion;
00142 timestamp_t time_workers_execute_failure;
00144 int64_t bytes_received;
00145 int64_t bytes_sent;
00146 int64_t bytes_transferred;
00148 struct rmsummary *resources_allocated;
00149 struct rmsummary *resources_measured;
00150 struct rmsummary *resources_requested;
00151 char *monitor_output_directory;
00153
00154
00155
00156 timestamp_t time_task_submit;
00157 timestamp_t time_task_finish;
00158 timestamp_t time_committed;
00160 timestamp_t time_send_input_start;
00161 timestamp_t time_send_input_finish;
00162 timestamp_t time_receive_result_start;
00163 timestamp_t time_receive_result_finish;
00164 timestamp_t time_receive_output_start;
00165 timestamp_t time_receive_output_finish;
00167 timestamp_t time_execute_cmd_start;
00168 timestamp_t time_execute_cmd_finish;
00170 timestamp_t total_transfer_time;
00172 timestamp_t cmd_execution_time;
00173 timestamp_t total_cmd_execution_time;
00174 timestamp_t total_cmd_exhausted_execute_time;
00175 timestamp_t total_time_until_worker_failure;
00177 int64_t total_bytes_received;
00178 int64_t total_bytes_sent;
00179 int64_t total_bytes_transferred;
00181 timestamp_t time_app_delay;
00182 };
00183
00186 struct work_queue_stats {
00187
00188 int workers_connected;
00189 int workers_init;
00190 int workers_idle;
00191 int workers_busy;
00192 int workers_able;
00194
00195 int workers_joined;
00196 int workers_removed;
00197 int workers_released;
00198 int workers_idled_out;
00199 int workers_fast_aborted;
00200 int workers_blacklisted ;
00201 int workers_lost;
00203
00204 int tasks_waiting;
00205 int tasks_on_workers;
00206 int tasks_running;
00207 int tasks_with_results;
00209
00210 int tasks_submitted;
00211 int tasks_dispatched;
00212 int tasks_done;
00213 int tasks_failed;
00214 int tasks_cancelled;
00215 int tasks_exhausted_attempts;
00217
00218
00219
00220
00221 timestamp_t time_when_started;
00222 timestamp_t time_send;
00223 timestamp_t time_receive;
00224 timestamp_t time_send_good;
00225 timestamp_t time_receive_good;
00226 timestamp_t time_status_msgs;
00227 timestamp_t time_internal;
00228 timestamp_t time_polling;
00229 timestamp_t time_application;
00231
00232 timestamp_t time_workers_execute;
00233 timestamp_t time_workers_execute_good;
00234 timestamp_t time_workers_execute_exhaustion;
00236
00237 int64_t bytes_sent;
00238 int64_t bytes_received;
00239 double bandwidth;
00241
00242 int capacity_tasks;
00243 int capacity_cores;
00244 int capacity_memory;
00245 int capacity_disk;
00247 int64_t total_cores;
00248 int64_t total_memory;
00249 int64_t total_disk;
00251 int64_t committed_cores;
00252 int64_t committed_memory;
00253 int64_t committed_disk;
00255 int64_t max_cores;
00256 int64_t max_memory;
00257 int64_t max_disk;
00259 int64_t min_cores;
00260 int64_t min_memory;
00261 int64_t min_disk;
00264 int total_workers_connected;
00265 int total_workers_joined;
00266 int total_workers_removed;
00267 int total_workers_lost;
00268 int total_workers_idled_out;
00269 int total_workers_fast_aborted;
00271 int tasks_complete;
00273 int total_tasks_dispatched;
00274 int total_tasks_complete;
00275 int total_tasks_failed;
00276 int total_tasks_cancelled;
00277 int total_exhausted_attempts;
00278 timestamp_t start_time;
00279 timestamp_t total_send_time;
00280 timestamp_t total_receive_time;
00281 timestamp_t total_good_transfer_time;
00283 timestamp_t total_execute_time;
00284 timestamp_t total_good_execute_time;
00285 timestamp_t total_exhausted_execute_time;
00287 int64_t total_bytes_sent;
00288 int64_t total_bytes_received;
00290 double capacity;
00292 double efficiency;
00293 double idle_percentage;
00295 int64_t total_gpus;
00296 int64_t committed_gpus;
00297 int64_t max_gpus;
00298 int64_t min_gpus;
00300 int port;
00301 int priority;
00302 int workers_ready;
00303 int workers_full;
00304 int total_worker_slots;
00305 int avg_capacity;
00306 };
00307
00308
00312
00320 struct work_queue_task *work_queue_task_create(const char *full_command);
00321
00327 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00328
00333 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00334
00351 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00352
00367 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00368
00379 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00380
00394 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00395
00401 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00402
00408 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00409
00415 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00416
00422 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00423
00429 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00430
00438 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00439
00447 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00448
00455 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00456
00463 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00464
00471 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00472
00479 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00480
00486 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00487
00493 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00494
00499 void work_queue_task_delete(struct work_queue_task *t);
00500
00502
00506
00523 struct work_queue *work_queue_create(int port);
00524
00537 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory);
00538
00547 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory);
00548
00557 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00558
00559
00570 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00571
00576 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00577
00585 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00586
00587
00592 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00593
00594
00598 void work_queue_blacklist_clear(struct work_queue *q);
00599
00613 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00614
00615
00630 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00631
00643 int work_queue_hungry(struct work_queue *q);
00644
00652 int work_queue_empty(struct work_queue *q);
00653
00660 int work_queue_port(struct work_queue *q);
00661
00666 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00667
00672 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00673
00679 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00680
00681
00687 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00688
00693 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00694
00699 double work_queue_get_effective_bandwidth(struct work_queue *q);
00700
00707 char * work_queue_get_worker_summary( struct work_queue *q );
00708
00718 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00719
00720
00730 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00731
00738 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00739
00747 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00748
00754 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00755
00760 const char *work_queue_name(struct work_queue *q);
00761
00766 void work_queue_specify_name(struct work_queue *q, const char *name);
00767
00772 void work_queue_specify_priority(struct work_queue *q, int priority);
00773
00782 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00783
00789 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00790
00795 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00796
00802 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00803
00809 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00810
00815 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00816
00821 int work_queue_shut_down_workers(struct work_queue *q, int n);
00822
00827 void work_queue_delete(struct work_queue *q);
00828
00834 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00835
00841 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00842
00848 void work_queue_specify_password( struct work_queue *q, const char *password );
00849
00856 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00857
00862 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00863
00868 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00869
00875 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00876
00892 int work_queue_tune(struct work_queue *q, const char *name, double value);
00893
00899 void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm);
00900
00906 void work_queue_specify_category_max_resources(struct work_queue *q, const char *category, const struct rmsummary *rm);
00907
00913 void work_queue_specify_category_first_allocation_guess(struct work_queue *q, const char *category, const struct rmsummary *rm);
00914
00920 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00921
00922
00924
00928
00929 #define WORK_QUEUE_TASK_ORDER_FIFO 0
00930 #define WORK_QUEUE_TASK_ORDER_LIFO 1
00938 void work_queue_specify_task_order(struct work_queue *q, int order);
00939
00940
00941 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
00942 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
00951 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00952
00953
00959 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00960
00969 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00970
00978 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00979
00987 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00988
00996 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00997
01005 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01006
01011 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01012
01014
01015 #endif