00001
00002
00003
00004
00005
00006
00007 #ifndef WORK_QUEUE_H
00008 #define WORK_QUEUE_H
00009
00020 #include <sys/types.h>
00021 #include "timestamp.h"
00022 #include "category.h"
00023 #include "rmsummary.h"
00024
00025 #define WORK_QUEUE_DEFAULT_PORT 9123
00026 #define WORK_QUEUE_RANDOM_PORT 0
00028 #define WORK_QUEUE_WAITFORTASK -1
00030 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120
00031 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
00033 typedef enum {
00034 WORK_QUEUE_INPUT = 0,
00035 WORK_QUEUE_OUTPUT = 1
00036 } work_queue_file_type_t;
00037
00038 typedef enum {
00039 WORK_QUEUE_NOCACHE = 0,
00040 WORK_QUEUE_CACHE = 1,
00041 WORK_QUEUE_SYMLINK = 2,
00042 WORK_QUEUE_PREEXIST = 4,
00043 WORK_QUEUE_THIRDGET = 8,
00044 WORK_QUEUE_THIRDPUT = 8,
00045 WORK_QUEUE_WATCH = 16
00046 } work_queue_file_flags_t;
00047
00048 typedef enum {
00049 WORK_QUEUE_SCHEDULE_UNSET = 0,
00050 WORK_QUEUE_SCHEDULE_FCFS,
00051 WORK_QUEUE_SCHEDULE_FILES,
00052 WORK_QUEUE_SCHEDULE_TIME,
00053 WORK_QUEUE_SCHEDULE_RAND,
00054 WORK_QUEUE_SCHEDULE_WORST
00055 } work_queue_schedule_t;
00056
00057
00058 typedef enum {
00059 WORK_QUEUE_RESULT_SUCCESS = 0,
00060 WORK_QUEUE_RESULT_INPUT_MISSING = 1,
00061 WORK_QUEUE_RESULT_OUTPUT_MISSING = 2,
00062 WORK_QUEUE_RESULT_STDOUT_MISSING = 4,
00063 WORK_QUEUE_RESULT_SIGNAL = 1 << 3,
00064 WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION = 2 << 3,
00065 WORK_QUEUE_RESULT_TASK_TIMEOUT = 3 << 3,
00066 WORK_QUEUE_RESULT_UNKNOWN = 4 << 3,
00067 WORK_QUEUE_RESULT_FORSAKEN = 5 << 3,
00068 WORK_QUEUE_RESULT_MAX_RETRIES = 6 << 3,
00069 WORK_QUEUE_RESULT_TASK_MAX_RUN_TIME = 7 << 3,
00070 WORK_QUEUE_RESULT_DISK_ALLOC_FULL = 8 << 3
00071 } work_queue_result_t;
00072
00073 typedef enum {
00074 WORK_QUEUE_TASK_UNKNOWN = 0,
00075 WORK_QUEUE_TASK_READY,
00076 WORK_QUEUE_TASK_RUNNING,
00077 WORK_QUEUE_TASK_WAITING_RETRIEVAL,
00078 WORK_QUEUE_TASK_RETRIEVED,
00079 WORK_QUEUE_TASK_DONE,
00080 WORK_QUEUE_TASK_CANCELED,
00081 } work_queue_task_state_t;
00082
00083 typedef enum {
00084 WORK_QUEUE_FILE = 1,
00085 WORK_QUEUE_BUFFER,
00086 WORK_QUEUE_REMOTECMD,
00087 WORK_QUEUE_FILE_PIECE,
00088 WORK_QUEUE_DIRECTORY,
00089 WORK_QUEUE_URL
00090 } work_queue_file_t;
00091
00092
00093 extern int wq_option_scheduler;
00103 struct work_queue_task {
00104 char *tag;
00105 char *command_line;
00106 work_queue_schedule_t worker_selection_algorithm;
00107 char *output;
00108 struct list *input_files;
00109 struct list *output_files;
00110 struct list *env_list;
00111 int taskid;
00112 int return_status;
00113 work_queue_result_t result;
00114 char *host;
00115 char *hostname;
00117 char *category;
00118 category_allocation_t resource_request;
00120 double priority;
00121 int max_retries;
00123 int try_count;
00124 int exhausted_attempts;
00126
00127
00128 timestamp_t time_when_submitted;
00129 timestamp_t time_when_done;
00131 int disk_allocation_exhausted;
00133 timestamp_t time_when_commit_start;
00134 timestamp_t time_when_commit_end;
00136 timestamp_t time_when_retrieval;
00138 timestamp_t time_workers_execute_last;
00139 timestamp_t time_workers_execute_all;
00140 timestamp_t time_workers_execute_exhaustion;
00141 timestamp_t time_workers_execute_failure;
00143 int64_t bytes_received;
00144 int64_t bytes_sent;
00145 int64_t bytes_transferred;
00147 struct rmsummary *resources_allocated;
00148 struct rmsummary *resources_measured;
00149 struct rmsummary *resources_requested;
00150 char *monitor_output_directory;
00152
00153
00154
00155 timestamp_t time_task_submit;
00156 timestamp_t time_task_finish;
00157 timestamp_t time_committed;
00159 timestamp_t time_send_input_start;
00160 timestamp_t time_send_input_finish;
00161 timestamp_t time_receive_result_start;
00162 timestamp_t time_receive_result_finish;
00163 timestamp_t time_receive_output_start;
00164 timestamp_t time_receive_output_finish;
00166 timestamp_t time_execute_cmd_start;
00167 timestamp_t time_execute_cmd_finish;
00169 timestamp_t total_transfer_time;
00171 timestamp_t cmd_execution_time;
00172 timestamp_t total_cmd_execution_time;
00173 timestamp_t total_cmd_exhausted_execute_time;
00174 timestamp_t total_time_until_worker_failure;
00176 int64_t total_bytes_received;
00177 int64_t total_bytes_sent;
00178 int64_t total_bytes_transferred;
00180 timestamp_t time_app_delay;
00181 };
00182
00185 struct work_queue_stats {
00186
00187 int workers_connected;
00188 int workers_init;
00189 int workers_idle;
00190 int workers_busy;
00191 int workers_able;
00193
00194 int workers_joined;
00195 int workers_removed;
00196 int workers_released;
00197 int workers_idled_out;
00198 int workers_fast_aborted;
00199 int workers_blacklisted ;
00200 int workers_lost;
00202
00203 int tasks_waiting;
00204 int tasks_on_workers;
00205 int tasks_running;
00206 int tasks_with_results;
00208
00209 int tasks_submitted;
00210 int tasks_dispatched;
00211 int tasks_done;
00212 int tasks_failed;
00213 int tasks_cancelled;
00214 int tasks_exhausted_attempts;
00216
00217
00218
00219
00220 timestamp_t time_when_started;
00221 timestamp_t time_send;
00222 timestamp_t time_receive;
00223 timestamp_t time_send_good;
00224 timestamp_t time_receive_good;
00225 timestamp_t time_status_msgs;
00226 timestamp_t time_internal;
00227 timestamp_t time_polling;
00228 timestamp_t time_application;
00230
00231 timestamp_t time_workers_execute;
00232 timestamp_t time_workers_execute_good;
00233 timestamp_t time_workers_execute_exhaustion;
00235
00236 int64_t bytes_sent;
00237 int64_t bytes_received;
00238 double bandwidth;
00240
00241 int capacity_tasks;
00242 int capacity_cores;
00243 int capacity_memory;
00244 int capacity_disk;
00246 int64_t total_cores;
00247 int64_t total_memory;
00248 int64_t total_disk;
00250 int64_t committed_cores;
00251 int64_t committed_memory;
00252 int64_t committed_disk;
00254 int64_t max_cores;
00255 int64_t max_memory;
00256 int64_t max_disk;
00258 int64_t min_cores;
00259 int64_t min_memory;
00260 int64_t min_disk;
00263 int total_workers_connected;
00264 int total_workers_joined;
00265 int total_workers_removed;
00266 int total_workers_lost;
00267 int total_workers_idled_out;
00268 int total_workers_fast_aborted;
00270 int tasks_complete;
00272 int total_tasks_dispatched;
00273 int total_tasks_complete;
00274 int total_tasks_failed;
00275 int total_tasks_cancelled;
00276 int total_exhausted_attempts;
00277 timestamp_t start_time;
00278 timestamp_t total_send_time;
00279 timestamp_t total_receive_time;
00280 timestamp_t total_good_transfer_time;
00282 timestamp_t total_execute_time;
00283 timestamp_t total_good_execute_time;
00284 timestamp_t total_exhausted_execute_time;
00286 int64_t total_bytes_sent;
00287 int64_t total_bytes_received;
00289 double capacity;
00291 double efficiency;
00292 double idle_percentage;
00294 int64_t total_gpus;
00295 int64_t committed_gpus;
00296 int64_t max_gpus;
00297 int64_t min_gpus;
00299 int port;
00300 int priority;
00301 int workers_ready;
00302 int workers_full;
00303 int total_worker_slots;
00304 int avg_capacity;
00305 };
00306
00307
00311
00319 struct work_queue_task *work_queue_task_create(const char *full_command);
00320
00326 struct work_queue_task *work_queue_task_clone(const struct work_queue_task *task);
00327
00332 void work_queue_task_specify_command( struct work_queue_task *t, const char *cmd );
00333
00350 int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags);
00351
00366 int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags);
00367
00378 int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t);
00379
00393 int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive);
00394
00400 void work_queue_task_specify_max_retries( struct work_queue_task *t, int64_t max_retries );
00401
00407 void work_queue_task_specify_memory( struct work_queue_task *t, int64_t memory );
00408
00414 void work_queue_task_specify_disk( struct work_queue_task *t, int64_t disk );
00415
00421 void work_queue_task_specify_cores( struct work_queue_task *t, int cores );
00422
00428 void work_queue_task_specify_gpus( struct work_queue_task *t, int gpus );
00429
00437 void work_queue_task_specify_end_time( struct work_queue_task *t, int64_t useconds );
00438
00446 void work_queue_task_specify_running_time( struct work_queue_task *t, int64_t useconds );
00447
00454 void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag);
00455
00462 void work_queue_task_specify_category(struct work_queue_task *t, const char *category);
00463
00470 void work_queue_task_specify_priority(struct work_queue_task *t, double priority );
00471
00478 void work_queue_task_specify_enviroment_variable( struct work_queue_task *t, const char *name, const char *value );
00479
00485 void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm);
00486
00492 void work_queue_task_specify_monitor_output(struct work_queue_task *t, const char *monitor_output);
00493
00498 void work_queue_task_delete(struct work_queue_task *t);
00499
00501
00505
00522 struct work_queue *work_queue_create(int port);
00523
00536 int work_queue_enable_monitoring(struct work_queue *q, char *monitor_output_directory);
00537
00546 int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory);
00547
00556 int work_queue_submit(struct work_queue *q, struct work_queue_task *t);
00557
00558
00569 int work_queue_specify_min_taskid(struct work_queue *q, int minid);
00570
00575 void work_queue_blacklist_add(struct work_queue *q, const char *hostname);
00576
00584 void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds);
00585
00586
00591 void work_queue_blacklist_remove(struct work_queue *q, const char *hostname);
00592
00593
00597 void work_queue_blacklist_clear(struct work_queue *q);
00598
00612 void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type);
00613
00614
00629 struct work_queue_task *work_queue_wait(struct work_queue *q, int timeout);
00630
00642 int work_queue_hungry(struct work_queue *q);
00643
00651 int work_queue_empty(struct work_queue *q);
00652
00659 int work_queue_port(struct work_queue *q);
00660
00665 void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s);
00666
00671 void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s);
00672
00678 void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s);
00679
00680
00686 work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid);
00687
00692 void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth);
00693
00698 double work_queue_get_effective_bandwidth(struct work_queue *q);
00699
00706 char * work_queue_get_worker_summary( struct work_queue *q );
00707
00717 int work_queue_activate_fast_abort(struct work_queue *q, double multiplier);
00718
00719
00729 int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier);
00730
00737 int work_queue_specify_category_mode(struct work_queue *q, const char *category, category_mode_t mode);
00738
00746 int work_queue_enable_category_resource(struct work_queue *q, const char *category, const char *resource, int autolabel);
00747
00753 void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm);
00754
00759 const char *work_queue_name(struct work_queue *q);
00760
00765 void work_queue_specify_name(struct work_queue *q, const char *name);
00766
00771 void work_queue_specify_priority(struct work_queue *q, int priority);
00772
00781 void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks);
00782
00788 void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port);
00789
00794 void work_queue_specify_catalog_servers(struct work_queue *q, const char *hosts);
00795
00801 struct work_queue_task *work_queue_cancel_by_taskid(struct work_queue *q, int id);
00802
00808 struct work_queue_task *work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag);
00809
00814 struct list * work_queue_cancel_all_tasks(struct work_queue *q);
00815
00820 int work_queue_shut_down_workers(struct work_queue *q, int n);
00821
00826 void work_queue_delete(struct work_queue *q);
00827
00833 int work_queue_specify_log(struct work_queue *q, const char *logfile);
00834
00840 int work_queue_specify_transactions_log(struct work_queue *q, const char *logfile);
00841
00847 void work_queue_specify_password( struct work_queue *q, const char *password );
00848
00855 int work_queue_specify_password_file( struct work_queue *q, const char *file );
00856
00861 void work_queue_specify_keepalive_interval(struct work_queue *q, int interval);
00862
00867 void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout);
00868
00874 void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection);
00875
00891 int work_queue_tune(struct work_queue *q, const char *name, double value);
00892
00898 void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm);
00899
00905 void work_queue_specify_category_max_resources(struct work_queue *q, const char *category, const struct rmsummary *rm);
00906
00912 void work_queue_specify_category_first_allocation_guess(struct work_queue *q, const char *category, const struct rmsummary *rm);
00913
00919 void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file);
00920
00921
00923
00927
00928 #define WORK_QUEUE_TASK_ORDER_FIFO 0
00929 #define WORK_QUEUE_TASK_ORDER_LIFO 1
00937 void work_queue_specify_task_order(struct work_queue *q, int order);
00938
00939
00940 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
00941 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
00950 void work_queue_specify_master_mode(struct work_queue *q, int mode);
00951
00952
00958 void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on);
00959
00968 int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname);
00969
00977 int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname);
00978
00986 int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname);
00987
00995 int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname);
00996
01004 int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname);
01005
01010 char *work_queue_generate_disk_alloc_full_filename(char *pwd, int taskid);
01011
01013
01014 #endif