20 #include <sys/types.h>
22 #include "rmsummary.h"
24 #define WORK_QUEUE_DEFAULT_PORT 9123
25 #define WORK_QUEUE_RANDOM_PORT 0
26 #define WORK_QUEUE_WAITFORTASK -1
28 #define WORK_QUEUE_SCHEDULE_UNSET 0
29 #define WORK_QUEUE_SCHEDULE_FCFS 1
30 #define WORK_QUEUE_SCHEDULE_FILES 2
31 #define WORK_QUEUE_SCHEDULE_TIME 3
32 #define WORK_QUEUE_SCHEDULE_RAND 4
33 #define WORK_QUEUE_SCHEDULE_WORST 5
35 #define WORK_QUEUE_INPUT 0
36 #define WORK_QUEUE_OUTPUT 1
38 #define WORK_QUEUE_NOCACHE 0
39 #define WORK_QUEUE_CACHE 1
40 #define WORK_QUEUE_SYMLINK 2
41 #define WORK_QUEUE_PREEXIST 4
42 #define WORK_QUEUE_THIRDGET 8
43 #define WORK_QUEUE_THIRDPUT 8
44 #define WORK_QUEUE_WATCH 16
46 #define WORK_QUEUE_RESET_ALL 0
47 #define WORK_QUEUE_RESET_KEEP_TASKS 1
49 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 300
50 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
52 #define WORK_QUEUE_RESULT_SUCCESS 0
53 #define WORK_QUEUE_RESULT_INPUT_MISSING 1
54 #define WORK_QUEUE_RESULT_OUTPUT_MISSING 2
55 #define WORK_QUEUE_RESULT_STDOUT_MISSING 4
56 #define WORK_QUEUE_RESULT_SIGNAL 8
57 #define WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION 16
58 #define WORK_QUEUE_RESULT_TASK_TIMEOUT 32
61 #define WORK_QUEUE_TASK_UNKNOWN 0
62 #define WORK_QUEUE_TASK_READY 1
63 #define WORK_QUEUE_TASK_RUNNING 2
64 #define WORK_QUEUE_TASK_WAITING_RETRIEVAL 3
65 #define WORK_QUEUE_TASK_RETRIEVED 4
66 #define WORK_QUEUE_TASK_DONE 5
67 #define WORK_QUEUE_TASK_CANCELED 6
69 extern double wq_option_fast_abort_multiplier;
71 extern int wq_option_scheduler;
75 struct work_queue_task {
110 int64_t maximum_end_time;
626 int work_queue_tune(
struct work_queue *q,
const char *name,
double value);
634 #define WORK_QUEUE_TASK_ORDER_FIFO 0
635 #define WORK_QUEUE_TASK_ORDER_LIFO 1
643 void work_queue_specify_task_order(struct work_queue *q, int order);
646 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
647 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
656 void work_queue_specify_master_mode(struct work_queue *q, int mode);
720 void work_queue_activate_worker_waiting(
struct work_queue *q,
int resources);
int64_t total_memory
Total memory in MB aggregated across the connected workers.
Definition: work_queue.h:160
int64_t committed_gpus
Committed number of GPUs aggregated across the connected workers.
Definition: work_queue.h:166
int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, int flags)
Add an input buffer to a task.
void work_queue_task_specify_env(struct work_queue_task *t, const char *name, const char *value)
Specify an environment variable to be added to the task.
void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on)
Change whether to estimate master capacity for a given queue.
int work_queue_send_receive_ratio(struct work_queue *q, double ratio)
Change the preference to send or receive tasks.
int64_t committed_disk
Committed disk space in MB aggregated across the connected workers.
Definition: work_queue.h:165
int64_t min_disk
The smallest disk space in MB observed among the connected workers.
Definition: work_queue.h:171
A task description.
Definition: work_queue.h:75
void work_queue_task_specify_cores(struct work_queue_task *t, int cores)
Specify the number of cores required by a task.
timestamp_t total_good_execute_time
Total time in microseconds workers spent executing successful tasks.
Definition: work_queue.h:149
timestamp_t time_send_input_start
The time at which it started to transfer input files.
Definition: work_queue.h:93
struct list * work_queue_cancel_all_tasks(struct work_queue *q)
Cancel all submitted tasks and remove them from the queue.
timestamp_t start_time
Absolute time at which the master started.
Definition: work_queue.h:143
timestamp_t cmd_execution_time
Time spent in microseconds for executing the command on the worker.
Definition: work_queue.h:106
int tasks_running
Number of tasks currently running.
Definition: work_queue.h:136
int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task, without caching.
int total_workers_joined
Total number of worker connections that were established to the master.
Definition: work_queue.h:132
int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task.
timestamp_t time_receive_output_start
The time at which it started to transfer output files.
Definition: work_queue.h:99
timestamp_t total_cmd_execution_time
Time spent in microseconds for executing the command on any worker, including resubmittions of the ta...
Definition: work_queue.h:108
void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s)
Get queue statistics (only from master).
struct work_queue_task * work_queue_cancel_by_taskid(struct work_queue *q, int id)
Cancel a submitted task using its task id and remove it from queue.
int workers_full
Definition: work_queue.h:178
int total_tasks_dispatched
Total number of tasks dispatch to workers.
Definition: work_queue.h:138
struct work_queue * work_queue_create(int port)
Create a new work queue.
void work_queue_specify_algorithm(struct work_queue *q, int algo)
Change the worker selection algorithm.
timestamp_t total_good_transfer_time
Total time in microseconds spent in sending and receiving data to workers for tasks with result WQ_RE...
Definition: work_queue.h:146
int workers_init
Number of workers initializing.
Definition: work_queue.h:129
int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task without caching.
timestamp_t time_send_input_finish
The time at which it finished transferring input files.
Definition: work_queue.h:94
void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s)
Get statistics of the master queue together with foremen information.
timestamp_t time_app_delay
Definition: work_queue.h:121
void work_queue_task_specify_command(struct work_queue_task *t, const char *cmd)
Indicate the command to be executed.
int tasks_waiting
Number of tasks waiting to be run.
Definition: work_queue.h:135
int total_submissions
The number of times the task has been submitted.
Definition: work_queue.h:107
char * hostname
The name of the host on which it ran.
Definition: work_queue.h:87
Portable routines for high resolution timing.
double work_queue_get_effective_bandwidth(struct work_queue *q)
Get current queue bandwidth.
int64_t total_gpus
Total number of GPUs aggregated across the connected workers.
Definition: work_queue.h:162
int64_t total_bytes_sent
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the ...
Definition: work_queue.h:152
struct list * output_files
The output files (other than the standard output stream) created by the program expected to be retrie...
Definition: work_queue.h:81
int64_t total_bytes_transferred
Number of bytes transferred since task has last started transferring input data.
Definition: work_queue.h:104
void work_queue_delete(struct work_queue *q)
Delete a work queue.
UINT64_T timestamp_t
A type to hold the current time, in microseconds since January 1st, 1970.
Definition: timestamp.h:20
struct list * env_list
Environment variables applied to the task.
Definition: work_queue.h:82
int tasks_complete
Number of tasks waiting to be returned to user.
Definition: work_queue.h:137
int64_t min_gpus
The lowest number of GPUs observed among the connected workers.
Definition: work_queue.h:173
struct list * input_files
The files to transfer to the worker and place in the executing directory.
Definition: work_queue.h:80
double bandwidth
Average network bandwidth in MB/S observed by the master when transferring to workers.
Definition: work_queue.h:158
void work_queue_task_specify_algorithm(struct work_queue_task *t, int algo)
Select the scheduling algorithm for a single task.
struct work_queue_task * work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag)
Cancel a submitted task using its tag and remove it from queue.
int total_tasks_complete
Total number of tasks completed and returned to user.
Definition: work_queue.h:139
struct work_queue_task * work_queue_wait(struct work_queue *q, int timeout)
Wait for a task to complete.
void work_queue_task_specify_memory(struct work_queue_task *t, int64_t memory)
Specify the amount of memory required by a task.
char * command_line
The program(s) to execute, as a shell command line.
Definition: work_queue.h:77
int capacity
The estimated number of workers that this master can effectively support.
Definition: work_queue.h:156
int64_t committed_memory
Committed memory in MB aggregated across the connected workers.
Definition: work_queue.h:164
int total_workers_removed
Total number of worker connections that were terminated by the master.
Definition: work_queue.h:133
int work_queue_specify_log(struct work_queue *q, const char *logfile)
Add a log file that records the states of the connected workers and submitted tasks.
timestamp_t time_receive_output_finish
The time at which it finished transferring output files.
Definition: work_queue.h:100
timestamp_t time_task_submit
The time at which this task was submitted.
Definition: work_queue.h:91
timestamp_t time_receive_result_finish
The time at which it finished transferring the results.
Definition: work_queue.h:98
int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname)
Add an input buffer to a task.
int64_t committed_cores
Committed number of cores aggregated across the connected workers.
Definition: work_queue.h:163
void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port)
Specify the catalog server the master should report to.
char * host
The address and port of the host on which it ran.
Definition: work_queue.h:86
int work_queue_port(struct work_queue *q)
Get the listening port of the queue.
int work_queue_specify_password_file(struct work_queue *q, const char *file)
Add a mandatory password file that each worker must present.
int taskid
A unique task id number.
Definition: work_queue.h:83
int avg_capacity
Definition: work_queue.h:180
int workers_busy
Number of workers that are running at least one task.
Definition: work_queue.h:131
struct work_queue_task * work_queue_task_create(const char *full_command)
Create a new task object.
void work_queue_specify_password(struct work_queue *q, const char *password)
Add a mandatory password that each worker must present.
struct rmsummary * resources_measured
When monitoring is enabled, it points to the measured resources used by the task. ...
Definition: work_queue.h:119
timestamp_t time_committed
The time at which a task was committed to a worker.
Definition: work_queue.h:89
int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags)
Add a file to a task.
int64_t total_bytes_received
Total number of file bytes (not including protocol control msg bytes) received from the workers by th...
Definition: work_queue.h:153
void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag)
Attach a user defined string tag to the task.
int total_tasks_cancelled
Total number of tasks cancelled.
Definition: work_queue.h:141
void work_queue_task_specify_priority(struct work_queue_task *t, double priority)
Specify the priority of this task relative to others in the queue.
void work_queue_task_specify_end_time(struct work_queue_task *t, int64_t seconds)
Specify the maximum end time allowed for the task (in seconds since the Epoch).
void work_queue_blacklist_add(struct work_queue *q, const char *hostname)
Blacklist host from a queue.
struct work_queue_task * work_queue_task_clone(const struct work_queue_task *task)
Create a copy of a task Create a functionally identical copy of a work_queue_task that can be re-sub...
int work_queue_empty(struct work_queue *q)
Determine whether the queue is empty.
void work_queue_task_specify_disk(struct work_queue_task *t, int64_t disk)
Specify the amount of disk space required by a task.
int work_queue_shut_down_workers(struct work_queue *q, int n)
Shut down workers connected to the work_queue system.
int64_t min_memory
The smallest memory size in MB observed among the connected workers.
Definition: work_queue.h:169
void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth)
Limit the queue bandwidth when transferring files to and from workers.
const char * work_queue_name(struct work_queue *q)
Get the project name of the queue.
double priority
The priority of this task relative to others in the queue: higher number run earlier.
Definition: work_queue.h:117
void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout)
Change the keepalive timeout for identifying dead workers for a given queue.
void work_queue_blacklist_clear(struct work_queue *q)
Clear blacklist of a queue.
Statistics describing a work queue.
Definition: work_queue.h:127
Definition: rmsummary.h:24
int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, int type, int flags)
Add a file piece to a task.
timestamp_t time_receive_result_start
The time at which it started to transfer the results.
Definition: work_queue.h:97
int return_status
The exit code of the command line.
Definition: work_queue.h:84
int64_t min_cores
The lowest number of cores observed among the connected workers.
Definition: work_queue.h:167
int64_t max_gpus
The highest number of GPUs observed among the connected workers.
Definition: work_queue.h:174
void work_queue_task_specify_gpus(struct work_queue_task *t, int gpus)
Specify the number of gpus required by a task.
int total_workers_connected
Total number of workers currently connected to the master.
Definition: work_queue.h:128
timestamp_t total_receive_time
Total time in microseconds spent in receiving data from workers.
Definition: work_queue.h:145
int work_queue_hungry(struct work_queue *q)
Determine whether the queue is 'hungry' for more tasks.
int work_queue_tune(struct work_queue *q, const char *name, double value)
Tune advanced parameters for work queue.
int64_t total_cores
Total number of cores aggregated across the connected workers.
Definition: work_queue.h:159
int work_queue_task_state(struct work_queue *q, int taskid)
Get the current state of the task.
int result
The result of the task (successful, failed return_status, missing input file, missing output file)...
Definition: work_queue.h:85
int total_worker_slots
Definition: work_queue.h:179
int64_t max_disk
The largest disk space in MB observed among the connected workers.
Definition: work_queue.h:172
timestamp_t total_transfer_time
Time comsumed in microseconds for transferring total_bytes_transferred.
Definition: work_queue.h:105
double idle_percentage
The fraction of time that the master is idle waiting for workers to respond.
Definition: work_queue.h:155
int work_queue_activate_fast_abort(struct work_queue *q, double multiplier)
Turn on or off fast abort functionality for a given queue.
int64_t total_disk
Total disk space in MB aggregated across the connected workers.
Definition: work_queue.h:161
char * tag
An optional user-defined logical name for the task.
Definition: work_queue.h:76
timestamp_t total_execute_time
Total time in microseconds workers spent executing completed tasks.
Definition: work_queue.h:148
int work_queue_submit(struct work_queue *q, struct work_queue_task *t)
Submit a task to a queue.
int worker_selection_algorithm
How to choose worker to run the task.
Definition: work_queue.h:78
timestamp_t time_execute_cmd_finish
The time at which the task finished (discovered by the master).
Definition: work_queue.h:96
double efficiency
Parallel efficiency of the system, sum(task execution times) / sum(worker lifetimes) ...
Definition: work_queue.h:154
void work_queue_specify_keepalive_interval(struct work_queue *q, int interval)
Change the keepalive interval for a given queue.
timestamp_t time_task_finish
The time at which this task was finished.
Definition: work_queue.h:92
int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task.
timestamp_t time_execute_cmd_start
The time at which the task began.
Definition: work_queue.h:95
int workers_idle
Number of workers that are not running a task.
Definition: work_queue.h:130
int64_t total_bytes_sent
Number of bytes sent since task has last started sending input data.
Definition: work_queue.h:103
timestamp_t total_send_time
Total time in microseconds spent in sending data to workers.
Definition: work_queue.h:144
char * work_queue_get_worker_summary(struct work_queue *q)
Summarize workers.
void work_queue_specify_priority(struct work_queue *q, int priority)
Change the priority for a given queue.
int64_t total_bytes_received
Number of bytes received since task has last started receiving input data.
Definition: work_queue.h:102
int total_tasks_failed
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
Definition: work_queue.h:140
int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file)
Enables resource monitoring on the give work queue.
int64_t max_memory
The largest memory size in MB observed among the connected workers.
Definition: work_queue.h:170
void work_queue_task_delete(struct work_queue_task *t)
Delete a task.
char * output
The standard output of the task.
Definition: work_queue.h:79
void work_queue_blacklist_remove(struct work_queue *q, const char *hostname)
Unblacklist host from a queue.
void work_queue_specify_name(struct work_queue *q, const char *name)
Change the project name for a given queue.
int workers_ready
Definition: work_queue.h:177
int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags, int recursive)
Add a directory to a task.
int64_t max_cores
The highest number of cores observed among the connected workers.
Definition: work_queue.h:168