20 #include <sys/types.h>
23 #include "rmsummary.h"
25 #define WORK_QUEUE_DEFAULT_PORT 9123
26 #define WORK_QUEUE_RANDOM_PORT 0
28 #define WORK_QUEUE_WAITFORTASK -1
30 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 120
31 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
49 WORK_QUEUE_SCHEDULE_UNSET = 0,
751 int work_queue_tune(
struct work_queue *q,
const char *name,
double value);
784 #define WORK_QUEUE_TASK_ORDER_FIFO 0
785 #define WORK_QUEUE_TASK_ORDER_LIFO 1
793 void work_queue_specify_task_order(struct work_queue *q, int order);
796 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
797 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
806 void work_queue_specify_master_mode(struct work_queue *q, int mode);
871 void work_queue_activate_worker_waiting(
struct work_queue *q,
int resources);
int64_t total_memory
Total memory in MB aggregated across the connected workers.
Definition: work_queue.h:193
Task is ready to be run, waiting in queue.
Definition: work_queue.h:74
int64_t committed_gpus
Committed number of GPUs aggregated across the connected workers.
Definition: work_queue.h:199
If the filename already exists on the host, use it in place.
Definition: work_queue.h:42
void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on)
Change whether to estimate master capacity for a given queue.
int work_queue_send_receive_ratio(struct work_queue *q, double ratio)
Change the preference to send or receive tasks.
category_allocation_t resource_request
See category_allocation_t.
Definition: work_queue.h:144
int64_t committed_disk
Committed disk space in MB aggregated across the connected workers.
Definition: work_queue.h:198
Task results are available at the worker.
Definition: work_queue.h:76
int64_t min_disk
The smallest disk space in MB observed among the connected workers.
Definition: work_queue.h:204
int work_queue_enable_monitoring_full(struct work_queue *q, char *monitor_output_directory)
Enables resource monitoring on the give work queue.
int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file piece to a task.
A task description.
Definition: work_queue.h:102
void work_queue_task_specify_cores(struct work_queue_task *t, int cores)
Specify the number of cores required by a task.
timestamp_t total_good_execute_time
Total time in microseconds workers spent executing successful tasks.
Definition: work_queue.h:182
timestamp_t time_send_input_start
The time at which it started to transfer input files.
Definition: work_queue.h:120
struct list * work_queue_cancel_all_tasks(struct work_queue *q)
Cancel all submitted tasks and remove them from the queue.
void work_queue_task_specify_enviroment_variable(struct work_queue_task *t, const char *name, const char *value)
Specify an environment variable to be added to the task.
timestamp_t start_time
Absolute time at which the master started.
Definition: work_queue.h:176
timestamp_t cmd_execution_time
Time spent in microseconds for executing the command until completion on a single worker...
Definition: work_queue.h:133
int tasks_running
Number of tasks currently running.
Definition: work_queue.h:167
int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task, without caching.
int total_workers_joined
Total number of worker connections that were established to the master.
Definition: work_queue.h:159
Task results are available at the master.
Definition: work_queue.h:77
int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task.
timestamp_t time_receive_output_start
The time at which it started to transfer output files.
Definition: work_queue.h:126
timestamp_t total_cmd_execution_time
Accumulated time spent in microseconds for executing the command on any worker, regardless of whether...
Definition: work_queue.h:135
void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s)
Get queue statistics (only from master).
struct work_queue_task * work_queue_cancel_by_taskid(struct work_queue *q, int id)
Cancel a submitted task using its task id and remove it from queue.
int workers_full
Definition: work_queue.h:211
int total_tasks_dispatched
Total number of tasks dispatch to workers.
Definition: work_queue.h:171
struct work_queue * work_queue_create(int port)
Create a new work queue.
timestamp_t total_good_transfer_time
Total time in microseconds spent in sending and receiving data to workers for tasks with result WQ_RE...
Definition: work_queue.h:179
int workers_init
Number of workers initializing.
Definition: work_queue.h:155
int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task without caching.
timestamp_t time_send_input_finish
The time at which it finished transferring input files.
Definition: work_queue.h:121
void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s)
Get statistics of the master queue together with foremen information.
timestamp_t time_app_delay
Definition: work_queue.h:148
void work_queue_task_specify_command(struct work_queue_task *t, const char *cmd)
Indicate the command to be executed.
int tasks_waiting
Number of tasks waiting to be run.
Definition: work_queue.h:166
work_queue_task_state_t
Definition: work_queue.h:72
int total_submissions
The number of times the task has been submitted.
Definition: work_queue.h:134
char * hostname
The name of the host on which it ran.
Definition: work_queue.h:114
void work_queue_invalidate_cached_file(struct work_queue *q, const char *local_name, work_queue_file_t type)
Invalidate cached file.
The task ran for more than the specified time (relative since running in a worker).
Definition: work_queue.h:69
Portable routines for high resolution timing.
double work_queue_get_effective_bandwidth(struct work_queue *q)
Get current queue bandwidth.
int64_t total_gpus
Total number of GPUs aggregated across the connected workers.
Definition: work_queue.h:195
The task ran but failed to generate a specified output file.
Definition: work_queue.h:61
int64_t total_bytes_sent
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the ...
Definition: work_queue.h:185
struct list * output_files
The output files (other than the standard output stream) created by the program to be retrieved from ...
Definition: work_queue.h:108
int64_t total_bytes_transferred
Number of bytes transferred since task has last started transferring input data.
Definition: work_queue.h:131
void work_queue_delete(struct work_queue *q)
Delete a work queue.
UINT64_T timestamp_t
A type to hold the current time, in microseconds since January 1st, 1970.
Definition: timestamp.h:20
struct list * env_list
Environment variables applied to the task.
Definition: work_queue.h:109
int tasks_complete
Number of tasks waiting to be returned to user.
Definition: work_queue.h:168
int64_t min_gpus
The lowest number of GPUs observed among the connected workers.
Definition: work_queue.h:206
struct list * input_files
The files to transfer to the worker and place in the executing directory.
Definition: work_queue.h:107
double bandwidth
Average network bandwidth in MB/S observed by the master when transferring to workers.
Definition: work_queue.h:191
struct work_queue_task * work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag)
Cancel a submitted task using its tag and remove it from queue.
work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid)
Get the current state of the task.
void work_queue_task_specify_running_time(struct work_queue_task *t, int64_t useconds)
Specify the maximum time (in microseconds) the task is allowed to run in a worker.
char * category
User-provided label for the task.
Definition: work_queue.h:146
int total_tasks_complete
Total number of tasks completed and returned to user.
Definition: work_queue.h:172
struct work_queue_task * work_queue_wait(struct work_queue *q, int timeout)
Wait for a task to complete.
void work_queue_task_specify_memory(struct work_queue_task *t, int64_t memory)
Specify the amount of disk space required by a task.
char * command_line
The program(s) to execute, as a shell command line.
Definition: work_queue.h:104
int capacity
The estimated number of workers that this master can effectively support.
Definition: work_queue.h:189
There is no such task.
Definition: work_queue.h:73
int64_t committed_memory
Committed memory in MB aggregated across the connected workers.
Definition: work_queue.h:197
int total_workers_removed
Total number of worker connections that were lost or terminated by the master.
Definition: work_queue.h:160
File-spec is a regular file.
Definition: work_queue.h:84
Task is done, and returned through work_queue_wait >
Definition: work_queue.h:78
The result could not be classified.
Definition: work_queue.h:66
Specify an output object.
Definition: work_queue.h:35
The task failed, but it was neither a task or worker error.
Definition: work_queue.h:67
int work_queue_specify_log(struct work_queue *q, const char *logfile)
Add a log file that records the states of the connected workers and submitted tasks.
timestamp_t time_receive_output_finish
The time at which it finished transferring output files.
Definition: work_queue.h:127
Select worker that has the most data required by the task.
Definition: work_queue.h:51
int total_workers_fast_aborted
Total number of worker connections terminated for being too slow.
Definition: work_queue.h:163
timestamp_t time_task_submit
The time at which this task was submitted.
Definition: work_queue.h:118
timestamp_t time_receive_result_finish
The time at which it finished transferring the results.
Definition: work_queue.h:125
int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname)
Add an input buffer to a task.
int64_t committed_cores
Committed number of cores aggregated across the connected workers.
Definition: work_queue.h:196
int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t)
Add an input buffer to a task.
void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port)
Specify the catalog server the master should report to.
void work_queue_blacklist_add_with_timeout(struct work_queue *q, const char *hostname, time_t seconds)
Blacklist hostname from a queue.
char * host
The address and port of the host on which it ran.
Definition: work_queue.h:113
int work_queue_port(struct work_queue *q)
Get the listening port of the queue.
work_queue_schedule_t
Definition: work_queue.h:48
int work_queue_specify_password_file(struct work_queue *q, const char *file)
Add a mandatory password file that each worker must present.
Access the file on the client from a shared filesystem (same as WORK_QUEUE_THIRDGET, included for readability)
Definition: work_queue.h:44
int taskid
A unique task id number.
Definition: work_queue.h:110
int avg_capacity
Definition: work_queue.h:213
Worker gave up on the task, and task will be resubmitted >
Definition: work_queue.h:80
int workers_busy
Number of workers that are running at least one task.
Definition: work_queue.h:157
struct work_queue_task * work_queue_task_create(const char *full_command)
Create a new task object.
File-spec refers to an URL.
Definition: work_queue.h:89
void work_queue_specify_password(struct work_queue *q, const char *password)
Add a mandatory password that each worker must present.
struct rmsummary * resources_measured
When monitoring is enabled, it points to the measured resources used by the task. ...
Definition: work_queue.h:141
timestamp_t time_committed
The time at which a task was committed to a worker.
Definition: work_queue.h:116
work_queue_schedule_t worker_selection_algorithm
How to choose worker to run the task.
Definition: work_queue.h:105
The task ran successfully.
Definition: work_queue.h:59
int64_t total_bytes_received
Total number of file bytes (not including protocol control msg bytes) received from the workers by th...
Definition: work_queue.h:186
void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag)
Attach a user defined string tag to the task.
int total_tasks_cancelled
Total number of tasks cancelled.
Definition: work_queue.h:174
Create a symlink to the file rather than copying it, if possible.
Definition: work_queue.h:41
void work_queue_task_specify_priority(struct work_queue_task *t, double priority)
Specify the priority of this task relative to others in the queue.
int max_retries
Number of times the task is retried on worker errors until success.
Definition: work_queue.h:139
struct rmsummary * resources_requested
Number of cores, disk, memory, time, etc.
Definition: work_queue.h:142
void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection)
Set the preference for using hostname over IP address to connect.
File-spec is a regular file.
Definition: work_queue.h:86
work_queue_result_t result
The result of the task (see work_queue_result_t.
Definition: work_queue.h:112
void work_queue_task_specify_max_retries(struct work_queue_task *t, int64_t max_retries)
Specify the number of times this task is retried on worker errors.
void work_queue_blacklist_add(struct work_queue *q, const char *hostname)
Blacklist hostname from a queue.
struct work_queue_task * work_queue_task_clone(const struct work_queue_task *task)
Create a copy of a task Create a functionally identical copy of a work_queue_task that can be re-sub...
int work_queue_empty(struct work_queue *q)
Determine whether the queue is empty.
void work_queue_task_specify_disk(struct work_queue_task *t, int64_t disk)
Specify the amount of disk space required by a task.
int work_queue_shut_down_workers(struct work_queue *q, int n)
Shut down workers connected to the work_queue system.
void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm)
Select the scheduling algorithm for a single task.
work_queue_file_type_t
Definition: work_queue.h:33
int64_t min_memory
The smallest memory size in MB observed among the connected workers.
Definition: work_queue.h:202
void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth)
Limit the queue bandwidth when transferring files to and from workers.
void work_queue_task_specify_category(struct work_queue_task *t, const char *category)
Label the task with the given category.
work_queue_result_t
Definition: work_queue.h:58
Cache file at execution site for later use.
Definition: work_queue.h:40
int total_workers_idled_out
Total number of worker that disconnected for being idle.
Definition: work_queue.h:162
File-spec is a directory.
Definition: work_queue.h:88
The task ran after the specified (absolute since epoch) end time.
Definition: work_queue.h:65
const char * work_queue_name(struct work_queue *q)
Get the project name of the queue.
int total_workers_lost
Total number of worker connections that were unexpectedly lost.
Definition: work_queue.h:161
double priority
The priority of this task relative to others in the queue: higher number run earlier.
Definition: work_queue.h:137
void work_queue_get_stats_category(struct work_queue *q, const char *c, struct work_queue_stats *s)
Get the task statistics for the given category.
void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout)
Change the keepalive timeout for identifying dead workers for a given queue.
void work_queue_specify_num_tasks_left(struct work_queue *q, int ntasks)
Specify the number of tasks not yet submitted to the queue.
Watch the output file and send back changes as the task runs.
Definition: work_queue.h:45
void work_queue_blacklist_clear(struct work_queue *q)
Clear blacklist of a queue.
Statistics describing a work queue.
Definition: work_queue.h:153
Definition: rmsummary.h:25
void work_queue_specify_max_resources(struct work_queue *q, const struct rmsummary *rm)
Enables resource autolabeling for tasks without an explicit category ("default" category).
timestamp_t time_receive_result_start
The time at which it started to transfer the results.
Definition: work_queue.h:124
int return_status
The exit code of the command line.
Definition: work_queue.h:111
Select worker on a first-come-first-serve basis.
Definition: work_queue.h:50
int64_t min_cores
The lowest number of cores observed among the connected workers.
Definition: work_queue.h:200
int64_t max_gpus
The highest number of GPUs observed among the connected workers.
Definition: work_queue.h:207
Access the file on the client from a shared filesystem.
Definition: work_queue.h:43
void work_queue_task_specify_gpus(struct work_queue_task *t, int gpus)
Specify the number of gpus required by a task.
int total_workers_connected
Total number of workers currently connected to the master.
Definition: work_queue.h:154
timestamp_t total_receive_time
Total time in microseconds spent in receiving data from workers.
Definition: work_queue.h:178
int work_queue_hungry(struct work_queue *q)
Determine whether the queue is 'hungry' for more tasks.
The task could not be completed successfully in the given number of retries.
Definition: work_queue.h:68
int work_queue_activate_fast_abort_category(struct work_queue *q, const char *category, double multiplier)
Turn on or off fast abort functionality for a given category.
Task has been dispatched to some worker.
Definition: work_queue.h:75
int work_queue_tune(struct work_queue *q, const char *name, double value)
Tune advanced parameters for work queue.
int64_t total_cores
Total number of cores aggregated across the connected workers.
Definition: work_queue.h:192
Select a random worker.
Definition: work_queue.h:53
The task cannot be run due to a missing input file.
Definition: work_queue.h:60
int total_worker_slots
Definition: work_queue.h:212
int64_t max_disk
The largest disk space in MB observed among the connected workers.
Definition: work_queue.h:205
Select worker that has the fastest execution time on previous tasks.
Definition: work_queue.h:52
timestamp_t total_transfer_time
Time comsumed in microseconds for transferring total_bytes_transferred.
Definition: work_queue.h:132
double idle_percentage
The fraction of time that the master is idle waiting for workers to respond.
Definition: work_queue.h:188
Task was canceled before completion.
Definition: work_queue.h:79
int work_queue_activate_fast_abort(struct work_queue *q, double multiplier)
Turn on or off fast abort functionality for a given queue for tasks without an explicit category...
void work_queue_specify_max_category_resources(struct work_queue *q, const char *category, const struct rmsummary *rm)
Enables resource autolabeling for tasks in the given category.
int64_t total_disk
Total disk space in MB aggregated across the connected workers.
Definition: work_queue.h:194
char * tag
An optional user-defined logical name for the task.
Definition: work_queue.h:103
timestamp_t total_execute_time
Total time in microseconds workers spent executing completed tasks.
Definition: work_queue.h:181
int work_queue_submit(struct work_queue *q, struct work_queue_task *t)
Submit a task to a queue.
timestamp_t time_execute_cmd_finish
The time at which the task finished (discovered by the master).
Definition: work_queue.h:123
Select the worst fit worker (the worker with more unused resources).
Definition: work_queue.h:54
double efficiency
Parallel efficiency of the system, sum(task execution times) / sum(worker lifetimes) ...
Definition: work_queue.h:187
void work_queue_specify_keepalive_interval(struct work_queue *q, int interval)
Change the keepalive interval for a given queue.
The task was terminated with a signal.
Definition: work_queue.h:63
timestamp_t time_task_finish
The time at which this task was finished.
Definition: work_queue.h:119
int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task.
timestamp_t time_execute_cmd_start
The time at which the task began.
Definition: work_queue.h:122
int workers_idle
Number of workers that are not running a task.
Definition: work_queue.h:156
int wq_option_scheduler
Initial setting for algorithm to assign tasks to workers upon creating queue .
void work_queue_task_specify_end_time(struct work_queue_task *t, int64_t useconds)
Specify the maximum end time allowed for the task (in microseconds since the Epoch).
int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive)
Add a directory to a task.
int64_t total_bytes_sent
Number of bytes sent since task has last started sending input data.
Definition: work_queue.h:130
timestamp_t total_send_time
Total time in microseconds spent in sending data to workers.
Definition: work_queue.h:177
work_queue_file_t
Definition: work_queue.h:83
char * work_queue_get_worker_summary(struct work_queue *q)
Summarize workers.
void work_queue_initialize_categories(struct work_queue *q, struct rmsummary *max, const char *summaries_file)
Initialize first value of categories.
void work_queue_specify_priority(struct work_queue *q, int priority)
Change the priority for a given queue.
int64_t total_bytes_received
Number of bytes received since task has last started receiving input data.
Definition: work_queue.h:129
int total_tasks_failed
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
Definition: work_queue.h:173
int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file)
Enables resource monitoring on the give work queue.
Data comes from buffer memory.
Definition: work_queue.h:85
int64_t max_memory
The largest memory size in MB observed among the connected workers.
Definition: work_queue.h:203
void work_queue_task_delete(struct work_queue_task *t)
Delete a task.
int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file to a task.
char * output
The standard output of the task.
Definition: work_queue.h:106
work_queue_file_flags_t
Definition: work_queue.h:38
void work_queue_blacklist_remove(struct work_queue *q, const char *hostname)
Unblacklist host from a queue.
The task used more resources than requested.
Definition: work_queue.h:64
void work_queue_specify_name(struct work_queue *q, const char *name)
Change the project name for a given queue.
File-spec refers to only a part of a file.
Definition: work_queue.h:87
int workers_ready
Definition: work_queue.h:210
void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm)
Change the worker selection algorithm.
Do not cache file at execution site.
Definition: work_queue.h:39
Specify an input object.
Definition: work_queue.h:34
Definition: category.h:28
int64_t max_cores
The highest number of cores observed among the connected workers.
Definition: work_queue.h:201
The task ran but its stdout has been truncated.
Definition: work_queue.h:62