20 #include <sys/types.h>
22 #include "rmsummary.h"
24 #define WORK_QUEUE_DEFAULT_PORT 9123
25 #define WORK_QUEUE_RANDOM_PORT 0
27 #define WORK_QUEUE_WAITFORTASK -1
29 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 300
30 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
48 WORK_QUEUE_SCHEDULE_UNSET = 0,
643 int work_queue_tune(
struct work_queue *q,
const char *name,
double value);
651 #define WORK_QUEUE_TASK_ORDER_FIFO 0
652 #define WORK_QUEUE_TASK_ORDER_LIFO 1
660 void work_queue_specify_task_order(struct work_queue *q, int order);
663 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
664 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
673 void work_queue_specify_master_mode(struct work_queue *q, int mode);
737 void work_queue_activate_worker_waiting(
struct work_queue *q,
int resources);
int64_t total_memory
Total memory in MB aggregated across the connected workers.
Definition: work_queue.h:179
Task is ready to be run, waiting in queue.
Definition: work_queue.h:69
int64_t committed_gpus
Committed number of GPUs aggregated across the connected workers.
Definition: work_queue.h:185
If the filename already exists on the host, use it in place.
Definition: work_queue.h:41
void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on)
Change whether to estimate master capacity for a given queue.
int work_queue_send_receive_ratio(struct work_queue *q, double ratio)
Change the preference to send or receive tasks.
int64_t committed_disk
Committed disk space in MB aggregated across the connected workers.
Definition: work_queue.h:184
double wq_option_fast_abort_multiplier
Initial setting for fast abort multiplier upon creating queue.
Task results are available at the worker.
Definition: work_queue.h:71
int64_t min_disk
The smallest disk space in MB observed among the connected workers.
Definition: work_queue.h:190
int64_t disk
Disk space required by the task.
Definition: work_queue.h:131
int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file piece to a task.
A task description.
Definition: work_queue.h:94
void work_queue_task_specify_cores(struct work_queue_task *t, int cores)
Specify the number of cores required by a task.
timestamp_t total_good_execute_time
Total time in microseconds workers spent executing successful tasks.
Definition: work_queue.h:168
timestamp_t time_send_input_start
The time at which it started to transfer input files.
Definition: work_queue.h:112
struct list * work_queue_cancel_all_tasks(struct work_queue *q)
Cancel all submitted tasks and remove them from the queue.
void work_queue_task_specify_enviroment_variable(struct work_queue_task *t, const char *name, const char *value)
Specify an environment variable to be added to the task.
timestamp_t start_time
Absolute time at which the master started.
Definition: work_queue.h:162
timestamp_t cmd_execution_time
Time spent in microseconds for executing the command on the worker.
Definition: work_queue.h:125
int tasks_running
Number of tasks currently running.
Definition: work_queue.h:155
int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task, without caching.
int total_workers_joined
Total number of worker connections that were established to the master.
Definition: work_queue.h:151
Task results are available at the master.
Definition: work_queue.h:72
int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task.
timestamp_t time_receive_output_start
The time at which it started to transfer output files.
Definition: work_queue.h:118
timestamp_t total_cmd_execution_time
Time spent in microseconds for executing the command on any worker, including resubmittions of the ta...
Definition: work_queue.h:127
void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s)
Get queue statistics (only from master).
int64_t memory
Memory required by the task.
Definition: work_queue.h:130
struct work_queue_task * work_queue_cancel_by_taskid(struct work_queue *q, int id)
Cancel a submitted task using its task id and remove it from queue.
int workers_full
Definition: work_queue.h:197
int total_tasks_dispatched
Total number of tasks dispatch to workers.
Definition: work_queue.h:157
struct work_queue * work_queue_create(int port)
Create a new work queue.
timestamp_t total_good_transfer_time
Total time in microseconds spent in sending and receiving data to workers for tasks with result WQ_RE...
Definition: work_queue.h:165
int workers_init
Number of workers initializing.
Definition: work_queue.h:148
int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task without caching.
timestamp_t time_send_input_finish
The time at which it finished transferring input files.
Definition: work_queue.h:113
void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s)
Get statistics of the master queue together with foremen information.
timestamp_t time_app_delay
Definition: work_queue.h:140
void work_queue_task_specify_command(struct work_queue_task *t, const char *cmd)
Indicate the command to be executed.
int unlabeled
1 if the task did not specify any required resource.
Definition: work_queue.h:134
int tasks_waiting
Number of tasks waiting to be run.
Definition: work_queue.h:154
work_queue_task_state_t
Definition: work_queue.h:67
int total_submissions
The number of times the task has been submitted.
Definition: work_queue.h:126
char * hostname
The name of the host on which it ran.
Definition: work_queue.h:106
Portable routines for high resolution timing.
double work_queue_get_effective_bandwidth(struct work_queue *q)
Get current queue bandwidth.
int64_t total_gpus
Total number of GPUs aggregated across the connected workers.
Definition: work_queue.h:181
The task ran but failed to generate a specified output file.
Definition: work_queue.h:60
int64_t total_bytes_sent
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the ...
Definition: work_queue.h:171
struct list * output_files
The output files (other than the standard output stream) created by the program to be retrieved from ...
Definition: work_queue.h:100
int64_t total_bytes_transferred
Number of bytes transferred since task has last started transferring input data.
Definition: work_queue.h:123
void work_queue_delete(struct work_queue *q)
Delete a work queue.
UINT64_T timestamp_t
A type to hold the current time, in microseconds since January 1st, 1970.
Definition: timestamp.h:20
struct list * env_list
Environment variables applied to the task.
Definition: work_queue.h:101
int tasks_complete
Number of tasks waiting to be returned to user.
Definition: work_queue.h:156
int64_t min_gpus
The lowest number of GPUs observed among the connected workers.
Definition: work_queue.h:192
struct list * input_files
The files to transfer to the worker and place in the executing directory.
Definition: work_queue.h:99
double bandwidth
Average network bandwidth in MB/S observed by the master when transferring to workers.
Definition: work_queue.h:177
struct work_queue_task * work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag)
Cancel a submitted task using its tag and remove it from queue.
work_queue_task_state_t work_queue_task_state(struct work_queue *q, int taskid)
Get the current state of the task.
int total_tasks_complete
Total number of tasks completed and returned to user.
Definition: work_queue.h:158
struct work_queue_task * work_queue_wait(struct work_queue *q, int timeout)
Wait for a task to complete.
void work_queue_task_specify_memory(struct work_queue_task *t, int64_t memory)
Specify the amount of memory required by a task.
char * command_line
The program(s) to execute, as a shell command line.
Definition: work_queue.h:96
int capacity
The estimated number of workers that this master can effectively support.
Definition: work_queue.h:175
There is no such task.
Definition: work_queue.h:68
int64_t committed_memory
Committed memory in MB aggregated across the connected workers.
Definition: work_queue.h:183
int total_workers_removed
Total number of worker connections that were terminated by the master.
Definition: work_queue.h:152
Task is done, and returned through work_queue_wait >
Definition: work_queue.h:73
Specify an output object.
Definition: work_queue.h:34
int work_queue_specify_log(struct work_queue *q, const char *logfile)
Add a log file that records the states of the connected workers and submitted tasks.
timestamp_t time_receive_output_finish
The time at which it finished transferring output files.
Definition: work_queue.h:119
Select worker that has the most data required by the task.
Definition: work_queue.h:50
timestamp_t time_task_submit
The time at which this task was submitted.
Definition: work_queue.h:110
timestamp_t time_receive_result_finish
The time at which it finished transferring the results.
Definition: work_queue.h:117
int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname)
Add an input buffer to a task.
int64_t committed_cores
Committed number of cores aggregated across the connected workers.
Definition: work_queue.h:182
int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, work_queue_file_flags_t)
Add an input buffer to a task.
void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port)
Specify the catalog server the master should report to.
char * host
The address and port of the host on which it ran.
Definition: work_queue.h:105
int work_queue_port(struct work_queue *q)
Get the listening port of the queue.
work_queue_schedule_t
Definition: work_queue.h:47
int work_queue_specify_password_file(struct work_queue *q, const char *file)
Add a mandatory password file that each worker must present.
Access the file on the client from a shared filesystem (same as WORK_QUEUE_THIRDGET, included for readability)
Definition: work_queue.h:43
int taskid
A unique task id number.
Definition: work_queue.h:102
int avg_capacity
Definition: work_queue.h:199
Worker gave up on the task, and task will be resubmitted >
Definition: work_queue.h:75
int workers_busy
Number of workers that are running at least one task.
Definition: work_queue.h:150
struct work_queue_task * work_queue_task_create(const char *full_command)
Create a new task object.
void work_queue_specify_password(struct work_queue *q, const char *password)
Add a mandatory password that each worker must present.
struct rmsummary * resources_measured
When monitoring is enabled, it points to the measured resources used by the task. ...
Definition: work_queue.h:138
timestamp_t time_committed
The time at which a task was committed to a worker.
Definition: work_queue.h:108
work_queue_schedule_t worker_selection_algorithm
How to choose worker to run the task.
Definition: work_queue.h:97
The task ran successfully.
Definition: work_queue.h:58
int64_t total_bytes_received
Total number of file bytes (not including protocol control msg bytes) received from the workers by th...
Definition: work_queue.h:172
int cores
Number of cores required by the task.
Definition: work_queue.h:132
void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag)
Attach a user defined string tag to the task.
int total_tasks_cancelled
Total number of tasks cancelled.
Definition: work_queue.h:160
Create a symlink to the file rather than copying it, if possible.
Definition: work_queue.h:40
int64_t maximum_end_time
Maximum time (from epoch) this task may run.
Definition: work_queue.h:129
void work_queue_task_specify_priority(struct work_queue_task *t, double priority)
Specify the priority of this task relative to others in the queue.
void work_queue_task_specify_end_time(struct work_queue_task *t, int64_t seconds)
Specify the maximum end time allowed for the task (in seconds since the Epoch).
void work_queue_master_preferred_connection(struct work_queue *q, const char *preferred_connection)
Set the preference for using hostname over IP address to connect.
work_queue_result_t result
The result of the task (successful, failed return_status, missing input file, missing output file)...
Definition: work_queue.h:104
void work_queue_blacklist_add(struct work_queue *q, const char *hostname)
Blacklist host from a queue.
struct work_queue_task * work_queue_task_clone(const struct work_queue_task *task)
Create a copy of a task Create a functionally identical copy of a work_queue_task that can be re-subm...
int work_queue_empty(struct work_queue *q)
Determine whether the queue is empty.
void work_queue_task_specify_disk(struct work_queue_task *t, int64_t disk)
Specify the amount of disk space required by a task.
int work_queue_shut_down_workers(struct work_queue *q, int n)
Shut down workers connected to the work_queue system.
void work_queue_task_specify_algorithm(struct work_queue_task *t, work_queue_schedule_t algorithm)
Select the scheduling algorithm for a single task.
work_queue_file_type_t
Definition: work_queue.h:32
int64_t min_memory
The smallest memory size in MB observed among the connected workers.
Definition: work_queue.h:188
void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth)
Limit the queue bandwidth when transferring files to and from workers.
work_queue_result_t
Definition: work_queue.h:57
Cache file at execution site for later use.
Definition: work_queue.h:39
The task ran after specified end time.
Definition: work_queue.h:64
const char * work_queue_name(struct work_queue *q)
Get the project name of the queue.
double priority
The priority of this task relative to others in the queue: higher number run earlier.
Definition: work_queue.h:136
void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout)
Change the keepalive timeout for identifying dead workers for a given queue.
Watch the output file and send back changes as the task runs.
Definition: work_queue.h:44
void work_queue_blacklist_clear(struct work_queue *q)
Clear blacklist of a queue.
Statistics describing a work queue.
Definition: work_queue.h:146
Definition: rmsummary.h:24
timestamp_t time_receive_result_start
The time at which it started to transfer the results.
Definition: work_queue.h:116
int return_status
The exit code of the command line.
Definition: work_queue.h:103
Select worker on a first-come-first-serve basis.
Definition: work_queue.h:49
int64_t min_cores
The lowest number of cores observed among the connected workers.
Definition: work_queue.h:186
int64_t max_gpus
The highest number of GPUs observed among the connected workers.
Definition: work_queue.h:193
Access the file on the client from a shared filesystem.
Definition: work_queue.h:42
void work_queue_task_specify_gpus(struct work_queue_task *t, int gpus)
Specify the number of gpus required by a task.
int total_workers_connected
Total number of workers currently connected to the master.
Definition: work_queue.h:147
timestamp_t total_receive_time
Total time in microseconds spent in receiving data from workers.
Definition: work_queue.h:164
int work_queue_hungry(struct work_queue *q)
Determine whether the queue is 'hungry' for more tasks.
Task has been dispatched to some worker.
Definition: work_queue.h:70
int work_queue_tune(struct work_queue *q, const char *name, double value)
Tune advanced parameters for work queue.
int64_t total_cores
Total number of cores aggregated across the connected workers.
Definition: work_queue.h:178
Select a random worker.
Definition: work_queue.h:52
The task cannot be run due to a missing input file.
Definition: work_queue.h:59
int total_worker_slots
Definition: work_queue.h:198
int64_t max_disk
The largest disk space in MB observed among the connected workers.
Definition: work_queue.h:191
Select worker that has the fastest execution time on previous tasks.
Definition: work_queue.h:51
timestamp_t total_transfer_time
Time comsumed in microseconds for transferring total_bytes_transferred.
Definition: work_queue.h:124
double idle_percentage
The fraction of time that the master is idle waiting for workers to respond.
Definition: work_queue.h:174
Task was canceled before completion.
Definition: work_queue.h:74
int work_queue_activate_fast_abort(struct work_queue *q, double multiplier)
Turn on or off fast abort functionality for a given queue.
int gpus
Number of gpus required by the task.
Definition: work_queue.h:133
int64_t total_disk
Total disk space in MB aggregated across the connected workers.
Definition: work_queue.h:180
char * tag
An optional user-defined logical name for the task.
Definition: work_queue.h:95
timestamp_t total_execute_time
Total time in microseconds workers spent executing completed tasks.
Definition: work_queue.h:167
int work_queue_submit(struct work_queue *q, struct work_queue_task *t)
Submit a task to a queue.
timestamp_t time_execute_cmd_finish
The time at which the task finished (discovered by the master).
Definition: work_queue.h:115
Select the worst fit worker (the worker with more unused resources).
Definition: work_queue.h:53
double efficiency
Parallel efficiency of the system, sum(task execution times) / sum(worker lifetimes) ...
Definition: work_queue.h:173
void work_queue_specify_keepalive_interval(struct work_queue *q, int interval)
Change the keepalive interval for a given queue.
The task was terminated with a signal.
Definition: work_queue.h:62
timestamp_t time_task_finish
The time at which this task was finished.
Definition: work_queue.h:111
int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task.
timestamp_t time_execute_cmd_start
The time at which the task began.
Definition: work_queue.h:114
int workers_idle
Number of workers that are not running a task.
Definition: work_queue.h:149
int wq_option_scheduler
Initial setting for algorithm to assign tasks to workers upon creating queue .
int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t, int recursive)
Add a directory to a task.
int64_t total_bytes_sent
Number of bytes sent since task has last started sending input data.
Definition: work_queue.h:122
timestamp_t total_send_time
Total time in microseconds spent in sending data to workers.
Definition: work_queue.h:163
char * work_queue_get_worker_summary(struct work_queue *q)
Summarize workers.
void work_queue_specify_priority(struct work_queue *q, int priority)
Change the priority for a given queue.
int64_t total_bytes_received
Number of bytes received since task has last started receiving input data.
Definition: work_queue.h:121
int total_tasks_failed
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
Definition: work_queue.h:159
int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file)
Enables resource monitoring on the give work queue.
int64_t max_memory
The largest memory size in MB observed among the connected workers.
Definition: work_queue.h:189
void work_queue_task_delete(struct work_queue_task *t)
Delete a task.
int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, work_queue_file_type_t type, work_queue_file_flags_t flags)
Add a file to a task.
char * output
The standard output of the task.
Definition: work_queue.h:98
work_queue_file_flags_t
Definition: work_queue.h:37
void work_queue_blacklist_remove(struct work_queue *q, const char *hostname)
Unblacklist host from a queue.
The task used more resources than requested.
Definition: work_queue.h:63
void work_queue_specify_name(struct work_queue *q, const char *name)
Change the project name for a given queue.
int workers_ready
Definition: work_queue.h:196
void work_queue_specify_algorithm(struct work_queue *q, work_queue_schedule_t algorithm)
Change the worker selection algorithm.
Do not cache file at execution site.
Definition: work_queue.h:38
Specify an input object.
Definition: work_queue.h:33
int64_t max_cores
The highest number of cores observed among the connected workers.
Definition: work_queue.h:187
The task ran but its stdout has been truncated.
Definition: work_queue.h:61