20 #include <sys/types.h>
22 #include "rmsummary.h"
24 #define WORK_QUEUE_DEFAULT_PORT 9123
25 #define WORK_QUEUE_RANDOM_PORT 0
26 #define WORK_QUEUE_WAITFORTASK -1
28 #define WORK_QUEUE_SCHEDULE_UNSET 0
29 #define WORK_QUEUE_SCHEDULE_FCFS 1
30 #define WORK_QUEUE_SCHEDULE_FILES 2
31 #define WORK_QUEUE_SCHEDULE_TIME 3
32 #define WORK_QUEUE_SCHEDULE_RAND 4
34 #define WORK_QUEUE_INPUT 0
35 #define WORK_QUEUE_OUTPUT 1
37 #define WORK_QUEUE_NOCACHE 0
38 #define WORK_QUEUE_CACHE 1
39 #define WORK_QUEUE_SYMLINK 2
40 #define WORK_QUEUE_PREEXIST 4
41 #define WORK_QUEUE_THIRDGET 8
42 #define WORK_QUEUE_THIRDPUT 8
43 #define WORK_QUEUE_WATCH 16
45 #define WORK_QUEUE_RESET_ALL 0
46 #define WORK_QUEUE_RESET_KEEP_TASKS 1
48 #define WORK_QUEUE_DEFAULT_KEEPALIVE_INTERVAL 300
49 #define WORK_QUEUE_DEFAULT_KEEPALIVE_TIMEOUT 30
51 #define WORK_QUEUE_RESULT_SUCCESS 0
52 #define WORK_QUEUE_RESULT_INPUT_MISSING 1
53 #define WORK_QUEUE_RESULT_OUTPUT_MISSING 2
54 #define WORK_QUEUE_RESULT_STDOUT_MISSING 4
55 #define WORK_QUEUE_RESULT_SIGNAL 8
56 #define WORK_QUEUE_RESULT_RESOURCE_EXHAUSTION 16
57 #define WORK_QUEUE_RESULT_TASK_TIMEOUT 32
59 extern double wq_option_fast_abort_multiplier;
61 extern int wq_option_scheduler;
65 struct work_queue_task {
99 int64_t maximum_end_time;
598 int work_queue_tune(
struct work_queue *q,
const char *name,
double value);
606 #define WORK_QUEUE_TASK_ORDER_FIFO 0
607 #define WORK_QUEUE_TASK_ORDER_LIFO 1
615 void work_queue_specify_task_order(struct work_queue *q, int order);
618 #define WORK_QUEUE_MASTER_MODE_STANDALONE 0
619 #define WORK_QUEUE_MASTER_MODE_CATALOG 1
628 void work_queue_specify_master_mode(struct work_queue *q, int mode);
692 void work_queue_activate_worker_waiting(
struct work_queue *q,
int resources);
int64_t total_memory
Total memory in MB aggregated across the connected workers.
Definition: work_queue.h:148
int64_t committed_gpus
Committed number of GPUs aggregated across the connected workers.
Definition: work_queue.h:154
int work_queue_task_specify_buffer(struct work_queue_task *t, const char *data, int length, const char *remote_name, int flags)
Add an input buffer to a task.
void work_queue_specify_estimate_capacity_on(struct work_queue *q, int estimate_capacity_on)
Change whether to estimate master capacity for a given queue.
int work_queue_send_receive_ratio(struct work_queue *q, double ratio)
Change the preference to send or receive tasks.
int64_t committed_disk
Committed disk space in MB aggregated across the connected workers.
Definition: work_queue.h:153
int64_t min_disk
The smallest disk space in MB observed among the connected workers.
Definition: work_queue.h:159
A task description.
Definition: work_queue.h:65
void work_queue_task_specify_cores(struct work_queue_task *t, int cores)
Specify the number of cores required by a task.
timestamp_t total_good_execute_time
Total time in microseconds workers spent executing successful tasks.
Definition: work_queue.h:137
timestamp_t time_send_input_start
The time at which it started to transfer input files.
Definition: work_queue.h:82
struct list * work_queue_cancel_all_tasks(struct work_queue *q)
Cancel all submitted tasks and remove them from the queue.
timestamp_t start_time
Absolute time at which the master started.
Definition: work_queue.h:131
timestamp_t cmd_execution_time
Time spent in microseconds for executing the command on the worker.
Definition: work_queue.h:95
int tasks_running
Number of tasks currently running.
Definition: work_queue.h:124
int work_queue_task_specify_input_file_do_not_cache(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task, without caching.
int total_workers_joined
Total number of worker connections that were established to the master.
Definition: work_queue.h:120
int work_queue_task_specify_output_file(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task.
timestamp_t time_receive_output_start
The time at which it started to transfer output files.
Definition: work_queue.h:88
timestamp_t total_cmd_execution_time
Time spent in microseconds for executing the command on any worker, including resubmittions of the ta...
Definition: work_queue.h:97
void work_queue_get_stats(struct work_queue *q, struct work_queue_stats *s)
Get queue statistics (only from master).
struct work_queue_task * work_queue_cancel_by_taskid(struct work_queue *q, int id)
Cancel a submitted task using its task id and remove it from queue.
int workers_full
Definition: work_queue.h:166
int total_tasks_dispatched
Total number of tasks dispatch to workers.
Definition: work_queue.h:126
struct work_queue * work_queue_create(int port)
Create a new work queue.
void work_queue_specify_algorithm(struct work_queue *q, int algo)
Change the worker selection algorithm.
timestamp_t total_good_transfer_time
Total time in microseconds spent in sending and receiving data to workers for tasks with result WQ_RE...
Definition: work_queue.h:134
int workers_init
Number of workers initializing.
Definition: work_queue.h:117
int work_queue_task_specify_output_file_do_not_cache(struct work_queue_task *t, const char *rname, const char *fname)
Add an output file to a task without caching.
timestamp_t time_send_input_finish
The time at which it finished transferring input files.
Definition: work_queue.h:83
void work_queue_get_stats_hierarchy(struct work_queue *q, struct work_queue_stats *s)
Get statistics of the master queue together with foremen information.
timestamp_t time_app_delay
Definition: work_queue.h:109
void work_queue_task_specify_command(struct work_queue_task *t, const char *cmd)
Indicate the command to be executed.
int tasks_waiting
Number of tasks waiting to be run.
Definition: work_queue.h:123
int total_submissions
The number of times the task has been submitted.
Definition: work_queue.h:96
char * hostname
The name of the host on which it ran.
Definition: work_queue.h:76
Portable routines for high resolution timing.
double work_queue_get_effective_bandwidth(struct work_queue *q)
Get current queue bandwidth.
int64_t total_gpus
Total number of GPUs aggregated across the connected workers.
Definition: work_queue.h:150
int64_t total_bytes_sent
Total number of file bytes (not including protocol control msg bytes) sent out to the workers by the ...
Definition: work_queue.h:140
struct list * output_files
The output files (other than the standard output stream) created by the program expected to be retrie...
Definition: work_queue.h:71
int64_t total_bytes_transferred
Number of bytes transferred since task has last started transferring input data.
Definition: work_queue.h:93
void work_queue_delete(struct work_queue *q)
Delete a work queue.
UINT64_T timestamp_t
A type to hold the current time, in microseconds since January 1st, 1970.
Definition: timestamp.h:20
int tasks_complete
Number of tasks waiting to be returned to user.
Definition: work_queue.h:125
int64_t min_gpus
The lowest number of GPUs observed among the connected workers.
Definition: work_queue.h:161
struct list * input_files
The files to transfer to the worker and place in the executing directory.
Definition: work_queue.h:70
double bandwidth
Average network bandwidth in MB/S observed by the master when transferring to workers.
Definition: work_queue.h:146
void work_queue_task_specify_algorithm(struct work_queue_task *t, int algo)
Select the scheduling algorithm for a single task.
struct work_queue_task * work_queue_cancel_by_tasktag(struct work_queue *q, const char *tag)
Cancel a submitted task using its tag and remove it from queue.
int total_tasks_complete
Total number of tasks completed and returned to user.
Definition: work_queue.h:127
struct work_queue_task * work_queue_wait(struct work_queue *q, int timeout)
Wait for a task to complete.
void work_queue_task_specify_memory(struct work_queue_task *t, int64_t memory)
Specify the amount of memory required by a task.
char * command_line
The program(s) to execute, as a shell command line.
Definition: work_queue.h:67
int capacity
The estimated number of workers that this master can effectively support.
Definition: work_queue.h:144
int64_t committed_memory
Committed memory in MB aggregated across the connected workers.
Definition: work_queue.h:152
int total_workers_removed
Total number of worker connections that were terminated by the master.
Definition: work_queue.h:121
int work_queue_specify_log(struct work_queue *q, const char *logfile)
Add a log file that records the states of the connected workers and submitted tasks.
timestamp_t time_receive_output_finish
The time at which it finished transferring output files.
Definition: work_queue.h:89
timestamp_t time_task_submit
The time at which this task was submitted.
Definition: work_queue.h:80
timestamp_t time_receive_result_finish
The time at which it finished transferring the results.
Definition: work_queue.h:87
int work_queue_task_specify_input_buf(struct work_queue_task *t, const char *buf, int length, const char *rname)
Add an input buffer to a task.
int64_t committed_cores
Committed number of cores aggregated across the connected workers.
Definition: work_queue.h:151
void work_queue_specify_catalog_server(struct work_queue *q, const char *hostname, int port)
Specify the catalog server the master should report to.
char * host
The address and port of the host on which it ran.
Definition: work_queue.h:75
int work_queue_port(struct work_queue *q)
Get the listening port of the queue.
int work_queue_specify_password_file(struct work_queue *q, const char *file)
Add a mandatory password file that each worker must present.
int taskid
A unique task id number.
Definition: work_queue.h:72
int avg_capacity
Definition: work_queue.h:168
int workers_busy
Number of workers that are running at least one task.
Definition: work_queue.h:119
struct work_queue_task * work_queue_task_create(const char *full_command)
Create a new task object.
void work_queue_specify_password(struct work_queue *q, const char *password)
Add a mandatory password that each worker must present.
struct rmsummary * resources_measured
When monitoring is enabled, it points to the measured resources used by the task. ...
Definition: work_queue.h:105
timestamp_t time_committed
The time at which a task was committed to a worker.
Definition: work_queue.h:78
int work_queue_task_specify_file(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags)
Add a file to a task.
int64_t total_bytes_received
Total number of file bytes (not including protocol control msg bytes) received from the workers by th...
Definition: work_queue.h:141
void work_queue_task_specify_tag(struct work_queue_task *t, const char *tag)
Attach a user defined string tag to the task.
int total_tasks_cancelled
Total number of tasks cancelled.
Definition: work_queue.h:129
void work_queue_task_specify_priority(struct work_queue_task *t, double priority)
Specify the priority of this task relative to others in the queue.
void work_queue_task_specify_end_time(struct work_queue_task *t, int64_t seconds)
Specify the maximum end time allowed for the task (in seconds since the Epoch).
void work_queue_blacklist_add(struct work_queue *q, const char *hostname)
Blacklist host from a queue.
struct work_queue_task * work_queue_task_clone(const struct work_queue_task *task)
Create a copy of a task Create a functionally identical copy of a work_queue_task that can be re-sub...
int work_queue_empty(struct work_queue *q)
Determine whether the queue is empty.
void work_queue_task_specify_disk(struct work_queue_task *t, int64_t disk)
Specify the amount of disk space required by a task.
int work_queue_shut_down_workers(struct work_queue *q, int n)
Shut down workers connected to the work_queue system.
int64_t min_memory
The smallest memory size in MB observed among the connected workers.
Definition: work_queue.h:157
void work_queue_set_bandwidth_limit(struct work_queue *q, const char *bandwidth)
Limit the queue bandwidth when transferring files to and from workers.
const char * work_queue_name(struct work_queue *q)
Get the project name of the queue.
double priority
The priority of this task relative to others in the queue: higher number run earlier.
Definition: work_queue.h:107
void work_queue_specify_keepalive_timeout(struct work_queue *q, int timeout)
Change the keepalive timeout for identifying dead workers for a given queue.
void work_queue_blacklist_clear(struct work_queue *q)
Clear blacklist of a queue.
Statistics describing a work queue.
Definition: work_queue.h:115
Definition: rmsummary.h:24
int work_queue_task_specify_file_piece(struct work_queue_task *t, const char *local_name, const char *remote_name, off_t start_byte, off_t end_byte, int type, int flags)
Add a file piece to a task.
timestamp_t time_receive_result_start
The time at which it started to transfer the results.
Definition: work_queue.h:86
int return_status
The exit code of the command line.
Definition: work_queue.h:73
int64_t min_cores
The lowest number of cores observed among the connected workers.
Definition: work_queue.h:155
int64_t max_gpus
The highest number of GPUs observed among the connected workers.
Definition: work_queue.h:162
void work_queue_task_specify_gpus(struct work_queue_task *t, int gpus)
Specify the number of gpus required by a task.
int total_workers_connected
Total number of workers currently connected to the master.
Definition: work_queue.h:116
timestamp_t total_receive_time
Total time in microseconds spent in receiving data from workers.
Definition: work_queue.h:133
int work_queue_hungry(struct work_queue *q)
Determine whether the queue is 'hungry' for more tasks.
int work_queue_tune(struct work_queue *q, const char *name, double value)
Tune advanced parameters for work queue.
int64_t total_cores
Total number of cores aggregated across the connected workers.
Definition: work_queue.h:147
int result
The result of the task (successful, failed return_status, missing input file, missing output file)...
Definition: work_queue.h:74
int total_worker_slots
Definition: work_queue.h:167
int64_t max_disk
The largest disk space in MB observed among the connected workers.
Definition: work_queue.h:160
timestamp_t total_transfer_time
Time comsumed in microseconds for transferring total_bytes_transferred.
Definition: work_queue.h:94
double idle_percentage
The fraction of time that the master is idle waiting for workers to respond.
Definition: work_queue.h:143
int work_queue_activate_fast_abort(struct work_queue *q, double multiplier)
Turn on or off fast abort functionality for a given queue.
int64_t total_disk
Total disk space in MB aggregated across the connected workers.
Definition: work_queue.h:149
char * tag
An optional user-defined logical name for the task.
Definition: work_queue.h:66
timestamp_t total_execute_time
Total time in microseconds workers spent executing completed tasks.
Definition: work_queue.h:136
int work_queue_submit(struct work_queue *q, struct work_queue_task *t)
Submit a task to a queue.
int worker_selection_algorithm
How to choose worker to run the task.
Definition: work_queue.h:68
timestamp_t time_execute_cmd_finish
The time at which the task finished (discovered by the master).
Definition: work_queue.h:85
double efficiency
Parallel efficiency of the system, sum(task execution times) / sum(worker lifetimes) ...
Definition: work_queue.h:142
void work_queue_specify_keepalive_interval(struct work_queue *q, int interval)
Change the keepalive interval for a given queue.
timestamp_t time_task_finish
The time at which this task was finished.
Definition: work_queue.h:81
int work_queue_task_specify_input_file(struct work_queue_task *t, const char *fname, const char *rname)
Add an input file to a task.
timestamp_t time_execute_cmd_start
The time at which the task began.
Definition: work_queue.h:84
int workers_idle
Number of workers that are not running a task.
Definition: work_queue.h:118
int64_t total_bytes_sent
Number of bytes sent since task has last started sending input data.
Definition: work_queue.h:92
timestamp_t total_send_time
Total time in microseconds spent in sending data to workers.
Definition: work_queue.h:132
char * work_queue_get_worker_summary(struct work_queue *q)
Summarize workers.
void work_queue_specify_priority(struct work_queue *q, int priority)
Change the priority for a given queue.
int64_t total_bytes_received
Number of bytes received since task has last started receiving input data.
Definition: work_queue.h:91
int total_tasks_failed
Total number of tasks completed and returned to user with result other than WQ_RESULT_SUCCESS.
Definition: work_queue.h:128
int work_queue_enable_monitoring(struct work_queue *q, char *monitor_summary_file)
Enables resource monitoring on the give work queue.
int64_t max_memory
The largest memory size in MB observed among the connected workers.
Definition: work_queue.h:158
void work_queue_task_delete(struct work_queue_task *t)
Delete a task.
char * output
The standard output of the task.
Definition: work_queue.h:69
void work_queue_blacklist_remove(struct work_queue *q, const char *hostname)
Unblacklist host from a queue.
void work_queue_specify_name(struct work_queue *q, const char *name)
Change the project name for a given queue.
int workers_ready
Definition: work_queue.h:165
int work_queue_task_specify_directory(struct work_queue_task *t, const char *local_name, const char *remote_name, int type, int flags, int recursive)
Add a directory to a task.
int64_t max_cores
The highest number of cores observed among the connected workers.
Definition: work_queue.h:156