diff --git a/README.md b/README.md index 8268c64f..e3a7587c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ -An implementation of a task execution engine based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) running on Kubernetes. +An implementation of a task execution engine based on the [TES standard](https://github.com/ga4gh/task-execution-schemas) running on Kubernetes. For more details on TES, see the [(very) brief introduction to TES](tesintro.md). For organisational reasons, this project is split into 2 repositories: One containing the API and associated docker images ([here](https://github.com/EMBL-EBI-TSI/tesk-api)) and one containing the actual task execution service and associated Docker images (this one). If the API is running on your cluster it will pull the images from our gcr.io repository automatically. In that vein, see below under 'How to install' to get TESK up and running on your Kubernetes cluster. @@ -10,7 +10,8 @@ For organisational reasons, this project is split into 2 repositories: One conta ## How to install - Clone the repo to your kube-master and cd into the folder - - Edit the following line in `specs/ingress/nginx-ingress-lb.yaml`: + - Find out what is the external IP for the cluster. E.g. with the command `minikube ip` + - Edit the following line in `specs/ingress/nginx-ingress-lb.yaml` with that IP: ```yaml spec: @@ -20,7 +21,7 @@ For organisational reasons, this project is split into 2 repositories: One conta ``` - - Then create the services necessary to run the API: + - Create the services necessary to run the API: ``` kubectl create -f specs/ingress/ diff --git a/examples/cancel/counter.json b/examples/cancel/counter.json new file mode 100644 index 00000000..aa368290 --- /dev/null +++ b/examples/cancel/counter.json @@ -0,0 +1,23 @@ +{ + "description": "An example task to test job cancellation. Cancel it please, because it runs forever", + "volumes": [ + "/test" + ], + "executors": [ + { + "image": "alpine", + "command": [ + "echo", + "Nothing" + ] + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "i=0; while true; do echo \"$i: $(date)\"; i=$((i+1)); sleep 1; done" + ] + } + ] +} diff --git a/examples/error/error_stops_execution_taskmaster.json b/examples/error/error_stops_execution_taskmaster.json new file mode 100644 index 00000000..55f01c5d --- /dev/null +++ b/examples/error/error_stops_execution_taskmaster.json @@ -0,0 +1,113 @@ +{ + "outputs": [], + "inputs": [], + "volumes": [], + "executors": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-ec104c85", + "executor-no": "0" + }, + "name": "task-ec104c85-ex-00" + }, + "spec": { + "template": { + "metadata": { + "name": "task-ec104c85-ex-00" + }, + "spec": { + "containers": [ + { + "command": [ + "echo", + "You will see this in the logs (stdout)." + ], + "image": "alpine", + "name": "task-ec104c85-ex-00", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + }, + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-ec104c85", + "executor-no": "1" + }, + "name": "task-ec104c85-ex-01" + }, + "spec": { + "template": { + "metadata": { + "name": "task-ec104c85-ex-01" + }, + "spec": { + "containers": [ + { + "command": [ + "sh", + "-c", + "exit 1" + ], + "image": "alpine", + "name": "task-ec104c85-ex-01", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + }, + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-ec104c85", + "executor-no": "2" + }, + "name": "task-ec104c85-ex-02" + }, + "spec": { + "template": { + "metadata": { + "name": "task-ec104c85-ex-02" + }, + "spec": { + "containers": [ + { + "command": [ + "echo", + "This shouldn't appear in the logs (stdout)." + ], + "image": "alpine", + "name": "task-ec104c85-ex-02", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + } + ], + "resources": { + "disk_gb": 0.1 + } +} \ No newline at end of file diff --git a/examples/error/executor_taskmaster.json b/examples/error/executor_taskmaster.json new file mode 100644 index 00000000..8e5f4bf5 --- /dev/null +++ b/examples/error/executor_taskmaster.json @@ -0,0 +1,45 @@ +{ + "outputs": [], + "inputs": [], + "volumes": [], + "executors": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-23bf5e64", + "executor-no": "0" + }, + "name": "task-23bf5e64-ex-00" + }, + "spec": { + "template": { + "metadata": { + "name": "task-23bf5e64-ex-00" + }, + "spec": { + "containers": [ + { + "command": [ + "sh", + "-c", + "exit 1" + ], + "image": "alpine", + "name": "task-23bf5e64-ex-00", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + } + ], + "resources": { + "disk_gb": 0.1 + } +} \ No newline at end of file diff --git a/examples/error/input_taskmaster.json b/examples/error/input_taskmaster.json new file mode 100644 index 00000000..6df439cf --- /dev/null +++ b/examples/error/input_taskmaster.json @@ -0,0 +1,84 @@ +{ + "outputs": [], + "inputs": [ + { + "url": "http://nonexistent.ebi.ac.uk", + "path": "/somewhere/file", + "type": "FILE" + } + ], + "volumes": [], + "executors": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-7ff304bd", + "executor-no": "0" + }, + "name": "task-7ff304bd-ex-00" + }, + "spec": { + "template": { + "metadata": { + "name": "task-7ff304bd-ex-00" + }, + "spec": { + "containers": [ + { + "command": [ + "echo", + "This shouldn't appear in the logs." + ], + "image": "alpine", + "name": "task-7ff304bd-ex-00", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + }, + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-7ff304bd", + "executor-no": "1" + }, + "name": "task-7ff304bd-ex-01" + }, + "spec": { + "template": { + "metadata": { + "name": "task-7ff304bd-ex-01" + }, + "spec": { + "containers": [ + { + "command": [ + "cat", + "/somewhere/file" + ], + "image": "alpine", + "name": "task-7ff304bd-ex-01", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + } + ], + "resources": { + "disk_gb": 0.1 + } +} \ No newline at end of file diff --git a/examples/success/input_content.json b/examples/success/input_content.json new file mode 100644 index 00000000..fbc209b7 --- /dev/null +++ b/examples/success/input_content.json @@ -0,0 +1,19 @@ +{ + "description": "Demonstrates inlined input", + "inputs": [ + { + "content": "ABC TESK and some more text.", + "path": "/tes/volumes/input", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "cat", + "/tes/volumes/input" + ] + } + ] +} diff --git a/examples/success/input_dir_duplicate_file_names.json b/examples/success/input_dir_duplicate_file_names.json new file mode 100644 index 00000000..07bcbaaa --- /dev/null +++ b/examples/success/input_dir_duplicate_file_names.json @@ -0,0 +1,35 @@ +{ + "description": "2 different folders with filename collision, mounted to the same place. The latest one wins (das in our case)", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/databases/16S_RNA/README", + "path": "/tes/files/rna_README", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das/README", + "path": "/tes/files/das_README", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/databases/16S_RNA", + "path": "/tes", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "ubuntu", + "command": [ + "sh", + "-c", + "cd /tes; find * -name \"*README\" -print0 | du --files0-from=- -b" + ] + } + ] +} diff --git a/examples/success/input_dir_file_duplicate_file_names.json b/examples/success/input_dir_file_duplicate_file_names.json new file mode 100644 index 00000000..f55e6737 --- /dev/null +++ b/examples/success/input_dir_file_duplicate_file_names.json @@ -0,0 +1,35 @@ +{ + "description": "Placing a file at a location (/tes/README), that is colliding with a file from directory input. The latest one wins (das in our case)", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/databases/16S_RNA/README", + "path": "/tes/files/rna_README", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das/README", + "path": "/tes/files/das_README", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/databases/16S_RNA/README", + "path": "/tes/README", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "ubuntu", + "command": [ + "sh", + "-c", + "cd /tes; find * -name \"*README\" -print0 | du --files0-from=- -b" + ] + } + ] +} diff --git a/examples/success/input_dir_ftp01_tree.json b/examples/success/input_dir_ftp01_tree.json new file mode 100644 index 00000000..4b23f038 --- /dev/null +++ b/examples/success/input_dir_ftp01_tree.json @@ -0,0 +1,53 @@ +{ + "description": "Demonstrates retrieving 2 independent FTP directory trees (with subfolders). If the FTP is stable, 1. executor should output dos (!!) and mac subfolders with number of files in them (mac 7, dos 3). We will also list the files in both dirs, to have sth to compare with for next examples", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools", + "path": "/tes/old-tools", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/elsewhere", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\;" + ], + "workdir": "/elsewhere" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/elsewhere" + } + ] +} diff --git a/examples/success/input_dir_ftp02_sub.json b/examples/success/input_dir_ftp02_sub.json new file mode 100644 index 00000000..326b0aa7 --- /dev/null +++ b/examples/success/input_dir_ftp02_sub.json @@ -0,0 +1,35 @@ +{ + "description": "Uses FTP directories from previous example (remember dos and mac?). This time 2. directory is mounted below mountpoint for the 1. directory (nested). If the FTP is stable, should output dos (3), mac (7) and windows (3) subfolders", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools", + "path": "/tes/old-tools", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes/old-tools/windows", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + } + ] +} diff --git a/examples/success/input_dir_ftp03_tree_shadowing.json b/examples/success/input_dir_ftp03_tree_shadowing.json new file mode 100644 index 00000000..a69c057b --- /dev/null +++ b/examples/success/input_dir_ftp03_tree_shadowing.json @@ -0,0 +1,35 @@ +{ + "description": "Uses FTP directories from previous examples (dos and mac). This time, we mount second source directory in place of existing directory (dos, that is). The content coming from 'ftp://ftp.ebi.ac.uk/pub/software/tools/dos' and 'ftp://ftp.ebi.ac.uk/pub/software/das' will be merged. Expect 6 files in dos directory.", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools", + "path": "/tes/old-tools", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes/old-tools/dos", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + } + ] +} diff --git a/examples/success/input_dir_ftp04_tree_shadowing.json b/examples/success/input_dir_ftp04_tree_shadowing.json new file mode 100644 index 00000000..13812dc0 --- /dev/null +++ b/examples/success/input_dir_ftp04_tree_shadowing.json @@ -0,0 +1,35 @@ +{ + "description": "The same as previous (03), but proves order does not matter.", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes/old-tools/dos", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools", + "path": "/tes/old-tools", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + } + ] +} diff --git a/examples/success/input_dir_ftp05_merge.json b/examples/success/input_dir_ftp05_merge.json new file mode 100644 index 00000000..06fa6109 --- /dev/null +++ b/examples/success/input_dir_ftp05_merge.json @@ -0,0 +1,35 @@ +{ + "description": "And now two source directories merged to a common place. There should be 6 files altogether in /tes/old-tools", + "inputs": [ + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das", + "path": "/tes/old-tools", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools/dos", + "path": "/tes/old-tools", + "type": "DIRECTORY" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + } + ] +} diff --git a/examples/success/input_dir_ftp06_file_merge.json b/examples/success/input_dir_ftp06_file_merge.json new file mode 100644 index 00000000..d8684c0f --- /dev/null +++ b/examples/success/input_dir_ftp06_file_merge.json @@ -0,0 +1,40 @@ +{ + "description": "And now add files from different locations to directory tree retrieved from FTP. There should be 4 files in dos and 8 in mac.", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/README.md", + "path": "/tes/old-tools/mac/readme.md", + "type": "FILE" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/tools", + "path": "/tes/old-tools", + "type": "DIRECTORY" + }, + { + "url": "ftp://ftp.ebi.ac.uk/pub/software/das/README", + "path": "/tes/old-tools/dos/readme.txt", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type d -exec sh -c 'echo \"$(find \"{}\" -type f | wc -l)\" {}' \\; | sort -nr" + ], + "workdir": "/tes/old-tools" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -type f" + ], + "workdir": "/tes/old-tools" + } + ] +} diff --git a/examples/success/input_file_dir_merge.json b/examples/success/input_file_dir_merge.json new file mode 100644 index 00000000..1b5c135d --- /dev/null +++ b/examples/success/input_file_dir_merge.json @@ -0,0 +1,25 @@ +{ + "description": "Demonstrates handling 2 http file inputs, that need to go to single directory.", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/hello.json", + "path": "/tes/json", + "type": "FILE" + }, + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/scripts/taskmaster.py", + "path": "/tes/python", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find /tes -type f" + ] + } + ] +} diff --git a/examples/success/input_file_duplicate_names.json b/examples/success/input_file_duplicate_names.json new file mode 100644 index 00000000..fb34e53d --- /dev/null +++ b/examples/success/input_file_duplicate_names.json @@ -0,0 +1,24 @@ +{ + "description": "Demonstrates an attempt of placing 2 different files in the same place. At the moment, teh last one wins (will overwrite previous occurrences)", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/hello.json", + "path": "/tes/volumes/input", + "type": "FILE" + }, + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/stdin.json", + "path": "/tes/volumes/input", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "cat", + "/tes/volumes/input" + ] + } + ] +} diff --git a/examples/success/input_file_http.json b/examples/success/input_file_http.json new file mode 100644 index 00000000..53cb270c --- /dev/null +++ b/examples/success/input_file_http.json @@ -0,0 +1,19 @@ +{ + "description": "Demonstrates handling single http file input. Will output some nice looking JSON to stdout.", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/hello.json", + "path": "/tes/volumes/input", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "cat", + "/tes/volumes/input" + ] + } + ] +} diff --git a/examples/success/input_file_http_nested_mounts.json b/examples/success/input_file_http_nested_mounts.json new file mode 100644 index 00000000..7ed5834e --- /dev/null +++ b/examples/success/input_file_http_nested_mounts.json @@ -0,0 +1,25 @@ +{ + "description": "Demonstrates handling 2 http file inputs with nested mountpoints. Should find both files in their respective locations.", + "inputs": [ + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/hello.json", + "path": "/tes/volumes/input.json", + "type": "FILE" + }, + { + "url": "https://raw.githubusercontent.com/EMBL-EBI-TSI/TESK/master/examples/success/env.json", + "path": "/tes/file.json", + "type": "FILE" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "sh", + "-c", + "find . -name '*.json'" + ] + } + ] +} diff --git a/examples/success/output.json b/examples/success/output.json new file mode 100644 index 00000000..105214c5 --- /dev/null +++ b/examples/success/output.json @@ -0,0 +1,47 @@ +{ + "description": "FTP output", + "outputs": [ + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1 exists and is initially empty - expect file1.txt created", + "path": "/tes/file1.txt", + "type": "FILE", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1/file1.txt" + }, + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/new1 does not exist - expect directory will be created and will have file_new.txt inside with contents of file1.txt", + "path": "/tes/file1.txt", + "type": "FILE", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/new1/file_new.txt" + }, + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2 does not exist - expect will be created and will contain file1.txt and file2.txt", + "path": "/tes", + "type": "DIRECTORY", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2" + }, + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing2 exists and has file1.txt and remaining.txt inside - expect file1.txt will be replaced, remaining.txt untouched and file2.txt added", + "path": "/tes", + "type": "DIRECTORY", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing2" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "echo", + "This goes to file1" + ], + "stdout": "/tes/file1.txt" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "echo 'This goes to file2' > /tes/file2.txt" + ] + } + ] +} diff --git a/examples/success/output_volume.json b/examples/success/output_volume.json new file mode 100644 index 00000000..f2895e98 --- /dev/null +++ b/examples/success/output_volume.json @@ -0,0 +1,38 @@ +{ + "description": "FTP outputting file and directory from volume location - TODO fix expects in descriptions", + "volumes": [ + "/tes" + ], + "outputs": [ + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1 exists and is initially empty - expect file1.txt created", + "path": "/tes/file1.txt", + "type": "FILE", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1/file1.txt" + }, + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2 does not exist - expect will be created and will contain file1.txt and file2.txt", + "path": "/tes", + "type": "DIRECTORY", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2" + } + ], + "executors": [ + { + "image": "alpine", + "command": [ + "echo", + "This goes to file1" + ], + "stdout": "/tes/file1.txt" + }, + { + "image": "alpine", + "command": [ + "sh", + "-c", + "echo 'This goes to file2' > /tes/file2.txt" + ] + } + ] +} diff --git a/examples/success/output_volume_taskmaster.json b/examples/success/output_volume_taskmaster.json new file mode 100644 index 00000000..024f1633 --- /dev/null +++ b/examples/success/output_volume_taskmaster.json @@ -0,0 +1,95 @@ +{ + "outputs": [ + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1 exists and is initially empty - expect file1.txt created", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/existing1/file1.txt", + "path": "/tes/file1.txt", + "type": "FILE" + }, + { + "description": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2 does not exist - expect will be created and will contain file1.txt and file2.txt", + "url": "ftp://ftp-private.ebi.ac.uk/upload/examples/new2", + "path": "/tes", + "type": "DIRECTORY" + } + ], + "inputs": [], + "volumes": [ + "/tes" + ], + "executors": [ + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-af78a6a3", + "executor-no": "0" + }, + "name": "task-af78a6a3-ex-00" + }, + "spec": { + "template": { + "metadata": { + "name": "task-af78a6a3-ex-00" + }, + "spec": { + "containers": [ + { + "command": [ + "/bin/sh", + "-c", + "echo \u0027This goes to file1\u0027 \u003e /tes/file1.txt" + ], + "image": "alpine", + "name": "task-af78a6a3-ex-00", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + }, + { + "apiVersion": "batch/v1", + "kind": "Job", + "metadata": { + "annotations": {}, + "labels": { + "job-type": "executor", + "taskmaster-name": "task-af78a6a3", + "executor-no": "1" + }, + "name": "task-af78a6a3-ex-01" + }, + "spec": { + "template": { + "metadata": { + "name": "task-af78a6a3-ex-01" + }, + "spec": { + "containers": [ + { + "command": [ + "sh", + "-c", + "echo \u0027This goes to file2\u0027 \u003e /tes/file2.txt" + ], + "image": "alpine", + "name": "task-af78a6a3-ex-01", + "resources": {} + } + ], + "restartPolicy": "Never" + } + } + } + } + ], + "resources": { + "disk_gb": 0.1 + } +} \ No newline at end of file diff --git a/scripts/exportsecrets.py b/scripts/exportsecrets.py index dcb93fe0..5a7de89c 100755 --- a/scripts/exportsecrets.py +++ b/scripts/exportsecrets.py @@ -1,7 +1,7 @@ -#!/usr/bin/python +#!/usr/bin/env python2 -import yaml import os +import yaml config = yaml.safe_load(open(os.path.join(os.environ["HOME"], '.kube/config'))) diff --git a/scripts/filer.py b/scripts/filer.py index 83c3e33c..82d5543a 100755 --- a/scripts/filer.py +++ b/scripts/filer.py @@ -1,10 +1,9 @@ -#!/usr/bin/python +#!/usr/bin/env python2 from __future__ import print_function from ftplib import FTP import ftplib import argparse -import requests import sys import json import re @@ -14,6 +13,7 @@ import logging import traceback + debug = True def download_ftp_file(source, target, ftp): diff --git a/scripts/genjob.py b/scripts/genjob.py index 5b4228ba..95d75a0c 100755 --- a/scripts/genjob.py +++ b/scripts/genjob.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python2 import argparse import json diff --git a/scripts/state.py b/scripts/state.py index 128cab41..9006e87d 100755 --- a/scripts/state.py +++ b/scripts/state.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python2 from __future__ import print_function @@ -17,11 +17,11 @@ def exec_state(exe_f, namespace, state): job = bv1.read_namespaced_job(exe, namespace=namespace) state['logs'][exe_i]['start_time'] = job.metadata.creation_timestamp state['logs'][exe_i]['end_time'] = job.status.completion_time - + job_label_s = 'controller-uid='+job.spec.selector.match_labels['controller-uid'] pods = cv1.list_namespaced_pod(label_selector=label_s, namespace=namespace) - try: + try: state['logs'][exe_i]['stdout'] = read_namespaced_pod_log(pods[0].metadata.name, namespace=namespace) except IndexError: print("No pod matching job "+job.metadata.name+" could be found", file=sys.stderr) @@ -46,7 +46,7 @@ def control_state(taskm_f, namespace, state): job_label_s = 'controller-uid='+job.spec.selector.match_labels['controller-uid'] pods = cv1.list_namespaced_pod(label_selector=label_s, namespace=namespace) - try: + try: state['system_logs'] = read_namespaced_pod_log(pods[0].metadata.name, namespace=namespace) except IndexError: print("No pod matching job "+job.metadata.name+" could be found", file=sys.stderr) diff --git a/scripts/taskmaster.py b/scripts/taskmaster.py index 54a7d773..c0b935cf 100755 --- a/scripts/taskmaster.py +++ b/scripts/taskmaster.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/env python2 from __future__ import print_function import argparse @@ -40,6 +40,7 @@ def run_executor(executor, namespace, pvc=None): # TODO move this code to PVC class def append_mount(volume_mounts, name, path, pvc): + # Checks all mount paths in volume_mounts if the path given is already in there duplicate = next((mount for mount in volume_mounts if mount['mountPath'] == path), None) # If not, add mount path @@ -115,6 +116,7 @@ def run_task(data, filer_version): if os.environ.get('TESK_FTP_USERNAME') is not None: filer.set_ftp(os.environ['TESK_FTP_USERNAME'], os.environ['TESK_FTP_PASSWORD']) + pvc = init_pvc(data, filer) for executor in data['executors']: diff --git a/tesintro.md b/tesintro.md new file mode 100644 index 00000000..5e896b28 --- /dev/null +++ b/tesintro.md @@ -0,0 +1,33 @@ +# A very brief introduction to TES + +## Task Execution Service standard +The Global Alliance for Genomics and Health (GA4GH) is an international consortium of academic and industry partners that try to establish standards to promote and facilitate collaoration and data exchange in the life sciences. As part of the 'Cloud Workstream' of this effort 4 standards have been proposed to facilitate running scientific workflows in a cloud environment: the Data Object Service (DOS), Tool Registration Service (TRS), Workflow Execution Service (WES) and the Task Execution Service (TES). These for standards are meant to be independent but complementary to each other in running workflows and handling the associated data needs. TES is a standard that represents the smallest unit of computational work in a workflow that can be indepently run in a cloud. TESK is an implementation of this standard by EMBL-EBI running on Google's Kubernetes container orchestration platform. + +## Technical overview +A minimal TES task is represented as follows: + +```json +{ + "inputs": [ + { + "url": "http://adresss/to/input_file", + "path": "/container/input" + } + ], + "outputs" : [ + { + "url" : "file://path/to/output_file", + "path" : "/container/output" + } + ], + "executors" : [ + { + "image" : "ubuntu", + "command" : ["md5sum", "/container/input"], + "stdout" : "/container/output" + } + ] +} +``` + +Inputs and outputs are expected to have an URI that can be resolved by the relevant implementation. The executor 'image' entry is ay image that can be reached by the relevant docker instance of the implementation, and would usually refer to a public image on Dockerhub or Quay. TES tasks are submitted through a RESTful API using JSON. Also see the [full spec](https://github.com/ga4gh/task-execution-schemas) for a complete list of possible fields and their description.