Skip to content

Commit e20fda0

Browse files
Merge pull request #54 from bencvdb/master
Add function to determine a workflow's version.
2 parents e621c6a + 721d660 commit e20fda0

File tree

3 files changed

+120
-21
lines changed

3 files changed

+120
-21
lines changed

test/test_client_util.py

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@
44
import os
55
import logging
66
import subprocess
7+
import sys
78

8-
from wes_client.util import expand_globs
9+
pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # noqa
10+
sys.path.insert(0, pkg_root) # noqa
11+
12+
from wes_client.util import expand_globs, wf_info
913

1014
logging.basicConfig(level=logging.INFO)
1115

@@ -14,6 +18,22 @@ class IntegrationTest(unittest.TestCase):
1418
def setUp(self):
1519
dirname, filename = os.path.split(os.path.abspath(__file__))
1620
self.testdata_dir = dirname + 'data'
21+
self.local = {'cwl': 'file://' + os.path.join(os.getcwd() + '/testdata/md5sum.cwl'),
22+
'wdl': 'file://' + os.path.join(os.getcwd() + '/testdata/md5sum.wdl'),
23+
'py': 'file://' + os.path.join(os.getcwd() + '/test/test_integration.py'),
24+
'unsupported': 'fake.txt'}
25+
26+
self.remote = {
27+
'cwl': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/testdata/md5sum.cwl',
28+
'wdl': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/testdata/md5sum.wdl',
29+
'py': 'https://raw.githubusercontent.com/common-workflow-language/workflow-service/master/test/test_integration.py',
30+
'unsupported': 'gs://topmed_workflow_testing/topmed_aligner/small_test_files_sbg/example_human_known_snp.py',
31+
'unreachable': 'https://fake.py'}
32+
33+
self.expected = {'cwl': ('v1.0', 'CWL'),
34+
'wdl': ('draft-2', 'WDL'),
35+
'py': ('2.7', 'PY'),
36+
'pyWithPrefix': ('2.7', 'PY')}
1737

1838
def tearDown(self):
1939
unittest.TestCase.tearDown(self)
@@ -34,6 +54,46 @@ def test_expand_globs(self):
3454
glob_files = expand_globs('*')
3555
assert set(files) == glob_files, '\n' + str(set(files)) + '\n' + str(glob_files)
3656

57+
def testSupportedFormatChecking(self):
58+
"""
59+
Check that non-wdl, -python, -cwl files are rejected.
60+
61+
This test is run only on local files to avoid downloading and removing a new file.
62+
"""
63+
64+
for file_format, location in self.local.items():
65+
if file_format != 'unsupported':
66+
# Tests the behavior after receiving supported file types with and without the 'file://' prefix
67+
self.assertEquals(wf_info(location), self.expected[file_format])
68+
self.assertEquals(wf_info(location[7:]), self.expected[file_format])
69+
70+
else:
71+
# Tests behavior after receiving a non supported file type.
72+
with self.assertRaises(TypeError):
73+
wf_info(location)
74+
75+
def testFileLocationChecking(self):
76+
"""
77+
Check that the function rejects unsupported file locations.
78+
79+
This test needs to be run on remote files to test the location checking functionality of wf_info().
80+
"""
81+
82+
for file_format, location in self.remote.items():
83+
if file_format == 'unsupported':
84+
# Tests behavior after receiving a file hosted at an unsupported location.
85+
with self.assertRaises(NotImplementedError):
86+
wf_info(location)
87+
88+
elif file_format == 'unreachable':
89+
# Tests behavior after receiving a non-existent file.
90+
with self.assertRaises(IOError):
91+
wf_info(location)
92+
93+
else:
94+
self.assertEquals(wf_info(location), self.expected[file_format])
95+
self.assertFalse(os.path.isfile(os.path.join(os.getcwd(), 'fetchedFromRemote.' + file_format)))
96+
3797

3898
if __name__ == '__main__':
3999
unittest.main() # run all tests

test/test_integration.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
import signal
88
import shutil
99
import logging
10+
import sys
11+
12+
pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # noqa
13+
sys.path.insert(0, pkg_root) # noqa
1014

1115
from wes_client.util import WESClient
1216

wes_client/util.py

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,69 @@
11
import os
22
import json
3+
import schema_salad.ref_resolver
4+
from subprocess32 import check_call, DEVNULL, CalledProcessError
5+
import yaml
36
import glob
47
import requests
58
import urllib
69
import logging
7-
import schema_salad.ref_resolver
810

911
from wes_service.util import visit
12+
from urllib import urlopen
1013

1114

12-
def wf_type(workflow_file):
13-
if workflow_file.lower().endswith('wdl'):
14-
return 'WDL'
15-
elif workflow_file.lower().endswith('cwl'):
16-
return 'CWL'
17-
elif workflow_file.lower().endswith('py'):
18-
return 'PY'
19-
else:
20-
raise ValueError('Unrecognized/unsupported workflow file extension: %s' % workflow_file.lower().split('.')[-1])
15+
def two_seven_compatible(filePath):
16+
"""Determines if a python file is 2.7 compatible by seeing if it compiles in a subprocess"""
17+
try:
18+
check_call(['python2', '-m', 'py_compile', filePath], stderr=DEVNULL)
19+
except CalledProcessError:
20+
raise RuntimeError('Python files must be 2.7 compatible')
21+
return True
2122

2223

23-
def wf_version(workflow_file):
24-
# TODO: Check inside of the file, handling local/http/etc.
25-
if wf_type(workflow_file) == 'PY':
24+
def get_version(extension, workflow_file):
25+
'''Determines the version of a .py, .wdl, or .cwl file.'''
26+
if extension == 'py' and two_seven_compatible(workflow_file):
2627
return '2.7'
27-
# elif wf_type(workflow_file) == 'CWL':
28-
# # only works locally
29-
# return yaml.load(open(workflow_file))['cwlVersion']
28+
elif extension == 'cwl':
29+
return yaml.load(open(workflow_file))['cwlVersion']
30+
else: # Must be a wdl file.
31+
# Borrowed from https://github.com/Sage-Bionetworks/synapse-orchestrator/blob/develop/synorchestrator/util.py#L142
32+
try:
33+
return [l.lstrip('version') for l in workflow_file.splitlines() if 'version' in l.split(' ')][0]
34+
except IndexError:
35+
return 'draft-2'
36+
37+
38+
def wf_info(workflow_path):
39+
"""
40+
Returns the version of the file and the file extension.
41+
42+
Assumes that the file path is to the file directly ie, ends with a valid file extension.Supports checking local
43+
files as well as files at http:// and https:// locations. Files at these remote locations are recreated locally to
44+
enable our approach to version checking, then removed after version is extracted.
45+
"""
46+
47+
supported_formats = ['py', 'wdl', 'cwl']
48+
file_type = workflow_path.lower().split('.')[-1] # Grab the file extension
49+
workflow_path = workflow_path if ':' in workflow_path else 'file://' + workflow_path
50+
51+
if file_type in supported_formats:
52+
if workflow_path.startswith('file://'):
53+
version = get_version(file_type, workflow_path[7:])
54+
elif workflow_path.startswith('https://') or workflow_path.startswith('http://'):
55+
# If file not local go fetch it.
56+
html = urlopen(workflow_path).read()
57+
local_loc = os.path.join(os.getcwd(), 'fetchedFromRemote.' + file_type)
58+
with open(local_loc, 'w') as f:
59+
f.write(html)
60+
version = wf_info('file://' + local_loc)[0] # Don't take the file_type here, found it above.
61+
os.remove(local_loc) # TODO: Find a way to avoid recreating file before version determination.
62+
else:
63+
raise NotImplementedError('Unsupported workflow file location: {}. Must be local or HTTP(S).'.format(workflow_path))
3064
else:
31-
# TODO: actually check the wdl file
32-
return "v1.0"
65+
raise TypeError('Unsupported workflow type: .{}. Must be {}.'.format(file_type, '.py, .cwl, or .wdl'))
66+
return version, file_type.upper()
3367

3468

3569
def build_wes_request(workflow_file, json_path, attachments=None):
@@ -42,10 +76,11 @@ def build_wes_request(workflow_file, json_path, attachments=None):
4276
"""
4377
workflow_file = "file://" + workflow_file if ":" not in workflow_file else workflow_file
4478
json_path = json_path[7:] if json_path.startswith("file://") else json_path
79+
wf_version, wf_type = wf_info(workflow_file)
4580

4681
parts = [("workflow_params", json.dumps(json.load(open(json_path)))),
47-
("workflow_type", wf_type(workflow_file)),
48-
("workflow_type_version", wf_version(workflow_file))]
82+
("workflow_type", wf_type),
83+
("workflow_type_version", wf_version)]
4984

5085
if workflow_file.startswith("file://"):
5186
parts.append(("workflow_attachment", (os.path.basename(workflow_file[7:]), open(workflow_file[7:], "rb"))))

0 commit comments

Comments
 (0)