Skip to content

Commit 1c6b75d

Browse files
authored
Merge pull request #507 from kapilkd13/httpinput
Allowing Http/Https files as input
2 parents 5729aa6 + e6c3548 commit 1c6b75d

File tree

3 files changed

+66
-6
lines changed

3 files changed

+66
-6
lines changed

cwltool/pathmapper.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
import stat
66
import uuid
77
from functools import partial
8+
from tempfile import NamedTemporaryFile
9+
10+
import requests
11+
from cachecontrol import CacheControl
12+
from cachecontrol.caches import FileCache
813
from typing import Any, Callable, Dict, Iterable, List, Set, Text, Tuple, Union
914

1015
import schema_salad.validate as validate
@@ -139,6 +144,29 @@ def trim_listing(obj):
139144
if obj.get("location", "").startswith("file://") and "listing" in obj:
140145
del obj["listing"]
141146

147+
# Download http Files
148+
def downloadHttpFile(httpurl):
149+
# type: (Text) -> Text
150+
cache_session = None
151+
if "XDG_CACHE_HOME" in os.environ:
152+
directory = os.environ["XDG_CACHE_HOME"]
153+
elif "HOME" in os.environ:
154+
directory = os.environ["HOME"]
155+
else:
156+
directory = os.path.expanduser('~')
157+
158+
cache_session = CacheControl(
159+
requests.Session(),
160+
cache=FileCache(
161+
os.path.join(directory, ".cache", "cwltool")))
162+
163+
r = cache_session.get(httpurl, stream=True)
164+
with NamedTemporaryFile(mode='wb', delete=False) as f:
165+
for chunk in r.iter_content(chunk_size=16384):
166+
if chunk: # filter out keep-alive new chunks
167+
f.write(chunk)
168+
r.close()
169+
return f.name
142170

143171
class PathMapper(object):
144172
"""Mapping of files from relative path provided in the file to a tuple of
@@ -208,14 +236,18 @@ def visit(self, obj, stagedir, basedir, copy=False, staged=False):
208236
self._pathmap[obj["location"]] = MapperEnt(obj["contents"], tgt, "CreateFile", staged)
209237
else:
210238
with SourceLine(obj, "location", validate.ValidationException):
211-
# Dereference symbolic links
212239
deref = ab
213-
st = os.lstat(deref)
214-
while stat.S_ISLNK(st.st_mode):
215-
rl = os.readlink(deref)
216-
deref = rl if os.path.isabs(rl) else os.path.join(
217-
os.path.dirname(deref), rl)
240+
if urllib.parse.urlsplit(deref).scheme in ['http','https']:
241+
deref = downloadHttpFile(path)
242+
else:
243+
# Dereference symbolic links
218244
st = os.lstat(deref)
245+
while stat.S_ISLNK(st.st_mode):
246+
rl = os.readlink(deref)
247+
deref = rl if os.path.isabs(rl) else os.path.join(
248+
os.path.dirname(deref), rl)
249+
st = os.lstat(deref)
250+
219251
self._pathmap[path] = MapperEnt(deref, tgt, "WritableFile" if copy else "File", staged)
220252
self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged)
221253

cwltool/stdfsaccess.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
def abspath(src, basedir): # type: (Text, Text) -> Text
1414
if src.startswith(u"file://"):
1515
ab = six.text_type(uri_file_path(str(src)))
16+
elif urllib.parse.urlsplit(src).scheme in ['http','https']:
17+
return src
1618
else:
1719
if basedir.startswith(u"file://"):
1820
ab = src if os.path.isabs(src) else basedir+ '/'+ src

tests/test_http_input.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from __future__ import absolute_import
2+
import unittest
3+
import os
4+
import tempfile
5+
from cwltool.pathmapper import PathMapper
6+
7+
8+
class TestHttpInput(unittest.TestCase):
9+
def test_http_path_mapping(self):
10+
class SubPathMapper(PathMapper):
11+
def __init__(self, referenced_files, basedir, stagedir):
12+
super(SubPathMapper, self).__init__(referenced_files, basedir, stagedir)
13+
input_file_path = "https://raw.githubusercontent.com/common-workflow-language/cwltool/master/tests/2.fasta"
14+
tempdir = tempfile.mkdtemp()
15+
base_file = [{
16+
"class": "File",
17+
"location": "https://raw.githubusercontent.com/common-workflow-language/cwltool/master/tests/2.fasta",
18+
"basename": "chr20.fa"
19+
}]
20+
path_map_obj = SubPathMapper(base_file, os.getcwd(), tempdir)
21+
22+
self.assertIn(input_file_path,path_map_obj._pathmap)
23+
assert os.path.exists(path_map_obj._pathmap[input_file_path].resolved) == 1
24+
with open(path_map_obj._pathmap[input_file_path].resolved) as f:
25+
self.assertIn(">Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;",f.read())
26+
f.close()

0 commit comments

Comments
 (0)