Skip to content

Commit fcd9c80

Browse files
committed
added csv reader
1 parent 4db2ab6 commit fcd9c80

File tree

2 files changed

+120
-0
lines changed

2 files changed

+120
-0
lines changed

flexio.yml

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ templates:
3636
in_staging: false
3737

3838
functions:
39+
- path: web-csv.py
3940
- path: web-extract-link.py
4041
- path: web-newspaper.py
4142
- path: web-rss.py

web-csv.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
2+
# ---
3+
# name: web-csv
4+
# deployed: true
5+
# title: CSV Reader
6+
# description: Returns the data for the CSVs given by the URLs
7+
# params:
8+
# - name: url
9+
# type: array
10+
# description: Urls for which to get the info
11+
# required: true
12+
# examples:
13+
# - '"https://raw.githubusercontent.com/flexiodata/data/master/sample/sample-contacts.csv"'
14+
# notes:
15+
# ---
16+
17+
import csv
18+
import json
19+
import tempfile
20+
import io
21+
import aiohttp
22+
import asyncio
23+
import itertools
24+
from cerberus import Validator
25+
from contextlib import closing
26+
from collections import OrderedDict
27+
28+
def flexio_handler(flex):
29+
30+
# get the input
31+
input = flex.input.read()
32+
input = json.loads(input)
33+
if not isinstance(input, list):
34+
raise ValueError
35+
36+
# define the expected parameters and map the values to the parameter names
37+
# based on the positions of the keys/values
38+
params = OrderedDict()
39+
params['urls'] = {'required': True, 'validator': validator_list, 'coerce': to_list}
40+
#params['columns'] = {'required': True, 'validator': validator_list, 'coerce': to_list}
41+
input = dict(zip(params.keys(), input))
42+
43+
# validate the mapped input against the validator
44+
v = Validator(params, allow_unknown = True)
45+
input = v.validated(input)
46+
if input is None:
47+
raise ValueError
48+
49+
urls = input['urls']
50+
loop = asyncio.get_event_loop()
51+
temp_fp_all = loop.run_until_complete(fetch_all(urls))
52+
53+
flex.output.content_type = 'application/json'
54+
flex.output.write('[')
55+
56+
# get the columns for each of the input urls
57+
properties = []
58+
for temp_fp in temp_fp_all:
59+
try:
60+
fp = io.TextIOWrapper(temp_fp, encoding='utf-8-sig')
61+
reader = csv.DictReader(fp, delimiter=',', quotechar='"')
62+
for row in reader:
63+
properties = list(row.keys())
64+
break
65+
finally:
66+
fp.seek(0)
67+
fp.detach()
68+
69+
flex.output.write(json.dumps(properties))
70+
71+
for temp_fp in temp_fp_all:
72+
fp = io.TextIOWrapper(temp_fp, encoding='utf-8-sig')
73+
reader = csv.DictReader(fp, delimiter=',', quotechar='"')
74+
for row in reader:
75+
row = ',' + json.dumps([(row.get(p) or '') for p in properties])
76+
flex.output.write(row)
77+
temp_fp.close()
78+
79+
flex.output.write(']')
80+
81+
async def fetch_all(urls):
82+
tasks = []
83+
async with aiohttp.ClientSession() as session:
84+
for url in urls:
85+
tasks.append(fetch(session, url))
86+
temp_fp_all = await asyncio.gather(*tasks)
87+
return temp_fp_all
88+
89+
async def fetch(session, url):
90+
# stream the data from the url into a temporary file and return
91+
# it for processing, after which it'll be closed and deleted
92+
temp_fp = tempfile.TemporaryFile()
93+
async with session.get(url) as response:
94+
while True:
95+
data = await response.content.read(1024)
96+
if not data:
97+
break
98+
temp_fp.write(data)
99+
temp_fp.seek(0) # rewind to the beginning
100+
return temp_fp
101+
102+
def validator_list(field, value, error):
103+
if isinstance(value, str):
104+
return
105+
if isinstance(value, list):
106+
for item in value:
107+
if not isinstance(item, str):
108+
error(field, 'Must be a list with only string values')
109+
return
110+
error(field, 'Must be a string or a list of strings')
111+
112+
def to_list(value):
113+
# if we have a list of strings, create a list from them; if we have
114+
# a list of lists, flatten it into a single list of strings
115+
if isinstance(value, str):
116+
return value.split(",")
117+
if isinstance(value, list):
118+
return list(itertools.chain.from_iterable(value))
119+
return None

0 commit comments

Comments
 (0)