Skip to content

Commit

Permalink
stream 4cat files when importing 4cat datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
dale-wahl committed Dec 5, 2023
1 parent 5fc27ee commit 7a1c4b9
Showing 1 changed file with 19 additions and 8 deletions.
27 changes: 19 additions & 8 deletions datasources/fourcat_import/import_4cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,10 +208,8 @@ def process(self):
self.halt_and_catch_fire()
try:
self.dataset.update_status(f"Transferring data file for dataset {new_dataset.key}")
data = SearchImportFromFourcat.fetch_from_4cat(self.base, dataset_key, api_key, "data")
datapath = new_dataset.get_results_path()
with datapath.open("wb") as outfile:
outfile.write(data.content)
data = SearchImportFromFourcat.fetch_from_4cat(self.base, dataset_key, api_key, "data", datapath)

if not imported:
# first dataset - use num rows as 'overall'
Expand Down Expand Up @@ -293,7 +291,7 @@ def halt_and_catch_fire(self):
raise ProcessorInterruptedException()

@staticmethod
def fetch_from_4cat(base, dataset_key, api_key, component):
def fetch_from_4cat(base, dataset_key, api_key, component, datapath=None):
"""
Get dataset component from 4CAT export API
Expand All @@ -304,10 +302,23 @@ def fetch_from_4cat(base, dataset_key, api_key, component):
:return: HTTP response object
"""
try:
response = requests.get(f"{base}/api/export-packed-dataset/{dataset_key}/{component}/", timeout=5, headers={
"User-Agent": "4cat/import",
"Authentication": api_key
})
if component == "data" and datapath:
# Stream data
with requests.get(f"{base}/api/export-packed-dataset/{dataset_key}/{component}/", timeout=5, stream=True,
headers={
"User-Agent": "4cat/import",
"Authentication": api_key
}) as r:
r.raise_for_status()
with datapath.open("wb") as outfile:
for chunk in r.iter_content(chunk_size=8192):
outfile.write(chunk)
return r
else:
response = requests.get(f"{base}/api/export-packed-dataset/{dataset_key}/{component}/", timeout=5, headers={
"User-Agent": "4cat/import",
"Authentication": api_key
})
except requests.Timeout:
raise FourcatImportException(f"The 4CAT server at {base} took too long to respond. Make sure it is "
f"accessible to external connections and try again.")
Expand Down

0 comments on commit 7a1c4b9

Please sign in to comment.