-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcombine_eng_esp.py
90 lines (67 loc) · 2.56 KB
/
combine_eng_esp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
'''combine_eng_esp -- this file combines english csv and spanish csv export.
Usage::
$ python capture_query.py QUERY_NAME 'query description'
$ python combine_eng_esp.py export/
export/temp/English will have English ADC export
export/temp/Spanish will have Spanish ADC export
:copyright: Copyright 2010-2019 University of Kansas Medical Center
__ https://informatics.kumc.edu/work/wiki/REDCap
'''
import pandas as pd
def get_files_to_export(export_dir):
files = []
for f in export_dir.iterdir():
files.append(f)
return files
def handle_files(eng_files, esp_files, export_dir):
from shutil import copy
from os.path import isfile
comb_files = {}
print "Combining Files"
for f in eng_files:
for efile in esp_files:
if f.name == efile.name:
combine_files(f, efile)
comb_files[str(f)] = str(f)
print "Moving English Files"
for f in eng_files:
if not isfile("{}/{}".format(str(export_dir.parent), f.name)):
copy(str(f), str(export_dir.parent))
print "Moving Spanish Files"
for f in esp_files:
if not isfile("{}/{}".format(str(export_dir.parent), f.name)):
copy(str(f), str(export_dir.parent))
def combine_files(eng_file, esp_file):
'''
>>> from pathlib import Path
>>> from hashlib import md5
>>> eng_path = Path("testcases/eng/test.csv")
>>> esp_path = Path("testcases/esp/test.csv")
>>> combine_files(eng_path, esp_path)
>>> output_path = Path ("test.csv")
>>> md5(output_path.open().read()).hexdigest()
'bf4eebe4f4f1bae86e39a987f99f15fa'
'''
export_location = r'{}'.format(eng_file.parent.parent.parent)
eng = pd.read_csv(eng_file, low_memory=False,dtype=str)
esp = pd.read_csv(esp_file, low_memory=False,dtype=str)
merged = eng.append(esp, sort=True)
merged_filename = '{}/{}'.format(export_location, eng_file.name)
merged = merged[merged.columns]
spanish_cols = set(merged.columns.tolist()) - set(eng.columns.tolist())
merged = merged[eng.columns.tolist() + list(spanish_cols)]
merged.to_csv(merged_filename, index=False)
def main(argv, cwd):
export_dir = cwd / argv[1] / 'temp'
eng_dir = export_dir / 'English'
esp_dir = export_dir / 'Spanish'
eng_files = get_files_to_export(eng_dir)
esp_files = get_files_to_export(esp_dir)
handle_files(eng_files, esp_files, export_dir)
if __name__ == "__main__":
def _script():
from sys import argv
from pathlib import Path
cwd = Path(".")
main(argv, cwd)
_script()