-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdf_convert_json_to_csv.py
More file actions
executable file
·133 lines (101 loc) · 4.65 KB
/
df_convert_json_to_csv.py
File metadata and controls
executable file
·133 lines (101 loc) · 4.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Script to convert jsons output by TractometryFlow into CSV files.
To run this script, individual jsons must be merged with scil_merge_json.py
(without option). It does not work with jsons provided in the Statistics
folder.
> scil_merge_json.py results_tractometry/sub*/Bundle_**/*json your_output.json
By default, when several jsons are given as input, this script converts
each json into an individual CSV file in long format (for wide format
use --wide). To convert all jsons into a single CSV file,
use the --save_merge_df option.
>> convert_json_to_csv.py *json --save_merge_df
"""
import argparse
import copy
import json
import os
import pandas as pd
import numpy as np
from dataframe.parameters import column_dict_name
from dataframe.func import (split_col, reshape_to_wide_format,
convert_lesion_data)
from scilpy.io.utils import (add_overwrite_arg,
assert_inputs_exist, assert_outputs_exist)
def _build_arg_parser():
p = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
description=__doc__)
p.add_argument('in_json', nargs='+',
help='File(s) containing the json stats (.json).')
p.add_argument('--out_csv',
help='Output CSV filename for the stats (.csv).')
p.add_argument('--out_dir',
help='Output directory to save CSV. \n'
'By default is current folder.')
p.add_argument('--wide', action='store_true',
help='Option to save in wide format the statistic '
'measurements. By default is long format.')
p.add_argument('--save_merge_df', action='store_true',
help='Save all jsons into a single dataframe in long \n'
'format. By default, each json is saved in an '
'independent csv. ')
add_overwrite_arg(p)
return p
def main():
parser = _build_arg_parser()
args = parser.parse_args()
assert_inputs_exist(parser, args.in_json)
if args.out_dir is None:
args.out_dir = './'
# Load, reshape and save multi json data
tmp_df = []
for curr_json in args.in_json:
if ('lesion_stats' or 'lesion_streamlines_stats') in curr_json:
raise ValueError("The lesion_stats and lesion_streamlines_stats\n"
" jsons cannot be processed with this script. \n"
"Remove these jsons from the input.\n")
key_columns = os.path.splitext(os.path.basename(curr_json))[0]
if args.out_csv is None:
args.out_csv = key_columns
# Load json data
df = pd.json_normalize(json.load(open(curr_json))).T
df = df.reset_index(drop=False)
print(df)
if 'lesion' in curr_json:
long_columns_list = column_dict_name[key_columns][0]
long_columns_nolist = column_dict_name[key_columns + '_nolist'][0]
long_df = convert_lesion_data(df, long_columns_list,
long_columns_nolist)
else:
# Define the column names based on number of columns
# This assumes that columns always have the same organization
long_columns, wide_columns = column_dict_name[key_columns]
# Store json data in dataframe
values = [split_col(x) for x in df[["index", 0]].values]
print(long_columns, wide_columns)
long_df = pd.DataFrame(columns=long_columns, data=values)
if args.save_merge_df:
tmp_df.append(long_df)
else:
long_df.to_csv(os.path.join(args.out_dir,
args.out_csv + '_long.csv',
index=False))
long_df.to_csv(os.path.join(args.out_dir,
args.out_csv + '_wide.csv'),
index=False)
# Reshape long to wide dataframe
if args.wide:
if 'sats' in long_df.columns.tolist():
long_df = reshape_to_wide_format(long_df, wide_columns)
# Save dataframe
long_df.to_csv(os.path.join(args.out_dir,
args.out_csv + '_wide.csv'),
index=False)
if args.save_merge_df:
merged_long_df = pd.concat(tmp_df[:], ignore_index=True)
merged_long_df = merged_long_df.reset_index(drop=True)
merged_long_df.to_csv(os.path.join(args.out_dir,
'merged_csv_long.csv'), index=False)
if __name__ == '__main__':
main()