MRI_RTDoc_flow/df_convert_json_to_csv.py at main · Manonedde/MRI_RTDoc_flow · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
Script to convert jsons output by TractometryFlow into CSV files.

To run this script, individual jsons must be merged with scil_merge_json.py
(without option). It does not work with jsons provided in the Statistics
folder.

> scil_merge_json.py results_tractometry/sub*/Bundle_**/*json your_output.json

By default, when several jsons are given as input, this script converts
each json into an individual CSV file in long format (for wide format
use --wide). To convert all jsons into a single CSV file,
use the --save_merge_df option.

>> convert_json_to_csv.py *json --save_merge_df

"""


import argparse
import copy
import json
import os

import pandas as pd
import numpy as np

from dataframe.parameters import column_dict_name
from dataframe.func import (split_col, reshape_to_wide_format,
                            convert_lesion_data)
from scilpy.io.utils import (add_overwrite_arg,
                             assert_inputs_exist, assert_outputs_exist)


def _build_arg_parser():
    p = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
                                description=__doc__)

    p.add_argument('in_json', nargs='+',
                   help='File(s) containing the json stats (.json).')

    p.add_argument('--out_csv',
                   help='Output CSV filename for the stats (.csv).')
    p.add_argument('--out_dir',
                   help='Output directory to save CSV. \n'
                   'By default is current folder.')
    p.add_argument('--wide', action='store_true',
                   help='Option to save in wide format the statistic '
                   'measurements. By default is long format.')
    p.add_argument('--save_merge_df', action='store_true',
                   help='Save all jsons into a single dataframe in long \n'
                   'format. By default, each json is saved in an '
                   'independent csv. ')

    add_overwrite_arg(p)

    return p


def main():
    parser = _build_arg_parser()
    args = parser.parse_args()

    assert_inputs_exist(parser, args.in_json)

    if args.out_dir is None:
        args.out_dir = './'

    # Load, reshape and save multi json data
    tmp_df = []
    for curr_json in args.in_json:
        if ('lesion_stats' or 'lesion_streamlines_stats') in curr_json:
            raise ValueError("The lesion_stats and lesion_streamlines_stats\n"
                             " jsons cannot be processed with this script. \n"
                             "Remove these jsons from the input.\n")

        key_columns = os.path.splitext(os.path.basename(curr_json))[0]
        if args.out_csv is None:
            args.out_csv = key_columns

        # Load json data
        df = pd.json_normalize(json.load(open(curr_json))).T
        df = df.reset_index(drop=False)

        print(df)

        if 'lesion' in curr_json:
            long_columns_list = column_dict_name[key_columns][0]
            long_columns_nolist = column_dict_name[key_columns + '_nolist'][0]

            long_df = convert_lesion_data(df, long_columns_list,
                                          long_columns_nolist)

        else:
            # Define the column names based on number of columns
            # This assumes that columns always have the same organization
            long_columns, wide_columns = column_dict_name[key_columns]
            # Store json data in dataframe
            values = [split_col(x) for x in df[["index", 0]].values]
            print(long_columns, wide_columns)
            long_df = pd.DataFrame(columns=long_columns, data=values)

        if args.save_merge_df:
            tmp_df.append(long_df)

        else:
            long_df.to_csv(os.path.join(args.out_dir,
                                        args.out_csv + '_long.csv',
                                        index=False))
            long_df.to_csv(os.path.join(args.out_dir,
                                        args.out_csv + '_wide.csv'),
                           index=False)
        # Reshape long to wide dataframe
        if args.wide:
            if 'sats' in long_df.columns.tolist():
                long_df = reshape_to_wide_format(long_df, wide_columns)
                # Save dataframe
            long_df.to_csv(os.path.join(args.out_dir,
                                        args.out_csv + '_wide.csv'),
                           index=False)

    if args.save_merge_df:
        merged_long_df = pd.concat(tmp_df[:], ignore_index=True)
        merged_long_df = merged_long_df.reset_index(drop=True)
        merged_long_df.to_csv(os.path.join(args.out_dir,
                                           'merged_csv_long.csv'), index=False)


if __name__ == '__main__':
    main()