Skip to content

Commit 57eefc4

Browse files
committed
data_import.py:
- In session_dataframe the 'value' field of 'variable' rows is now a dict {v_name: v_value} not a json representation of that dict. - Started work on print and variable processing in Session class, but currently different implementations for .tsv and .txt files.
1 parent 4d324cd commit 57eefc4

File tree

1 file changed

+45
-9
lines changed

1 file changed

+45
-9
lines changed

tools/data_import.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# sessions and experiments. Dependencies: Python 3.5+, Numpy.
33

44
import os
5+
import json
56
import pickle
67
import pandas as pd
78
import numpy as np
@@ -10,12 +11,17 @@
1011

1112
Event = namedtuple('Event', ['time','name'])
1213

14+
Variable = namedtuple('Variable', ['time', 'type', 'value'])
15+
16+
Print = namedtuple('Print', ['time','value'])
17+
1318
#----------------------------------------------------------------------------------
1419
# Session class
1520
#----------------------------------------------------------------------------------
1621

1722
class Session():
18-
'''Import data from a pyControl file and represent it as an object with attributes:
23+
'''Import data from a pyControl file (.txt or .tsv) and represent it as an
24+
object with attributes:
1925
- file_name
2026
- experiment_name
2127
- task_name
@@ -75,8 +81,18 @@ def __init__(self, file_path, int_subject_IDs=False):
7581
self.times = {event_name: np.array([ev.time for ev in self.events if ev.name == event_name])
7682
for event_name in ID2name.values()}
7783

78-
self.print_lines = [line[2:] for line in all_lines if line[0]=='P']
79-
84+
self.print_lines = [line[2:].split(' ', 1) for line in all_lines if line[0]=='P']
85+
86+
self.prints = []
87+
self.variables = []
88+
89+
for print_line in self.print_lines:
90+
try:
91+
value = json.loads(print_line[1])
92+
self.variables.append(Variable(time=int(print_line[0]),type='print',value=value))
93+
except json.JSONDecodeError:
94+
self.prints.append(Print(time=int(print_line[0]),value=print_line[1]))
95+
8096
elif os.path.splitext(file_path)[1] == '.tsv':
8197

8298
# Load tsv file to pandas dataframe.
@@ -99,6 +115,16 @@ def __init__(self, file_path, int_subject_IDs=False):
99115

100116
self.times = {event_name: np.array([ev.time for ev in self.events if ev.name == event_name])
101117
for event_name in df.loc[df['type'].isin(['state', 'event']), 'name'].unique()}
118+
119+
# Create variables dataframe.
120+
121+
df.loc[df['type']=='variable', 'value'] = df.loc[df['type']=='variable', 'value'].apply(json.loads) # Convert variables row value fields to dicts.
122+
self.variables_df = pd.DataFrame(df.loc[df['type']=='variable','value'].tolist())
123+
columns = self.variables_df.columns
124+
self.variables_df.columns = pd.MultiIndex.from_arrays([['values']*len(columns),columns])
125+
self.variables_df.insert(0,'operation', df.loc[df['type']=='variable', 'name'].tolist())
126+
self.variables_df.insert(0,'time', df.loc[df['type']=='variable', 'time'].tolist())
127+
self.variables_df.reset_index()
102128

103129
# Common to both filetypes.
104130

@@ -250,8 +276,8 @@ def _toDate(d): # Convert input to datetime.date object.
250276
#----------------------------------------------------------------------------------
251277

252278
def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
253-
'''Generate a pandas dataframe from a pyControl data file containing the
254-
sessions data. The data frame has columns:
279+
'''Generate a pandas dataframe from a pyControl data file (.txt or .tsv)
280+
containing the sessions data. The data frame has columns:
255281
type : Whether the row contains session 'info', a 'state' entry,
256282
'event' or 'print' line.
257283
name : The name of the state, event or session information in the row.
@@ -314,15 +340,25 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
314340
'name' : ID2name[ID]})
315341
elif line[0] == 'P': # Print line.
316342
time_str, print_str = line[2:].split(' ',1)
317-
line_dicts.append({'time' : int(time_str)/1000,
318-
'type' : 'print',
319-
'value' : print_str})
320-
343+
try:
344+
value_dict = json.loads(print_str)
345+
line_dicts.append({'time' : int(time_str)/1000,
346+
'type' : 'variable',
347+
'name' : 'print',
348+
'value' : value_dict})
349+
350+
except json.JSONDecodeError:
351+
line_dicts.append({'time' : int(time_str)/1000,
352+
'type' : 'print',
353+
'value' : print_str})
354+
321355
df = pd.DataFrame(line_dicts)
322356

323357
elif os.path.splitext(file_path)[1] == '.tsv': # Load data from .tsv file.
324358

325359
df = pd.read_csv(file_path, delimiter='\t')
360+
# Convert variables row value fields to dicts from json strings.
361+
df.loc[df['type']=='variable', 'value'] = df.loc[df['type']=='variable', 'value'].apply(json.loads)
326362

327363
# Add state durations.
328364
df.loc[df['type'] == 'state','duration'] = -df.loc[df['type'] == 'state','time'].diff(-1)

0 commit comments

Comments
 (0)