Skip to content

Commit 4d324cd

Browse files
committed
- data_import.py: Modified session_dataframe function to take either .txt or .tsv data files. Made some small changes to the generated dataframe for consistency with the tsv file format:
- time is in units of seconds not milliseconds. - info line names use '_' not ' ' and are all lower case, e.g. 'task_name' not 'Task name' - The sesion start date-time info line is now named 'start_time' not 'start_date' and the value is an isoformat string.
1 parent 08dcc11 commit 4d324cd

File tree

1 file changed

+42
-28
lines changed

1 file changed

+42
-28
lines changed

tools/data_import.py

Lines changed: 42 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -282,33 +282,47 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
282282
'''
283283

284284
# Load data from file.
285-
with open(file_path, 'r') as f:
286-
print('Importing data file: '+os.path.split(file_path)[1])
287-
all_lines = [line.strip() for line in f.readlines() if line.strip()]
288-
289-
# Make dataframe.
290-
state_IDs = eval(next(line for line in all_lines if line[0]=='S')[2:])
291-
event_IDs = eval(next(line for line in all_lines if line[0]=='E')[2:])
292-
ID2name = {v: k for k, v in {**state_IDs, **event_IDs}.items()}
293-
294-
line_dicts = []
295-
for line in all_lines:
296-
if line[0] == 'I': # Info line.
297-
name, value = line[2:].split(' : ')
298-
line_dicts.append({'type' : 'info',
299-
'name' : name,
300-
'value' : value})
301-
elif line[0] == 'D': # Data line.
302-
timestamp, ID = [int(i) for i in line.split(' ')[1:]]
303-
line_dicts.append({'type' : 'state' if ID in state_IDs.values() else 'event',
304-
'name' : ID2name[ID],
305-
'time' : int(timestamp)})
306-
elif line[0] == 'P': # Print line.
307-
line_dicts.append({'type' : 'print',
308-
'time' : int(line[2:].split(' ',1)[0]),
309-
'value' : line[2:].split(' ',1)[1]})
310-
311-
df = pd.DataFrame(line_dicts)
285+
286+
if os.path.splitext(file_path)[1] == '.txt': # Load data from .txt. file.
287+
288+
with open(file_path, 'r') as f:
289+
print('Importing data file: '+os.path.split(file_path)[1])
290+
all_lines = [line.strip() for line in f.readlines() if line.strip()]
291+
292+
# Make dataframe.
293+
state_IDs = eval(next(line for line in all_lines if line[0]=='S')[2:])
294+
event_IDs = eval(next(line for line in all_lines if line[0]=='E')[2:])
295+
ID2name = {v: k for k, v in {**state_IDs, **event_IDs}.items()}
296+
297+
line_dicts = []
298+
for line in all_lines:
299+
if line[0] == 'I': # Info line.
300+
name, value = line[2:].split(' : ')
301+
# Make info lines consistent with .tsv files.
302+
name = name.lower().replace(' ', '_')
303+
if name == 'start_date':
304+
name = 'start_time'
305+
value = datetime.strptime(value, '%Y/%m/%d %H:%M:%S').isoformat()
306+
line_dicts.append({'time' : 0,
307+
'type' : 'info',
308+
'name' : name,
309+
'value' : value})
310+
elif line[0] == 'D': # Data line.
311+
timestamp, ID = [int(i) for i in line.split(' ')[1:]]
312+
line_dicts.append({'time' : timestamp/1000,
313+
'type' : 'state' if ID in state_IDs.values() else 'event',
314+
'name' : ID2name[ID]})
315+
elif line[0] == 'P': # Print line.
316+
time_str, print_str = line[2:].split(' ',1)
317+
line_dicts.append({'time' : int(time_str)/1000,
318+
'type' : 'print',
319+
'value' : print_str})
320+
321+
df = pd.DataFrame(line_dicts)
322+
323+
elif os.path.splitext(file_path)[1] == '.tsv': # Load data from .tsv file.
324+
325+
df = pd.read_csv(file_path, delimiter='\t')
312326

313327
# Add state durations.
314328
df.loc[df['type'] == 'state','duration'] = -df.loc[df['type'] == 'state','time'].diff(-1)
@@ -344,7 +358,7 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
344358

345359
# Reset index and set column order.
346360
df.reset_index(drop=True)
347-
df = df.reindex(columns=['type','name','time','duration','value'])
361+
df = df.reindex(columns=['time','type','name','value','duration'])
348362
return df
349363

350364
#----------------------------------------------------------------------------------

0 commit comments

Comments
 (0)