10
10
from collections import namedtuple
11
11
12
12
Event = namedtuple ('Event' , ['time' ,'name' ])
13
-
14
- Variable = namedtuple ('Variable' , ['time' , 'type' , 'value' ])
15
-
16
- Print = namedtuple ('Print' , ['time' ,'value' ])
13
+ Print = namedtuple ('Print' , ['time' ,'string' ])
17
14
18
15
#----------------------------------------------------------------------------------
19
16
# Session class
@@ -26,8 +23,6 @@ class Session():
26
23
- experiment_name
27
24
- task_name
28
25
- subject_ID
29
- If argument int_subject_IDs is True, suject_ID is stored as an integer,
30
- otherwise subject_ID is stored as a string.
31
26
- datetime
32
27
The date and time that the session started stored as a datetime object.
33
28
- datetime_string
@@ -40,12 +35,16 @@ class Session():
40
35
A dictionary with keys that are the names of the framework events and states and
41
36
corresponding values which are Numpy arrays of all the times (in milliseconds since the
42
37
start of the framework run) at which each event/state entry occured.
43
- - print_lines
44
- A list of all the lines output by print statements during the framework run, each line starts
45
- with the time in milliseconds at which it was printed.
38
+ - prints
39
+ A list of named tuples for every user print statement output with fields 'time' and 'string'.
40
+ - variables_df
41
+ A Pandas dataframe containing the values of variables output by the task. For .txt files
42
+ only variables output by the print_variables function are included.
46
43
'''
47
44
48
- def __init__ (self , file_path , int_subject_IDs = False ):
45
+ def __init__ (self , file_path , time_unit = 'second' ):
46
+
47
+ assert time_unit in ('second' ,'ms' ), 'time_unit must be "second" or "ms"'
49
48
50
49
print ('Importing data file: ' + os .path .split (file_path )[1 ])
51
50
self .file_name = os .path .split (file_path )[1 ]
@@ -62,8 +61,9 @@ def __init__(self, file_path, int_subject_IDs=False):
62
61
63
62
self .experiment_name = next (line for line in info_lines if 'Experiment name' in line ).split (' : ' )[1 ]
64
63
self .task_name = next (line for line in info_lines if 'Task name' in line ).split (' : ' )[1 ]
65
- subject_ID_string = next (line for line in info_lines if 'Subject ID' in line ).split (' : ' )[1 ]
64
+ self . subject_ID = next (line for line in info_lines if 'Subject ID' in line ).split (' : ' )[1 ]
66
65
datetime_string = next (line for line in info_lines if 'Start date' in line ).split (' : ' )[1 ]
66
+ self .time_unit = time_unit
67
67
68
68
self .datetime = datetime .strptime (datetime_string , '%Y/%m/%d %H:%M:%S' )
69
69
@@ -76,46 +76,63 @@ def __init__(self, file_path, int_subject_IDs=False):
76
76
77
77
data_lines = [line [2 :].split (' ' ) for line in all_lines if line [0 ]== 'D' ]
78
78
79
- self .events = [Event (int (dl [0 ]), ID2name [int (dl [1 ])]) for dl in data_lines ]
79
+ self .events = [Event (int (dl [0 ]) if time_unit == 'ms' else int (dl [0 ])/ 1000 ,
80
+ ID2name [int (dl [1 ])]) for dl in data_lines ]
80
81
81
82
self .times = {event_name : np .array ([ev .time for ev in self .events if ev .name == event_name ])
82
83
for event_name in ID2name .values ()}
83
84
84
- self .print_lines = [line [2 :].split (' ' , 1 ) for line in all_lines if line [0 ]== 'P' ]
85
-
85
+ print_lines = [line [2 :].split (' ' , 1 ) for line in all_lines if line [0 ]== 'P' ]
86
+ var_dicts = []
87
+ var_times = []
86
88
self .prints = []
87
- self .variables = []
88
89
89
- for print_line in self .print_lines :
90
- try :
91
- value = json .loads (print_line [1 ])
92
- self .variables .append (Variable (time = int (print_line [0 ]),type = 'print' ,value = value ))
93
- except json .JSONDecodeError :
94
- self .prints .append (Print (time = int (print_line [0 ]),value = print_line [1 ]))
90
+ for print_line in print_lines :
91
+ print_time = int (print_line [0 ]) if time_unit == 'ms' else int (print_line [0 ])/ 1000
92
+ try : # Output of print_variables function.
93
+ var_dicts .append (json .loads (print_line [1 ]))
94
+ var_times .append (print_time )
95
+ except json .JSONDecodeError : # Output of user print function.
96
+ self .prints .append (Print (print_time , print_line [1 ]))
95
97
98
+ # Create variables dataframe.
99
+
100
+ self .variables_df = pd .DataFrame (var_dicts )
101
+ columns = self .variables_df .columns
102
+ self .variables_df .columns = pd .MultiIndex .from_arrays ([['values' ]* len (columns ),columns ])
103
+ self .variables_df .insert (0 ,'operation' , ['print' ]* len (self .variables_df ))
104
+ self .variables_df .insert (0 ,'time' , var_times )
105
+ self .variables_df .reset_index ()
106
+
96
107
elif os .path .splitext (file_path )[1 ] == '.tsv' :
97
108
98
109
# Load tsv file to pandas dataframe.
99
110
100
111
df = pd .read_csv (file_path , delimiter = '\t ' )
112
+
113
+ if time_unit == 'ms' :
114
+ df = df .loc [df ['type' ] != 'warning' , :] # Warning rows have nan time so can't convert to int.
115
+ df ['time' ] = (df ['time' ]* 1000 ).astype (int )
101
116
102
117
# Extract and store session information.
103
118
104
119
self .experiment_name = df .loc [(df ["type" ]== "info" ) & (df ["name" ]== "experiment_name" ), "value" ].item ()
105
120
self .task_name = df .loc [(df ["type" ]== "info" ) & (df ["name" ]== "task_name" ), "value" ].item ()
106
- subject_ID_string = df .loc [(df ["type" ]== "info" ) & (df ["name" ]== "subject_id" ), "value" ].item ()
121
+ self . subject_ID = df .loc [(df ["type" ]== "info" ) & (df ["name" ]== "subject_id" ), "value" ].item ()
107
122
datetime_string = df .loc [(df ["type" ]== "info" ) & (df ["name" ]== "start_time" ), "value" ].item ()
108
123
109
124
self .datetime = datetime .fromisoformat (datetime_string )
110
125
111
126
# Extract and store session data.
112
127
113
- self .events = [Event (int ( row ['time' ]* 1000 ) , row ['name' ]) for i ,row
128
+ self .events = [Event (row ['time' ], row ['name' ]) for i ,row
114
129
in df [df ['type' ].isin (['state' , 'event' ])].iterrows ()]
115
130
116
131
self .times = {event_name : np .array ([ev .time for ev in self .events if ev .name == event_name ])
117
132
for event_name in df .loc [df ['type' ].isin (['state' , 'event' ]), 'name' ].unique ()}
118
133
134
+ self .prints = [Print (row .time , row .value ) for row in df .loc [df .type == 'print' ,:].itertuples ()]
135
+
119
136
# Create variables dataframe.
120
137
121
138
df .loc [df ['type' ]== 'variable' , 'value' ] = df .loc [df ['type' ]== 'variable' , 'value' ].apply (json .loads ) # Convert variables row value fields to dicts.
@@ -126,13 +143,6 @@ def __init__(self, file_path, int_subject_IDs=False):
126
143
self .variables_df .insert (0 ,'time' , df .loc [df ['type' ]== 'variable' , 'time' ].tolist ())
127
144
self .variables_df .reset_index ()
128
145
129
- # Common to both filetypes.
130
-
131
- if int_subject_IDs : # Convert subject ID string to integer.
132
- self .subject_ID = int ('' .join ([i for i in subject_ID_string if i .isdigit ()]))
133
- else :
134
- self .subject_ID = subject_ID_string
135
-
136
146
self .datetime_string = self .datetime .strftime ('%Y-%m-%d %H:%M:%S' )
137
147
138
148
@@ -141,14 +151,14 @@ def __init__(self, file_path, int_subject_IDs=False):
141
151
#----------------------------------------------------------------------------------
142
152
143
153
class Experiment ():
144
- def __init__ (self , folder_path , int_subject_IDs = True ):
154
+ def __init__ (self , folder_path , time_unit = 'second' ):
145
155
'''
146
156
Import all sessions from specified folder to create experiment object. Only sessions in the
147
157
specified folder (not in subfolders) will be imported.
148
158
Arguments:
149
159
folder_path: Path of data folder.
150
- int_subject_IDs: If True subject IDs are converted to integers, e.g. m012 is converted to 12.
151
160
'''
161
+ assert time_unit in ('second' ,'ms' ), 'time_unit must be "second" or "ms"'
152
162
153
163
self .folder_name = os .path .split (folder_path )[1 ]
154
164
self .path = folder_path
@@ -160,18 +170,19 @@ def __init__(self, folder_path, int_subject_IDs=True):
160
170
with open (os .path .join (self .path , 'sessions.pkl' ),'rb' ) as sessions_file :
161
171
self .sessions = pickle .load (sessions_file )
162
172
print ('Saved sessions loaded from: sessions.pkl' )
173
+ assert self .sessions [0 ].time_unit == time_unit , 'time_unit of saved sessions does not match time_unit argument.'
163
174
except IOError :
164
175
pass
165
176
166
177
old_files = [session .file_name for session in self .sessions ]
167
178
files = os .listdir (self .path )
168
- new_files = [f for f in files if f [- 4 :] == '.txt' and f not in old_files ]
179
+ new_files = [f for f in files if f [- 4 :] in ( '.txt' , '.tsv' ) and f not in old_files ]
169
180
170
181
if len (new_files ) > 0 :
171
182
print ('Loading new data files..' )
172
183
for file_name in new_files :
173
184
try :
174
- self .sessions .append (Session (os .path .join (self .path , file_name ), int_subject_IDs ))
185
+ self .sessions .append (Session (os .path .join (self .path , file_name ), time_unit ))
175
186
except Exception as error_message :
176
187
print ('Unable to import file: ' + file_name )
177
188
print (error_message )
@@ -275,7 +286,7 @@ def _toDate(d): # Convert input to datetime.date object.
275
286
# Session Dataframe
276
287
#----------------------------------------------------------------------------------
277
288
278
- def session_dataframe (file_path , paired_events = {}, pair_end_suffix = None ):
289
+ def session_dataframe (file_path , paired_events = {}, pair_end_suffix = None , time_unit = 'second' ):
279
290
'''Generate a pandas dataframe from a pyControl data file (.txt or .tsv)
280
291
containing the sessions data. The data frame has columns:
281
292
type : Whether the row contains session 'info', a 'state' entry,
@@ -306,6 +317,7 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
306
317
-------
307
318
df : session dataframe
308
319
'''
320
+ assert time_unit in ('second' ,'ms' ), 'time_unit must be "second" or "ms"'
309
321
310
322
# Load data from file.
311
323
@@ -335,20 +347,20 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
335
347
'value' : value })
336
348
elif line [0 ] == 'D' : # Data line.
337
349
timestamp , ID = [int (i ) for i in line .split (' ' )[1 :]]
338
- line_dicts .append ({'time' : timestamp / 1000 ,
350
+ line_dicts .append ({'time' : timestamp if time_unit == 'ms' else timestamp / 1000 ,
339
351
'type' : 'state' if ID in state_IDs .values () else 'event' ,
340
352
'name' : ID2name [ID ]})
341
353
elif line [0 ] == 'P' : # Print line.
342
354
time_str , print_str = line [2 :].split (' ' ,1 )
343
- try :
355
+ timestamp = int (time_str )
356
+ try : # print_variables output.
344
357
value_dict = json .loads (print_str )
345
- line_dicts .append ({'time' : int ( time_str ) / 1000 ,
358
+ line_dicts .append ({'time' : timestamp if time_unit == 'ms' else timestamp / 1000 ,
346
359
'type' : 'variable' ,
347
360
'name' : 'print' ,
348
361
'value' : value_dict })
349
-
350
- except json .JSONDecodeError :
351
- line_dicts .append ({'time' : int (time_str )/ 1000 ,
362
+ except json .JSONDecodeError : # User print string.
363
+ line_dicts .append ({'time' : timestamp if time_unit == 'ms' else timestamp / 1000 ,
352
364
'type' : 'print' ,
353
365
'value' : print_str })
354
366
@@ -357,6 +369,11 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
357
369
elif os .path .splitext (file_path )[1 ] == '.tsv' : # Load data from .tsv file.
358
370
359
371
df = pd .read_csv (file_path , delimiter = '\t ' )
372
+
373
+ if time_unit == 'ms' :
374
+ df = df .loc [df ['type' ] != 'warning' , :] # Warning rows have nan time so can't convert to int.
375
+ df ['time' ] = (df ['time' ]* 1000 ).astype (int )
376
+
360
377
# Convert variables row value fields to dicts from json strings.
361
378
df .loc [df ['type' ]== 'variable' , 'value' ] = df .loc [df ['type' ]== 'variable' , 'value' ].apply (json .loads )
362
379
@@ -401,7 +418,8 @@ def session_dataframe(file_path, paired_events={}, pair_end_suffix=None):
401
418
# Experiment dataframe
402
419
#----------------------------------------------------------------------------------
403
420
404
- def experiment_dataframe (folder_path , paired_events = {}, pair_end_suffix = None ):
421
+ def experiment_dataframe (folder_path , paired_events = {}, pair_end_suffix = None ,
422
+ time_unit = 'second' ):
405
423
'''Generate a pandas dataframe from a pyControl experiment comprising
406
424
many session data files in a folder. The experiment dataframe has the
407
425
same columns as the session dataframe ('type', 'name', 'time', 'duration',
@@ -432,19 +450,21 @@ def experiment_dataframe(folder_path, paired_events={}, pair_end_suffix=None):
432
450
-------
433
451
df : session dataframe
434
452
'''
435
- session_filenames = [f for f in os .listdir (folder_path ) if f [- 4 :] == '.txt' ]
453
+ assert time_unit in ('second' ,'ms' ), 'time_unit must be "second" or "ms"'
454
+ session_filenames = [f for f in os .listdir (folder_path ) if f [- 4 :] in ('.txt' , '.tsv' )]
436
455
session_dataframes = []
437
456
for session_filename in session_filenames :
438
457
# Make session dataframe.
439
458
session_df = session_dataframe (os .path .join (folder_path ,session_filename ),
440
- paired_events = paired_events , pair_end_suffix = pair_end_suffix )
459
+ paired_events = paired_events , pair_end_suffix = pair_end_suffix , time_unit = time_unit )
441
460
# Convert info rows to columns.
442
461
info_rows = session_df [session_df ['type' ]== 'info' ]
443
462
session_df = session_df [session_df ['type' ]!= 'info' ]
444
463
for name ,value in zip (info_rows ['name' ], info_rows ['value' ]):
445
464
session_df [name ] = value
446
465
session_dataframes .append (session_df )
447
466
experiment_df = pd .concat (session_dataframes , axis = 0 )
467
+ experiment_df .reset_index (drop = True , inplace = True )
448
468
return experiment_df
449
469
450
470
#----------------------------------------------------------------------------------
0 commit comments