@@ -82,110 +82,6 @@ def __init__(self, file_path, int_subject_IDs=True):
82
82
83
83
self .print_lines = [line [2 :] for line in all_lines if line [0 ]== 'P' ]
84
84
85
- #----------------------------------------------------------------------------------
86
- # Session Dataframe
87
- #----------------------------------------------------------------------------------
88
-
89
- def session_dataframe (file_path , paired_events = {}, pair_end_suffix = None ):
90
- '''Generate a pandas dataframe from a pyControl data file containing the
91
- sessions data. The data frame has columns:
92
- type : Whether the row contains session 'info', a 'state' entry,
93
- 'event' or 'print' line.
94
- name : The name of the state, event or session information in the row.
95
- time : The time the row occured in ms since the session start.
96
- duration : The duration in ms of states and paired events (see below).
97
- value : The contents of 'info' and 'print' rows.
98
-
99
- Optionally events can be specified as coming in pairs corresponding to the
100
- start and end of an action, e.g. entering and exiting a nosepoke. When a
101
- start-event end-event pair occurs in the data, only the start_event generates
102
- a row in the dataframe, with the end event used to compute the duration.
103
-
104
- Parameters
105
- ----------
106
- file_path : path to pyControl data file.
107
-
108
- paired_events : Optional dict specifying paired events e.g.
109
- {'poke_1_in':poke_1_out', 'poke_1_in':poke_1_out'}.
110
-
111
- pair_end_suffix : Optional string specifying a suffix used to indicate the
112
- end event of paired events that share a common stem e.g.
113
- the pair {'poke_1_in':poke_1_out'} would be found
114
- automatically using pair_end_suffix='_out'
115
-
116
- DESCRIPTION. The default is None.
117
-
118
- Returns
119
- -------
120
- df : session dataframe
121
- '''
122
-
123
- # Load data from file.
124
- with open (file_path , 'r' ) as f :
125
- print ('Importing data file: ' + os .path .split (file_path )[1 ])
126
- all_lines = [line .strip () for line in f .readlines () if line .strip ()]
127
-
128
- # Make dataframe.
129
- state_IDs = eval (next (line for line in all_lines if line [0 ]== 'S' )[2 :])
130
- event_IDs = eval (next (line for line in all_lines if line [0 ]== 'E' )[2 :])
131
- ID2name = {v : k for k , v in {** state_IDs , ** event_IDs }.items ()}
132
-
133
- line_dicts = []
134
- for line in all_lines :
135
- if line [0 ] == 'I' : # Info line.
136
- name , value = line [2 :].split (' : ' )
137
- line_dicts .append ({'type' : 'info' ,
138
- 'name' : name ,
139
- 'value' : value })
140
- elif line [0 ] == 'D' : # Data line.
141
- timestamp , ID = [int (i ) for i in line .split (' ' )[1 :]]
142
- line_dicts .append ({'type' : 'state' if ID in state_IDs .values () else 'event' ,
143
- 'name' : ID2name [ID ],
144
- 'time' : int (timestamp )})
145
- elif line [0 ] == 'P' : # Print line.
146
- line_dicts .append ({'type' : 'print' ,
147
- 'time' : int (line [2 :].split (' ' ,1 )[0 ]),
148
- 'value' : line [2 :].split (' ' ,1 )[1 ]})
149
-
150
- df = pd .DataFrame (line_dicts )
151
-
152
- # Add state durations.
153
- df .loc [df ['type' ] == 'state' ,'duration' ] = - df .loc [df ['type' ] == 'state' ,'time' ].diff (- 1 )
154
-
155
- # Find paired events with specified pair end suffix.
156
- if pair_end_suffix :
157
- end_events = [ev for ev in event_IDs .keys () if ev .endswith (pair_end_suffix )]
158
- for end_event in end_events :
159
- stem = end_event [:- len (pair_end_suffix )]
160
- try :
161
- start_event = next (ev for ev in event_IDs .keys () if ev .startswith (stem ) and ev != end_event )
162
- except StopIteration :
163
- continue # No matching start event found.
164
- paired_events [start_event ] = end_event
165
-
166
- # Compute paired event durations and remove end events.
167
- if paired_events :
168
- end2start = {v :k for k ,v in paired_events .items ()}
169
- start_times = {se :None for se in paired_events .keys ()}
170
- start_inds = {se :None for se in paired_events .keys ()}
171
- end_inds = []
172
- for i in df .index :
173
- if df .loc [i ,'name' ] in paired_events .keys (): # Pair start event.
174
- start_times [df .loc [i ,'name' ]] = df .loc [i ,'time' ]
175
- start_inds [ df .loc [i ,'name' ]] = i
176
- elif df .loc [i ,'name' ] in paired_events .values (): # Pair end event.
177
- start_event = end2start [df .loc [i ,'name' ]]
178
- if start_times [start_event ] is not None :
179
- df .loc [start_inds [start_event ],'duration' ] = df .loc [i ,'time' ] - start_times [start_event ]
180
- start_times [start_event ] = None
181
- end_inds .append (i )
182
- df .drop (index = end_inds , inplace = True )
183
-
184
- # Reset index and set column order.
185
- df .reset_index (drop = True )
186
- df = df .reindex (columns = ['type' ,'name' ,'time' ,'duration' ,'value' ])
187
- return df
188
-
189
85
#----------------------------------------------------------------------------------
190
86
# Experiment class
191
87
#----------------------------------------------------------------------------------
@@ -321,6 +217,157 @@ def _toDate(d): # Convert input to datetime.date object.
321
217
else :
322
218
raise ValueError ('Unable to convert input to date.' )
323
219
220
+ #----------------------------------------------------------------------------------
221
+ # Session Dataframe
222
+ #----------------------------------------------------------------------------------
223
+
224
+ def session_dataframe (file_path , paired_events = {}, pair_end_suffix = None ):
225
+ '''Generate a pandas dataframe from a pyControl data file containing the
226
+ sessions data. The data frame has columns:
227
+ type : Whether the row contains session 'info', a 'state' entry,
228
+ 'event' or 'print' line.
229
+ name : The name of the state, event or session information in the row.
230
+ time : The time the row occured in ms since the session start.
231
+ duration : The duration in ms of states and paired events (see below).
232
+ value : The contents of 'info' and 'print' rows.
233
+
234
+ Optionally events can be specified as coming in pairs corresponding to the
235
+ start and end of an action, e.g. entering and exiting a nosepoke. When a
236
+ start-event end-event pair occurs in the data, only the start_event generates
237
+ a row in the dataframe, with the end event used to compute the duration.
238
+
239
+ Parameters
240
+ ----------
241
+ file_path : path to pyControl data file.
242
+
243
+ paired_events : Optional dict specifying paired events e.g.
244
+ {'poke_1_in':poke_1_out', 'poke_1_in':poke_1_out'}.
245
+
246
+ pair_end_suffix : Optional string specifying a suffix used to indicate the
247
+ end event of paired events that share a common stem e.g.
248
+ the pair {'poke_1_in':poke_1_out'} would be found
249
+ automatically using pair_end_suffix='_out'
250
+
251
+ Returns
252
+ -------
253
+ df : session dataframe
254
+ '''
255
+
256
+ # Load data from file.
257
+ with open (file_path , 'r' ) as f :
258
+ print ('Importing data file: ' + os .path .split (file_path )[1 ])
259
+ all_lines = [line .strip () for line in f .readlines () if line .strip ()]
260
+
261
+ # Make dataframe.
262
+ state_IDs = eval (next (line for line in all_lines if line [0 ]== 'S' )[2 :])
263
+ event_IDs = eval (next (line for line in all_lines if line [0 ]== 'E' )[2 :])
264
+ ID2name = {v : k for k , v in {** state_IDs , ** event_IDs }.items ()}
265
+
266
+ line_dicts = []
267
+ for line in all_lines :
268
+ if line [0 ] == 'I' : # Info line.
269
+ name , value = line [2 :].split (' : ' )
270
+ line_dicts .append ({'type' : 'info' ,
271
+ 'name' : name ,
272
+ 'value' : value })
273
+ elif line [0 ] == 'D' : # Data line.
274
+ timestamp , ID = [int (i ) for i in line .split (' ' )[1 :]]
275
+ line_dicts .append ({'type' : 'state' if ID in state_IDs .values () else 'event' ,
276
+ 'name' : ID2name [ID ],
277
+ 'time' : int (timestamp )})
278
+ elif line [0 ] == 'P' : # Print line.
279
+ line_dicts .append ({'type' : 'print' ,
280
+ 'time' : int (line [2 :].split (' ' ,1 )[0 ]),
281
+ 'value' : line [2 :].split (' ' ,1 )[1 ]})
282
+
283
+ df = pd .DataFrame (line_dicts )
284
+
285
+ # Add state durations.
286
+ df .loc [df ['type' ] == 'state' ,'duration' ] = - df .loc [df ['type' ] == 'state' ,'time' ].diff (- 1 )
287
+
288
+ # Find paired events with specified pair end suffix.
289
+ if pair_end_suffix :
290
+ end_events = [ev for ev in event_IDs .keys () if ev .endswith (pair_end_suffix )]
291
+ for end_event in end_events :
292
+ stem = end_event [:- len (pair_end_suffix )]
293
+ try :
294
+ start_event = next (ev for ev in event_IDs .keys () if ev .startswith (stem ) and ev != end_event )
295
+ except StopIteration :
296
+ continue # No matching start event found.
297
+ paired_events [start_event ] = end_event
298
+
299
+ # Compute paired event durations and remove end events.
300
+ if paired_events :
301
+ end2start = {v :k for k ,v in paired_events .items ()}
302
+ start_times = {se :None for se in paired_events .keys ()}
303
+ start_inds = {se :None for se in paired_events .keys ()}
304
+ end_inds = []
305
+ for i in df .index :
306
+ if df .loc [i ,'name' ] in paired_events .keys (): # Pair start event.
307
+ start_times [df .loc [i ,'name' ]] = df .loc [i ,'time' ]
308
+ start_inds [ df .loc [i ,'name' ]] = i
309
+ elif df .loc [i ,'name' ] in paired_events .values (): # Pair end event.
310
+ start_event = end2start [df .loc [i ,'name' ]]
311
+ if start_times [start_event ] is not None :
312
+ df .loc [start_inds [start_event ],'duration' ] = df .loc [i ,'time' ] - start_times [start_event ]
313
+ start_times [start_event ] = None
314
+ end_inds .append (i )
315
+ df .drop (index = end_inds , inplace = True )
316
+
317
+ # Reset index and set column order.
318
+ df .reset_index (drop = True )
319
+ df = df .reindex (columns = ['type' ,'name' ,'time' ,'duration' ,'value' ])
320
+ return df
321
+
322
+ #----------------------------------------------------------------------------------
323
+ # Experiment dataframe
324
+ #----------------------------------------------------------------------------------
325
+
326
+ def experiment_dataframe (folder_path , paired_events = {}, pair_end_suffix = None ):
327
+ '''Generate a pandas dataframe from a pyControl experiment comprising
328
+ many session data files in a folder. The experiment dataframe has the
329
+ same columns as the session dataframe ('type', 'name', 'time', 'duration',
330
+ 'value'), with additional columns specifying the subject_ID, start data and
331
+ time etc generated from the info lines in the pyControl data file. Each row
332
+ of the dataframe corresponds to a single state entry, event or print line
333
+ from a single session.
334
+
335
+ As with the session_dataframe function, events can optionally be specified
336
+ as coming in pairs corresponding to the start and end of an action, e.g.
337
+ entering and exiting a nosepoke. When a start-event end-event pair occurs
338
+ in the data, only the start_event generates a row in the dataframe, with
339
+ the end event used to compute the duration.
340
+
341
+ Parameters
342
+ ----------
343
+ folder_path : path to experiment data folder.
344
+
345
+ paired_events : Optional dict specifying paired events e.g.
346
+ {'poke_1_in':poke_1_out', 'poke_1_in':poke_1_out'}.
347
+
348
+ pair_end_suffix : Optional string specifying a suffix used to indicate the
349
+ end event of paired events that share a common stem e.g.
350
+ the pair {'poke_1_in':poke_1_out'} would be found
351
+ automatically using pair_end_suffix='_out'
352
+
353
+ Returns
354
+ -------
355
+ df : session dataframe
356
+ '''
357
+ session_filenames = [f for f in os .listdir (folder_path ) if f [- 4 :] == '.txt' ]
358
+ session_dataframes = []
359
+ for session_filename in session_filenames :
360
+ # Make session dataframe.
361
+ session_df = session_dataframe (os .path .join (folder_path ,session_filename ),
362
+ paired_events = paired_events , pair_end_suffix = pair_end_suffix )
363
+ # Convert info rows to columns.
364
+ info_rows = session_df [session_df ['type' ]== 'info' ]
365
+ session_df = session_df [session_df ['type' ]!= 'info' ]
366
+ for name ,value in zip (info_rows ['name' ], info_rows ['value' ]):
367
+ session_df [name ] = value
368
+ session_dataframes .append (session_df )
369
+ experiment_df = pd .concat (session_dataframes , axis = 0 )
370
+ return experiment_df
324
371
325
372
#----------------------------------------------------------------------------------
326
373
# Load analog data
0 commit comments