8
8
from typing import Dict , List
9
9
10
10
from smif .data_layer .data_array import DataArray
11
+ from smif .exception import SmifDataMismatchError , SmifDataNotFoundError
12
+ from smif .metadata import Spec
11
13
12
14
13
15
class DataStore (metaclass = ABCMeta ):
14
16
"""A DataStore must implement each of the abstract methods defined in this interface
15
17
"""
16
18
# region DataArray
17
19
@abstractmethod
18
- def read_scenario_variant_data (self , key , spec , timestep = None ) -> DataArray :
19
- """Read data array
20
+ def read_scenario_variant_data (
21
+ self , key , spec , timestep = None , timesteps = None ) -> DataArray :
22
+ """Read scenario variant data array.
23
+
24
+ If a single timestep is specified, the spec MAY include 'timestep' as a dimension,
25
+ which should match the timestep specified.
26
+
27
+ If multiple timesteps are specified, the spec MUST include 'timestep' as a dimension,
28
+ which should match the timesteps specified.
29
+
30
+ If timestep and timesteps are None, read all available timesteps. Whether or not the
31
+ spec includes 'timestep' as a dimension, the returned DataArray will include a
32
+ 'timestep' dimension with all available timesteps included.
20
33
21
34
Parameters
22
35
----------
23
36
key : str
24
37
spec : ~smif.metadata.spec.Spec
25
38
timestep : int (optional)
26
- If None, read data for all timesteps
39
+ If set, read data for single timestep
40
+ timesteps : list[int] (optional)
41
+ If set, read data for specified timesteps
27
42
28
43
Returns
29
44
-------
30
45
data_array : ~smif.data_layer.data_array.DataArray
31
46
"""
32
47
33
48
@abstractmethod
34
- def write_scenario_variant_data (self , key , data_array , timestep = None ):
49
+ def write_scenario_variant_data (self , key , data_array ):
35
50
"""Write data array
36
51
37
52
Parameters
38
53
----------
39
54
key : str
40
55
data_array : ~smif.data_layer.data_array.DataArray
41
- timestep : int (optional)
42
- If None, write data for all timesteps
43
56
"""
44
57
45
58
@abstractmethod
@@ -59,15 +72,13 @@ def read_narrative_variant_data(self, key, spec, timestep=None):
59
72
"""
60
73
61
74
@abstractmethod
62
- def write_narrative_variant_data (self , key , data_array , timestep = None ):
75
+ def write_narrative_variant_data (self , key , data_array ):
63
76
"""Write data array
64
77
65
78
Parameters
66
79
----------
67
80
key : str
68
81
data_array : ~smif.data_layer.data_array.DataArray
69
- timestep : int (optional)
70
- If None, write data for all timesteps
71
82
"""
72
83
73
84
@abstractmethod
@@ -275,3 +286,84 @@ def available_results(self, modelrun_name):
275
286
Each tuple is (timestep, decision_iteration, model_name, output_name)
276
287
"""
277
288
# endregion
289
+
290
+ @classmethod
291
+ def filter_on_timesteps (cls , dataframe , spec , path , timestep = None , timesteps = None ):
292
+ """Filter dataframe by timestep
293
+
294
+ The 'timestep' dimension is treated as follows:
295
+
296
+ If a single timestep is specified, the spec MAY include 'timestep' as a dimension. If so,
297
+ the returned DataArray's spec will match the timestep requested. Otherwise, the DataArray
298
+ will not include timestep as a dimension.
299
+
300
+ If multiple timesteps are specified, the returned DataArray's spec will include a
301
+ 'timestep' dimension to match the timesteps requested.
302
+
303
+ If timestep and timesteps are None, and the stored data has a timestep column, read all
304
+ available timesteps. The returned DataArray's spec 'timestep' dimension will match the
305
+ timesteps requested. If the stored data does not have a timestep column, ignore and pass
306
+ through unchanged.
307
+ """
308
+ if timestep is not None :
309
+ dataframe = cls ._check_timestep_column_exists (dataframe , spec , path )
310
+ dataframe = dataframe [dataframe .timestep == timestep ]
311
+ if 'timestep' in spec .dims :
312
+ spec = cls ._set_spec_timesteps (spec , [timestep ])
313
+ else :
314
+ dataframe = dataframe .drop ('timestep' , axis = 1 )
315
+ elif timesteps is not None :
316
+ dataframe = cls ._check_timestep_column_exists (dataframe , spec , path )
317
+ dataframe = dataframe [dataframe .timestep .isin (timesteps )]
318
+ spec = cls ._set_spec_timesteps (spec , timesteps )
319
+ elif timestep is None and timesteps is None :
320
+ try :
321
+ dataframe = cls ._check_timestep_column_exists (dataframe , spec , path )
322
+ spec = cls ._set_spec_timesteps (spec , sorted (list (dataframe .timestep .unique ())))
323
+ except SmifDataMismatchError :
324
+ pass
325
+
326
+ if dataframe .empty :
327
+ raise SmifDataNotFoundError (
328
+ "Data for '{}' not found for timestep {}" .format (spec .name , timestep ))
329
+
330
+ return dataframe , spec
331
+
332
+ @staticmethod
333
+ def dataframe_to_data_array (dataframe , spec , path ):
334
+ if spec .dims :
335
+ data_array = DataArray .from_df (spec , dataframe )
336
+ else :
337
+ # zero-dimensional case (scalar)
338
+ data = dataframe [spec .name ]
339
+ if data .shape != (1 ,):
340
+ msg = "Data for '{}' should contain a single value, instead got {} while " + \
341
+ "reading from {}"
342
+ raise SmifDataMismatchError (msg .format (spec .name , len (data ), path ))
343
+ data_array = DataArray (spec , data .iloc [0 ])
344
+
345
+ return data_array
346
+
347
+ @staticmethod
348
+ def _check_timestep_column_exists (dataframe , spec , path ):
349
+ if 'timestep' not in dataframe .columns :
350
+ if 'timestep' in dataframe .index .names :
351
+ dataframe = dataframe .reset_index ()
352
+ else :
353
+ msg = "Data for '{name}' expected a column called 'timestep', instead " + \
354
+ "got data columns {data_columns} and index names {index_names} " + \
355
+ "while reading from {path}"
356
+ raise SmifDataMismatchError (msg .format (
357
+ data_columns = dataframe .columns .values .tolist (),
358
+ index_names = dataframe .index .names ,
359
+ name = spec .name ,
360
+ path = path ))
361
+ return dataframe
362
+
363
+ @staticmethod
364
+ def _set_spec_timesteps (spec , timesteps ):
365
+ spec_config = spec .as_dict ()
366
+ if 'timestep' not in spec_config ['dims' ]:
367
+ spec_config ['dims' ] = ['timestep' ] + spec_config ['dims' ]
368
+ spec_config ['coords' ]['timestep' ] = timesteps
369
+ return Spec .from_dict (spec_config )
0 commit comments