@@ -124,6 +124,62 @@ def inspect_database(root_path):
124
124
125
125
126
126
127
+ def inspect_compressed_database (glob_str ):
128
+ """
129
+ Build filename list from directory.
130
+
131
+ Examples
132
+
133
+ glob_str = '/Users/toddkarin/Documents/NSRDB_compressed/*'
134
+ filedata = nsrdbtools.inspect_compressed_database(glob_str)
135
+
136
+
137
+ Returns
138
+ -------
139
+
140
+ """
141
+
142
+
143
+ location_id = []
144
+ lat = []
145
+ lon = []
146
+
147
+
148
+ # filename = get_s3_files()
149
+ # base_dir = '/Users/toddkarin/Documents/NSRDB_compressed/*'
150
+ filename = glob .glob (glob_str )
151
+
152
+
153
+ # Extract location id, lat and lon.
154
+ for key in filename :
155
+ if key .endswith ('.npz' ):
156
+
157
+ path_parts = os .path .split (key )
158
+
159
+ filename_parts = path_parts [- 1 ].split ('_' )
160
+
161
+ location_id .append (int (filename_parts [0 ]))
162
+ lat .append (float (filename_parts [1 ]))
163
+ lon .append (float (filename_parts [2 ][0 :- 4 ]))
164
+
165
+
166
+ # Create a DataFrame
167
+ filedata = pd .DataFrame .from_dict ({
168
+ 'location_id' : location_id ,
169
+ 'lat' : lat ,
170
+ 'lon' : lon ,
171
+ 'filename' : filename ,
172
+ })
173
+
174
+ # Redefine the index.
175
+ filedata .index = range (filedata .__len__ ())
176
+
177
+
178
+ return filedata
179
+
180
+
181
+
182
+
127
183
128
184
def inspect_pickle_database (root_path ):
129
185
"""Build database for NSRDB files
@@ -199,18 +255,6 @@ def inspect_pickle_database(root_path):
199
255
filedata .index = range (filedata .__len__ ())
200
256
return filedata
201
257
202
- def import_weather_pickle (fullpath ):
203
- df = pd .read_pickle (fullpath , compression = 'xz' )
204
-
205
- # print(data_filename)
206
- weather = pd .DataFrame .from_dict ({
207
- 'dni' : df ['dni' ].astype (np .float64 ),
208
- 'dhi' : df ['dhi' ].astype (np .float64 ),
209
- 'ghi' : df ['ghi' ].astype (np .float64 ),
210
- 'temp_air' : df ['temp_air' ].astype (np .float64 ),
211
- 'wind_speed' : df ['wind_speed' ].astype (np .float64 )})
212
-
213
- return weather
214
258
215
259
216
260
def import_csv (filename ):
@@ -233,7 +277,11 @@ def import_csv(filename):
233
277
234
278
# filename = '1ad06643cad4eeb947f3de02e9a0d6d7/128364_38.29_-122.14_1998.csv'
235
279
236
- info = pd .read_csv (filename , nrows = 1 )
280
+ info_df = pd .read_csv (filename , nrows = 1 )
281
+ info = {}
282
+ for p in info_df :
283
+ info [p ] = info_df [p ].iloc [0 ]
284
+
237
285
# See metadata for specified properties, e.g., timezone and elevation
238
286
# timezone, elevation = info['Local Time Zone'], info['Elevation']
239
287
@@ -373,6 +421,8 @@ def build_nsrdb_link_list(filename):
373
421
374
422
return url_list
375
423
424
+
425
+
376
426
def download_nsrdb_link_list (url_list , sleep = 0.2 ):
377
427
"""
378
428
This simple script opens a list of urls for downloading files.
@@ -392,6 +442,111 @@ def download_nsrdb_link_list(url_list, sleep=0.2):
392
442
time .sleep (sleep )
393
443
394
444
445
+
446
+
447
+ def load_npz (filename ):
448
+ """
449
+ Load npz file from a local file
450
+
451
+ Parameters
452
+ ----------
453
+ filename
454
+
455
+ Returns
456
+ -------
457
+
458
+ """
459
+ #
460
+ data = {}
461
+ with np .load (filename ) as arr :
462
+ for var in list (arr .keys ()):
463
+ data [var ] = arr [var ]
464
+ return data
465
+
466
+
467
+ def get_local_weather_data (filename ):
468
+ """
469
+
470
+ Load a local compressed weather datafile.
471
+
472
+ Parameters
473
+ ----------
474
+ filename
475
+
476
+ Returns
477
+ -------
478
+
479
+ """
480
+
481
+
482
+ data = load_npz (filename )
483
+ return build_weather_info (data )
484
+
485
+
486
+
487
+ def build_weather_info (info ):
488
+ """
489
+
490
+ Parameters
491
+ ----------
492
+ info
493
+
494
+ Returns
495
+ -------
496
+
497
+ """
498
+
499
+ for f in info :
500
+ try :
501
+ if info [f ].dtype == np .dtype ('<U5' ):
502
+ info [f ] = str (info [f ])
503
+ elif info [f ].dtype == np .dtype ('<U6' ):
504
+ info [f ] = str (info [f ])
505
+ elif info [f ].dtype == np .dtype ('int64' ):
506
+ info [f ] = int (info [f ])
507
+ elif info [f ].dtype == np .dtype ('float64' ):
508
+ info [f ] = float (info [f ])
509
+
510
+
511
+ except :
512
+ print (f )
513
+
514
+
515
+ weather = pd .DataFrame .from_dict ({
516
+ 'year' : info ['year' ],
517
+ 'month' : info ['month' ],
518
+ 'day' : info ['day' ],
519
+ 'hour' : info ['hour' ],
520
+ 'minute' : info ['minute' ],
521
+ 'dni' : info ['dni' ],
522
+ 'ghi' : info ['ghi' ],
523
+ 'dhi' : info ['dhi' ],
524
+ 'temp_air' : info ['temp_air' ],
525
+ 'wind_speed' : info ['wind_speed' ],
526
+ }
527
+ )
528
+
529
+ weather .index = pd .to_datetime (
530
+ pd .DataFrame .from_dict ({
531
+ 'year' : info ['year' ],
532
+ 'month' : info ['month' ],
533
+ 'day' : info ['day' ],
534
+ 'hour' : info ['hour' ],
535
+ 'minute' : info ['minute' ],
536
+ })
537
+ )
538
+
539
+ weather .index = weather .index .tz_localize (
540
+ pytz .FixedOffset (float (info ['local_time_zone' ] * 60 )))
541
+
542
+ # Remove long vectors from info.
543
+ for f in list (info .keys ()):
544
+ if type (info [f ]) == type (np .array ([0 ])):
545
+ del info [f ]
546
+
547
+
548
+ return weather , info
549
+
395
550
def haversine_distance (lat1 , lon1 , lat2 , lon2 ):
396
551
p = 0.017453292519943295
397
552
a = 0.5 - np .cos ((lat2 - lat1 )* p )/ 2 + np .cos (lat1 * p )* np .cos (lat2 * p ) * (1 - np .cos ((lon2 - lon1 )* p )) / 2
0 commit comments