-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfinal_tests.py
1110 lines (850 loc) · 58.3 KB
/
final_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (C) 2024 Antonio Rodriguez
#
# This file is part of Personalized-AI-Based-Do-It-Yourself-Glucose-Prediction-tool.
#
# Personalized-AI-Based-Do-It-Yourself-Glucose-Prediction-tool is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Personalized-AI-Based-Do-It-Yourself-Glucose-Prediction-tool is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Personalized-AI-Based-Do-It-Yourself-Glucose-Prediction-tool. If not, see <http://www.gnu.org/licenses/>.
# final_tests.py
# This module contains the necessary function to perform the DL models
# final tests with additional collected data. From the raw data reading, to the data
# preparation, including the results analysis.
# See functions documentation for more details.
from typing import Dict, List
import os
import pickle
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import tensorflow as tf
import json
# Custom libraries
from models.training import ISO_adapted_loss
from sensor_params import *
from evaluation.multi_step.evaluation import model_evaluation as multi_step_model_evaluation
from utils import get_LibreView_CGM_X_Y_multistep
set_of_libreview_keys = [["001", "001", "001", "2024"],
["003", "001", "001", "2024"],
["004", "001", "001", "2024"],
["007", "001", "001", "2024"],
["008", "001", "001", "2024"],
["011", "001", "001", "2024"],
#["013", "001", "001", "2024"],
["014", "001", "001", "2024"],
["015", "001", "001", "2024"],
["025", "001", "001", "2024"],
["026", "001", "001", "2024"],
["029", "001", "001", "2024"],
["039", "001", "001", "2024"],
["043", "001", "001", "2024"],
["045", "001", "001", "2024"],
["046", "001", "001", "2024"],
["048", "001", "001", "2024"],
["049", "001", "001", "2024"],
["051", "001", "001", "2024"],
["055", "001", "001", "2024"],
["057", "001", "001", "2024"],
["058", "001", "001", "2024"],
["060", "001", "001", "2024"],
["061", "001", "001", "2024"],
["062", "001", "001", "2024"],
["063", "001", "001", "2024"],
["065", "001", "001", "2024"],
["067", "001", "001", "2024"],
["068", "001", "001", "2024"]]
def read_test_csv(dataset_path : str, save_dict : bool = True) -> Dict :
"""
This function reads the raw .csv files (as downloaded by the endocrinologist/user)
from the dataset path and stores them in a dictionary. It takes a while.
It returns a dictionary with the CGM-related most important information.
Args:
----
dataset_path: Path where the .csv files are stored.
save_dict: Flag to save the dictionary with pickle.
Returns:
-------
data_dict: Dictionary with the CGM readings and timestamps for each subject, sensor and recording.
"""
# Go to the dataset directory
os.chdir(dataset_path)
# Create empty dictionary
data_dict = {}
# Read all the available .csv files and store them in a dictionary
for file in os.listdir(dataset_path) :
# Only iterate on the .csv files that contains patient's data
if "ID" not in file :
pass
else:
print("Reading ", file, "...")
# Extract the useful information from the file name to use them as dictionary keys
id = file.split("_")[0][2:]
s = file.split("_")[1][1:]
r = file.split("_")[2][1:]
download_date = file.split("_")[4][:-4]
# Create the dictionary for every patient, sensor and recording
data_dict[id] = {}
data_dict[id][s] = {}
data_dict[id][s][r] = {}
data_dict[id][s][r][download_date] = {}
# Only read_csv is called if the file is .csv
if file.endswith(".csv") :
# Read the .csv and store it in a DataFrame.
current_recordings = pd.read_csv(file, low_memory=False)
# Clean NaN values
current_recordings = current_recordings.dropna(axis=0, subset=['Tipo de registro'])
# Recording #14-01-01 has an error in the timestamps from sample 71870 to sample 74580. These are removed
if id == "014" and s == "001" and r == "001" :
idxs = np.where(current_recordings['Sello de tiempo del dispositivo'] == '01-01-0001 00:00')
current_recordings.drop(current_recordings.index[71870:74581], inplace=True)
# Conver timestamps to datetime64
current_recordings['Sello de tiempo del dispositivo'] = pd.to_datetime(current_recordings['Sello de tiempo del dispositivo'],
dayfirst=True,
format="%Y-%m-%d %H:%M",
exact=True)
# Obtain sensors MACs (this is more robust that obtaining the sensor names, which has a lot of typos)
MACs = current_recordings['Número de serial'].unique()
# Iterate over the MACs, since it contains less errors than 'Dispositivo' column
for i in range(0, len(MACs)) :
# Some instances (e.g., 014), brings NaN serial number (MAC). These are discarded and not considered in further steps
if MACs[i] is not np.nan :
# Find the indices of the MACs
MAC_idxs = np.where(current_recordings['Número de serial'] == MACs[i])
# We take the first idx to obtain the sensor name
sensor_name = current_recordings['Dispositivo'].iloc[MAC_idxs[0][0]]
# Empty arrays and DataFrames
empty_array = np.empty((1)) # to be filled with the readings separately
# Create the dictionary for every recording, date and sensor
data_dict[id][s][r][download_date][MACs[i]] = {sensor_name : {"CGM" : {"reading" : np.empty((0), dtype=np.float64),
"timestamp" : np.empty((0))},
"Escanned CGM" : {"reading" : np.empty((0)),
"timestamp" : np.empty((0))},
"Insulin no num" : {"reading" : np.empty((0)),
"timestamp" : np.empty((0))},
"Fast insulin" : {"reading" : np.empty((0)),
"timestamp" : np.empty((0))},
"Food no num" : {"reading" : np.empty((0)),
"timestamp" : np.empty((0))}}}
# Iterate over all the rerconding and place them and their timestamp in the corresponding dictionary entry
for i in range(0,current_recordings.shape[0]):
# Update current sensor name and MAC
curr_sensor_name = current_recordings['Dispositivo'].iloc[i]
curr_MAC = current_recordings['Número de serial'].iloc[i]
# Depeding on the register type, some columns are useful and some are not
register_type = round(current_recordings['Tipo de registro'].iloc[i])
match register_type:
case 0: # Historial de glucosa mg/dL
# Add element to the dictionary
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["CGM"]["reading"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["CGM"]["reading"], current_recordings['Historial de glucosa mg/dL'].iloc[i])
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["CGM"]["timestamp"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["CGM"]["timestamp"], current_recordings['Sello de tiempo del dispositivo'].iloc[i])
case 1: # Escaneo de glucosa mg/dL
# Add element to the dictionary
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Escanned CGM"]["reading"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Escanned CGM"]["reading"], current_recordings['Escaneo de glucosa mg/dL'].iloc[i])
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Escanned CGM"]["timestamp"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Escanned CGM"]["timestamp"], current_recordings['Sello de tiempo del dispositivo'].iloc[i])
case 2: # ¿¿¿¿¿¿ Insulina de acción rápida no numérica ?????
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Insulin no num"]["reading"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Insulin no num"]["reading"], current_recordings['Insulina de acción rápida no numérica'].iloc[i])
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Insulin no num"]["timestamp"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Insulin no num"]["timestamp"], current_recordings['Sello de tiempo del dispositivo'].iloc[i])
case 3: # ¿¿¿¿¿¿ Insulina de acción rápida (unidades) ?????
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Fast insulin"]["reading"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Fast insulin"]["reading"], current_recordings['Insulina de acción rápida (unidades)'].iloc[i])
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Fast insulin"]["timestamp"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Fast insulin"]["timestamp"], current_recordings['Sello de tiempo del dispositivo'].iloc[i])
case 4: # ¿¿¿¿¿¿ Alimento no numérico ?????
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Food no num"]["reading"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Food no num"]["reading"], current_recordings['Alimento no numérico'].iloc[i])
data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Food no num"]["timestamp"] = np.append(data_dict[id][s][r][download_date][curr_MAC][curr_sensor_name]["Food no num"]["timestamp"], current_recordings['Sello de tiempo del dispositivo'].iloc[i])
case 5: # ¿¿¿¿¿¿ Carbohidratos (gramos) ?????
pass
case 6: # ¿¿¿¿¿¿ Carbohidratos (porciones) ?????
pass
# Save dictionary using pickle
if save_dict :
filename = 'libreview_test_data.pickle'
with open(filename, 'wb') as handle:
pickle.dump(data_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
return data_dict
def get_end_training_dates(parent_dir : str, target_dir : str) -> Dict:
""" This function is for the final test of this work. After performing the 4-folds
trimester wise cross-validation, more data were collected. These data were used for a
final validation of the models. Notice that this data are more recent that those used
in the previous step. Hence, to avoid use data that were used in the training process,
the last timestamps of the data were extracted. It returns a dictionary containing
the last timestamp of each subject. See other functions to understand the structure
of this dictionary. This function also saves such dictionary.
Args:
----
parent_dir : Path were the new data were stored.
target_dir : Path where the dictionary will be saved.
Returns:
-------
subjects_end_trianing_dates: Dictionary with the last timestamps of the data for each subject (n=29 at this stage)
"""
# Empty dict to be filled with subject - end dates pairs
subjects_end_training_dates = {}
# Parent directory where all timestamps are placed
parent_dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files" # HARD-CODED
os.chdir(parent_dir)
# Iterate over the ID folders to fecht the end data for each subject
for id in os.listdir():
# Consider only folders, not .npy or .txt files
if ('npy' not in id) and ('txt' not in id) and ('ISO' not in id) and ('test' not in id) and ('svg' not in id) and ('pickle' not in id):
# Go to the ID directory
os.chdir(id)
# New dict entry with the ID
subjects_end_training_dates[id] = {}
# Construct path HARD-CODED. (This directory should be the same if you are using the same functions)
dir = r'C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}\N96\step1\PH60\multi\month-wise-4-folds\norm_min-max\None_sampling\DIL-1D-UNET\ISO_loss'.format(id)
# Go to the directory
os.chdir(dir)
# Read the .npy file with the timestamps
timestamps = np.load('Y_times.npy', allow_pickle=True)
# Extact the most recent timestamp to compare to the new data to make sure that test is done with newer data
subjects_end_training_dates[id] = str(timestamps[-1][-1])
# Back to parent directory
os.chdir(parent_dir)
# Go to the test set directory
os.chdir(target_dir)
# Save dictionary using pickle
filename = 'subjects_end_training_dates.pickle'
with open(filename, 'wb') as handle:
pickle.dump(subjects_end_training_dates, handle, protocol=pickle.HIGHEST_PROTOCOL)
return subjects_end_training_dates
def extract_test_data_recordings (subjects_end_training_dates: Dict, data_dict : Dict) -> Dict:
"""
This function returns and save with a dictionary with the test data recordings after
being processed by the prepare_LibreView_data() function. It sorts the data by ID and by sensor in case
a subject has more than one. The end dates of the training data should be provided. If the most recent
test data is older than the newest train data, this subject is discarded, since it does not provide a
proper test set.
Args:
----
subjects_end_training_dates: Dictionary with the last timestamps of the data for each subject (n=29 at this stage)
data_dict: Dictionary with the CGM data after read the .csv files.
Returns:
-------
test_data_recordings: Dictionary with the test data recordings sorted by ID and sensor.
"""
# Fill the dictionary with the test data
test_data_recordings = {}
# Iterate over all dictionary keys
for i in range(0,len(set_of_libreview_keys)):
# Initialize the dictionary entries
test_data_recordings[set_of_libreview_keys[i][0]] = {}
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]] = {}
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]] = {}
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]] = {}
for key in data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys():
# Initialize the entry regarding the MAC
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key] = {}
for key2 in data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key].keys():
# There could be without data
if not data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]["CGM"]["reading"].any():
pass
else:
# Calculate number of days so we can discard recordings with less than a year of data
data_1st_sample = data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]["CGM"]["timestamp"][0]
data_last_sample = data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]["CGM"]["timestamp"][-1]
# Discard data that are on the .csv but are previous to the end of the training period
if data_last_sample > np.datetime64(subjects_end_training_dates[set_of_libreview_keys[i][0]]):
# Corresponding CGM readings and their timestamps
cgm = data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]["CGM"]["reading"]
cgm_timestamp = data_dict[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]["CGM"]["timestamp"]
# Fill dictionary with readings of at least one year
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2] = {"CGM" : {"reading" : cgm,
"timestamp" : cgm_timestamp}}
else:
pass
# Iterate over all dictionary keys to delete the entries that are empty (meaning that they had <1 year of data )
for i in range(0,len(set_of_libreview_keys)):
for key in list(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys()):
# Check if the entry is empty or not to delete it
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]
# Check if there are IDs completely empty to delete them
for i in range(0,len(set_of_libreview_keys)):
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]]
# Save dictionary as pickle
with open('libreview_test_data_recordings.pickle', 'wb') as handle:
pickle.dump(test_data_recordings, handle, protocol=pickle.HIGHEST_PROTOCOL)
return test_data_recordings
def discard_data_from_sensor_black_list(test_data_recordings : Dict, sensor_black_list : List) -> Dict:
"""
Since there are plenty of glucose sensors in the market and not all of them
have the same sampling period, dataformat, etc., this function is introduced to
discard a given sensor model. Now, the criterion is to have a 15-min sampling period sensor.
But this might change in the future. The input parameter is the sensor black list. There
is not output, just a filtering of the previous dictionary.
Args:
----
test_data_recordings: Dictionary with the test data recordings sorted by ID and sensor.
sensor_black_list: List of sensors to be discarded from the dictionary.
Returns:
-------
test_data_recordings: Filtered dictionary with the sensors that are not in the black list.
"""
# Iterate over all dictionary keys
for i in range(0,len(set_of_libreview_keys)):
for key in test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys():
# Extract sensor model to discard FreeStyle Libre 3, since it has a sampling period of 5 minutes
sensor_model = list(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key].keys())[0]
if sensor_model in sensor_black_list:
# delet entry
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key] = {}
# Iterate over all dictionary keys to delete the entries that are empty (meaning that they had Free Style Libre 3 sensor)
for i in range(0,len(set_of_libreview_keys)):
for key in list(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys()):
# Check if the entry is empty or not to delete it
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]
return test_data_recordings
def get_ID_sensor_MAC_pairs(yr_data : Dict) -> Dict:
"""
In this experiment, the test is done with just new test data. But it must be with
the same sensors used in the training and validation data for all subjects. Thus,
from a dictionary creates and returns in the training step, we extract a dictionry with the
IDs matched to the MACs of the sensors they used within the selected periods. This dictionary
will be used to filter the already generated test data.
Args:
----
yr_data: Dictionary with the training and validation data recordings sorted by ID and sensor.
Returns:
-------
data_with_1yr_sensor_MAC: Dictionary with the IDs matched to the MACs of the sensors they used within the selected periods.
"""
# Declare a dictionary to store the MACs of the sensors used to trained the personalized models to filter the test data
data_with_1yr_sensor_MAC = {}
for key in yr_data:
for key2 in yr_data[key]:
for key3 in yr_data[key][key2]:
for key4 in yr_data[key][key2][key3]:
for key5 in yr_data[key][key2][key3][key4]:
data_with_1yr_sensor_MAC[key] = key5
return data_with_1yr_sensor_MAC
def filter_subjects_that_change_sensor(test_data_recordings : Dict, data_with_1_yr_sensor_MAC : Dict) -> Dict:
"""
This function filters the Dictionary that contain the test data recordings using the dicionary that contains
the MACs of the sensors used in the training and validation step. It returns a dictionary with the subjects
that did not change the sensor, or, at least, have data from the same sensor. The rest of the data entries
are deleted
Args:
----
test_data_recordings: Dictionary with the test data recordings sorted by ID and sensor.
data_with_1_yr_sensor_MAC: Dictionary with the IDs matched to the MACs of the sensors they used within the selected periods.
Returns:
-------
test_data_recordings: Filtered dictionary with subjects that have data from the same sensor used in the training and validation step.
"""
# Iterate over all dictionary keys
for i in range(0,len(set_of_libreview_keys)):
for key in test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys():
# Check if the MACs match. Only if they do, the test data will be used
if key == data_with_1_yr_sensor_MAC[set_of_libreview_keys[i][0]]:
pass
else:
# Empty entry to further delete it
test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key] = {}
# Iterate over all dictionary keys to delete the entries that are empty (meaning that they a subject changed his/her sensor in the test data set)
for i in range(0,len(set_of_libreview_keys)):
for key in list(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys()):
# Check if the entry is empty or not to delete it
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key]
return test_data_recordings
def remove_training_data_from_test_set(test_data_recordings : Dict, subjects_end_training_dates : Dict, verbose: int) -> Dict:
"""
To test solely with data from the same sensor (as previosly explained), and
that were not used to train the models, the timestamps from the training steps are
used to remove the test data that are older or equal than that timestamps (i.e., same data).
If desired, informative messages are printed. This function returns the CGM readings for all subjects
with only the test data.
Args:
----
test_data_recording: Dictionary with the test data recordings sorted by ID and sensor.
subject_end_training_dates: Dictionary with the last timestamps of the data for each subject
verbose: Flag to print informative messages.
Returns:
-------
test_data_recording: Dictionary with the test data recordings sorted by ID and sensor with only the test data.
"""
# Convert subject's end training dates to datetime64 to compare
for key in subjects_end_training_dates:
subjects_end_training_dates[key] = np.datetime64(subjects_end_training_dates[key])
# Concatenate data to be able to filter CGM readings and timestamps at once
for key in test_data_recordings:
for key2 in test_data_recordings[key]:
for key3 in test_data_recordings[key][key2]:
for key4 in test_data_recordings[key][key2][key3]:
for key5 in test_data_recordings[key][key2][key3][key4]:
for key6 in test_data_recordings[key][key2][key3][key4][key5]:
timestamps = test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["timestamp"]
readings = test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["reading"]
# Concatenate the timestamp and the readings
data = np.column_stack((timestamps, readings))
# Add the data to the dictionary
test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"] = data
#########################################
# Filter the data
data = data[data[:,0] > subjects_end_training_dates[key]]
if verbose == 1:
print("~~~~")
print(key)
print("Lower limit", subjects_end_training_dates[key])
print("Before filtering", test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["timestamp"][0])
print("After filtering", data[0][0])
# Update the dictionary
test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"] = data
# Difference of last and first timestamps
test_time = test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"][-1][0] - test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"][0][0]
if verbose == 1:
print(key, test_time)
return test_data_recordings
def extract_test_set_from_test_data_dictionary(test_data_recordings : Dict, NUM_OF_DAYS_TEST_SET : int, verbose : int) -> Dict:
"""
This function takes the test data dictionary after cleaning the old data belonging to the training process and the sensor model
filtering. It outputs the final CGM and timestamps sequences to generate the test vectors. The main parameter is the number of days
that will form the test set. Subjects that do not have at least this amount of days in their data, will be discared. The more number
of days, the more subjects will be discarded.
Args
----
test_data_recordings: Dictionary with the test data recordings sorted by ID and sensor.
NUM_OF_DAYS_TEST_SET: Number of days that will form the test set.
verbose: Flag to print informative messages.
Returns:
-------
test_data_recordings: Dictionary with the final sequences of CGM readings and timestamps for the test set of containing NUM_OF_DAYS_TEST_SET
"""
for key in test_data_recordings:
for key2 in test_data_recordings[key]:
for key3 in test_data_recordings[key][key2]:
for key4 in test_data_recordings[key][key2][key3]:
for key5 in test_data_recordings[key][key2][key3][key4]:
for key6 in test_data_recordings[key][key2][key3][key4][key5]:
# Extract the time data
time_data = test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"][:,0]
# Calculate the time difference
time_diff = time_data[-1] - time_data[0]
# Check if the time difference is greater than the threshold
if time_diff < NUM_OF_DAYS_TEST_SET:
# Delete entry
test_data_recordings[key][key2][key3][key4][key5][key6] = {}
else:
# From the first timestamp, sum the time threshold and only keep that data
upper_limit = time_data[0] + NUM_OF_DAYS_TEST_SET
# Filter the data
data = test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"]
data = data[data[:,0] < upper_limit]
# Update the dictionary
test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"] = data
# Place the data in CGM readings and timestamps dictionary entries
test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["reading"] = data[:,1]
test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["timestamp"] = data[:,0]
# Delete the concatenated data entry
del test_data_recordings[key][key2][key3][key4][key5][key6]['CGM']["concat_data"]
# Save current keys in a list
current_keys = list(test_data_recordings.keys())
# Check if there are IDs completely empty to delete them
for i in range(0,len(set_of_libreview_keys)):
# Avoid errors with IDs already deleted
if set_of_libreview_keys[i][0] in current_keys:
# Check if there is data for the subject after temporal filtering
for key in test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]].keys():
for key2 in test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key].keys():
# Delete entry if there is no data
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]][key][key2]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]]
if verbose == 1:
print(set_of_libreview_keys[i][0], "deleted")
else:
if verbose == 1:
print(set_of_libreview_keys[i][0], "already deleted")
pass
return test_data_recordings
def final_model_test(test_data_recordings_final : Dict, PH : int, DL_models : List, NUM_OF_DAYS_TEST_SET : int, N : int = 96, step : int = 1) -> None:
"""
This function performs the final model tests, given the test data recordings, and also the number
of days of the test set. The model evaluation is the same as in main_libreview.py file. N and step are parameters
derived from the previous step. Please, note that "libreview_sensors" is hardcoded now, but will be changed
soon.
Args:
----
test_data_recording_final : Dictionary containing the filtered test data recordings subject per subject
PH : Prediction horizon (in minutes). Currently 30 and 60 have been tested.
DL_models: DL models to be tested. (Currently, should be []'LSTM', 'StackedLSTM', 'DIL-1D-UNET'])
NUM_OF_DAYS_TEST_SET : Number of days of the test set.
N : Number of input steps. Default is 96. (According to previous steps)
step : Step between input steps. Default is 1. (According to previous steps)
Returns:
-------
None
"""
# First, repeat X and Y generation for each subject and then preprocessing
# Iterate over the subjects to generate X and Y test vectors
for key in test_data_recordings_final.keys():
for key2 in test_data_recordings_final[key]['001']['001']['2024'].keys():
for key3 in test_data_recordings_final[key]['001']['001']['2024'][key2].keys():
# Save CGM and timestamps in a variable
recordings = test_data_recordings_final[key]['001']['001']['2024'][key2][key3]['CGM']['reading']
timestamps = test_data_recordings_final[key]['001']['001']['2024'][key2][key3]['CGM']['timestamp']
# Save number of available CGM test samples in dictionary
test_data_recordings_final[key]['001']['001']['2024'][key2][key3]['CGM']['test_CGM_samples'] = recordings.shape
# Generate X and Y test
X_test, Y_test, X_times, Y_times = get_LibreView_CGM_X_Y_multistep(recordings, timestamps, libreview_sensors,
N, step, PH, plot = True, verbose = 0)
# Min-max normalization
X_norm = (X_test - np.min(X_test))/(np.max(X_test) - np.min(X_test))
Y_norm = (Y_test - np.min(X_test))/(np.max(X_test) - np.min(X_test))
# Get 1st derivative of X_norm
X_norm_der = np.diff(X_norm, axis = 1)
# Add the last point of X_norm_dev on the right of the array tp have same dimension than X_norm
X_norm_der = np.insert(X_norm_der, -1, X_norm_der[:,-1], axis = 1)
# Stack X_norm and X_norm_der
X_norm = np.dstack((X_norm, X_norm_der))
# Go to the ID directory to check if there is a dictionary. If not, create it. HARD-CODED
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}".format(key)
os.chdir(dir)
# Create dictionary to fill it with the results (one per patient)
try:
with open('test_results_dictionary.json', 'rb') as handle:
test_results_dictionary = json.load(handle)
print("Dictionary loaded.\n")
except:
test_results_dictionary = {}
print("Non-existing dictionary. A new one was created.\n")
# Create a key depending on the number of test days
test_key = str(NUM_OF_DAYS_TEST_SET)
# This is to not overwrite the results of the same test day duration with different PHs
if test_key not in test_results_dictionary.keys():
test_results_dictionary[test_key] = {}
else:
pass
test_results_dictionary[test_key][PH] = {}
# Go to the directory where the models are stored. Iterate over the three evaluated
for DL_model in DL_models:
# Go to the correspondant directory
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}\N96\step1\PH{}\multi\month-wise-4-folds\norm_min-max\None_sampling\{}\ISO_loss\1-yr_model\training".format(key, PH, DL_model)
os.chdir(dir)
# Load the model
name = "1yr-"+DL_model
model = tf.keras.models.load_model(name+'.h5', custom_objects={'ISO_adapted_loss': ISO_adapted_loss})
# Model evaluation
results_normal_eval = multi_step_model_evaluation(N, PH, name, "min-max", 2, X_norm, Y_norm, round(PH/15), X_test, "ISO_loss", plot_results=True)
# Create a key depending on the number of test days
test_key = str(NUM_OF_DAYS_TEST_SET)
# Save results in directory
test_results_dictionary[test_key][PH][DL_model] = results_normal_eval
# Back to the id directory
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}".format(key)
os.chdir(dir)
# Save updated dictionary
with open('test_results_dictionary.json', 'w') as fp:
json.dump(test_results_dictionary, fp)
def subject_per_subject_bar_diagram(test_included_subjects : Dict, metric : str, metric_LSTM : List, metric_StackedLSTM : List, metric_DIL_1D_UNET, PH : int, NUM_OF_DAYS_TEST_SET : int) -> None:
"""
Given the dictionary with the IDs of the included subjects and the name of the metric
evaluated, this function generates a bar diagram with the metric evaluated for each subject.
This function has been designed ad hoc for the models included in this work.
Args:
-----
test_data_recordings : Dictionary with the test data recordings
metric : Name of the metric evaluated
metric_LSTM : List with the metric values for LSTM
metric_StackedLSTM : List with the metric values for Stacked LSTM
metric_DIL_1D_UNET : List with the metric values for DIL-1D-UNET
PH : Prediction horizon (in minutes). Currently 30 and 60 have been tested.
NUM_OF_DAYS_TEST_SET : Number of days included in the test set
Returns:
--------
None
"""
# Patient list (filled manually)
all_patient_list_sorted = ['004', '011', '029', '008', '015', '045', '025', '065', '067', '026', '060',
'062', '039', '007', '048', '001', '014', '013', '046', '043', '051', '049',
'063', '055', '061', '057', '003', '068', '058']
plt.figure(figsize=(17, 8.5))
# Set font to arial
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Arial'
# Set text to bold
plt.rcParams['font.weight'] = 'bold'
plt.rcParams['axes.labelweight'] = 'bold'
# Grouped bar diagram for RMSE in LSTM, Stacked LSTM and DIL-1D-UNET
barWidth = 0.25
bars1 = metric_LSTM
bars2 = metric_StackedLSTM
bars3 = metric_DIL_1D_UNET
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
plt.bar(r1, bars1, color='b', width=barWidth, edgecolor='grey', label='LSTM')
plt.bar(r2, bars2, color='r', width=barWidth, edgecolor='grey', label='Stacked LSTM')
plt.bar(r3, bars3, color='g', width=barWidth, edgecolor='grey', label='DIL-1D-UNET')
# X labels are the filtered patient_list
plt.xticks(range(len(test_included_subjects)), test_included_subjects)
# Center the x ticks
plt.xlabel('Subject', fontweight='bold')
plt.legend()
if metric == 'RMSE':
plt.ylabel('RMSE (mg/dL)', fontweight='bold')
elif metric == 'PARKES':
plt.ylabel('PARKES (%)', fontweight='bold')
# Add a dash line at 99% of the PARKES error
plt.axhline(y=99, color='black', linestyle='--', label='99% PARKES error')
elif metric == 'ISO':
plt.ylabel('ISO (%)', fontweight='bold')
# Add a dash line at 99% of the PARKES error
plt.axhline(y=95, color='black', linestyle='--', label='99% PARKES error')
# Save the figure
plt.savefig(metric+'PH-'+str(PH)+'min_'+str(NUM_OF_DAYS_TEST_SET)+ '_test.svg', dpi=1200)
plt.show()
def final_DIY_models_test(data_dict : Dict, sensor_black_list : List, PH : int, NUM_OF_DAYS_TEST_SET : int,
N : int = 96, step : int = 1,
DL_models : List = ['LSTM', 'StackedLSTM', 'DIL-1D-UNET'],
parent_dir : str = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files",
TEST_DATASET_PATH : str = r"C:\Users\aralmeida\Downloads\Datos sensor FSL 2024\Datos crudos excel 2024_adapted") -> None:
"""
This function performs the final tests with the DIY models trained within 1 year. In this function, data from the previous
cross-validation step is used to filter the test data ny taking the sensor MACs. Thus, if a subject has changed his/her
sensor and no new data is provided, it will be discarded. The test data is also filtered by the sensor model.
Besides, if interruptions of lack in data samples imply that no test intances are generated, this subject will be also discarded.
Notice that the N, PH and NUM_OF_DAYS_TEST_SET will influence on this. The final test set is extracted from the test data dictionary.
Once the data is extracted, the evaluation is done as in main_libreview.py.
For more details about the specific steps, please refer to the specific functions inside this one.
Args:
----
data_dict: Dictionary with the CGM data after read the .csv files.
sensor_black_list: List of sensors to be discarded from the dictionary.
PH : Prediction horizon (in minutes). Currently 30 and 60 have been tested.
NUM_OF_DAYS_TEST_SET : Number of days of the test set.
N : Number of input steps. Default is 96. (According to previous steps)
step : Step between input steps. Default is 1. (According to previous steps)
DL_models: DL models to be tested. Deafult: ['LSTM', 'StackedLSTM', 'DIL-1D-UNET']
parent_dir : Parent directory where the data is stored. Set by default but should be changed if someone wants to use it.
TEST_DATASET_PATH : Path to the test dataset. Set by default but should be changed if someone wants to use it.
Returns:
-------
test_data_recordings: Dictionary with the final sequences of CGM readings and timestamps for the test set of containing NUM_OF_DAYS_TEST_SET.
"""
# Get the end dates of the training set
subjects_end_training_dates = get_end_training_dates(parent_dir, TEST_DATASET_PATH)
# Extract the test data recordings
test_data_recordings = extract_test_data_recordings(subjects_end_training_dates, data_dict)
# Filter subjects using sensor from the blacklist
test_data_recordings = discard_data_from_sensor_black_list(test_data_recordings, sensor_black_list)
# Open the dicionary previously generated with the training and validation data
os.chdir(parent_dir)
# Go to parent folder
os.chdir("..")
# Open libreview_1_yr_recordings pickle
with open('libreview_data_1yr_recordings.pickle', 'rb') as handle:
yr_data = pickle.load(handle)
# Back to the test set directory
os.chdir(TEST_DATASET_PATH)
# Extract the MAC of the sensors from the subjects used in the previous step (4-folds cross validation)
data_with_1_yr_sensor_MAC = get_ID_sensor_MAC_pairs(yr_data)
# Inclusion criteria #1: Subjects that have the same sensor during the 1-year period to train the models
# Filter subjects that change their sensors to exclude then from the final test
test_data_recordings = filter_subjects_that_change_sensor(test_data_recordings, data_with_1_yr_sensor_MAC)
# Check if there are IDs completely empty to delete them
for i in range(0,len(set_of_libreview_keys)):
if len(test_data_recordings[set_of_libreview_keys[i][0]][set_of_libreview_keys[i][1]][set_of_libreview_keys[i][2]][set_of_libreview_keys[i][3]]) == 0:
# Delete entry
del test_data_recordings[set_of_libreview_keys[i][0]]
print("After filtering sensor changes: ", len(test_data_recordings.keys()))
# Get the IDs MACs peers for the test data
data_test_sensor_MAC = get_ID_sensor_MAC_pairs(test_data_recordings)
# Remove, if exists, the overlapping data to test only with data not used in the training and validation sets
test_data_recordings = remove_training_data_from_test_set(test_data_recordings, subjects_end_training_dates, 1)
# Establishing the period of the test set (30, 90, 180, and 365 days in this work), extract the final test set
test_data_recordings = extract_test_set_from_test_data_dictionary(test_data_recordings, NUM_OF_DAYS_TEST_SET, 1)
# List to store the keys of the subjects with no test instances
no_test_subjects = []
# Generate X and Y to see if there are subjects that do not provide any instances and delete them
for key in test_data_recordings.keys():
for key2 in test_data_recordings[key]['001']['001']['2024'].keys():
for key3 in test_data_recordings[key]['001']['001']['2024'][key2].keys():
# Save CGM and timestamps in a variable
recordings = test_data_recordings[key]['001']['001']['2024'][key2][key3]['CGM']['reading']
timestamps = test_data_recordings[key]['001']['001']['2024'][key2][key3]['CGM']['timestamp']
# Generate X and Y test
X_test, Y_test, X_times, Y_times = get_LibreView_CGM_X_Y_multistep(recordings, timestamps, libreview_sensors,
N, step, PH, plot = True, verbose = 0)
# Save number of available CGM test samples in dictionary
test_data_recordings[key]['001']['001']['2024'][key2][key3]['CGM']['test_CGM_instances'] = X_test.shape[0]
for key in test_data_recordings.keys():
for key2 in test_data_recordings[key]['001']['001']['2024'].keys():
for key3 in test_data_recordings[key]['001']['001']['2024'][key2].keys():
if test_data_recordings[key]['001']['001']['2024'][key2][key3]['CGM']['test_CGM_instances'] == 0:
no_test_subjects.append(key)
# Delete entry with no test instances
for i in range(0,len(no_test_subjects)):
del test_data_recordings[no_test_subjects[i]]
print("After filtering because subject does not have test instances: ", len(test_data_recordings.keys()))
final_model_test(test_data_recordings, PH, DL_models, NUM_OF_DAYS_TEST_SET)
# Save the final test data recordings
with open('test_final_data_recordings.pickle', 'wb') as handle:
pickle.dump(test_data_recordings, handle, protocol=pickle.HIGHEST_PROTOCOL)
return test_data_recordings
def group_and_save_metrics(included_subjects : List, DL_models : List, PH : int, NUM_OF_DAYS_TEST_SET : int) -> List:
"""
Group RMSE, Parkes and ISO metrics for LSTM, Stacked LSTM, DIL-1D-UNET and Naive models.
Once this function is called, the results are saved in an Excel file an ready to be
plotted through the use of Lists.
Args:
----
test_data_recordings_final : List containing the subjects included in the current experiment.
DL_models : List with the DL models to be tested.
PH : Prediction horizon (in minutes). Currently 30 and 60 have been tested.
NUM_OF_DAYS_TEST_SET : Number of days included in the test set.
Returns:
-------
RMSE_LSTM : List with the RMSE values for LSTM
RMSE_StackedLSTM : List with the RMSE values for Stacked LSTM
RMSE_DIL_1D_UNET : List with the RMSE values for DIL-1D-UNET
RMSE_naive : List with the RMSE values for Naive model
PARKES_LSTM : List with the PARKES values for LSTM
PARKES_StackedLSTM : List with the PARKES values for Stacked LSTM
PARKES_DIL_1D_UNET : List with the PARKES values for DIL-1D-UNET
PARKES_naive : List with the PARKES values for Naive model
ISO_LSTM : List with the ISO values for LSTM
ISO_StackedLSTM : List with the ISO values for Stacked LSTM
ISO_DIL_1D_UNET : List with the ISO values for DIL-1D-UNET
ISO_naive : List with the ISO values for Naive model
"""
# Group the RMSE of all patients
RMSE_LSTM = []
RMSE_StackedLSTM = []
RMSE_DIL_1D_UNET = []
RMSE_naive = []
PARKES_LSTM = []
PARKES_StackedLSTM = []
PARKES_DIL_1D_UNET = []
PARKES_naive = []
ISO_LSTM = []
ISO_StackedLSTM = []
ISO_DIL_1D_UNET = []
ISO_naive = []
# Convert to timedelta datatype for consistency with dictionary keys
NUM_OF_DAYS_TEST_SET = np.timedelta64(NUM_OF_DAYS_TEST_SET, 'D')
for id in included_subjects:
# Go to the correspondant directory
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}".format(id)
os.chdir(dir)
# Load dictionary
with open('test_results_dictionary.json', 'r') as fp:
curr_results = json.load(fp)
# Index depending on the PH
if PH == 30:
idx = 1
if PH == 60:
idx = 3
# Append RMSE, Parkes, ISO
RMSE_LSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[0]]['RMSE'][idx])
PARKES_LSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[0]]['PARKES'][idx])
ISO_LSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[0]]['ISO'][idx])
# Go to the correspondant directory
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}".format(id)
os.chdir(dir)
# Load dictionary
with open('test_results_dictionary.json', 'r') as fp:
curr_results = json.load(fp)
RMSE_StackedLSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[1]]['RMSE'][idx])
PARKES_StackedLSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[1]]['PARKES'][idx])
ISO_StackedLSTM.append(curr_results[str(NUM_OF_DAYS_TEST_SET)][str(PH)][DL_models[1]]['ISO'][idx])
# Go to the correspondant directory
dir = r"C:\Users\aralmeida\Downloads\LibreViewRawData-final_sims\1yr_npy_files\{}".format(id)