-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathplugin.bib
1658 lines (1542 loc) · 185 KB
/
plugin.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@standard{2023hcx,
title = {{{HCX Protocol}} v0.9},
date = {2023-12-01},
url = {http://hcxprotocol.io/},
urldate = {2024-09-18},
abstract = {Open-source, community driven protocol for Health Claims data Exchange},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/7R9WM4UH/hcxprotocol.io.html}
}
@inproceedings{allaart2022vertical,
title = {Vertical {{Split Learning}} - an Exploration of Predictive Performance in Medical and Other Use Cases},
booktitle = {2022 {{International Joint Conference}} on {{Neural Networks}} ({{IJCNN}})},
author = {Allaart, Corinne G. and Keyser, Bjorn and Bal, Henri and Van Halteren, Aart},
date = {2022-07-18},
pages = {1--8},
publisher = {IEEE},
location = {Padua, Italy},
doi = {10.1109/IJCNN55064.2022.9891964},
url = {https://ieeexplore.ieee.org/document/9891964/},
urldate = {2024-10-07},
abstract = {In healthcare and other fields, data of an individual is often vertically partitioned across multiple organizations. Creating a centralized data store for AI algorithm development is cumbersome in such cases because of concerns like privacy and data ownership. Methods of distributed learning over vertically partitioned data could offer a solution here. While several studies have evaluated the feasibility, privacy and efficiency of such methods, an extensive evaluation of their impact on predictive performance compared to a centralized approach is missing. Vertical Split Learning (VSL) aims to provide vertical distributed learning through distributed neural network architectures. Our study adapts and applies VSL to 8 datasets, both in medicine and beyond, evaluating the impact of different network and (vertical) feature distributions on predictive performance. In most configurations VSL yields comparable predictive performance to its centralized counterparts. However, certain data and network distributions give an unexpected and severe loss of performance. Based on our findings we give some initial recommendations under which conditions VSL can be applied as a suitable alternative for data centralization.},
eventtitle = {2022 {{International Joint Conference}} on {{Neural Networks}} ({{IJCNN}})},
isbn = {978-1-72818-671-9},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/X2SM7XDJ/Allaart et al. - 2022 - Vertical Split Learning - an exploration of predictive performance in medical and other use cases.pdf}
}
@article{antunes2022federated,
title = {Federated {{Learning}} for {{Healthcare}}: {{Systematic Review}} and {{Architecture Proposal}}},
shorttitle = {Federated {{Learning}} for {{Healthcare}}},
author = {Antunes, Rodolfo Stoffel and André Da Costa, Cristiano and Küderle, Arne and Yari, Imrana Abdullahi and Eskofier, Björn},
date = {2022-08-31},
journaltitle = {ACM Transactions on Intelligent Systems and Technology},
shortjournal = {ACM Trans. Intell. Syst. Technol.},
volume = {13},
number = {4},
pages = {1--23},
issn = {2157-6904, 2157-6912},
doi = {10.1145/3501813},
url = {https://dl.acm.org/doi/10.1145/3501813},
urldate = {2023-04-23},
abstract = {FL enables the training of ML models locally (at the location of the data) and only shares the resulting model, which is not reverse-engineerable, with the requesting party. Therefore, FL avoids the need to share the private datasets and sensitive data to others, preventing exposition to entities conducting studies and enabling data usage for broader purposes [11]. A central entity manages the learning process and distributes the training algorithm to each participating data holder. Each participant generates a local model trained with their private data and shares the resulting parameters with the central entity. Finally, the central entity employs an aggregation algorithm to combine the parameters of all local models into a single global model.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/94L35GLV/Antunes et al. - 2022 - Federated Learning for Healthcare Systematic Revi.pdf}
}
@standard{apache-arrow,
title = {Apache {{Arrow}}},
date = {2024-09-20},
url = {https://arrow.apache.org/},
urldate = {2024-09-20},
abstract = {A cross-language development platform for in-memory analytics},
langid = {american},
file = {/Users/dkapitan/Zotero/storage/TIBXI35H/arrow.apache.org.html}
}
@standard{apache-iceberg,
title = {Apache {{Iceberg}}},
url = {https://iceberg.apache.org/},
urldate = {2024-09-20},
file = {/Users/dkapitan/Zotero/storage/WDUZH5EF/iceberg.apache.org.html}
}
@standard{apache-parquet,
title = {Apache {{Parquet}}},
date = {2024-09-20},
url = {https://parquet.apache.org/},
urldate = {2024-09-20},
abstract = {The Apache Parquet Website},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/6ZYX6IMX/parquet.apache.org.html}
}
@inproceedings{armbrust2021lakehouse,
title = {Lakehouse: {{A New Generation}} of {{Open Platforms}} That {{Unify Data Warehousing}} and {{Advanced Analytics}}},
author = {Armbrust, Michael and Ghodsi, Ali and Xin, Reynold and Zaharia, Matei},
date = {2021},
pages = {8},
abstract = {This paper argues that the data warehouse architecture as we know it today will wither in the coming years and be replaced by a new architectural pattern, the Lakehouse, which will (i) be based on open direct-access data formats, such as Apache Parquet, (ii) have firstclass support for machine learning and data science, and (iii) offer state-of-the-art performance. Lakehouses can help address several major challenges with data warehouses, including data staleness, reliability, total cost of ownership, data lock-in, and limited use-case support. We discuss how the industry is already moving toward Lakehouses and how this shift may affect work in data management. We also report results from a Lakehouse system using Parquet that is competitive with popular cloud data warehouses on TPC-DS.},
eventtitle = {11th {{Annual Conference}} on {{Innovative Data Systems Research}} ({{CIDR}} ’21)},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/8XX2TSTM/Armbrust et al. - 2021 - Lakehouse A New Generation of Open Platforms that.pdf}
}
@article{bacher2024fhiring,
title = {{{FHIRing}} up {{OpenMRS}}: {{Architecture}}, {{Implementation}} and {{Real-World Use-Cases}} in {{Global Health}}},
shorttitle = {{{FHIRing}} up {{OpenMRS}}},
author = {Bacher, I and Goodrich, M and Kimaina, A and Seaton, M and Faulkenberry, G and Vaish, S and Flowers, J and family=Fraser, given=HS, given-i=HS},
date = {2024-05-31},
journaltitle = {AMIA Summits on Translational Science Proceedings},
shortjournal = {AMIA Jt Summits Transl Sci Proc},
volume = {2024},
eprint = {38827065},
eprinttype = {pmid},
pages = {162--171},
issn = {2153-4063},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC11141833/},
urldate = {2024-06-04},
abstract = {HL7 FHIR was created almost a decade ago and is seeing increasingly wide use in high income settings. Although some initial work was carried out in low and middle income (LMIC) settings there has been little impact until recently. The need for reliable and easy to implement interoperability between health information systems in LMICs is growing with large scale deployments of EHRs, national reporting systems and mHealth applications. The OpenMRS open source EHR has been deployed in more than 44 LMIC with increasing needs for interoperability with other HIS. We describe here the development and deployment of a new FHIR module supporting the latest standards and its use in interoperability with laboratory systems, mHealth applications, pharmacy dispensing system and as a tool for supporting advanced user interface designs. We also show how it facilitates date science projects and deployment of machine leaning based CDSS and precision medicine in LMICs.},
pmcid = {PMC11141833},
file = {/Users/dkapitan/Zotero/storage/IXLQFUR2/Bacher et al. - 2024 - FHIRing up OpenMRS Architecture, Implementation a.pdf}
}
@article{bak2023trustbased,
title = {Towards Trust-Based Governance of Health Data Research},
author = {Bak, Marieke A. R. and Ploem, M. Corrette and Tan, Hanno L. and Blom, M. T. and Willems, Dick L.},
date = {2023-01-12},
journaltitle = {Medicine, Health Care and Philosophy},
shortjournal = {Med Health Care and Philos},
issn = {1572-8633},
doi = {10.1007/s11019-022-10134-8},
url = {https://doi.org/10.1007/s11019-022-10134-8},
urldate = {2023-04-24},
abstract = {Developments in medical big data analytics may bring societal benefits but are also challenging privacy and other ethical values. At the same time, an overly restrictive data protection regime can form a serious threat to valuable observational studies. Discussions about whether data privacy or data solidarity should be the foundational value of research policies, have remained unresolved. We add to this debate with an empirically informed ethical analysis. First, experiences with the implementation of the General Data Protection Regulation (GDPR) within a European research consortium demonstrate a gap between the aims of the regulation and its effects in practice. Namely, strictly formalised data protection requirements may cause routinisation among researchers instead of substantive ethical reflection, and may crowd out trust between actors in the health data research ecosystem; while harmonisation across Europe and data sharing between countries is hampered by different interpretations of the law, which partly stem from different views about ethical values. Then, building on these observations, we use theory to argue that the concept of trust provides an escape from the privacy-solidarity debate. Lastly, the paper details three aspects of trust that can help to create a responsible research environment and to mitigate the encountered challenges: trust as multi-agent concept; trust as a rational and democratic value; and trust as method for priority setting. Mutual cooperation in research—among researchers and with data subjects—is grounded in trust, which should be more explicitly recognised in the governance of health data research.},
langid = {english},
keywords = {Big data,Data sharing,ESCAPE-NET,GDPR,Governance,Privacy,Research ethics,Solidarity,Trust},
file = {/Users/dkapitan/Zotero/storage/UNPATT4L/Bak et al. - 2023 - Towards trust-based governance of health data rese.pdf}
}
@article{bak2024federated,
title = {Federated Learning Is Not a Cure-All for Data Ethics},
author = {Bak, Marieke and Madai, Vince I. and Celi, Leo Anthony and Kaissis, Georgios A. and Cornet, Ronald and Maris, Menno and Rueckert, Daniel and Buyx, Alena and McLennan, Stuart},
date = {2024-03-18},
journaltitle = {Nature Machine Intelligence},
shortjournal = {Nat Mach Intell},
pages = {1--3},
publisher = {Nature Publishing Group},
issn = {2522-5839},
doi = {10.1038/s42256-024-00813-x},
url = {https://www.nature.com/articles/s42256-024-00813-x.epdf?sharing_token=x-TNQE6N7p78n_EgW0nE9NRgN0jAjWel9jnR3ZoTv0PBsqMZcqgh1-FjxZchFKZRQPH7UbIAvc2DYW5o9AOAduszN7kemNlJrdAjE33lom0VIdKVHhghu_mODfGDPh6YF4-Du0R9BSRlG9HF-l_zF8qROmjb2Ml0pA1RT-3-DLs%3D},
urldate = {2024-03-18},
abstract = {Although federated learning is often seen as a promising solution to allow AI innovation while addressing privacy concerns, we argue that this technology does not fix all underlying data ethics concerns. Benefiting from federated learning in digital health requires acknowledgement of its limitations.},
langid = {english},
keywords = {Computational models,Ethics,Machine learning,Medical ethics}
}
@article{balch2023machine,
title = {Machine {{Learning}}–{{Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resources Data Standards}}: {{Scoping Review}}},
shorttitle = {Machine {{Learning}}–{{Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resources Data Standards}}},
author = {Balch, Jeremy A. and Ruppert, Matthew M. and Loftus, Tyler J. and Guan, Ziyuan and Ren, Yuanfang and Upchurch, Gilbert R. and Ozrazgat-Baslanti, Tezcan and Rashidi, Parisa and Bihorac, Azra},
date = {2023-08-24},
journaltitle = {JMIR Medical Informatics},
volume = {11},
number = {1},
pages = {e48297},
publisher = {JMIR Publications Inc., Toronto, Canada},
doi = {10.2196/48297},
url = {https://medinform.jmir.org/2023/1/e48297},
urldate = {2024-10-10},
abstract = {Background: Machine Learning-Enabled Clinical Information Systems (ML-CIS) have the potential to drive healthcare delivery and research. The Fast Healthcare Interoperability Resources (FHIR) data standard is increasingly applied in developing these systems. However, methods for applying FHIR to ML-CIS are variable. Objective: This study evaluates and compares the functionalities, strengths, and weaknesses of existing systems and proposes guidelines for optimizing future work with ML-CIS. Methods: Embase, PubMed, and Web of Science were searched for articles describing machine-learning systems used for clinical data analytics or decision support in compliance with FHIR standards. Information regarding each system’s functionality, data sources, formats, security, performance, resource requirements, scalability, strengths, and limitations were compared across systems. Results: 39 articles describing FHIR-based ML-CIS were divided into three categories according to their primary focus: Clinical Decision Support Systems (CDSSs) (n=18), data management and analytic platforms (n=10), or auxiliary modules and application programming interfaces (n=11). Model strengths included novel use of cloud systems, Bayesian networks, visualization strategies, and techniques for translating unstructured or free text data to FHIR frameworks. Most intelligent systems lacked electronic health record interoperability and externally validated evidence of clinical efficacy. Conclusions: Shortcomings in current ML-CIS can be addressed by incorporating modular and interoperable data management, analytic platforms, secure inter-institutional data exchange, and application programming interfaces with adequate scalability to support both real-time and prospective clinical applications using electronic health record platforms with diverse implementations.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/ZUT8FMDJ/Balch et al. - 2023 - Machine Learning–Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resourc.pdf;/Users/dkapitan/Zotero/storage/BZSWAX22/e48297.html}
}
@article{beck2019hourglass,
title = {On the Hourglass Model},
author = {Beck, Micah},
date = {2019-06},
journaltitle = {Communications of the ACM},
volume = {62},
number = {7},
pages = {48--57},
issn = {0001-0782, 1557-7317},
doi = {10.1145/3274770},
abstract = {Used in the design of the Internet and Unix, the layered services of the hourglass model have enabled viral adoption and deployment scalability.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/NKGQZL5V/Beck - 2019 - On the hourglass model.pdf}
}
@online{beda-emr,
title = {Beda {{EMR}}},
url = {https://beda.software/emr},
urldate = {2024-12-30},
abstract = {Beda EMR System with no-code customisation},
file = {/Users/dkapitan/Zotero/storage/NCC2GQR5/emr.html}
}
@article{bennett2023mimiciva,
title = {{{MIMIC-IV}} on {{FHIR}}: Converting a Decade of in-Patient Data into an Exchangeable, Interoperable Format},
shorttitle = {{{MIMIC-IV}} on {{FHIR}}},
author = {Bennett, Alex M. and Ulrich, Hannes and family=Damme, given=Philip, prefix=van, useprefix=true and Wiedekopf, Joshua and Johnson, Alistair E. W.},
date = {2023-03-16},
journaltitle = {Journal of the American Medical Informatics Association: JAMIA},
shortjournal = {J Am Med Inform Assoc},
volume = {30},
number = {4},
eprint = {36688534},
eprinttype = {pmid},
pages = {718--725},
issn = {1527-974X},
doi = {10.1093/jamia/ocad002},
abstract = {OBJECTIVE: Convert the Medical Information Mart for Intensive Care (MIMIC)-IV database into Health Level 7 Fast Healthcare Interoperability Resources (FHIR). Additionally, generate and publish an openly available demo of the resources, and create a FHIR Implementation Guide to support and clarify the usage of MIMIC-IV on FHIR. MATERIALS AND METHODS: FHIR profiles and terminology system of MIMIC-IV were modeled from the base FHIR R4 resources. Data and terminology were reorganized from the relational structure into FHIR according to the profiles. Resources generated were validated for conformance with the FHIR profiles. Finally, FHIR resources were published as newline delimited JSON files and the profiles were packaged into an implementation guide. RESULTS: The modeling of MIMIC-IV in FHIR resulted in 25 profiles, 2 extensions, 35 ValueSets, and 34 CodeSystems. An implementation guide encompassing the FHIR modeling can be accessed at mimic.mit.edu/fhir/mimic. The generated demo dataset contained 100 patients and over 915~000 resources. The full dataset contained 315~000 patients covering approximately 5~840~000 resources. The final datasets in NDJSON format are accessible on PhysioNet. DISCUSSION: Our work highlights the challenges and benefits of generating a real-world FHIR store. The challenges arise from terminology mapping and profiling modeling decisions. The benefits come from the extensively validated openly accessible data created as a result of the modeling work. CONCLUSION: The newly created MIMIC-IV on FHIR provides one of the first accessible deidentified critical care FHIR datasets. The extensive real-world data found in MIMIC-IV on FHIR will be invaluable for research and the development of healthcare applications.},
langid = {english},
pmcid = {PMC10018258},
keywords = {Datasets as Topic,electronic health records,Electronic Health Records,fast healthcare interoperability resources,Health Level Seven,HL7 FHIR,Humans,Information Dissemination,Information Storage and Retrieval,interoperability,MIMIC-IV,open data,Patients,Reproducibility of Results},
file = {/Users/dkapitan/Zotero/storage/TN5X7YHN/Bennett et al. - 2023 - MIMIC-IV on FHIR converting a decade of in-patient data into an exchangeable, interoperable format.pdf}
}
@article{bonisch2022harvesting,
title = {Harvesting Metadata in Clinical Care: A Crosswalk between {{FHIR}}, {{OMOP}}, {{CDISC}} and {{openEHR}} Metadata},
shorttitle = {Harvesting Metadata in Clinical Care},
author = {Bönisch, Caroline and Kesztyüs, Dorothea and Kesztyüs, Tibor},
date = {2022-10-28},
journaltitle = {Scientific Data},
shortjournal = {Sci Data},
volume = {9},
number = {1},
pages = {659},
publisher = {Nature Publishing Group},
issn = {2052-4463},
doi = {10.1038/s41597-022-01792-7},
url = {https://www.nature.com/articles/s41597-022-01792-7},
urldate = {2023-06-21},
abstract = {Metadata describe information about data source, type of creation, structure, status and semantics and are prerequisite for preservation and reuse of medical data. To overcome the hurdle of disparate data sources and repositories with heterogeneous data formats a metadata crosswalk was initiated, based on existing standards. FAIR Principles were included, as well as data format specifications. The metadata crosswalk is the foundation of data provision between a Medical Data Integration Center (MeDIC) and researchers, providing a selection of metadata information for research design and requests. Based on the crosswalk, metadata items were prioritized and categorized to demonstrate that not one single predefined standard meets all requirements of a MeDIC and only a maximum data set of metadata is suitable for use. The development of a convergence format including the maximum data set is the anticipated solution for an automated transformation of metadata in a MeDIC.},
issue = {1},
langid = {english},
keywords = {Health care,Public health},
file = {/Users/dkapitan/Zotero/storage/JH8VKLQ9/Bönisch et al. - 2022 - Harvesting metadata in clinical care a crosswalk .pdf}
}
@report{carmo2024d22,
title = {D2.2 - {{EHRxF}} in a Nutshell-{{WP2-ISCTE}}},
author = {Carmo, Anderson and Martins, Henrique},
date = {2024-07-04},
url = {https://ehr-exchange-format.eu/wp-content/uploads/2024/10/D2.2-v20240704-EHRxF-in-a-Nutshell-WP2-ISCTE.pdf},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/TJJZ24PP/Carmo - D2.2 - EHRxF in a nutshell-WP2-ISCTE.pdf}
}
@article{cauchoisknowing,
title = {Knowing What {{You Know}}: Valid and Validated Confidence Sets in Multiclass and Multilabel Prediction},
author = {Cauchois, Maxime and Gupta, Suyash and Duchi, John C},
abstract = {We develop conformal prediction methods for constructing valid predictive confidence sets in multiclass and multilabel problems without assumptions on the data generating distribution. A challenge here is that typical conformal prediction methods—which give marginal validity (coverage) guarantees—provide uneven coverage, in that they address easy examples at the expense of essentially ignoring difficult examples. By leveraging ideas from quantile regression, we build methods that always guarantee correct coverage but additionally provide (asymptotically consistent) conditional coverage for both multiclass and multilabel prediction problems. To address the potential challenge of exponentially large confidence sets in multilabel prediction, we build tree-structured classifiers that efficiently account for interactions between labels. Our methods can be bolted on top of any classification model—neural network, random forest, boosted tree—to guarantee its validity. We also provide an empirical evaluation, simultaneously providing new validation methods, that suggests the more robust coverage of our confidence sets.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/IVV2P7PT/Cauchois et al. - Knowing what You Know valid and validated confidence sets in multiclass and multilabel prediction.pdf}
}
@article{chen2021automatic,
title = {Automatic {{ICD-10 Coding}} and {{Training System}}: {{Deep Neural Network Based}} on {{Supervised Learning}}},
shorttitle = {Automatic {{ICD-10 Coding}} and {{Training System}}},
author = {Chen, Pei-Fu and Wang, Ssu-Ming and Liao, Wei-Chih and Kuo, Lu-Cheng and Chen, Kuan-Chih and Lin, Yu-Cheng and Yang, Chi-Yu and Chiu, Chi-Hao and Chang, Shu-Chih and Lai, Feipei},
date = {2021-08-31},
journaltitle = {JMIR Medical Informatics},
volume = {9},
number = {8},
pages = {e23230},
publisher = {JMIR Publications Inc., Toronto, Canada},
doi = {10.2196/23230},
url = {https://medinform.jmir.org/2021/8/e23230},
urldate = {2024-04-11},
abstract = {Background: The International Classification of Diseases (ICD) code is widely used as the reference in medical system and billing purposes. However, classifying diseases into ICD codes still mainly relies on humans reading a large amount of written material as the basis for coding. Coding is both laborious and time-consuming. Since the conversion of ICD-9 to ICD-10, the coding task became much more complicated, and deep learning– and natural language processing–related approaches have been studied to assist disease coders. Objective: This paper aims at constructing a deep learning model for ICD-10 coding, where the model is meant to automatically determine the corresponding diagnosis and procedure codes based solely on free-text medical notes to improve accuracy and reduce human effort. Methods: We used diagnosis records of the National Taiwan University Hospital as resources and apply natural language processing techniques, including global vectors, word to vectors, embeddings from language models, bidirectional encoder representations from transformers, and single head attention recurrent neural network, on the deep neural network architecture to implement ICD-10 auto-coding. Besides, we introduced the attention mechanism into the classification model to extract the keywords from diagnoses and visualize the coding reference for training freshmen in ICD-10. Sixty discharge notes were randomly selected to examine the change in the F1-score and the coding time by coders before and after using our model. Results: In experiments on the medical data set of National Taiwan University Hospital, our prediction results revealed F1-scores of 0.715 and 0.618 for the ICD-10 Clinical Modification code and Procedure Coding System code, respectively, with a bidirectional encoder representations from transformers embedding approach in the Gated Recurrent Unit classification model. The well-trained models were applied on the ICD-10 web service for coding and training to ICD-10 users. With this service, coders can code with the F1-score significantly increased from a median of 0.832 to 0.922 (P\<.05), but not in a reduced interval. Conclusions: The proposed model significantly improved the F1-score but did not decrease the time consumed in coding by disease coders.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/JPYLNNH4/Chen et al. - 2021 - Automatic ICD-10 Coding and Training System Deep .pdf}
}
@inproceedings{choudhury2020personal,
title = {Personal {{Health Train}} on {{FHIR}}: {{A Privacy Preserving Federated Approach}} for {{Analyzing FAIR Data}} in {{Healthcare}}},
shorttitle = {Personal {{Health Train}} on {{FHIR}}},
booktitle = {Machine {{Learning}}, {{Image Processing}}, {{Network Security}} and {{Data Sciences}}},
author = {Choudhury, Ananya and family=Soest, given=Johan, prefix=van, useprefix=true and Nayak, Stuti and Dekker, Andre},
editor = {Bhattacharjee, Arup and Borgohain, Samir Kr. and Soni, Badal and Verma, Gyanendra and Gao, Xiao-Zhi},
date = {2020},
series = {Communications in {{Computer}} and {{Information Science}}},
pages = {85--95},
publisher = {Springer},
location = {Singapore},
doi = {10.1007/978-981-15-6315-7_7},
abstract = {Big data and machine learning applications focus on retrieving data on a central location for analysis. However, healthcare data can be sensitive in nature and as such difficult to share and make use for secondary purposes. Healthcare vendors are restricted to share data without proper consent from the patient. There is a rising awareness among individual patients as well regarding sharing their personal information due to ethical, legal and societal problems. The current data-sharing platforms in healthcare do not sufficiently handle these issues. The rationale of the Personal Health Train (PHT) approach shifts the focus from sharing data to sharing processing/analysis applications and their respective results. A prerequisite of the PHT-infrastructure is that the data is FAIR (findable, accessible, interoperable, reusable). The aim of the paper is to describe a methodology of finding the number of patients diagnosed with hypertension and calculate cohort statistics in a privacy-preserving federated manner. The whole process completes without individual patient data leaving the source. For this, we rely on the Fast Healthcare Interoperability Resources (FHIR) standard.},
isbn = {9789811563157},
langid = {english},
keywords = {FAIR,FHIR,Personal health train},
file = {/Users/dkapitan/Zotero/storage/MX4FY5MX/Choudhury et al. - 2020 - Personal Health Train on FHIR A Privacy Preservin.pdf}
}
@incollection{chraibi2021deep,
title = {A {{Deep Learning Framework}} for {{Automated ICD-10 Coding}}},
booktitle = {Studies in {{Health Technology}} and {{Informatics}}},
author = {Chraibi, Abdelahad and Delerue, David and Taillard, Julien and Chaib Draa, Ismat and Beuscart, Régis and Hansske, Arnaud},
editor = {Mantas, John and Stoicu-Tivadar, Lăcrămioara and Chronaki, Catherine and Hasman, Arie and Weber, Patrick and Gallos, Parisis and Crişan-Vida, Mihaela and Zoulias, Emmanouil and Chirila, Oana Sorina},
date = {2021-05-27},
publisher = {IOS Press},
doi = {10.3233/SHTI210178},
url = {https://ebooks.iospress.nl/doi/10.3233/SHTI210178},
urldate = {2024-04-11},
abstract = {The International Statistical Classification of Diseases and Related Health Problems (ICD) is one of the widely used classification system for diagnoses and procedures to assign diagnosis codes to Electronic Health Record (EHR) associated with a patient’s stay. The aim of this paper is to propose an automated coding system to assist physicians in the assignment of ICD codes to EHR. For this purpose, we created a pipeline of Natural Language Processing (NLP) and Deep Learning (DL) models able to extract the useful information from French medical texts and to perform classification. After the evaluation phase, our approach was able to predict 346 diagnosis codes from heterogeneous medical units with an accuracy average of 83\%. Our results were finally validated by physicians of the Medical Information Department (MID) in charge of coding hospital stays.},
isbn = {978-1-64368-184-9 978-1-64368-185-6},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/7JVHUMR6/Chraibi et al. - 2021 - A Deep Learning Framework for Automated ICD-10 Cod.pdf}
}
@software{clickhouse,
title = {Clickhouse: {{Fast Open-Source OLAP DBMS}}},
author = {ClickHouse},
url = {https://clickhouse.com},
urldate = {2024-09-20},
abstract = {ClickHouse is a fast open-source column-oriented database management system that allows generating analytical data reports in real-time using SQL queries},
file = {/Users/dkapitan/Zotero/storage/N6RERXVA/clickhouse.com.html}
}
@online{conformal,
title = {Conformal {{Prediction Sets Improve Human Decision Making}}},
url = {https://arxiv.org/html/2401.13744v3},
urldate = {2024-10-02},
file = {/Users/dkapitan/Zotero/storage/VJ3GD6ZW/2401.html}
}
@article{cremonesi2023need,
title = {The Need for Multimodal Health Data Modeling: {{A}} Practical Approach for a Federated-Learning Healthcare Platform},
shorttitle = {The Need for Multimodal Health Data Modeling},
author = {Cremonesi, Francesco and Planat, Vincent and Kalokyri, Varvara and Kondylakis, Haridimos and Sanavia, Tiziana and Miguel Mateos Resinas, Victor and Singh, Babita and Uribe, Silvia},
date = {2023-05-01},
journaltitle = {Journal of Biomedical Informatics},
shortjournal = {Journal of Biomedical Informatics},
volume = {141},
pages = {104338},
issn = {1532-0464},
doi = {10.1016/j.jbi.2023.104338},
url = {https://www.sciencedirect.com/science/article/pii/S153204642300059X},
urldate = {2024-01-16},
abstract = {Federated learning initiatives in healthcare are being developed to collaboratively train predictive models without the need to centralize sensitive personal data. GenoMed4All is one such project, with the goal of connecting European clinical and –omics data repositories on rare diseases through a federated learning platform. Currently, the consortium faces the challenge of a lack of well-established international datasets and interoperability standards for federated learning applications on rare diseases. This paper presents our practical approach to select and implement a Common Data Model (CDM) suitable for the federated training of predictive models applied to the medical domain, during the initial design phase of our federated learning platform. We describe our selection process, composed of identifying the consortium’s needs, reviewing our functional and technical architecture specifications, and extracting a list of business requirements. We review the state of the art and evaluate three widely-used approaches (FHIR, OMOP and Phenopackets) based on a checklist of requirements and specifications. We discuss the pros and cons of each approach considering the use cases specific to our consortium as well as the generic issues of implementing a European federated learning healthcare platform. A list of lessons learned from the experience in our consortium is discussed, from the importance of establishing the proper communication channels for all stakeholders to technical aspects related to –omics data. For federated learning projects focused on secondary use of health data for predictive modeling, encompassing multiple data modalities, a phase of data model convergence is sorely needed to gather different data representations developed in the context of medical research, interoperability of clinical care software, imaging, and –omics analysis into a coherent, unified data model. Our work identifies this need and presents our experience and a list of actionable lessons learned for future work in this direction.},
keywords = {Data model,Federated learning,Healthcare,Lessons learned,Medical research,Omics},
file = {/Users/dkapitan/Zotero/storage/C5RQXIRH/Cremonesi et al. - 2023 - The need for multimodal health data modeling A pr.pdf;/Users/dkapitan/Zotero/storage/K2A9EKFC/S153204642300059X.html}
}
@article{dalhatu2023paper,
title = {From {{Paper Files}} to {{Web-Based Application}} for {{Data-Driven Monitoring}} of {{HIV Programs}}: {{Nigeria}}'s {{Journey}} to a {{National Data Repository}} for {{Decision-Making}} and {{Patient Care}}},
shorttitle = {From {{Paper Files}} to {{Web-Based Application}} for {{Data-Driven Monitoring}} of {{HIV Programs}}},
author = {Dalhatu, Ibrahim and Aniekwe, Chinedu and Bashorun, Adebobola and Abdulkadir, Alhassan and Dirlikov, Emilio and Ohakanu, Stephen and Adedokun, Oluwasanmi and Oladipo, Ademola and Jahun, Ibrahim and Murie, Lisa and Yoon, Steven and Abdu-Aguye, Mubarak G. and Sylvanus, Ahmed and Indyer, Samuel and Abbas, Isah and Bello, Mustapha and Nalda, Nannim and Alagi, Matthias and Odafe, Solomon and Adebajo, Sylvia and Ogorry, Otse and Akpu, Murphy and Okoye, Ifeanyi and Kakanfo, Kunle and Onovo, Amobi Andrew and Ashefor, Gregory and Nzelu, Charles and Ikpeazu, Akudo and Aliyu, Gambo and Ellerbrock, Tedd and Boyd, Mary and Stafford, Kristen A. and Swaminathan, Mahesh},
date = {2023-09},
journaltitle = {Methods of Information in Medicine},
shortjournal = {Methods Inf Med},
volume = {62},
number = {03/04},
pages = {130--139},
issn = {0026-1270, 2511-705X},
doi = {10.1055/s-0043-1768711},
url = {http://www.thieme-connect.de/DOI/DOI?10.1055/s-0043-1768711},
urldate = {2024-03-25},
abstract = {Abstract Background{$\quad$}Timely and reliable data are crucial for clinical, epidemiologic, and program management decision making. Electronic health information systems provide platforms for managing large longitudinal patient records. Nigeria implemented the National Data Repository (NDR) to create a central data warehouse of all people living with human immunodeficiency virus (PLHIV) while providing useful functionalities to aid decision making at different levels of program implementation. Objective{$\quad$}We describe the Nigeria NDR and its development process, including its use for surveillance, research, and national HIV program monitoring toward achieving HIV epidemic control. Methods{$\quad$}Stakeholder engagement meetings were held in 2013 to gather information on data elements and vocabulary standards for reporting patient-level information, technical infrastructure, human capacity requirements, and information flow. Findings from these meetings guided the development of the NDR. An implementation guide provided common terminologies and data reporting structures for data exchange between the NDR and the electronic medical record (EMR) systems. Data from the EMR were encoded in extensible markup language and sent to the NDR over secure hypertext transfer protocol after going through a series of validation processes. Results{$\quad$}By June 30, 2021, the NDR had up-to-date records of 1,477,064 (94.4\%) patients receiving HIV treatment across 1,985 health facilities, of which 1,266,512 (85.7\%) patient records had fingerprint template data to support unique patient identification and record linkage to prevent registration of the same patient under different identities. Data from the NDR was used to support HIV program monitoring, case-based surveillance and production of products like the monthly lists of patients who have treatment interruptions and dashboards for monitoring HIV test and start. Conclusion{$\quad$}The NDR enabled the availability of reliable and timely data for surveillance, research, and HIV program monitoring to guide program improvements to accelerate progress toward epidemic control.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/TGU3P9XW/Dalhatu et al. - 2023 - From Paper Files to Web-Based Application for Data.pdf}
}
@software{dbt,
title = {Dbt},
url = {https://www.getdbt.com/index},
urldate = {2024-09-20},
abstract = {Use dbt to build reliable data models quickly and collaboratively—featuring version control, automated documentation, and integrated testing.},
file = {/Users/dkapitan/Zotero/storage/UBVUQEMW/www.getdbt.com.html}
}
@unpublished{deltomme2024federated,
title = {Federated {{Health Innovation Network}} ({{FHIN}})},
author = {Deltomme, Camile and Denturck, Kim and De Jaeger, Peter and Willems, Wouter and De Caluwe, Bram and Hellebaut, Geert and Pierlet, Noëlla and Van Brantegem, Karel and Heirman, Peter and Thorrez, Yves and Deschepper, Mieke},
date = {2024-09-20},
url = {https://www.ohdsi-europe.org/images/symposium-2024/Posters/poster%20OHDSI%20FHIN%20Camille%20Deltomme%20-%20Camille%20Deltomme.pdf}
}
@article{delussu2024survey,
title = {A Survey of {{openEHR Clinical Data Repositories}}},
author = {Delussu, Giovanni and Frexia, Francesca and Mascia, Cecilia and Sulis, Alessandro and Meloni, Vittorio and Del Rio, Mauro and Lianas, Luca},
date = {2024-11-01},
journaltitle = {International Journal of Medical Informatics},
shortjournal = {International Journal of Medical Informatics},
volume = {191},
pages = {105591},
issn = {1386-5056},
doi = {10.1016/j.ijmedinf.2024.105591},
url = {https://www.sciencedirect.com/science/article/pii/S1386505624002545},
urldate = {2025-02-05},
keywords = {Archetype,CDR,Clinical Data Repository,ISO 13606,openEHR,Survey},
file = {/Users/dkapitan/Zotero/storage/UMZ4UUY2/S1386505624002545.html}
}
@article{demello2022semantic,
title = {Semantic Interoperability in Health Records Standards: A Systematic Literature Review},
shorttitle = {Semantic Interoperability in Health Records Standards},
author = {family=Mello, given=Blanda Helena, prefix=de, useprefix=true and Rigo, Sandro José and family=Costa, given=Cristiano André, prefix=da, useprefix=true and family=Rosa Righi, given=Rodrigo, prefix=da, useprefix=true and Donida, Bruna and Bez, Marta Rosecler and Schunke, Luana Carina},
date = {2022-03-01},
journaltitle = {Health and Technology},
shortjournal = {Health Technol.},
volume = {12},
number = {2},
pages = {255--272},
issn = {2190-7196},
doi = {10.1007/s12553-022-00639-w},
url = {https://doi.org/10.1007/s12553-022-00639-w},
urldate = {2024-06-03},
abstract = {The integration and exchange of information among health organizations and system providers are currently regarded as a challenge. Each organization usually has an internal ecosystem and a proprietary way to store electronic health records of the patient’s history. Recent research explores the advantages of an integrated ecosystem by exchanging information between the different inpatient care actors. Many efforts seek quality in health care, economy, and sustainability in process management. Some examples are reducing medical errors, disease control and monitoring, individualized patient care, and avoiding duplicate and fragmented entries in the electronic medical record. Likewise, some studies showed technologies to achieve this goal effectively and efficiently, with the ability to interoperate data, allowing the interpretation and use of health information. To that end, semantic interoperability aims to share data among all the sectors in the organization, clinicians, nurses, lab, the entire hospital. Therefore, avoiding data silos and keep data regardless of vendors, to exchange the information across organizational boundaries. This study presents a comprehensive systematic literature review of semantic interoperability in electronic health records. We searched seven databases of articles published between 2010 to September 2020. We showed the most chosen scenarios, technologies, and tools employed to solve interoperability problems, and we propose a taxonomy around semantic interoperability in health records. Also, we presented the main approaches to solve the exchange problem of legacy and heterogeneous data across healthcare organizations.},
langid = {english},
keywords = {EHR,Health record,Health standard,Semantic interoperability,Systematic review},
file = {/Users/dkapitan/Zotero/storage/2KJQL4X8/de Mello et al. - 2022 - Semantic interoperability in health records standa.pdf}
}
@article{dereuver2018digital,
title = {The {{Digital Platform}}: {{A Research Agenda}}},
shorttitle = {The {{Digital Platform}}},
author = {family=Reuver, given=Mark, prefix=de, useprefix=true and Sørensen, Carsten and Basole, Rahul C.},
date = {2018-06-01},
journaltitle = {Journal of Information Technology},
volume = {33},
number = {2},
pages = {124--135},
publisher = {SAGE Publications Ltd},
issn = {0268-3962},
doi = {10.1057/s41265-016-0033-3},
url = {https://doi.org/10.1057/s41265-016-0033-3},
urldate = {2023-02-15},
abstract = {As digital platforms are transforming almost every industry today, they are slowly finding their way into the mainstream information systems (ISs) literature. Digital platforms are a challenging research object because of their distributed nature and intertwinement with institutions, markets and technologies. New research challenges arise as a result of the exponentially growing scale of platform innovation, the increasing complexity of platform architectures and the spread of digital platforms to many different industries. This paper develops a research agenda for digital platforms research in IS. We recommend researchers seek to (1) advance conceptual clarity by providing clear definitions that specify the unit of analysis, degree of digitality and the sociotechnical nature of digital platforms; (2) define the proper scoping of digital platform concepts by studying platforms on different architectural levels and in different industry settings; and (3) advance methodological rigour by employing embedded case studies, longitudinal studies, design research, data-driven modelling and visualisation techniques. Considering current developments in the business domain, we suggest six questions for further research: (1) Are platforms here to stay? (2) How should platforms be designed? (3) How do digital platforms transform industries? (4) How can data-driven approaches inform digital platforms research? (5) How should researchers develop theory for digital platforms? and (6) How do digital platforms affect everyday life?},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/Z2GNA5YA/de Reuver et al. - 2018 - The Digital Platform A Research Agenda.pdf}
}
@inproceedings{dereuver2022openness,
title = {The Openness of Data Platforms: A Research Agenda},
shorttitle = {The Openness of Data Platforms},
booktitle = {Proceedings of the 1st {{International Workshop}} on {{Data Economy}}},
author = {family=Reuver, given=Mark, prefix=de, useprefix=true and Ofe, Hosea and Agahari, Wirawan and Abbas, Antragama Ewa and Zuiderwijk, Anneke},
date = {2022-12-06},
series = {{{DE}} '22},
pages = {34--41},
publisher = {Association for Computing Machinery},
location = {New York, NY, USA},
doi = {10.1145/3565011.3569056},
url = {https://doi.org/10.1145/3565011.3569056},
urldate = {2023-02-15},
abstract = {Data platforms are the keystone of the data economy. When opened up, data platforms allow data owners, data consumers and third parties to interact. Yet, openness may also harm business and societal interests. Literature on platform openness does not cover data platforms, and data economy scholars rarely study platform openness. Therefore, this paper develops a research agenda on the openness of data platforms. We explore how data platforms differ from conventional digital platforms (e.g., software platforms). From those differentiating characteristics, we identify areas for future work: (1) The specific characteristics of data require reconceptualizing the object of platform openness; (2) New ways in which data platforms can be opened should be conceptualized; (3) As data platforms are tailored to specific industries, platform-to-platform openness should be a novel unit of analysis; (4) Because opening up data platforms create novel risks, new reasons to (not) open up data platforms should be studied.},
isbn = {978-1-4503-9923-4},
keywords = {data ecosystem,data marketplace,data platform,platform openness},
file = {/Users/dkapitan/Zotero/storage/CSL6NCAX/de Reuver et al. - 2022 - The openness of data platforms a research agenda.pdf}
}
@online{digitalpublicgoods,
title = {Digital {{Public Goods Alliance}}},
date = {2024},
url = {https://digitalpublicgoods.net/},
urldate = {2024-02-05},
abstract = {Unlocking the potential of open-source technologies for a more equitable world. Explore and learn more about digital public goods The Digital Public Goods Alliance is a multi-stakeholder initiative that accelerates the attainment of the sustainable development goals by facilitating the discovery, development, use of, and investment in digital public goods. Learn More},
organization = {Digital Public Goods Alliance - Promoting digital public goods to create a more equitable world},
file = {/Users/dkapitan/Zotero/storage/J5Y9ZM7S/digitalpublicgoods.net.html}
}
@online{duckdb,
title = {An In-Process {{SQL OLAP}} Database Management System},
author = {User, GitHub},
url = {https://duckdb.org/},
urldate = {2024-10-10},
abstract = {DuckDB is an in-process SQL OLAP database management system. Simple, feature-rich, fast \& open source.},
langid = {english},
organization = {DuckDB},
file = {/Users/dkapitan/Zotero/storage/K5X9337J/duckdb.org.html}
}
@article{duda2022hl7,
title = {{{HL7 FHIR-based}} Tools and Initiatives to Support Clinical Research: A Scoping Review},
shorttitle = {{{HL7 FHIR-based}} Tools and Initiatives to Support Clinical Research},
author = {Duda, Stephany N and Kennedy, Nan and Conway, Douglas and Cheng, Alex C and Nguyen, Viet and Zayas-Cabán, Teresa and Harris, Paul A},
date = {2022-09-01},
journaltitle = {Journal of the American Medical Informatics Association},
shortjournal = {Journal of the American Medical Informatics Association},
volume = {29},
number = {9},
pages = {1642--1653},
issn = {1527-974X},
doi = {10.1093/jamia/ocac105},
url = {https://doi.org/10.1093/jamia/ocac105},
urldate = {2023-01-20},
abstract = {The HL7® fast healthcare interoperability resources (FHIR®) specification has emerged as the leading interoperability standard for the exchange of healthcare data. We conducted a scoping review to identify trends and gaps in the use of FHIR for clinical research.We reviewed published literature, federally funded project databases, application websites, and other sources to discover FHIR-based papers, projects, and tools (collectively, “FHIR projects”) available to support clinical research activities.Our search identified 203 different FHIR projects applicable to clinical research. Most were associated with preparations to conduct research, such as data mapping to and from FHIR formats (n\,=\,66, 32.5\%) and managing ontologies with FHIR (n\,=\,30, 14.8\%), or post-study data activities, such as sharing data using repositories or registries (n\,=\,24, 11.8\%), general research data sharing (n\,=\,23, 11.3\%), and management of genomic data (n\,=\,21, 10.3\%). With the exception of phenotyping (n\,=\,19, 9.4\%), fewer FHIR-based projects focused on needs within the clinical research process itself.Funding and usage of FHIR-enabled solutions for research are expanding, but most projects appear focused on establishing data pipelines and linking clinical systems such as electronic health records, patient-facing data systems, and registries, possibly due to the relative newness of FHIR and the incentives for FHIR integration in health information systems. Fewer FHIR projects were associated with research-only activities.The FHIR standard is becoming an essential component of the clinical research enterprise. To develop FHIR’s full potential for clinical research, funding and operational stakeholders should address gaps in FHIR-based research tools and methods.},
file = {/Users/dkapitan/Zotero/storage/J4EIBEFS/Duda et al. - 2022 - HL7 FHIR-based tools and initiatives to support cl.pdf;/Users/dkapitan/Zotero/storage/5P3NTBII/6639865.html}
}
@online{ehds2pilot,
title = {Recommendations of Standards for Data Interoperability, Querying and Exchange and on {{QC}}/{{QA}} \& Provenance ({{WP8}}) - {{EHDS2 Pilot}} - {{Official}} Website},
date = {2024-12-17T10:34:03+00:00},
url = {https://ehds2pilot.eu/upcoming_results/recommendations-of-standards-for-data-interoperability-querying-and-exchange-2/},
urldate = {2024-12-30},
abstract = {These deliverables, led by BBMRI-ERIC, in the framework of WP8, will define and document the journeys of the use cases, outlining their experiences and the steps undertaken to achieve data interoperability and quality. It will collect feedback from these journeys, identifying both successes and challenges encountered. This feedback will then be compared to an “ideal”},
langid = {american},
file = {/Users/dkapitan/Zotero/storage/IUYKR4T3/recommendations-of-standards-for-data-interoperability-querying-and-exchange-2.html}
}
@software{ehrbase,
title = {{{EHRbase}} 2.0 Website},
date = {2024-03-19T18:10:44+01:00},
url = {https://www.ehrbase.org/},
urldate = {2024-09-20},
abstract = {Creating the right data environment for tomorrow. EHRbase is the leading open source electronic health record backend. → Learn more},
file = {/Users/dkapitan/Zotero/storage/7TS4ELF2/www.ehrbase.org.html}
}
@article{estrin2010health,
title = {Health Care Delivery. {{Open mHealth}} Architecture: An Engine for Health Care Innovation},
shorttitle = {Health Care Delivery. {{Open mHealth}} Architecture},
author = {Estrin, Deborah and Sim, Ida},
date = {2010-11-05},
journaltitle = {Science (New York, N.Y.)},
shortjournal = {Science},
volume = {330},
number = {6005},
eprint = {21051617},
eprinttype = {pmid},
pages = {759--760},
issn = {1095-9203},
doi = {10.1126/science.1196187},
langid = {english},
keywords = {Cell Phone,Delivery of Health Care,Humans,Internet,Medical Informatics,Software,Telemedicine},
file = {/Users/dkapitan/Zotero/storage/9CHH5MAQ/Estrin and Sim - 2010 - Health care delivery. Open mHealth architecture a.pdf}
}
@online{fhir-implementations,
title = {{{FHIR Open Source Implementations}}},
date = {2024-09-20},
url = {https://confluence.hl7.org/display/FHIR/Open+Source+Implementations},
urldate = {2024-09-20},
file = {/Users/dkapitan/Zotero/storage/9NXY7PER/Open+Source+Implementations.html}
}
@software{fhirconnectspec,
title = {{{FHIR Connect}} Specfication},
date = {2024-10-10T12:26:01Z},
origdate = {2022-12-14T09:08:27Z},
url = {https://github.com/better-care/fhir-connect-mapping-spec},
urldate = {2025-02-04},
organization = {Better}
}
@report{firely2023fhir,
title = {{{FHIR}} in {{US}} Healthcare Regulations},
shorttitle = {{{FHIR}} in {{US}} Healthcare Regulations},
author = {Firely},
date = {2023-10-26},
url = {https://simplifier.net/organization/firely/news/153},
urldate = {2024-05-30},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/RTZU9IAH/firely2023fhir-in-us.pdf}
}
@online{garst2023comprehensive,
title = {A Comprehensive Experimental Comparison between Federated and Centralized Learning},
author = {Garst, Swier and Dekker, Julian and Reinders, Marcel},
date = {2023-07-29},
eprinttype = {bioRxiv},
eprintclass = {New Results},
pages = {2023.07.26.550615},
doi = {10.1101/2023.07.26.550615},
url = {https://www.biorxiv.org/content/10.1101/2023.07.26.550615v1},
urldate = {2024-10-24},
abstract = {Purpose Federated learning is an upcoming machine learning paradigm which allows data from multiple sources to be used for training of classifiers without the data leaving the source it originally resides. This can be highly valuable for use cases such as medical research, where gathering data at a central location can be quite complicated due to privacy and legal concerns of the data. In such cases, federated learning has the potential to vastly speed up the research cycle. Although federated and central learning have been compared from a theoretical perspective, an extensive experimental comparison of performances and learning behavior still lacks. Methods We have performed a comprehensive experimental comparison between federated and centralized learning. We evaluated various classifiers on various datasets exploring influences of different sample distributions as well as different class distributions across the clients. Results The results show similar performances under a wide variety of settings between the federated and central learning strategies. Federated learning is able to deal with various imbalances in the data distributions. It is sensitive to batch effects between different datasets when they coincide with location, similar as with central learning, but this setting might go unobserved more easily. Conclusion Federated learning seems robust to various challenges such as skewed data distributions, high data dimensionality, multiclass problems and complex models. Taken together, the insights from our comparison gives much promise for applying federated learning as an alternative to sharing data.},
langid = {english},
pubstate = {prepublished},
file = {/Users/dkapitan/Zotero/storage/VWMUG34U/Garst et al. - 2023 - A comprehensive experimental comparison between federated and centralized learning.pdf}
}
@article{gentner2023data,
title = {Data {{Lakes}} in {{Healthcare}}: {{Applications}} and {{Benefits}} from the {{Perspective}} of {{Data Sources}} and {{Players}}},
shorttitle = {Data {{Lakes}} in {{Healthcare}}},
author = {Gentner, Tobias and Neitzel, Timon and Schulze, Jacob and Gerschner, Felix and Theissler, Andreas},
date = {2023},
journaltitle = {Procedia Computer Science},
shortjournal = {Procedia Computer Science},
volume = {225},
pages = {1302--1311},
issn = {18770509},
doi = {10.1016/j.procs.2023.10.118},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1877050923012760},
urldate = {2024-12-02},
abstract = {As the amount of available data in healthcare has increased significantly and only 20\% of electronic health record data are in a structured format, data lakes have become a common solution for managing heterogeneous data in the healthcare domain. Nowadays, these are utilized far below their capabilities in medical research. Since previous reviews only partly address data lakes in the healthcare domain, a systematic literature review on this topic is missing. Therefore, this paper provides an overview of applications in the healthcare domain that benefit from data lakes. We review the literature and structure it according to data sources and players, and we identify applications and future research needs of data lakes in the healthcare domain. Overall, it turned out that all players could benefit from the capabilities of data lakes. We found that data lakes are currently not broadly implemented in the field, and the viewpoint of hospital operators and healthcare insurers seems to be an underresearched topic compared to the other players.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/55AX3GRC/Gentner et al. - 2023 - Data Lakes in Healthcare Applications and Benefits from the Perspective of Data Sources and Players.pdf}
}
@inproceedings{giebler2020zone,
title = {A {{Zone Reference Model}} for {{Enterprise-Grade Data Lake Management}}},
booktitle = {2020 {{IEEE}} 24th {{International Enterprise Distributed Object Computing Conference}} ({{EDOC}})},
author = {Giebler, Corinna and Groger, Christoph and Hoos, Eva and Schwarz, Holger and Mitschang, Bernhard},
date = {2020-10},
pages = {57--66},
publisher = {IEEE},
location = {Eindhoven, Netherlands},
doi = {10.1109/EDOC49727.2020.00017},
url = {https://ieeexplore.ieee.org/document/9233155/},
urldate = {2024-12-02},
abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from data exploration to machine learning. They achieve the required flexibility by storing heterogeneous data in their raw format, and by avoiding the need for pre-defined use cases. However, storing only raw data is inefficient, as for many applications, the same data processing has to be applied repeatedly. To foster the reuse of processing steps, literature proposes to store data in different degrees of processing in addition to their raw format. To this end, data lakes are typically structured in zones. There exists various zone models, but they are varied, vague, and no assessments are given. It is unclear which of these zone models is applicable in a practical data lake implementation in enterprises. In this work, we assess existing zone models using requirements derived from multiple representative data analytics use cases of a real-world industry case. We identify the shortcomings of existing work and develop a zone reference model for enterprise-grade data lake management in a detailed manner. We assess the reference model’s applicability through a prototypical implementation for a real-world enterprise data lake use case. This assessment shows that the zone reference model meets the requirements relevant in practice and is ready for industry use.},
eventtitle = {2020 {{IEEE}} 24th {{International Enterprise Distributed Object Computing Conference}} ({{EDOC}})},
isbn = {978-1-72816-473-1},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/F67S5QE4/Giebler et al. - 2020 - A Zone Reference Model for Enterprise-Grade Data Lake Management.pdf}
}
@article{grievefhir,
title = {{{FHIR}} or {{Relational Model}} for {{Storing Data}}},
author = {Grieve, Grahame},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/YSYGRIWM/Grieve - FHIR or Relational Model for Storing Data.pdf}
}
@article{gruendner2019ketos,
title = {{{KETOS}}: {{Clinical}} Decision Support and Machine Learning as a Service – {{A}} Training and Deployment Platform Based on {{Docker}}, {{OMOP-CDM}}, and {{FHIR Web Services}}},
shorttitle = {{{KETOS}}},
author = {Gruendner, Julian and Schwachhofer, Thorsten and Sippl, Phillip and Wolf, Nicolas and Erpenbeck, Marcel and Gulden, Christian and Kapsner, Lorenz A. and Zierk, Jakob and Mate, Sebastian and Stürzl, Michael and Croner, Roland and Prokosch, Hans-Ulrich and Toddenroth, Dennis},
date = {2019-10-03},
journaltitle = {PLOS ONE},
shortjournal = {PLOS ONE},
volume = {14},
number = {10},
pages = {e0223010},
publisher = {Public Library of Science},
issn = {1932-6203},
doi = {10.1371/journal.pone.0223010},
url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223010},
urldate = {2024-05-27},
abstract = {Background and objective To take full advantage of decision support, machine learning, and patient-level prediction models, it is important that models are not only created, but also deployed in a clinical setting. The KETOS platform demonstrated in this work implements a tool for researchers allowing them to perform statistical analyses and deploy resulting models in a secure environment. Methods The proposed system uses Docker virtualization to provide researchers with reproducible data analysis and development environments, accessible via Jupyter Notebook, to perform statistical analysis and develop, train and deploy models based on standardized input data. The platform is built in a modular fashion and interfaces with web services using the Health Level 7 (HL7) Fast Healthcare Interoperability Resources (FHIR) standard to access patient data. In our prototypical implementation we use an OMOP common data model (OMOP-CDM) database. The architecture supports the entire research lifecycle from creating a data analysis environment, retrieving data, and training to final deployment in a hospital setting. Results We evaluated the platform by establishing and deploying an analysis and end user application for hemoglobin reference intervals within the University Hospital Erlangen. To demonstrate the potential of the system to deploy arbitrary models, we loaded a colorectal cancer dataset into an OMOP database and built machine learning models to predict patient outcomes and made them available via a web service. We demonstrated both the integration with FHIR as well as an example end user application. Finally, we integrated the platform with the open source DataSHIELD architecture to allow for distributed privacy preserving data analysis and training across networks of hospitals. Conclusion The KETOS platform takes a novel approach to data analysis, training and deploying decision support models in a hospital or healthcare setting. It does so in a secure and privacy-preserving manner, combining the flexibility of Docker virtualization with the advantages of standardized vocabularies, a widely applied database schema (OMOP-CDM), and a standardized way to exchange medical data (FHIR).},
langid = {english},
keywords = {Colorectal cancer,Consortia,Machine learning,Machine learning algorithms,Physicians,Preprocessing,Prototypes,Statistical data},
file = {/Users/dkapitan/Zotero/storage/RWWQ7PJA/Gruendner et al. - 2019 - KETOS Clinical decision support and machine learn.pdf}
}
@article{gruendner2021framework,
title = {A {{Framework}} for {{Criteria-Based Selection}} and {{Processing}} of {{Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Data}} for {{Statistical Analysis}}: {{Design}} and {{Implementation Study}}},
shorttitle = {A {{Framework}} for {{Criteria-Based Selection}} and {{Processing}} of {{Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Data}} for {{Statistical Analysis}}},
author = {Gruendner, Julian and Gulden, Christian and Kampf, Marvin and Mate, Sebastian and Prokosch, Hans-Ulrich and Zierk, Jakob},
date = {2021-04-01},
journaltitle = {JMIR Medical Informatics},
volume = {9},
number = {4},
pages = {e25645},
publisher = {JMIR Publications Inc., Toronto, Canada},
doi = {10.2196/25645},
url = {https://medinform.jmir.org/2021/4/e25645},
urldate = {2024-05-27},
abstract = {Background: The harmonization and standardization of digital medical information for research purposes is a challenging and ongoing collaborative effort. Current research data repositories typically require extensive efforts in harmonizing and transforming original clinical data. The Fast Healthcare Interoperability Resources (FHIR) format was designed primarily to represent clinical processes; therefore, it closely resembles the clinical data model and is more widely available across modern electronic health records. However, no common standardized data format is directly suitable for statistical analyses, and data need to be preprocessed before statistical analysis. Objective: This study aimed to elucidate how FHIR data can be queried directly with a preprocessing service and be used for statistical analyses. Methods: We propose that the binary JavaScript Object Notation format of the PostgreSQL (PSQL) open source database is suitable for not only storing FHIR data, but also extending it with preprocessing and filtering services, which directly transform data stored in FHIR format into prepared data subsets for statistical analysis. We specified an interface for this preprocessor, implemented and deployed it at University Hospital Erlangen-Nürnberg, generated 3 sample data sets, and analyzed the available data. Results: We imported real-world patient data from 2016 to 2018 into a standard PSQL database, generating a dataset of approximately 35.5 million FHIR resources, including “Patient,” “Encounter,” “Condition” (diagnoses specified using International Classification of Diseases codes), “Procedure,” and “Observation” (laboratory test results). We then integrated the developed preprocessing service with the PSQL database and the locally installed web-based KETOS analysis platform. Advanced statistical analyses were feasible using the developed framework using 3 clinically relevant scenarios (data-driven establishment of hemoglobin reference intervals, assessment of anemia prevalence in patients with cancer, and investigation of the adverse effects of drugs). Conclusions: This study shows how the standard open source database PSQL can be used to store FHIR data and be integrated with a specifically developed preprocessing and analysis framework. This enables dataset generation with advanced medical criteria and the integration of subsequent statistical analysis. The web-based preprocessing service can be deployed locally at the hospital level, protecting patients’ privacy while being integrated with existing open source data analysis tools currently being developed across Germany.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/D2P7B27M/Gruendner et al. - 2021 - A Framework for Criteria-Based Selection and Proce.pdf}
}
@article{gruendner2022architecture,
title = {The {{Architecture}} of a {{Feasibility Query Portal}} for {{Distributed COVID-19 Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Patient Data Repositories}}: {{Design}} and {{Implementation Study}}},
shorttitle = {The {{Architecture}} of a {{Feasibility Query Portal}} for {{Distributed COVID-19 Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Patient Data Repositories}}},
author = {Gruendner, Julian and Deppenwiese, Noemi and Folz, Michael and Köhler, Thomas and Kroll, Björn and Prokosch, Hans-Ulrich and Rosenau, Lorenz and Rühle, Mathias and Scheidl, Marc-Anton and Schüttler, Christina and Sedlmayr, Brita and Twrdik, Alexander and Kiel, Alexander and Majeed, Raphael W.},
date = {2022-05-25},
journaltitle = {JMIR Medical Informatics},
volume = {10},
number = {5},
pages = {e36709},
publisher = {JMIR Publications Inc., Toronto, Canada},
doi = {10.2196/36709},
url = {https://medinform.jmir.org/2022/5/e36709},
urldate = {2024-05-27},
abstract = {Background: An essential step in any medical research project after identifying the research question is to determine if there are sufficient patients available for a study and where to find them. Pursuing digital feasibility queries on available patient data registries has proven to be an excellent way of reusing existing real-world data sources. To support multicentric research, these feasibility queries should be designed and implemented to run across multiple sites and securely access local data. Working across hospitals usually involves working with different data formats and vocabularies. Recently, the Fast Healthcare Interoperability Resources (FHIR) standard was developed by Health Level Seven to address this concern and describe patient data in a standardized format. The Medical Informatics Initiative in Germany has committed to this standard and created data integration centers, which convert existing data into the FHIR format at each hospital. This partially solves the interoperability problem; however, a distributed feasibility query platform for the FHIR standard is still missing. Objective: This study described the design and implementation of the components involved in creating a cross-hospital feasibility query platform for researchers based on FHIR resources. This effort was part of a large COVID-19 data exchange platform and was designed to be scalable for a broad range of patient data. Methods: We analyzed and designed the abstract components necessary for a distributed feasibility query. This included a user interface for creating the query, backend with an ontology and terminology service, middleware for query distribution, and FHIR feasibility query execution service. Results: We implemented the components described in the Methods section. The resulting solution was distributed to 33 German university hospitals. The functionality of the comprehensive network infrastructure was demonstrated using a test data set based on the German Corona Consensus Data Set. A performance test using specifically created synthetic data revealed the applicability of our solution to data sets containing millions of FHIR resources. The solution can be easily deployed across hospitals and supports feasibility queries, combining multiple inclusion and exclusion criteria using standard Health Level Seven query languages such as Clinical Quality Language and FHIR Search. Developing a platform based on multiple microservices allowed us to create an extendable platform and support multiple Health Level Seven query languages and middleware components to allow integration with future directions of the Medical Informatics Initiative. Conclusions: We designed and implemented a feasibility platform for distributed feasibility queries, which works directly on FHIR-formatted data and distributed it across 33 university hospitals in Germany. We showed that developing a feasibility platform directly on the FHIR standard is feasible.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/JWTLQ68A/Gruendner et al. - 2022 - The Architecture of a Feasibility Query Portal for.pdf}
}
@article{hai2023data,
title = {Data {{Lakes}}: {{A Survey}} of {{Functions}} and {{Systems}}},
shorttitle = {Data {{Lakes}}},
author = {Hai, Rihan and Koutras, Christos and Quix, Christoph and Jarke, Matthias},
date = {2023-12},
journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
volume = {35},
number = {12},
pages = {12571--12590},
issn = {1558-2191},
doi = {10.1109/TKDE.2023.3270101},
url = {https://ieeexplore.ieee.org/abstract/document/10107808},
urldate = {2024-01-11},
abstract = {Data lakes are becoming increasingly prevalent for Big Data management and data analytics. In contrast to traditional ‘schema-on-write’ approaches such as data warehouses, data lakes are repositories storing raw data in its original formats and providing a common access interface. Despite the strong interest raised from both academia and industry, there is a large body of ambiguity regarding the definition, functions and available technologies for data lakes. A complete, coherent picture of data lake challenges and solutions is still missing. This survey reviews the development, architectures, and systems of data lakes. We provide a comprehensive overview of research questions for designing and building data lakes. We classify the existing approaches and systems based on their provided functions for data lakes, which makes this survey a useful technical reference for designing, implementing and deploying data lakes. We hope that the thorough comparison of existing solutions and the discussion of open research challenges in this survey will motivate the future development of data lake research and practice.},
eventtitle = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
file = {/Users/dkapitan/Zotero/storage/R87AQA44/Hai et al. - 2023 - Data Lakes A Survey of Functions and Systems.pdf}
}
@software{hapi-fhir,
title = {{{HAPI FHIR}} - {{The Open Source FHIR API}} for {{Java}}},
url = {https://hapifhir.io/},
urldate = {2024-09-20},
file = {/Users/dkapitan/Zotero/storage/Z5VJXY8A/hapifhir.io.html}
}
@inproceedings{harby2022data,
title = {From {{Data Warehouse}} to {{Lakehouse}}: {{A Comparative Review}}},
shorttitle = {From {{Data Warehouse}} to {{Lakehouse}}},
booktitle = {2022 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
author = {Harby, Ahmed A. and Zulkernine, Farhana},
date = {2022-12-17},
pages = {389--395},
publisher = {IEEE},
location = {Osaka, Japan},
doi = {10.1109/BigData55660.2022.10020719},
url = {https://ieeexplore.ieee.org/document/10020719/},
urldate = {2023-04-23},
abstract = {Digital information systems currently generate a vast amount of data every minute which emphasizes the continuing need to advance big data management systems with efficient data ingestion and knowledge extraction capabilities. To address the ‘big data’ problems due to high volume, velocity, variety, and veracity, data management systems evolved from structured databases to big data storage systems, graph databases, data warehouses, and data lakes but each solution has its strengths and shortcomings. The need to produce actionable knowledge fast from unstructured data ingested from distributed sources requires a marriage of data warehouses and data lakes to create a data Lakehouse (LH). The objective is to use the strengths of the data warehouse in producing insights fast from processed merged data, and of the data lake in ingesting and storing high-speed unstructured data with post-storage transformation and analytics capabilities. In this paper, we present a comparative review of the existing data warehouse and data lake technology to highlight their strengths and weaknesses and propose the desired and necessary features of the LH architecture, which has recently gained a lot of attention in the big data management research community.},
eventtitle = {2022 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
isbn = {978-1-66548-045-1},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/H7SQYVUI/Harby and Zulkernine - 2022 - From Data Warehouse to Lakehouse A Comparative Re.pdf}
}
@online{harby2024data,
type = {SSRN Scholarly Paper},
title = {Data {{Lakehouse}}: {{A Survey}} and {{Experimental Study}}},
shorttitle = {Data {{Lakehouse}}},
author = {Harby, Ahmed A. and Zulkernine, Farhana},
date = {2024-03-20},
number = {4765588},
location = {Rochester, NY},
doi = {10.2139/ssrn.4765588},
url = {https://papers.ssrn.com/abstract=4765588},
urldate = {2024-03-27},
abstract = {Efficient big data management is a dire necessity to manage the exponential growth in data generated by digital information systems to produce usable knowledge. Structured databases, data lakes, and warehouses have each provided a solution with varying degrees of success. However, a new and superior solution, the data Lakehouse, has emerged to extract actionable insights from unstructured data ingested from distributed sources. By combining the strengths of data warehouses and data lakes, the data Lakehouse can process and merge data quickly while ingesting and storing high-speed unstructured data with post-storage transformation and analytics capabilities. The Lakehouse architecture offers the necessary features for optimal functionality and has gained significant attention in the big data management research community. In this paper, we compare data lake, warehouse, and lakehouse systems, highlight their strengths and shortcomings, identify the desired features to handle the evolving challenges in big data management and analysis and propose an advanced data Lakehouse architecture. We also demonstrate the performance of three state-of-the-art data management systems namely HDFS data lake, Hive data warehouse, and Delta lakehouse in managing data for analytical query responses through an experimental study.},
langid = {english},
pubstate = {prepublished},
keywords = {Big data,Data Lake,Data Lakehouse,Data Warehouse},
file = {/Users/dkapitan/Zotero/storage/3THYQL7N/Harby and Zulkernine - 2024 - Data Lakehouse A Survey and Experimental Study.pdf}
}
@online{healthri2024agreements,
type = {wiki},
title = {Agreements on the {{National Health Data Infrastructure}} for {{Research}}, {{Policy}} and {{Innovation}} - {{Health-RI Nationale Gezondheidsdata-infrastructuur}} - {{Confluence}}},
author = {Health-RI},
date = {2024-01-29},
url = {https://health-ri.atlassian.net/wiki/spaces/HNG/pages/249073646/Agreements+on+the+National+Health+Data+Infrastructure+for+Research+Policy+and+Innovation},
urldate = {2024-06-03},
file = {/Users/dkapitan/Zotero/storage/TF49QFQ4/Agreements+on+the+National+Health+Data+Infrastructure+for+Research+Policy+and+Innovation.html}
}
@report{hl72024state,
title = {The {{State}} of {{FHIR}} 2024 {{Survey Results}}},
date = {2024-05},
institution = {HL7},
url = {https://www.hl7.org/documentcenter/public/white-papers/2024%20StateofFHIRSurveyResults_final.pdf},
urldate = {2024-04-04},
file = {/Users/dkapitan/Zotero/storage/NRK88P5M/2024 StateofFHIRSurveyResults_final.pdf}
}
@online{hl7a,
title = {{{HL7}} v2 vs. {{FHIR}}: {{Key Data Standard Differences}} | {{Cloudticity}}},
shorttitle = {{{HL7}} v2 vs. {{FHIR}}},
url = {https://blog.cloudticity.com/hl7-vs-fhir-key-differences-healthcare-data-exchange},
urldate = {2024-09-20},
abstract = {What's the difference between HL7v2 and FHIR and why is FHIR better? What this means for developers as well as providers, payers, and patients.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/NS2HAJXT/hl7-vs-fhir-key-differences-healthcare-data-exchange.html}
}
@report{india2020national,
title = {National {{Digital Health Mission}}},
date = {2020},
institution = {India National Health Authority},
file = {/Users/dkapitan/Zotero/storage/GEK2SB32/ndhm_strategy_overview.pdf}
}
@software{instant-openhie-v2,
title = {Instant {{OpenHIE}} V2},
date = {2024-07-03},
url = {https://jembi.gitbook.io/instant-v2/},
urldate = {2024-09-20},
file = {/Users/dkapitan/Zotero/storage/GHEPC2SQ/instant-v2.html}
}
@online{integration,
title = {Integration of {{OMOP}} and {{OHDSI}} Tools in Vantage6},
url = {https://distributedlearning.ai/news/omop-integration/},
urldate = {2024-05-30}
}
@inproceedings{ismail2016hl7,
title = {{{HL7 FHIR Compliant Data Access Model}} for {{Maternal Health Information System}}},
booktitle = {2016 {{IEEE}} 16th {{International Conference}} on {{Bioinformatics}} and {{Bioengineering}} ({{BIBE}})},
author = {Ismail, Saadia and Alshmari, Majed and Qamar, Usman and Butt, Wasi Haider and Latif, Khalid and Ahmad, Hafiz Farooq},
date = {2016-10},
pages = {51--56},
publisher = {IEEE},
location = {Taichung, Taiwan},
doi = {10.1109/BIBE.2016.9},
url = {http://ieeexplore.ieee.org/document/7789959/},
urldate = {2023-03-19},
abstract = {Effective decision-making to improve healthcare for people depends essentially upon availability of reliable health data. Several developing countries have maternal health indicators lagging behind as compared to international targets set by the UN as Millennium or Sustainable Development Goals. One of the major reasons is poor and non-standardized maternal health record keeping that affect data quality and undermines evidence-based decision making. The aim of this research is the design and development of HL7 FHIR compliant data access model for maintaining maternal health data as FHIR resources to enable effective exchange of health data. The proposed model is implemented as restful web services and data is stored in a NoSQL database for flexibility. To evaluate effectiveness, the system was reviewed by healthcare providers and expectant women. Their feedback highlights the usefulness of the proposed system as compared to traditional record keeping techniques. It is anticipated that the proposed system will lay the foundation of a comprehensive maternal healthcare information system. This shall enable trend analysis for policy-making to help accelerate the efforts for meeting global maternal health targets.},
eventtitle = {2016 {{IEEE}} 16th {{International Conference}} on {{Bioinformatics}} and {{Bioengineering}} ({{BIBE}})},
isbn = {978-1-5090-3834-3},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/Z3WLVGRH/Ismail et al. - 2016 - HL7 FHIR Compliant Data Access Model for Maternal .pdf}
}
@inproceedings{jager2024data,
title = {From {{Data Imputation}} to {{Data Cleaning}} — {{Automated Cleaning}} of {{Tabular Data Improves Downstream Predictive Performance}}},
booktitle = {Proceedings of {{The}} 27th {{International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
author = {Jäger, Sebastian and Biessmann, Felix},
date = {2024-04-18},
pages = {3394--3402},
publisher = {PMLR},
issn = {2640-3498},
url = {https://proceedings.mlr.press/v238/jager24a.html},
urldate = {2024-11-29},
abstract = {The translation of Machine Learning (ML) research innovations to real-world applications and the maintenance of ML components are hindered by reoccurring challenges, such as reaching high predictive performance, robustness, complying with regulatory constraints, or meeting ethical standards. Many of these challenges are related to data quality and, in particular, to the lack of automation in data pipelines upstream of ML components. Automated data cleaning remains challenging since many approaches neglect the dependency structure of the data errors and require task-specific heuristics or human input for calibration. In this study, we develop and evaluate an application-agnostic ML-based data cleaning approach using well-established imputation techniques for automated detection and cleaning of erroneous values. To improve the degree of automation, we combine imputation techniques with conformal prediction (CP), a model-agnostic and distribution-free method to quantify and calibrate the uncertainty of ML models. Extensive empirical evaluations demonstrate that Conformal Data Cleaning (CDC) improves predictive performance in downstream ML tasks in the majority of cases. Our code is available on GitHub: \textbackslash url\{https://github.com/se-jaeger/conformal-data-cleaning\}.},
eventtitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/2LHQGLF9/Jäger and Biessmann - 2024 - From Data Imputation to Data Cleaning — Automated Cleaning of Tabular Data Improves Downstream Predi.pdf}
}
@article{jain2023analyzing,
title = {Analyzing and {{Comparing Lakehouse Storage Systems}}},
author = {Jain, Paras and Kraft, Peter and Power, Conor and Das, Tathagata and Stoica, Ion and Zaharia, Matei},
date = {2023},
abstract = {Lakehouse storage systems that implement ACID transactions and other management features over data lake storage, such as Delta Lake, Apache Hudi and Apache Iceberg, have rapidly grown in popularity, replacing traditional data lakes at many organizations. These open storage systems with rich management features promise to simplify management of large datasets, accelerate SQL workloads, and offer fast, direct file access for other workloads, such as machine learning. However, the research community has not explored the tradeoffs in designing lakehouse systems in detail. In this paper, we analyze the designs of the three most popular lakehouse storage systems—Delta Lake, Hudi and Iceberg—and compare their performance and features among varying axes based on these designs. We also release a simple benchmark, LHBench, that researchers can use to compare other designs. LHBench is available at https://github.com/lhbench/lhbench.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/AQ2TN7SR/p92-jain.pdf;/Users/dkapitan/Zotero/storage/KLZG5RE5/Jain et al. - 2023 - Analyzing and Comparing Lakehouse Storage Systems.pdf}
}
@online{jmir,
title = {{{JMIR Medical Informatics}} - {{Open-Source Electronic Health Record Systems}} for {{Low-Resource Settings}}: {{Systematic Review}}},
url = {https://medinform.jmir.org/2017/4/e44/},
urldate = {2024-06-04},
file = {/Users/dkapitan/Zotero/storage/689HC47Z/e44.html}
}
@inproceedings{johansson2021calibrating,
title = {Calibrating Multi-Class Models},
booktitle = {Proceedings of the {{Tenth Symposium}} on {{Conformal}} and {{Probabilistic Prediction}} and {{Applications}}},
author = {Johansson, Ulf and Löfström, Tuwe and Boström, Henrik},
date = {2021-09-20},
pages = {111--130},
publisher = {PMLR},
issn = {2640-3498},
url = {https://proceedings.mlr.press/v152/johansson21a.html},
urldate = {2024-10-01},
abstract = {Predictive models communicating algorithmic confidence are very informative, but only if well-calibrated and sharp, i.e., providing accurate probability estimates adjusted for each instance. While almost all machine learning algorithms are able to produce probability estimates, these are often poorly calibrated, thus requiring external calibration. For multiclass problems, external calibration has typically been done using one-vs-all or all-vs-all schemes, thus adding to the computational complexity, but also making it impossible to analyze and inspect the predictive models. In this paper, we suggest a novel approach for calibrating inherently multi-class models. Instead of providing a probability distribution over all labels, the estimation is of the probability that the class label predicted by the underlying model is correct. In an extensive empirical study, it is shown that the suggested approach, when applied to both Platt scaling and Venn-Abers, is able to improve the probability estimates from decision trees, random forests and extreme gradient boosting.},
eventtitle = {Conformal and {{Probabilistic Prediction}} and {{Applications}}},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/DL8W5H5S/Johansson et al. - 2021 - Calibrating multi-class models.pdf}
}
@article{johnson2015data,
title = {A {{Data Quality Ontology}} for the {{Secondary Use}} of {{EHR Data}}},
author = {Johnson, Steven G. and Speedie, Stuart and Simon, Gyorgy and Kumar, Vipin and Westra, Bonnie L.},
date = {2015-11-05},
journaltitle = {AMIA Annual Symposium Proceedings},
shortjournal = {AMIA Annu Symp Proc},
volume = {2015},
eprint = {26958293},
eprinttype = {pmid},
pages = {1937--1946},
issn = {1942-597X},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4765682/},
urldate = {2024-09-19},
abstract = {The secondary use of EHR data for research is expected to improve health outcomes for patients, but the benefits will only be realized if the data in the EHR is of sufficient quality to support these uses. A data quality (DQ) ontology was developed to rigorously define concepts and enable automated computation of data quality measures. The healthcare data quality literature was mined for the important terms used to describe data quality concepts and harmonized into an ontology. Four high-level data quality dimensions (“correctness”, “consistency”, “completeness” and “currency”) categorize 19 lower level measures. The ontology serves as an unambiguous vocabulary, which defines concepts more precisely than natural language; it provides a mechanism to automatically compute data quality measures; and is reusable across domains and use cases. A detailed example is presented to demonstrate its utility. The DQ ontology can make data validation more common and reproducible.},
pmcid = {PMC4765682},
file = {/Users/dkapitan/Zotero/storage/EAFLQV4S/Johnson et al. - 2015 - A Data Quality Ontology for the Secondary Use of E.pdf}
}
@article{jones2021landscape,
title = {A Landscape Survey of Planned {{SMART}}/{{HL7}} Bulk {{FHIR}} Data Access {{API}} Implementations and Tools},
author = {Jones, James and Gottlieb, Daniel and Mandel, Joshua C and Ignatov, Vladimir and Ellis, Alyssa and Kubick, Wayne and Mandl, Kenneth D},
date = {2021-06-01},
journaltitle = {Journal of the American Medical Informatics Association},
shortjournal = {Journal of the American Medical Informatics Association},
volume = {28},
number = {6},
pages = {1284--1287},
issn = {1527-974X},
doi = {10.1093/jamia/ocab028},
url = {https://doi.org/10.1093/jamia/ocab028},
urldate = {2023-01-20},
abstract = {The Office of National Coordinator for Health Information Technology final rule implementing the interoperability and information blocking provisions of the 21st Century Cures Act requires support for two SMART (Substitutable Medical Applications, Reusable Technologies) application programming interfaces (APIs) and instantiates Health Level Seven International (HL7) Fast Healthcare Interoperability Resources (FHIR) as a lingua franca for health data. We sought to assess the current state and near-term plans for the SMART/HL7 Bulk FHIR Access API implementation across organizations including electronic health record vendors, cloud vendors, public health contractors, research institutions, payors, FHIR tooling developers, and other purveyors of health information technology platforms. We learned that many organizations not required through regulation to use standardized bulk data are rapidly implementing the API for a wide array of use cases. This may portend an unprecedented level of standardized population-level health data exchange that will support an apps and analytics ecosystem. Feedback from early adopters on the API’s limitations and unsolved problems in the space of population health are highlighted.},
file = {/Users/dkapitan/Zotero/storage/PG68K5CE/Jones et al. - 2021 - A landscape survey of planned SMARTHL7 bulk FHIR .pdf;/Users/dkapitan/Zotero/storage/3NTEYRHE/6155897.html}
}
@article{karamagi2022ehealth,
title = {{{eHealth}} or E-{{Chaos}}: {{The}} Use of {{Digital Health Interventions}} for {{Health Systems Strengthening}} in Sub-{{Saharan Africa}} over the Last 10 Years: {{A}} Scoping Review},
shorttitle = {{{eHealth}} or E-{{Chaos}}},
author = {Karamagi, Humphrey C and Muneene, Derrick and Droti, Benson and Jepchumba, Violet and Okeibunor, Joseph C and Nabyonga, Juliet and Asamani, James Avoka and Traore, Moussa and Kipruto, Hillary},
date = {2022-12-03},
journaltitle = {Journal of Global Health},
shortjournal = {J Glob Health},
volume = {12},
pages = {04090},
issn = {2047-2978, 2047-2986},
doi = {10.7189/jogh.12.04090},
url = {https://jogh.org/2022/jogh-12-04090},
urldate = {2024-02-05},
abstract = {Background Digital health solutions are a potent and complementary intervention in health system strengthening to accelerate universal access to health services. Implementing scalable, sustainable, and integrated digital solutions in a coordinated manner is necessary to experience the benefits of digital interventions in health systems. We sought to establish the breadth and scope of available digital health interventions (DHIs) and their functions in sub-Saharan Africa. Methods: We conducted a scoping review according to the Joanne Briggs Institute’s reviewers manual and followed the Preferred Reporting Items for Systematic Reviews and Meta-Analyses - Extension for Scoping Reviews (PRISMA-ScR) checklist and explanation. We retrieved data from the WHO Digital Health Atlas (DHA), the WHO e-Health country profiles report of 2015, and electronic databases. The protocol has been deposited in an open-source platform – the Open Science Framework at https://osf.io/5kzq7. Results The researchers retrieved 983 digital tools used to strengthen health systems in sub-Saharan Africa over the past 10 years. We included 738 DHIs in the analysis while 245 were excluded for not meeting the inclusion criteria. We observed a disproportionate distribution of DHIs towards service delivery (81.7\%, n\,=\,603), health care providers (91.8\%, n\,=\,678), and access and use of information (84.1\%, n\,=\,621). Fifty-three percent (53.4\%, n\,=\,394) of the solutions are established and 47.5\% (n\,=\,582) were aligned to 20\% (n\,=\,5) of the system categories. Conclusions Sub-Saharan Africa is endowed with digital health solutions in both numbers and distinct functions. It is lacking in coordination, integration, scalability, sustainability, and equitable distribution of investments in digital health. Digital health policymakers in sub-Saharan Africa need to urgently institute coordination mechanisms to terminate unending duplication and disjointed vertical implementations and manage solutions for scale. Central to this would be to build digital health leadership in countries within SSA, adopt standards and interoperability frameworks; advocate for more investments into lagging components, and promote multi-purpose solutions to halt the seeming “e-chaos” and progress to sustainable e-health solutions.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/549QGYAI/jogh-12-04090.pdf;/Users/dkapitan/Zotero/storage/U2E7IBUE/jogh-12-04090-s001.pdf}
}
@article{keller2021paradox,
title = {The {{Paradox}} of {{Open}}},
author = {Keller, Paul and Tarkowski, Alek},
date = {2021-03-05},
journaltitle = {Open Future},
publisher = {Open Future Foundation},
url = {https://openfuture.pubpub.org/pub/paradox-of-open/release/1},
urldate = {2024-03-25},
abstract = {In today’s digital environment, openness serves as both a challenge to concentrations of power and its enabler. Solving this paradox is at the heart of our work, which focuses on three objectives.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/GRW8I2Y9/Keller and Tarkowski - 2021 - The Paradox of Open.pdf}
}
@article{khalid2021standardized,
title = {A Standardized Analytics Pipeline for Reliable and Rapid Development and Validation of Prediction Models Using Observational Health Data},
author = {Khalid, Sara and Yang, Cynthia and Blacketer, Clair and Duarte-Salles, Talita and Fernández-Bertolín, Sergio and Kim, Chungsoo and Park, Rae Woong and Park, Jimyung and Schuemie, Martijn J. and Sena, Anthony G. and Suchard, Marc A. and You, Seng Chan and Rijnbeek, Peter R. and Reps, Jenna M.},
date = {2021-11-01},
journaltitle = {Computer Methods and Programs in Biomedicine},
shortjournal = {Computer Methods and Programs in Biomedicine},
volume = {211},
pages = {106394},
issn = {0169-2607},
doi = {10.1016/j.cmpb.2021.106394},
url = {https://www.sciencedirect.com/science/article/pii/S0169260721004685},
urldate = {2024-05-27},
abstract = {Background and objective As a response to the ongoing COVID-19 pandemic, several prediction models in the existing literature were rapidly developed, with the aim of providing evidence-based guidance. However, none of these COVID-19 prediction models have been found to be reliable. Models are commonly assessed to have a risk of bias, often due to insufficient reporting, use of non-representative data, and lack of large-scale external validation. In this paper, we present the Observational Health Data Sciences and Informatics (OHDSI) analytics pipeline for patient-level prediction modeling as a standardized approach for rapid yet reliable development and validation of prediction models. We demonstrate how our analytics pipeline and open-source software tools can be used to answer important prediction questions while limiting potential causes of bias (e.g., by validating phenotypes, specifying the target population, performing large-scale external validation, and publicly providing all analytical source code). Methods We show step-by-step how to implement the analytics pipeline for the question: ‘In patients hospitalized with COVID-19, what is the risk of death 0 to 30 days after hospitalization?’. We develop models using six different machine learning methods in a USA claims database containing over 20,000 COVID-19 hospitalizations and externally validate the models using data containing over 45,000 COVID-19 hospitalizations from South Korea, Spain, and the USA. Results Our open-source software tools enabled us to efficiently go end-to-end from problem design to reliable Model Development and evaluation. When predicting death in patients hospitalized with COVID-19, AdaBoost, random forest, gradient boosting machine, and decision tree yielded similar or lower internal and external validation discrimination performance compared to L1-regularized logistic regression, whereas the MLP neural network consistently resulted in lower discrimination. L1-regularized logistic regression models were well calibrated. Conclusion Our results show that following the OHDSI analytics pipeline for patient-level prediction modelling can enable the rapid development towards reliable prediction models. The OHDSI software tools and pipeline are open source and available to researchers from all around the world.},
keywords = {COVID-19,Data harmonization,Data quality control,Distributed data network,Machine learning,Risk prediction},
file = {/Users/dkapitan/Zotero/storage/D7HZSKV3/Khalid et al. - 2021 - A standardized analytics pipeline for reliable and.pdf}
}
@article{khanvertical,
title = {Vertical {{Federated Learning}}: {{A Structured Literature Review}}},
author = {Khan, Afsana},
abstract = {Federated Learning (FL) has emerged as a promising distributed learning paradigm with an added advantage of data privacy. With the growing interest in having collaboration among data owners, FL has gained significant attention of organizations. The idea of FL is to enable collaborating participants train machine learning (ML) models on decentralized data without breaching privacy. In simpler words, federated learning is the approach of “bringing the model to the data, instead of bringing the data to the model”. Federated learning, when applied to data which is partitioned vertically across participants, is able to build a complete ML model by combining local models trained only using the data with distinct features at the local sites. This architecture of FL is referred to as vertical federated learning (VFL), which differs from the conventional FL on horizontally partitioned data. As VFL is different from conventional FL, it comes with its own issues and challenges. In this paper, we present a structured literature review discussing the state-of-the-art approaches in VFL. Additionally, the literature review highlights the existing solutions to challenges in VFL and provides potential research directions in this domain.},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/D59A9FNF/Khan - Vertical Federated Learning A Structured Literature Review.pdf}
}
@article{kroes2022blueprint,
title = {Blueprint for Harmonising Unstandardised Disease Registries to Allow Federated Data Analysis: Prepare for the Future},
shorttitle = {Blueprint for Harmonising Unstandardised Disease Registries to Allow Federated Data Analysis},
author = {Kroes, Johannes A. and Bansal, Aruna T. and Berret, Emmanuelle and Christian, Nils and Kremer, Andreas and Alloni, Anna and Gabetta, Matteo and Marshall, Chris and Wagers, Scott and Djukanovic, Ratko and Porsbjerg, Celeste and Hamerlijnck, Dominique and Fulton, Olivia and family=Brinke, given=Anneke, prefix=ten, useprefix=false and Bel, Elisabeth H. and Sont, Jacob K.},
date = {2022-10-01},
journaltitle = {ERJ Open Research},
volume = {8},
number = {4},
publisher = {European Respiratory Society},
issn = {2312-0541},
doi = {10.1183/23120541.00168-2022},
url = {https://openres.ersjournals.com/content/8/4/00168-2022},
urldate = {2024-06-04},
abstract = {Real-world evidence from multinational disease registries is becoming increasingly important not only for confirming the results of randomised controlled trials, but also for identifying phenotypes, monitoring disease progression, predicting response to new drugs and early detection of rare side-effects. With new open-access technologies, it has become feasible to harmonise patient data from different disease registries and use it for data analysis without compromising privacy rules. Here, we provide a blueprint for how a clinical research collaboration can successfully use real-world data from existing disease registries to perform federated analyses. We describe how the European severe asthma clinical research collaboration SHARP (Severe Heterogeneous Asthma Research collaboration, Patient-centred) fulfilled the harmonisation process from nonstandardised clinical registry data to the Observational Medical Outcomes Partnership Common Data Model and built a strong network of collaborators from multiple disciplines and countries. The blueprint covers organisational, financial, conceptual, technical, analytical and research aspects, and discusses both the challenges and the lessons learned. All in all, setting up a federated data network is a complex process that requires thorough preparation, but above all, it is a worthwhile investment for all clinical research collaborations, especially in view of the emerging applications of artificial intelligence and federated learning. Tweetable abstract @ERSpublications click to tweetHarmonising real-world patient data from diverse registries to allow federated analyses is a complex process that requires thorough preparation but is above all a valuable investment, especially in view of emerging applications of artificial intelligence https://bit.ly/3NEKKnV},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/CZ6TISLJ/Kroes et al. - 2022 - Blueprint for harmonising unstandardised disease r.pdf}
}
@inproceedings{kurniawan2019midwife,
title = {Midwife Service Coverage, Quality of Work, and Client Health Improved after Deployment of an {{OpenSRP-driven}} Client Management Application in {{Indonesia}}},
author = {Kurniawan, Kevin and FitriaSyah, Inraini and Jayakusuma, Ahmad Rafi and Armis, Resty Asmauryanah and Lubis, Yusran and Haryono, Muhammad Abdi and Harefa, Benyamin and Shankar, Anuraj},
date = {2019-11},
pages = {155--162},
publisher = {Atlantis Press},
issn = {2468-5739},
doi = {10.2991/ichs-18.2019.21},
url = {https://www.atlantis-press.com/proceedings/ichs-18/125921329},
urldate = {2024-01-22},
abstract = {The quality of maternal and child health services remains suboptimal in most low and middleincome countries (LMIC). Data are routinely collected with paper-based systems but are incomplete, underutilized, and reported as poorly-usable aggregated indicators. Therefore, we developed the Open Smart Register Platform (OpenSRP) application for midwives in...},
eventtitle = {5th {{International Conference}} on {{Health Sciences}} ({{ICHS}} 2018)},
isbn = {978-94-6252-824-6},
langid = {english},
file = {/Users/dkapitan/Zotero/storage/AV5DFWKS/Kurniawan2019midwife.pdf;/Users/dkapitan/Zotero/storage/JB3NXXH3/Kurniawan et al. - 2019 - Midwife service coverage, quality of work, and cli.pdf}
}
@online{lee2022feedernet,
title = {{{FeederNet}} ({{Federated E-Health Big Data}} for {{Evidence Renovation Network}}) Platform in {{Korea}} – {{OHDSI}}},
author = {Lee, Seongwon and Kim, Chungsoo and Chang, Junyuk and Park, Rae Woong},
date = {2022},
url = {https://www.ohdsi.org/2022showcase-33/},
urldate = {2024-06-04},
langid = {american},
file = {/Users/dkapitan/Zotero/storage/SCV5WC6H/2022showcase-33.html}
}
@article{leefeasibility,
title = {Feasibility {{Study}} of {{Federated Learning}} on the {{Distributed Research Network}} of {{OMOP Common Data Model}}},
author = {Lee, Geun Hyeong and Park, Jonggul and Kim, Jihyeong and Kim, Yeesuk and Choi, Byungjin and Park, Rae Woong and Rhee, Sang Youl and Shin, Soo-Yong},
journaltitle = {Healthcare Informatics Research},
shortjournal = {Healthc Inform Res},
volume = {29},
number = {2},
eprint = {37190741},
eprinttype = {pmid},
pages = {168--173},
issn = {2093-3681},
doi = {10.4258/hir.2023.29.2.168},
url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10209729/},
urldate = {2024-06-04},
abstract = {Objectives Since protecting patients’ privacy is a major concern in clinical research, there has been a growing need for privacy-preserving data analysis platforms. For this purpose, a federated learning (FL) method based on the Observational Medical Outcomes Partnership (OMOP) common data model (CDM) was implemented, and its feasibility was demonstrated. Methods We implemented an FL platform on FeederNet, which is a distributed clinical data analysis platform based on the OMOP CDM in Korea. We trained it through an artificial neural network (ANN) using data from patients who received steroid prescriptions or injections, with the aim of predicting the occurrence of side effects depending on the prescribed dose. The ANN was trained using the FL platform with the OMOP CDMs of Kyung Hee University Medical Center (KHMC) and Ajou University Hospital (AUH). Results The area under the receiver operating characteristic curves (AUROCs) for predicting bone fracture, osteonecrosis, and osteoporosis using only data from each hospital were 0.8426, 0.6920, and 0.7727 for KHMC and 0.7891, 0.7049, and 0.7544 for AUH, respectively. In contrast, when using FL, the corresponding AUROCs were 0.8260, 0.7001, and 0.7928 for KHMC and 0.7912, 0.8076, and 0.7441 for AUH, respectively. In particular, FL led to a 14\% improvement in performance for osteonecrosis at AUH. Conclusions FL can be performed with the OMOP CDM, and FL often shows better performance than using only a single institution’s data. Therefore, research using OMOP CDM has been expanded from statistical analysis to machine learning so that researchers can conduct more diverse research.},
pmcid = {PMC10209729},
file = {/Users/dkapitan/Zotero/storage/PGHXSLFQ/Lee et al. - Feasibility Study of Federated Learning on the Dis.pdf}
}
@article{mamuye2022health,
title = {Health Information Exchange Policy and Standards for Digital Health Systems in Africa: {{A}} Systematic Review},
shorttitle = {Health Information Exchange Policy and Standards for Digital Health Systems in Africa},
author = {Mamuye, Adane L. and Yilma, Tesfahun M. and Abdulwahab, Ahmad and Broomhead, Sean and Zondo, Phumzule and Kyeng, Mercy and Maeda, Justin and Abdulaziz, Mohammed and Wuhib, Tadesse and Tilahun, Binyam C.},
date = {2022-10-10},
journaltitle = {PLOS Digital Health},
shortjournal = {PLOS Digital Health},
volume = {1},
number = {10},
pages = {e0000118},
publisher = {Public Library of Science},
issn = {2767-3170},
doi = {10.1371/journal.pdig.0000118},