plugin.bib

@standard{2023hcx,
  title = {{{HCX Protocol}} v0.9},
  date = {2023-12-01},
  url = {http://hcxprotocol.io/},
  urldate = {2024-09-18},
  abstract = {Open-source, community driven protocol for Health Claims data Exchange},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/7R9WM4UH/hcxprotocol.io.html}
}

@inproceedings{allaart2022vertical,
  title = {Vertical {{Split Learning}} - an Exploration of Predictive Performance in Medical and Other Use Cases},
  booktitle = {2022 {{International Joint Conference}} on {{Neural Networks}} ({{IJCNN}})},
  author = {Allaart, Corinne G. and Keyser, Bjorn and Bal, Henri and Van Halteren, Aart},
  date = {2022-07-18},
  pages = {1--8},
  publisher = {IEEE},
  location = {Padua, Italy},
  doi = {10.1109/IJCNN55064.2022.9891964},
  url = {https://ieeexplore.ieee.org/document/9891964/},
  urldate = {2024-10-07},
  abstract = {In healthcare and other fields, data of an individual is often vertically partitioned across multiple organizations. Creating a centralized data store for AI algorithm development is cumbersome in such cases because of concerns like privacy and data ownership. Methods of distributed learning over vertically partitioned data could offer a solution here. While several studies have evaluated the feasibility, privacy and efficiency of such methods, an extensive evaluation of their impact on predictive performance compared to a centralized approach is missing. Vertical Split Learning (VSL) aims to provide vertical distributed learning through distributed neural network architectures. Our study adapts and applies VSL to 8 datasets, both in medicine and beyond, evaluating the impact of different network and (vertical) feature distributions on predictive performance. In most configurations VSL yields comparable predictive performance to its centralized counterparts. However, certain data and network distributions give an unexpected and severe loss of performance. Based on our findings we give some initial recommendations under which conditions VSL can be applied as a suitable alternative for data centralization.},
  eventtitle = {2022 {{International Joint Conference}} on {{Neural Networks}} ({{IJCNN}})},
  isbn = {978-1-72818-671-9},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/X2SM7XDJ/Allaart et al. - 2022 - Vertical Split Learning - an exploration of predictive performance in medical and other use cases.pdf}
}

@article{antunes2022federated,
  title = {Federated {{Learning}} for {{Healthcare}}: {{Systematic Review}} and {{Architecture Proposal}}},
  shorttitle = {Federated {{Learning}} for {{Healthcare}}},
  author = {Antunes, Rodolfo Stoffel and André Da Costa, Cristiano and Küderle, Arne and Yari, Imrana Abdullahi and Eskofier, Björn},
  date = {2022-08-31},
  journaltitle = {ACM Transactions on Intelligent Systems and Technology},
  shortjournal = {ACM Trans. Intell. Syst. Technol.},
  volume = {13},
  number = {4},
  pages = {1--23},
  issn = {2157-6904, 2157-6912},
  doi = {10.1145/3501813},
  url = {https://dl.acm.org/doi/10.1145/3501813},
  urldate = {2023-04-23},
  abstract = {FL enables the training of ML models locally (at the location of the data) and only shares the resulting model, which is not reverse-engineerable, with the requesting party. Therefore, FL avoids the need to share the private datasets and sensitive data to others, preventing exposition to entities conducting studies and enabling data usage for broader purposes [11]. A central entity manages the learning process and distributes the training algorithm to each participating data holder. Each participant generates a local model trained with their private data and shares the resulting parameters with the central entity. Finally, the central entity employs an aggregation algorithm to combine the parameters of all local models into a single global model.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/94L35GLV/Antunes et al. - 2022 - Federated Learning for Healthcare Systematic Revi.pdf}
}

@standard{apache-arrow,
  title = {Apache {{Arrow}}},
  date = {2024-09-20},
  url = {https://arrow.apache.org/},
  urldate = {2024-09-20},
  abstract = {A cross-language development platform for in-memory analytics},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/TIBXI35H/arrow.apache.org.html}
}

@standard{apache-iceberg,
  title = {Apache {{Iceberg}}},
  url = {https://iceberg.apache.org/},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/WDUZH5EF/iceberg.apache.org.html}
}

@standard{apache-parquet,
  title = {Apache {{Parquet}}},
  date = {2024-09-20},
  url = {https://parquet.apache.org/},
  urldate = {2024-09-20},
  abstract = {The Apache Parquet Website},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/6ZYX6IMX/parquet.apache.org.html}
}

@inproceedings{armbrust2021lakehouse,
  title = {Lakehouse: {{A New Generation}} of {{Open Platforms}} That {{Unify Data Warehousing}} and {{Advanced Analytics}}},
  author = {Armbrust, Michael and Ghodsi, Ali and Xin, Reynold and Zaharia, Matei},
  date = {2021},
  pages = {8},
  abstract = {This paper argues that the data warehouse architecture as we know it today will wither in the coming years and be replaced by a new architectural pattern, the Lakehouse, which will (i) be based on open direct-access data formats, such as Apache Parquet, (ii) have firstclass support for machine learning and data science, and (iii) offer state-of-the-art performance. Lakehouses can help address several major challenges with data warehouses, including data staleness, reliability, total cost of ownership, data lock-in, and limited use-case support. We discuss how the industry is already moving toward Lakehouses and how this shift may affect work in data management. We also report results from a Lakehouse system using Parquet that is competitive with popular cloud data warehouses on TPC-DS.},
  eventtitle = {11th {{Annual Conference}} on {{Innovative Data Systems Research}} ({{CIDR}} ’21)},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/8XX2TSTM/Armbrust et al. - 2021 - Lakehouse A New Generation of Open Platforms that.pdf}
}

@article{bacher2024fhiring,
  title = {{{FHIRing}} up {{OpenMRS}}: {{Architecture}}, {{Implementation}} and {{Real-World Use-Cases}} in {{Global Health}}},
  shorttitle = {{{FHIRing}} up {{OpenMRS}}},
  author = {Bacher, I and Goodrich, M and Kimaina, A and Seaton, M and Faulkenberry, G and Vaish, S and Flowers, J and family=Fraser, given=HS, given-i=HS},
  date = {2024-05-31},
  journaltitle = {AMIA Summits on Translational Science Proceedings},
  shortjournal = {AMIA Jt Summits Transl Sci Proc},
  volume = {2024},
  eprint = {38827065},
  eprinttype = {pmid},
  pages = {162--171},
  issn = {2153-4063},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC11141833/},
  urldate = {2024-06-04},
  abstract = {HL7 FHIR was created almost a decade ago and is seeing increasingly wide use in high income settings. Although some initial work was carried out in low and middle income (LMIC) settings there has been little impact until recently. The need for reliable and easy to implement interoperability between health information systems in LMICs is growing with large scale deployments of EHRs, national reporting systems and mHealth applications. The OpenMRS open source EHR has been deployed in more than 44 LMIC with increasing needs for interoperability with other HIS. We describe here the development and deployment of a new FHIR module supporting the latest standards and its use in interoperability with laboratory systems, mHealth applications, pharmacy dispensing system and as a tool for supporting advanced user interface designs. We also show how it facilitates date science projects and deployment of machine leaning based CDSS and precision medicine in LMICs.},
  pmcid = {PMC11141833},
  file = {/Users/dkapitan/Zotero/storage/IXLQFUR2/Bacher et al. - 2024 - FHIRing up OpenMRS Architecture, Implementation a.pdf}
}

@article{bak2023trustbased,
  title = {Towards Trust-Based Governance of Health Data Research},
  author = {Bak, Marieke A. R. and Ploem, M. Corrette and Tan, Hanno L. and Blom, M. T. and Willems, Dick L.},
  date = {2023-01-12},
  journaltitle = {Medicine, Health Care and Philosophy},
  shortjournal = {Med Health Care and Philos},
  issn = {1572-8633},
  doi = {10.1007/s11019-022-10134-8},
  url = {https://doi.org/10.1007/s11019-022-10134-8},
  urldate = {2023-04-24},
  abstract = {Developments in medical big data analytics may bring societal benefits but are also challenging privacy and other ethical values. At the same time, an overly restrictive data protection regime can form a serious threat to valuable observational studies. Discussions about whether data privacy or data solidarity should be the foundational value of research policies, have remained unresolved. We add to this debate with an empirically informed ethical analysis. First, experiences with the implementation of the General Data Protection Regulation (GDPR) within a European research consortium demonstrate a gap between the aims of the regulation and its effects in practice. Namely, strictly formalised data protection requirements may cause routinisation among researchers instead of substantive ethical reflection, and may crowd out trust between actors in the health data research ecosystem; while harmonisation across Europe and data sharing between countries is hampered by different interpretations of the law, which partly stem from different views about ethical values. Then, building on these observations, we use theory to argue that the concept of trust provides an escape from the privacy-solidarity debate. Lastly, the paper details three aspects of trust that can help to create a responsible research environment and to mitigate the encountered challenges: trust as multi-agent concept; trust as a rational and democratic value; and trust as method for priority setting. Mutual cooperation in research—among researchers and with data subjects—is grounded in trust, which should be more explicitly recognised in the governance of health data research.},
  langid = {english},
  keywords = {Big data,Data sharing,ESCAPE-NET,GDPR,Governance,Privacy,Research ethics,Solidarity,Trust},
  file = {/Users/dkapitan/Zotero/storage/UNPATT4L/Bak et al. - 2023 - Towards trust-based governance of health data rese.pdf}
}

@article{bak2024federated,
  title = {Federated Learning Is Not a Cure-All for Data Ethics},
  author = {Bak, Marieke and Madai, Vince I. and Celi, Leo Anthony and Kaissis, Georgios A. and Cornet, Ronald and Maris, Menno and Rueckert, Daniel and Buyx, Alena and McLennan, Stuart},
  date = {2024-03-18},
  journaltitle = {Nature Machine Intelligence},
  shortjournal = {Nat Mach Intell},
  pages = {1--3},
  publisher = {Nature Publishing Group},
  issn = {2522-5839},
  doi = {10.1038/s42256-024-00813-x},
  url = {https://www.nature.com/articles/s42256-024-00813-x.epdf?sharing_token=x-TNQE6N7p78n_EgW0nE9NRgN0jAjWel9jnR3ZoTv0PBsqMZcqgh1-FjxZchFKZRQPH7UbIAvc2DYW5o9AOAduszN7kemNlJrdAjE33lom0VIdKVHhghu_mODfGDPh6YF4-Du0R9BSRlG9HF-l_zF8qROmjb2Ml0pA1RT-3-DLs%3D},
  urldate = {2024-03-18},
  abstract = {Although federated learning is often seen as a promising solution to allow AI innovation while addressing privacy concerns, we argue that this technology does not fix all underlying data ethics concerns. Benefiting from federated learning in digital health requires acknowledgement of its limitations.},
  langid = {english},
  keywords = {Computational models,Ethics,Machine learning,Medical ethics}
}

@article{balch2023machine,
  title = {Machine {{Learning}}–{{Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resources Data Standards}}: {{Scoping Review}}},
  shorttitle = {Machine {{Learning}}–{{Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resources Data Standards}}},
  author = {Balch, Jeremy A. and Ruppert, Matthew M. and Loftus, Tyler J. and Guan, Ziyuan and Ren, Yuanfang and Upchurch, Gilbert R. and Ozrazgat-Baslanti, Tezcan and Rashidi, Parisa and Bihorac, Azra},
  date = {2023-08-24},
  journaltitle = {JMIR Medical Informatics},
  volume = {11},
  number = {1},
  pages = {e48297},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/48297},
  url = {https://medinform.jmir.org/2023/1/e48297},
  urldate = {2024-10-10},
  abstract = {Background: Machine Learning-Enabled Clinical Information Systems (ML-CIS) have the potential to drive healthcare delivery and research. The Fast Healthcare Interoperability Resources (FHIR) data standard is increasingly applied in developing these systems. However, methods for applying FHIR to ML-CIS are variable. Objective: This study evaluates and compares the functionalities, strengths, and weaknesses of existing systems and proposes guidelines for optimizing future work with ML-CIS. Methods: Embase, PubMed, and Web of Science were searched for articles describing machine-learning systems used for clinical data analytics or decision support in compliance with FHIR standards. Information regarding each system’s functionality, data sources, formats, security, performance, resource requirements, scalability, strengths, and limitations were compared across systems. Results: 39 articles describing FHIR-based ML-CIS were divided into three categories according to their primary focus: Clinical Decision Support Systems (CDSSs) (n=18), data management and analytic platforms (n=10), or auxiliary modules and application programming interfaces (n=11). Model strengths included novel use of cloud systems, Bayesian networks, visualization strategies, and techniques for translating unstructured or free text data to FHIR frameworks. Most intelligent systems lacked electronic health record interoperability and externally validated evidence of clinical efficacy. Conclusions: Shortcomings in current ML-CIS can be addressed by incorporating modular and interoperable data management, analytic platforms, secure inter-institutional data exchange, and application programming interfaces with adequate scalability to support both real-time and prospective clinical applications using electronic health record platforms with diverse implementations.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/ZUT8FMDJ/Balch et al. - 2023 - Machine Learning–Enabled Clinical Information Systems Using Fast Healthcare Interoperability Resourc.pdf;/Users/dkapitan/Zotero/storage/BZSWAX22/e48297.html}
}

@article{beck2019hourglass,
  title = {On the Hourglass Model},
  author = {Beck, Micah},
  date = {2019-06},
  journaltitle = {Communications of the ACM},
  volume = {62},
  number = {7},
  pages = {48--57},
  issn = {0001-0782, 1557-7317},
  doi = {10.1145/3274770},
  abstract = {Used in the design of the Internet and Unix, the layered services of the hourglass model have enabled viral adoption and deployment scalability.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/NKGQZL5V/Beck - 2019 - On the hourglass model.pdf}
}

@online{beda-emr,
  title = {Beda {{EMR}}},
  url = {https://beda.software/emr},
  urldate = {2024-12-30},
  abstract = {Beda EMR System with no-code customisation},
  file = {/Users/dkapitan/Zotero/storage/NCC2GQR5/emr.html}
}

@article{bennett2023mimiciva,
  title = {{{MIMIC-IV}} on {{FHIR}}: Converting a Decade of in-Patient Data into an Exchangeable, Interoperable Format},
  shorttitle = {{{MIMIC-IV}} on {{FHIR}}},
  author = {Bennett, Alex M. and Ulrich, Hannes and family=Damme, given=Philip, prefix=van, useprefix=true and Wiedekopf, Joshua and Johnson, Alistair E. W.},
  date = {2023-03-16},
  journaltitle = {Journal of the American Medical Informatics Association: JAMIA},
  shortjournal = {J Am Med Inform Assoc},
  volume = {30},
  number = {4},
  eprint = {36688534},
  eprinttype = {pmid},
  pages = {718--725},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocad002},
  abstract = {OBJECTIVE: Convert the Medical Information Mart for Intensive Care (MIMIC)-IV database into Health Level 7 Fast Healthcare Interoperability Resources (FHIR). Additionally, generate and publish an openly available demo of the resources, and create a FHIR Implementation Guide to support and clarify the usage of MIMIC-IV on FHIR. MATERIALS AND METHODS: FHIR profiles and terminology system of MIMIC-IV were modeled from the base FHIR R4 resources. Data and terminology were reorganized from the relational structure into FHIR according to the profiles. Resources generated were validated for conformance with the FHIR profiles. Finally, FHIR resources were published as newline delimited JSON files and the profiles were packaged into an implementation guide. RESULTS: The modeling of MIMIC-IV in FHIR resulted in 25 profiles, 2 extensions, 35 ValueSets, and 34 CodeSystems. An implementation guide encompassing the FHIR modeling can be accessed at mimic.mit.edu/fhir/mimic. The generated demo dataset contained 100 patients and over 915~000 resources. The full dataset contained 315~000 patients covering approximately 5~840~000 resources. The final datasets in NDJSON format are accessible on PhysioNet. DISCUSSION: Our work highlights the challenges and benefits of generating a real-world FHIR store. The challenges arise from terminology mapping and profiling modeling decisions. The benefits come from the extensively validated openly accessible data created as a result of the modeling work. CONCLUSION: The newly created MIMIC-IV on FHIR provides one of the first accessible deidentified critical care FHIR datasets. The extensive real-world data found in MIMIC-IV on FHIR will be invaluable for research and the development of healthcare applications.},
  langid = {english},
  pmcid = {PMC10018258},
  keywords = {Datasets as Topic,electronic health records,Electronic Health Records,fast healthcare interoperability resources,Health Level Seven,HL7 FHIR,Humans,Information Dissemination,Information Storage and Retrieval,interoperability,MIMIC-IV,open data,Patients,Reproducibility of Results},
  file = {/Users/dkapitan/Zotero/storage/TN5X7YHN/Bennett et al. - 2023 - MIMIC-IV on FHIR converting a decade of in-patient data into an exchangeable, interoperable format.pdf}
}

@article{bonisch2022harvesting,
  title = {Harvesting Metadata in Clinical Care: A Crosswalk between {{FHIR}}, {{OMOP}}, {{CDISC}} and {{openEHR}} Metadata},
  shorttitle = {Harvesting Metadata in Clinical Care},
  author = {Bönisch, Caroline and Kesztyüs, Dorothea and Kesztyüs, Tibor},
  date = {2022-10-28},
  journaltitle = {Scientific Data},
  shortjournal = {Sci Data},
  volume = {9},
  number = {1},
  pages = {659},
  publisher = {Nature Publishing Group},
  issn = {2052-4463},
  doi = {10.1038/s41597-022-01792-7},
  url = {https://www.nature.com/articles/s41597-022-01792-7},
  urldate = {2023-06-21},
  abstract = {Metadata describe information about data source, type of creation, structure, status and semantics and are prerequisite for preservation and reuse of medical data. To overcome the hurdle of disparate data sources and repositories with heterogeneous data formats a metadata crosswalk was initiated, based on existing standards. FAIR Principles were included, as well as data format specifications. The metadata crosswalk is the foundation of data provision between a Medical Data Integration Center (MeDIC) and researchers, providing a selection of metadata information for research design and requests. Based on the crosswalk, metadata items were prioritized and categorized to demonstrate that not one single predefined standard meets all requirements of a MeDIC and only a maximum data set of metadata is suitable for use. The development of a convergence format including the maximum data set is the anticipated solution for an automated transformation of metadata in a MeDIC.},
  issue = {1},
  langid = {english},
  keywords = {Health care,Public health},
  file = {/Users/dkapitan/Zotero/storage/JH8VKLQ9/Bönisch et al. - 2022 - Harvesting metadata in clinical care a crosswalk .pdf}
}

@report{carmo2024d22,
  title = {D2.2 - {{EHRxF}} in a Nutshell-{{WP2-ISCTE}}},
  author = {Carmo, Anderson and Martins, Henrique},
  date = {2024-07-04},
  url = {https://ehr-exchange-format.eu/wp-content/uploads/2024/10/D2.2-v20240704-EHRxF-in-a-Nutshell-WP2-ISCTE.pdf},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/TJJZ24PP/Carmo - D2.2 - EHRxF in a nutshell-WP2-ISCTE.pdf}
}

@article{cauchoisknowing,
  title = {Knowing What {{You Know}}: Valid and Validated Confidence Sets in Multiclass and Multilabel Prediction},
  author = {Cauchois, Maxime and Gupta, Suyash and Duchi, John C},
  abstract = {We develop conformal prediction methods for constructing valid predictive confidence sets in multiclass and multilabel problems without assumptions on the data generating distribution. A challenge here is that typical conformal prediction methods—which give marginal validity (coverage) guarantees—provide uneven coverage, in that they address easy examples at the expense of essentially ignoring difficult examples. By leveraging ideas from quantile regression, we build methods that always guarantee correct coverage but additionally provide (asymptotically consistent) conditional coverage for both multiclass and multilabel prediction problems. To address the potential challenge of exponentially large confidence sets in multilabel prediction, we build tree-structured classifiers that efficiently account for interactions between labels. Our methods can be bolted on top of any classification model—neural network, random forest, boosted tree—to guarantee its validity. We also provide an empirical evaluation, simultaneously providing new validation methods, that suggests the more robust coverage of our confidence sets.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/IVV2P7PT/Cauchois et al. - Knowing what You Know valid and validated conﬁdence sets in multiclass and multilabel prediction.pdf}
}

@article{chen2021automatic,
  title = {Automatic {{ICD-10 Coding}} and {{Training System}}: {{Deep Neural Network Based}} on {{Supervised Learning}}},
  shorttitle = {Automatic {{ICD-10 Coding}} and {{Training System}}},
  author = {Chen, Pei-Fu and Wang, Ssu-Ming and Liao, Wei-Chih and Kuo, Lu-Cheng and Chen, Kuan-Chih and Lin, Yu-Cheng and Yang, Chi-Yu and Chiu, Chi-Hao and Chang, Shu-Chih and Lai, Feipei},
  date = {2021-08-31},
  journaltitle = {JMIR Medical Informatics},
  volume = {9},
  number = {8},
  pages = {e23230},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/23230},
  url = {https://medinform.jmir.org/2021/8/e23230},
  urldate = {2024-04-11},
  abstract = {Background: The International Classification of Diseases (ICD) code is widely used as the reference in medical system and billing purposes. However, classifying diseases into ICD codes still mainly relies on humans reading a large amount of written material as the basis for coding. Coding is both laborious and time-consuming. Since the conversion of ICD-9 to ICD-10, the coding task became much more complicated, and deep learning– and natural language processing–related approaches have been studied to assist disease coders. Objective: This paper aims at constructing a deep learning model for ICD-10 coding, where the model is meant to automatically determine the corresponding diagnosis and procedure codes based solely on free-text medical notes to improve accuracy and reduce human effort. Methods: We used diagnosis records of the National Taiwan University Hospital as resources and apply natural language processing techniques, including global vectors, word to vectors, embeddings from language models, bidirectional encoder representations from transformers, and single head attention recurrent neural network, on the deep neural network architecture to implement ICD-10 auto-coding. Besides, we introduced the attention mechanism into the classification model to extract the keywords from diagnoses and visualize the coding reference for training freshmen in ICD-10. Sixty discharge notes were randomly selected to examine the change in the F1-score and the coding time by coders before and after using our model. Results: In experiments on the medical data set of National Taiwan University Hospital, our prediction results revealed F1-scores of 0.715 and 0.618 for the ICD-10 Clinical Modification code and Procedure Coding System code, respectively, with a bidirectional encoder representations from transformers embedding approach in the Gated Recurrent Unit classification model. The well-trained models were applied on the ICD-10 web service for coding and training to ICD-10 users. With this service, coders can code with the F1-score significantly increased from a median of 0.832 to 0.922 (P\&lt;.05), but not in a reduced interval. Conclusions: The proposed model significantly improved the F1-score but did not decrease the time consumed in coding by disease coders.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/JPYLNNH4/Chen et al. - 2021 - Automatic ICD-10 Coding and Training System Deep .pdf}
}

@inproceedings{choudhury2020personal,
  title = {Personal {{Health Train}} on {{FHIR}}: {{A Privacy Preserving Federated Approach}} for {{Analyzing FAIR Data}} in {{Healthcare}}},
  shorttitle = {Personal {{Health Train}} on {{FHIR}}},
  booktitle = {Machine {{Learning}}, {{Image Processing}}, {{Network Security}} and {{Data Sciences}}},
  author = {Choudhury, Ananya and family=Soest, given=Johan, prefix=van, useprefix=true and Nayak, Stuti and Dekker, Andre},
  editor = {Bhattacharjee, Arup and Borgohain, Samir Kr. and Soni, Badal and Verma, Gyanendra and Gao, Xiao-Zhi},
  date = {2020},
  series = {Communications in {{Computer}} and {{Information Science}}},
  pages = {85--95},
  publisher = {Springer},
  location = {Singapore},
  doi = {10.1007/978-981-15-6315-7_7},
  abstract = {Big data and machine learning applications focus on retrieving data on a central location for analysis. However, healthcare data can be sensitive in nature and as such difficult to share and make use for secondary purposes. Healthcare vendors are restricted to share data without proper consent from the patient. There is a rising awareness among individual patients as well regarding sharing their personal information due to ethical, legal and societal problems. The current data-sharing platforms in healthcare do not sufficiently handle these issues. The rationale of the Personal Health Train (PHT) approach shifts the focus from sharing data to sharing processing/analysis applications and their respective results. A prerequisite of the PHT-infrastructure is that the data is FAIR (findable, accessible, interoperable, reusable). The aim of the paper is to describe a methodology of finding the number of patients diagnosed with hypertension and calculate cohort statistics in a privacy-preserving federated manner. The whole process completes without individual patient data leaving the source. For this, we rely on the Fast Healthcare Interoperability Resources (FHIR) standard.},
  isbn = {9789811563157},
  langid = {english},
  keywords = {FAIR,FHIR,Personal health train},
  file = {/Users/dkapitan/Zotero/storage/MX4FY5MX/Choudhury et al. - 2020 - Personal Health Train on FHIR A Privacy Preservin.pdf}
}

@incollection{chraibi2021deep,
  title = {A {{Deep Learning Framework}} for {{Automated ICD-10 Coding}}},
  booktitle = {Studies in {{Health Technology}} and {{Informatics}}},
  author = {Chraibi, Abdelahad and Delerue, David and Taillard, Julien and Chaib Draa, Ismat and Beuscart, Régis and Hansske, Arnaud},
  editor = {Mantas, John and Stoicu-Tivadar, Lăcrămioara and Chronaki, Catherine and Hasman, Arie and Weber, Patrick and Gallos, Parisis and Crişan-Vida, Mihaela and Zoulias, Emmanouil and Chirila, Oana Sorina},
  date = {2021-05-27},
  publisher = {IOS Press},
  doi = {10.3233/SHTI210178},
  url = {https://ebooks.iospress.nl/doi/10.3233/SHTI210178},
  urldate = {2024-04-11},
  abstract = {The International Statistical Classification of Diseases and Related Health Problems (ICD) is one of the widely used classification system for diagnoses and procedures to assign diagnosis codes to Electronic Health Record (EHR) associated with a patient’s stay. The aim of this paper is to propose an automated coding system to assist physicians in the assignment of ICD codes to EHR. For this purpose, we created a pipeline of Natural Language Processing (NLP) and Deep Learning (DL) models able to extract the useful information from French medical texts and to perform classification. After the evaluation phase, our approach was able to predict 346 diagnosis codes from heterogeneous medical units with an accuracy average of 83\%. Our results were finally validated by physicians of the Medical Information Department (MID) in charge of coding hospital stays.},
  isbn = {978-1-64368-184-9 978-1-64368-185-6},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/7JVHUMR6/Chraibi et al. - 2021 - A Deep Learning Framework for Automated ICD-10 Cod.pdf}
}

@software{clickhouse,
  title = {Clickhouse: {{Fast Open-Source OLAP DBMS}}},
  author = {ClickHouse},
  url = {https://clickhouse.com},
  urldate = {2024-09-20},
  abstract = {ClickHouse is a fast open-source column-oriented database management system that allows generating analytical data reports in real-time using SQL queries},
  file = {/Users/dkapitan/Zotero/storage/N6RERXVA/clickhouse.com.html}
}

@online{conformal,
  title = {Conformal {{Prediction Sets Improve Human Decision Making}}},
  url = {https://arxiv.org/html/2401.13744v3},
  urldate = {2024-10-02},
  file = {/Users/dkapitan/Zotero/storage/VJ3GD6ZW/2401.html}
}

@article{cremonesi2023need,
  title = {The Need for Multimodal Health Data Modeling: {{A}} Practical Approach for a Federated-Learning Healthcare Platform},
  shorttitle = {The Need for Multimodal Health Data Modeling},
  author = {Cremonesi, Francesco and Planat, Vincent and Kalokyri, Varvara and Kondylakis, Haridimos and Sanavia, Tiziana and Miguel Mateos Resinas, Victor and Singh, Babita and Uribe, Silvia},
  date = {2023-05-01},
  journaltitle = {Journal of Biomedical Informatics},
  shortjournal = {Journal of Biomedical Informatics},
  volume = {141},
  pages = {104338},
  issn = {1532-0464},
  doi = {10.1016/j.jbi.2023.104338},
  url = {https://www.sciencedirect.com/science/article/pii/S153204642300059X},
  urldate = {2024-01-16},
  abstract = {Federated learning initiatives in healthcare are being developed to collaboratively train predictive models without the need to centralize sensitive personal data. GenoMed4All is one such project, with the goal of connecting European clinical and –omics data repositories on rare diseases through a federated learning platform. Currently, the consortium faces the challenge of a lack of well-established international datasets and interoperability standards for federated learning applications on rare diseases. This paper presents our practical approach to select and implement a Common Data Model (CDM) suitable for the federated training of predictive models applied to the medical domain, during the initial design phase of our federated learning platform. We describe our selection process, composed of identifying the consortium’s needs, reviewing our functional and technical architecture specifications, and extracting a list of business requirements. We review the state of the art and evaluate three widely-used approaches (FHIR, OMOP and Phenopackets) based on a checklist of requirements and specifications. We discuss the pros and cons of each approach considering the use cases specific to our consortium as well as the generic issues of implementing a European federated learning healthcare platform. A list of lessons learned from the experience in our consortium is discussed, from the importance of establishing the proper communication channels for all stakeholders to technical aspects related to –omics data. For federated learning projects focused on secondary use of health data for predictive modeling, encompassing multiple data modalities, a phase of data model convergence is sorely needed to gather different data representations developed in the context of medical research, interoperability of clinical care software, imaging, and –omics analysis into a coherent, unified data model. Our work identifies this need and presents our experience and a list of actionable lessons learned for future work in this direction.},
  keywords = {Data model,Federated learning,Healthcare,Lessons learned,Medical research,Omics},
  file = {/Users/dkapitan/Zotero/storage/C5RQXIRH/Cremonesi et al. - 2023 - The need for multimodal health data modeling A pr.pdf;/Users/dkapitan/Zotero/storage/K2A9EKFC/S153204642300059X.html}
}

@article{dalhatu2023paper,
  title = {From {{Paper Files}} to {{Web-Based Application}} for {{Data-Driven Monitoring}} of {{HIV Programs}}: {{Nigeria}}'s {{Journey}} to a {{National Data Repository}} for {{Decision-Making}} and {{Patient Care}}},
  shorttitle = {From {{Paper Files}} to {{Web-Based Application}} for {{Data-Driven Monitoring}} of {{HIV Programs}}},
  author = {Dalhatu, Ibrahim and Aniekwe, Chinedu and Bashorun, Adebobola and Abdulkadir, Alhassan and Dirlikov, Emilio and Ohakanu, Stephen and Adedokun, Oluwasanmi and Oladipo, Ademola and Jahun, Ibrahim and Murie, Lisa and Yoon, Steven and Abdu-Aguye, Mubarak G. and Sylvanus, Ahmed and Indyer, Samuel and Abbas, Isah and Bello, Mustapha and Nalda, Nannim and Alagi, Matthias and Odafe, Solomon and Adebajo, Sylvia and Ogorry, Otse and Akpu, Murphy and Okoye, Ifeanyi and Kakanfo, Kunle and Onovo, Amobi Andrew and Ashefor, Gregory and Nzelu, Charles and Ikpeazu, Akudo and Aliyu, Gambo and Ellerbrock, Tedd and Boyd, Mary and Stafford, Kristen A. and Swaminathan, Mahesh},
  date = {2023-09},
  journaltitle = {Methods of Information in Medicine},
  shortjournal = {Methods Inf Med},
  volume = {62},
  number = {03/04},
  pages = {130--139},
  issn = {0026-1270, 2511-705X},
  doi = {10.1055/s-0043-1768711},
  url = {http://www.thieme-connect.de/DOI/DOI?10.1055/s-0043-1768711},
  urldate = {2024-03-25},
  abstract = {Abstract             Background{$\quad$}Timely and reliable data are crucial for clinical, epidemiologic, and program management decision making. Electronic health information systems provide platforms for managing large longitudinal patient records. Nigeria implemented the National Data Repository (NDR) to create a central data warehouse of all people living with human immunodeficiency virus (PLHIV) while providing useful functionalities to aid decision making at different levels of program implementation.             Objective{$\quad$}We describe the Nigeria NDR and its development process, including its use for surveillance, research, and national HIV program monitoring toward achieving HIV epidemic control.             Methods{$\quad$}Stakeholder engagement meetings were held in 2013 to gather information on data elements and vocabulary standards for reporting patient-level information, technical infrastructure, human capacity requirements, and information flow. Findings from these meetings guided the development of the NDR. An implementation guide provided common terminologies and data reporting structures for data exchange between the NDR and the electronic medical record (EMR) systems. Data from the EMR were encoded in extensible markup language and sent to the NDR over secure hypertext transfer protocol after going through a series of validation processes.             Results{$\quad$}By June 30, 2021, the NDR had up-to-date records of 1,477,064 (94.4\%) patients receiving HIV treatment across 1,985 health facilities, of which 1,266,512 (85.7\%) patient records had fingerprint template data to support unique patient identification and record linkage to prevent registration of the same patient under different identities. Data from the NDR was used to support HIV program monitoring, case-based surveillance and production of products like the monthly lists of patients who have treatment interruptions and dashboards for monitoring HIV test and start.             Conclusion{$\quad$}The NDR enabled the availability of reliable and timely data for surveillance, research, and HIV program monitoring to guide program improvements to accelerate progress toward epidemic control.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/TGU3P9XW/Dalhatu et al. - 2023 - From Paper Files to Web-Based Application for Data.pdf}
}

@software{dbt,
  title = {Dbt},
  url = {https://www.getdbt.com/index},
  urldate = {2024-09-20},
  abstract = {Use dbt to build reliable data models quickly and collaboratively—featuring version control, automated documentation, and integrated testing.},
  file = {/Users/dkapitan/Zotero/storage/UBVUQEMW/www.getdbt.com.html}
}

@unpublished{deltomme2024federated,
  title = {Federated {{Health Innovation Network}} ({{FHIN}})},
  author = {Deltomme, Camile and Denturck, Kim and De Jaeger, Peter and Willems, Wouter and De Caluwe, Bram and Hellebaut, Geert and Pierlet, Noëlla and Van Brantegem, Karel and Heirman, Peter and Thorrez, Yves and Deschepper, Mieke},
  date = {2024-09-20},
  url = {https://www.ohdsi-europe.org/images/symposium-2024/Posters/poster%20OHDSI%20FHIN%20Camille%20Deltomme%20-%20Camille%20Deltomme.pdf}
}

@article{delussu2024survey,
  title = {A Survey of {{openEHR Clinical Data Repositories}}},
  author = {Delussu, Giovanni and Frexia, Francesca and Mascia, Cecilia and Sulis, Alessandro and Meloni, Vittorio and Del Rio, Mauro and Lianas, Luca},
  date = {2024-11-01},
  journaltitle = {International Journal of Medical Informatics},
  shortjournal = {International Journal of Medical Informatics},
  volume = {191},
  pages = {105591},
  issn = {1386-5056},
  doi = {10.1016/j.ijmedinf.2024.105591},
  url = {https://www.sciencedirect.com/science/article/pii/S1386505624002545},
  urldate = {2025-02-05},
  keywords = {Archetype,CDR,Clinical Data Repository,ISO 13606,openEHR,Survey},
  file = {/Users/dkapitan/Zotero/storage/UMZ4UUY2/S1386505624002545.html}
}

@article{demello2022semantic,
  title = {Semantic Interoperability in Health Records Standards: A Systematic Literature Review},
  shorttitle = {Semantic Interoperability in Health Records Standards},
  author = {family=Mello, given=Blanda Helena, prefix=de, useprefix=true and Rigo, Sandro José and family=Costa, given=Cristiano André, prefix=da, useprefix=true and family=Rosa Righi, given=Rodrigo, prefix=da, useprefix=true and Donida, Bruna and Bez, Marta Rosecler and Schunke, Luana Carina},
  date = {2022-03-01},
  journaltitle = {Health and Technology},
  shortjournal = {Health Technol.},
  volume = {12},
  number = {2},
  pages = {255--272},
  issn = {2190-7196},
  doi = {10.1007/s12553-022-00639-w},
  url = {https://doi.org/10.1007/s12553-022-00639-w},
  urldate = {2024-06-03},
  abstract = {The integration and exchange of information among health organizations and system providers are currently regarded as a challenge. Each organization usually has an internal ecosystem and a proprietary way to store electronic health records of the patient’s history. Recent research explores the advantages of an integrated ecosystem by exchanging information between the different inpatient care actors. Many efforts seek quality in health care, economy, and sustainability in process management. Some examples are reducing medical errors, disease control and monitoring, individualized patient care, and avoiding duplicate and fragmented entries in the electronic medical record. Likewise, some studies showed technologies to achieve this goal effectively and efficiently, with the ability to interoperate data, allowing the interpretation and use of health information. To that end, semantic interoperability aims to share data among all the sectors in the organization, clinicians, nurses, lab, the entire hospital. Therefore, avoiding data silos and keep data regardless of vendors, to exchange the information across organizational boundaries. This study presents a comprehensive systematic literature review of semantic interoperability in electronic health records. We searched seven databases of articles published between 2010 to September 2020. We showed the most chosen scenarios, technologies, and tools employed to solve interoperability problems, and we propose a taxonomy around semantic interoperability in health records. Also, we presented the main approaches to solve the exchange problem of legacy and heterogeneous data across healthcare organizations.},
  langid = {english},
  keywords = {EHR,Health record,Health standard,Semantic interoperability,Systematic review},
  file = {/Users/dkapitan/Zotero/storage/2KJQL4X8/de Mello et al. - 2022 - Semantic interoperability in health records standa.pdf}
}

@article{dereuver2018digital,
  title = {The {{Digital Platform}}: {{A Research Agenda}}},
  shorttitle = {The {{Digital Platform}}},
  author = {family=Reuver, given=Mark, prefix=de, useprefix=true and Sørensen, Carsten and Basole, Rahul C.},
  date = {2018-06-01},
  journaltitle = {Journal of Information Technology},
  volume = {33},
  number = {2},
  pages = {124--135},
  publisher = {SAGE Publications Ltd},
  issn = {0268-3962},
  doi = {10.1057/s41265-016-0033-3},
  url = {https://doi.org/10.1057/s41265-016-0033-3},
  urldate = {2023-02-15},
  abstract = {As digital platforms are transforming almost every industry today, they are slowly finding their way into the mainstream information systems (ISs) literature. Digital platforms are a challenging research object because of their distributed nature and intertwinement with institutions, markets and technologies. New research challenges arise as a result of the exponentially growing scale of platform innovation, the increasing complexity of platform architectures and the spread of digital platforms to many different industries. This paper develops a research agenda for digital platforms research in IS. We recommend researchers seek to (1) advance conceptual clarity by providing clear definitions that specify the unit of analysis, degree of digitality and the sociotechnical nature of digital platforms; (2) define the proper scoping of digital platform concepts by studying platforms on different architectural levels and in different industry settings; and (3) advance methodological rigour by employing embedded case studies, longitudinal studies, design research, data-driven modelling and visualisation techniques. Considering current developments in the business domain, we suggest six questions for further research: (1) Are platforms here to stay? (2) How should platforms be designed? (3) How do digital platforms transform industries? (4) How can data-driven approaches inform digital platforms research? (5) How should researchers develop theory for digital platforms? and (6) How do digital platforms affect everyday life?},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/Z2GNA5YA/de Reuver et al. - 2018 - The Digital Platform A Research Agenda.pdf}
}

@inproceedings{dereuver2022openness,
  title = {The Openness of Data Platforms: A Research Agenda},
  shorttitle = {The Openness of Data Platforms},
  booktitle = {Proceedings of the 1st {{International Workshop}} on {{Data Economy}}},
  author = {family=Reuver, given=Mark, prefix=de, useprefix=true and Ofe, Hosea and Agahari, Wirawan and Abbas, Antragama Ewa and Zuiderwijk, Anneke},
  date = {2022-12-06},
  series = {{{DE}} '22},
  pages = {34--41},
  publisher = {Association for Computing Machinery},
  location = {New York, NY, USA},
  doi = {10.1145/3565011.3569056},
  url = {https://doi.org/10.1145/3565011.3569056},
  urldate = {2023-02-15},
  abstract = {Data platforms are the keystone of the data economy. When opened up, data platforms allow data owners, data consumers and third parties to interact. Yet, openness may also harm business and societal interests. Literature on platform openness does not cover data platforms, and data economy scholars rarely study platform openness. Therefore, this paper develops a research agenda on the openness of data platforms. We explore how data platforms differ from conventional digital platforms (e.g., software platforms). From those differentiating characteristics, we identify areas for future work: (1) The specific characteristics of data require reconceptualizing the object of platform openness; (2) New ways in which data platforms can be opened should be conceptualized; (3) As data platforms are tailored to specific industries, platform-to-platform openness should be a novel unit of analysis; (4) Because opening up data platforms create novel risks, new reasons to (not) open up data platforms should be studied.},
  isbn = {978-1-4503-9923-4},
  keywords = {data ecosystem,data marketplace,data platform,platform openness},
  file = {/Users/dkapitan/Zotero/storage/CSL6NCAX/de Reuver et al. - 2022 - The openness of data platforms a research agenda.pdf}
}

@online{digitalpublicgoods,
  title = {Digital {{Public Goods Alliance}}},
  date = {2024},
  url = {https://digitalpublicgoods.net/},
  urldate = {2024-02-05},
  abstract = {Unlocking the potential of open-source technologies for a more equitable world. Explore and learn more about digital public goods The Digital Public Goods Alliance is a multi-stakeholder initiative that accelerates the attainment of the sustainable development goals by facilitating the discovery, development, use of, and investment in digital public goods. Learn More},
  organization = {Digital Public Goods Alliance - Promoting digital public goods to create a more equitable world},
  file = {/Users/dkapitan/Zotero/storage/J5Y9ZM7S/digitalpublicgoods.net.html}
}

@online{duckdb,
  title = {An In-Process {{SQL OLAP}} Database Management System},
  author = {User, GitHub},
  url = {https://duckdb.org/},
  urldate = {2024-10-10},
  abstract = {DuckDB is an in-process SQL OLAP database management system. Simple, feature-rich, fast \& open source.},
  langid = {english},
  organization = {DuckDB},
  file = {/Users/dkapitan/Zotero/storage/K5X9337J/duckdb.org.html}
}

@article{duda2022hl7,
  title = {{{HL7 FHIR-based}} Tools and Initiatives to Support Clinical Research: A Scoping Review},
  shorttitle = {{{HL7 FHIR-based}} Tools and Initiatives to Support Clinical Research},
  author = {Duda, Stephany N and Kennedy, Nan and Conway, Douglas and Cheng, Alex C and Nguyen, Viet and Zayas-Cabán, Teresa and Harris, Paul A},
  date = {2022-09-01},
  journaltitle = {Journal of the American Medical Informatics Association},
  shortjournal = {Journal of the American Medical Informatics Association},
  volume = {29},
  number = {9},
  pages = {1642--1653},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocac105},
  url = {https://doi.org/10.1093/jamia/ocac105},
  urldate = {2023-01-20},
  abstract = {The HL7® fast healthcare interoperability resources (FHIR®) specification has emerged as the leading interoperability standard for the exchange of healthcare data. We conducted a scoping review to identify trends and gaps in the use of FHIR for clinical research.We reviewed published literature, federally funded project databases, application websites, and other sources to discover FHIR-based papers, projects, and tools (collectively, “FHIR projects”) available to support clinical research activities.Our search identified 203 different FHIR projects applicable to clinical research. Most were associated with preparations to conduct research, such as data mapping to and from FHIR formats (n\,=\,66, 32.5\%) and managing ontologies with FHIR (n\,=\,30, 14.8\%), or post-study data activities, such as sharing data using repositories or registries (n\,=\,24, 11.8\%), general research data sharing (n\,=\,23, 11.3\%), and management of genomic data (n\,=\,21, 10.3\%). With the exception of phenotyping (n\,=\,19, 9.4\%), fewer FHIR-based projects focused on needs within the clinical research process itself.Funding and usage of FHIR-enabled solutions for research are expanding, but most projects appear focused on establishing data pipelines and linking clinical systems such as electronic health records, patient-facing data systems, and registries, possibly due to the relative newness of FHIR and the incentives for FHIR integration in health information systems. Fewer FHIR projects were associated with research-only activities.The FHIR standard is becoming an essential component of the clinical research enterprise. To develop FHIR’s full potential for clinical research, funding and operational stakeholders should address gaps in FHIR-based research tools and methods.},
  file = {/Users/dkapitan/Zotero/storage/J4EIBEFS/Duda et al. - 2022 - HL7 FHIR-based tools and initiatives to support cl.pdf;/Users/dkapitan/Zotero/storage/5P3NTBII/6639865.html}
}

@online{ehds2pilot,
  title = {Recommendations of Standards for Data Interoperability, Querying and Exchange and on {{QC}}/{{QA}} \& Provenance ({{WP8}}) - {{EHDS2 Pilot}} - {{Official}} Website},
  date = {2024-12-17T10:34:03+00:00},
  url = {https://ehds2pilot.eu/upcoming_results/recommendations-of-standards-for-data-interoperability-querying-and-exchange-2/},
  urldate = {2024-12-30},
  abstract = {These deliverables, led by BBMRI-ERIC, in the framework of WP8, will define and document the journeys of the use cases, outlining their experiences and the steps undertaken to achieve data interoperability and quality. It will collect feedback from these journeys, identifying both successes and challenges encountered. This feedback will then be compared to an “ideal”},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/IUYKR4T3/recommendations-of-standards-for-data-interoperability-querying-and-exchange-2.html}
}

@software{ehrbase,
  title = {{{EHRbase}} 2.0 Website},
  date = {2024-03-19T18:10:44+01:00},
  url = {https://www.ehrbase.org/},
  urldate = {2024-09-20},
  abstract = {Creating the right data environment for tomorrow. EHRbase is the leading open source electronic health record backend. → Learn more},
  file = {/Users/dkapitan/Zotero/storage/7TS4ELF2/www.ehrbase.org.html}
}

@article{estrin2010health,
  title = {Health Care Delivery. {{Open mHealth}} Architecture: An Engine for Health Care Innovation},
  shorttitle = {Health Care Delivery. {{Open mHealth}} Architecture},
  author = {Estrin, Deborah and Sim, Ida},
  date = {2010-11-05},
  journaltitle = {Science (New York, N.Y.)},
  shortjournal = {Science},
  volume = {330},
  number = {6005},
  eprint = {21051617},
  eprinttype = {pmid},
  pages = {759--760},
  issn = {1095-9203},
  doi = {10.1126/science.1196187},
  langid = {english},
  keywords = {Cell Phone,Delivery of Health Care,Humans,Internet,Medical Informatics,Software,Telemedicine},
  file = {/Users/dkapitan/Zotero/storage/9CHH5MAQ/Estrin and Sim - 2010 - Health care delivery. Open mHealth architecture a.pdf}
}

@online{fhir-implementations,
  title = {{{FHIR Open Source Implementations}}},
  date = {2024-09-20},
  url = {https://confluence.hl7.org/display/FHIR/Open+Source+Implementations},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/9NXY7PER/Open+Source+Implementations.html}
}

@software{fhirconnectspec,
  title = {{{FHIR Connect}} Specfication},
  date = {2024-10-10T12:26:01Z},
  origdate = {2022-12-14T09:08:27Z},
  url = {https://github.com/better-care/fhir-connect-mapping-spec},
  urldate = {2025-02-04},
  organization = {Better}
}

@report{firely2023fhir,
  title = {{{FHIR}} in {{US}} Healthcare Regulations},
  shorttitle = {{{FHIR}} in {{US}} Healthcare Regulations},
  author = {Firely},
  date = {2023-10-26},
  url = {https://simplifier.net/organization/firely/news/153},
  urldate = {2024-05-30},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/RTZU9IAH/firely2023fhir-in-us.pdf}
}

@online{garst2023comprehensive,
  title = {A Comprehensive Experimental Comparison between Federated and Centralized Learning},
  author = {Garst, Swier and Dekker, Julian and Reinders, Marcel},
  date = {2023-07-29},
  eprinttype = {bioRxiv},
  eprintclass = {New Results},
  pages = {2023.07.26.550615},
  doi = {10.1101/2023.07.26.550615},
  url = {https://www.biorxiv.org/content/10.1101/2023.07.26.550615v1},
  urldate = {2024-10-24},
  abstract = {Purpose Federated learning is an upcoming machine learning paradigm which allows data from multiple sources to be used for training of classifiers without the data leaving the source it originally resides. This can be highly valuable for use cases such as medical research, where gathering data at a central location can be quite complicated due to privacy and legal concerns of the data. In such cases, federated learning has the potential to vastly speed up the research cycle. Although federated and central learning have been compared from a theoretical perspective, an extensive experimental comparison of performances and learning behavior still lacks. Methods We have performed a comprehensive experimental comparison between federated and centralized learning. We evaluated various classifiers on various datasets exploring influences of different sample distributions as well as different class distributions across the clients. Results The results show similar performances under a wide variety of settings between the federated and central learning strategies. Federated learning is able to deal with various imbalances in the data distributions. It is sensitive to batch effects between different datasets when they coincide with location, similar as with central learning, but this setting might go unobserved more easily. Conclusion Federated learning seems robust to various challenges such as skewed data distributions, high data dimensionality, multiclass problems and complex models. Taken together, the insights from our comparison gives much promise for applying federated learning as an alternative to sharing data.},
  langid = {english},
  pubstate = {prepublished},
  file = {/Users/dkapitan/Zotero/storage/VWMUG34U/Garst et al. - 2023 - A comprehensive experimental comparison between federated and centralized learning.pdf}
}

@article{gentner2023data,
  title = {Data {{Lakes}} in {{Healthcare}}: {{Applications}} and {{Benefits}} from the {{Perspective}} of {{Data Sources}} and {{Players}}},
  shorttitle = {Data {{Lakes}} in {{Healthcare}}},
  author = {Gentner, Tobias and Neitzel, Timon and Schulze, Jacob and Gerschner, Felix and Theissler, Andreas},
  date = {2023},
  journaltitle = {Procedia Computer Science},
  shortjournal = {Procedia Computer Science},
  volume = {225},
  pages = {1302--1311},
  issn = {18770509},
  doi = {10.1016/j.procs.2023.10.118},
  url = {https://linkinghub.elsevier.com/retrieve/pii/S1877050923012760},
  urldate = {2024-12-02},
  abstract = {As the amount of available data in healthcare has increased significantly and only 20\% of electronic health record data are in a structured format, data lakes have become a common solution for managing heterogeneous data in the healthcare domain. Nowadays, these are utilized far below their capabilities in medical research. Since previous reviews only partly address data lakes in the healthcare domain, a systematic literature review on this topic is missing. Therefore, this paper provides an overview of applications in the healthcare domain that benefit from data lakes. We review the literature and structure it according to data sources and players, and we identify applications and future research needs of data lakes in the healthcare domain. Overall, it turned out that all players could benefit from the capabilities of data lakes. We found that data lakes are currently not broadly implemented in the field, and the viewpoint of hospital operators and healthcare insurers seems to be an underresearched topic compared to the other players.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/55AX3GRC/Gentner et al. - 2023 - Data Lakes in Healthcare Applications and Benefits from the Perspective of Data Sources and Players.pdf}
}

@inproceedings{giebler2020zone,
  title = {A {{Zone Reference Model}} for {{Enterprise-Grade Data Lake Management}}},
  booktitle = {2020 {{IEEE}} 24th {{International Enterprise Distributed Object Computing Conference}} ({{EDOC}})},
  author = {Giebler, Corinna and Groger, Christoph and Hoos, Eva and Schwarz, Holger and Mitschang, Bernhard},
  date = {2020-10},
  pages = {57--66},
  publisher = {IEEE},
  location = {Eindhoven, Netherlands},
  doi = {10.1109/EDOC49727.2020.00017},
  url = {https://ieeexplore.ieee.org/document/9233155/},
  urldate = {2024-12-02},
  abstract = {Data lakes are on the rise as data platforms for any kind of analytics, from data exploration to machine learning. They achieve the required flexibility by storing heterogeneous data in their raw format, and by avoiding the need for pre-defined use cases. However, storing only raw data is inefficient, as for many applications, the same data processing has to be applied repeatedly. To foster the reuse of processing steps, literature proposes to store data in different degrees of processing in addition to their raw format. To this end, data lakes are typically structured in zones. There exists various zone models, but they are varied, vague, and no assessments are given. It is unclear which of these zone models is applicable in a practical data lake implementation in enterprises. In this work, we assess existing zone models using requirements derived from multiple representative data analytics use cases of a real-world industry case. We identify the shortcomings of existing work and develop a zone reference model for enterprise-grade data lake management in a detailed manner. We assess the reference model’s applicability through a prototypical implementation for a real-world enterprise data lake use case. This assessment shows that the zone reference model meets the requirements relevant in practice and is ready for industry use.},
  eventtitle = {2020 {{IEEE}} 24th {{International Enterprise Distributed Object Computing Conference}} ({{EDOC}})},
  isbn = {978-1-72816-473-1},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/F67S5QE4/Giebler et al. - 2020 - A Zone Reference Model for Enterprise-Grade Data Lake Management.pdf}
}

@article{grievefhir,
  title = {{{FHIR}} or {{Relational Model}} for {{Storing Data}}},
  author = {Grieve, Grahame},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/YSYGRIWM/Grieve - FHIR or Relational Model for Storing Data.pdf}
}

@article{gruendner2019ketos,
  title = {{{KETOS}}: {{Clinical}} Decision Support and Machine Learning as a Service – {{A}} Training and Deployment Platform Based on {{Docker}}, {{OMOP-CDM}}, and {{FHIR Web Services}}},
  shorttitle = {{{KETOS}}},
  author = {Gruendner, Julian and Schwachhofer, Thorsten and Sippl, Phillip and Wolf, Nicolas and Erpenbeck, Marcel and Gulden, Christian and Kapsner, Lorenz A. and Zierk, Jakob and Mate, Sebastian and Stürzl, Michael and Croner, Roland and Prokosch, Hans-Ulrich and Toddenroth, Dennis},
  date = {2019-10-03},
  journaltitle = {PLOS ONE},
  shortjournal = {PLOS ONE},
  volume = {14},
  number = {10},
  pages = {e0223010},
  publisher = {Public Library of Science},
  issn = {1932-6203},
  doi = {10.1371/journal.pone.0223010},
  url = {https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0223010},
  urldate = {2024-05-27},
  abstract = {Background and objective To take full advantage of decision support, machine learning, and patient-level prediction models, it is important that models are not only created, but also deployed in a clinical setting. The KETOS platform demonstrated in this work implements a tool for researchers allowing them to perform statistical analyses and deploy resulting models in a secure environment. Methods The proposed system uses Docker virtualization to provide researchers with reproducible data analysis and development environments, accessible via Jupyter Notebook, to perform statistical analysis and develop, train and deploy models based on standardized input data. The platform is built in a modular fashion and interfaces with web services using the Health Level 7 (HL7) Fast Healthcare Interoperability Resources (FHIR) standard to access patient data. In our prototypical implementation we use an OMOP common data model (OMOP-CDM) database. The architecture supports the entire research lifecycle from creating a data analysis environment, retrieving data, and training to final deployment in a hospital setting. Results We evaluated the platform by establishing and deploying an analysis and end user application for hemoglobin reference intervals within the University Hospital Erlangen. To demonstrate the potential of the system to deploy arbitrary models, we loaded a colorectal cancer dataset into an OMOP database and built machine learning models to predict patient outcomes and made them available via a web service. We demonstrated both the integration with FHIR as well as an example end user application. Finally, we integrated the platform with the open source DataSHIELD architecture to allow for distributed privacy preserving data analysis and training across networks of hospitals. Conclusion The KETOS platform takes a novel approach to data analysis, training and deploying decision support models in a hospital or healthcare setting. It does so in a secure and privacy-preserving manner, combining the flexibility of Docker virtualization with the advantages of standardized vocabularies, a widely applied database schema (OMOP-CDM), and a standardized way to exchange medical data (FHIR).},
  langid = {english},
  keywords = {Colorectal cancer,Consortia,Machine learning,Machine learning algorithms,Physicians,Preprocessing,Prototypes,Statistical data},
  file = {/Users/dkapitan/Zotero/storage/RWWQ7PJA/Gruendner et al. - 2019 - KETOS Clinical decision support and machine learn.pdf}
}

@article{gruendner2021framework,
  title = {A {{Framework}} for {{Criteria-Based Selection}} and {{Processing}} of {{Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Data}} for {{Statistical Analysis}}: {{Design}} and {{Implementation Study}}},
  shorttitle = {A {{Framework}} for {{Criteria-Based Selection}} and {{Processing}} of {{Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Data}} for {{Statistical Analysis}}},
  author = {Gruendner, Julian and Gulden, Christian and Kampf, Marvin and Mate, Sebastian and Prokosch, Hans-Ulrich and Zierk, Jakob},
  date = {2021-04-01},
  journaltitle = {JMIR Medical Informatics},
  volume = {9},
  number = {4},
  pages = {e25645},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/25645},
  url = {https://medinform.jmir.org/2021/4/e25645},
  urldate = {2024-05-27},
  abstract = {Background: The harmonization and standardization of digital medical information for research purposes is a challenging and ongoing collaborative effort. Current research data repositories typically require extensive efforts in harmonizing and transforming original clinical data. The Fast Healthcare Interoperability Resources (FHIR) format was designed primarily to represent clinical processes; therefore, it closely resembles the clinical data model and is more widely available across modern electronic health records. However, no common standardized data format is directly suitable for statistical analyses, and data need to be preprocessed before statistical analysis. Objective: This study aimed to elucidate how FHIR data can be queried directly with a preprocessing service and be used for statistical analyses. Methods: We propose that the binary JavaScript Object Notation format of the PostgreSQL (PSQL) open source database is suitable for not only storing FHIR data, but also extending it with preprocessing and filtering services, which directly transform data stored in FHIR format into prepared data subsets for statistical analysis. We specified an interface for this preprocessor, implemented and deployed it at University Hospital Erlangen-Nürnberg, generated 3 sample data sets, and analyzed the available data. Results: We imported real-world patient data from 2016 to 2018 into a standard PSQL database, generating a dataset of approximately 35.5 million FHIR resources, including “Patient,” “Encounter,” “Condition” (diagnoses specified using International Classification of Diseases codes), “Procedure,” and “Observation” (laboratory test results). We then integrated the developed preprocessing service with the PSQL database and the locally installed web-based KETOS analysis platform. Advanced statistical analyses were feasible using the developed framework using 3 clinically relevant scenarios (data-driven establishment of hemoglobin reference intervals, assessment of anemia prevalence in patients with cancer, and investigation of the adverse effects of drugs). Conclusions: This study shows how the standard open source database PSQL can be used to store FHIR data and be integrated with a specifically developed preprocessing and analysis framework. This enables dataset generation with advanced medical criteria and the integration of subsequent statistical analysis. The web-based preprocessing service can be deployed locally at the hospital level, protecting patients’ privacy while being integrated with existing open source data analysis tools currently being developed across Germany.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/D2P7B27M/Gruendner et al. - 2021 - A Framework for Criteria-Based Selection and Proce.pdf}
}

@article{gruendner2022architecture,
  title = {The {{Architecture}} of a {{Feasibility Query Portal}} for {{Distributed COVID-19 Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Patient Data Repositories}}: {{Design}} and {{Implementation Study}}},
  shorttitle = {The {{Architecture}} of a {{Feasibility Query Portal}} for {{Distributed COVID-19 Fast Healthcare Interoperability Resources}} ({{FHIR}}) {{Patient Data Repositories}}},
  author = {Gruendner, Julian and Deppenwiese, Noemi and Folz, Michael and Köhler, Thomas and Kroll, Björn and Prokosch, Hans-Ulrich and Rosenau, Lorenz and Rühle, Mathias and Scheidl, Marc-Anton and Schüttler, Christina and Sedlmayr, Brita and Twrdik, Alexander and Kiel, Alexander and Majeed, Raphael W.},
  date = {2022-05-25},
  journaltitle = {JMIR Medical Informatics},
  volume = {10},
  number = {5},
  pages = {e36709},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/36709},
  url = {https://medinform.jmir.org/2022/5/e36709},
  urldate = {2024-05-27},
  abstract = {Background: An essential step in any medical research project after identifying the research question is to determine if there are sufficient patients available for a study and where to find them. Pursuing digital feasibility queries on available patient data registries has proven to be an excellent way of reusing existing real-world data sources. To support multicentric research, these feasibility queries should be designed and implemented to run across multiple sites and securely access local data. Working across hospitals usually involves working with different data formats and vocabularies. Recently, the Fast Healthcare Interoperability Resources (FHIR) standard was developed by Health Level Seven to address this concern and describe patient data in a standardized format. The Medical Informatics Initiative in Germany has committed to this standard and created data integration centers, which convert existing data into the FHIR format at each hospital. This partially solves the interoperability problem; however, a distributed feasibility query platform for the FHIR standard is still missing. Objective: This study described the design and implementation of the components involved in creating a cross-hospital feasibility query platform for researchers based on FHIR resources. This effort was part of a large COVID-19 data exchange platform and was designed to be scalable for a broad range of patient data. Methods: We analyzed and designed the abstract components necessary for a distributed feasibility query. This included a user interface for creating the query, backend with an ontology and terminology service, middleware for query distribution, and FHIR feasibility query execution service. Results: We implemented the components described in the Methods section. The resulting solution was distributed to 33 German university hospitals. The functionality of the comprehensive network infrastructure was demonstrated using a test data set based on the German Corona Consensus Data Set. A performance test using specifically created synthetic data revealed the applicability of our solution to data sets containing millions of FHIR resources. The solution can be easily deployed across hospitals and supports feasibility queries, combining multiple inclusion and exclusion criteria using standard Health Level Seven query languages such as Clinical Quality Language and FHIR Search. Developing a platform based on multiple microservices allowed us to create an extendable platform and support multiple Health Level Seven query languages and middleware components to allow integration with future directions of the Medical Informatics Initiative. Conclusions: We designed and implemented a feasibility platform for distributed feasibility queries, which works directly on FHIR-formatted data and distributed it across 33 university hospitals in Germany. We showed that developing a feasibility platform directly on the FHIR standard is feasible.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/JWTLQ68A/Gruendner et al. - 2022 - The Architecture of a Feasibility Query Portal for.pdf}
}

@article{hai2023data,
  title = {Data {{Lakes}}: {{A Survey}} of {{Functions}} and {{Systems}}},
  shorttitle = {Data {{Lakes}}},
  author = {Hai, Rihan and Koutras, Christos and Quix, Christoph and Jarke, Matthias},
  date = {2023-12},
  journaltitle = {IEEE Transactions on Knowledge and Data Engineering},
  volume = {35},
  number = {12},
  pages = {12571--12590},
  issn = {1558-2191},
  doi = {10.1109/TKDE.2023.3270101},
  url = {https://ieeexplore.ieee.org/abstract/document/10107808},
  urldate = {2024-01-11},
  abstract = {Data lakes are becoming increasingly prevalent for Big Data management and data analytics. In contrast to traditional ‘schema-on-write’ approaches such as data warehouses, data lakes are repositories storing raw data in its original formats and providing a common access interface. Despite the strong interest raised from both academia and industry, there is a large body of ambiguity regarding the definition, functions and available technologies for data lakes. A complete, coherent picture of data lake challenges and solutions is still missing. This survey reviews the development, architectures, and systems of data lakes. We provide a comprehensive overview of research questions for designing and building data lakes. We classify the existing approaches and systems based on their provided functions for data lakes, which makes this survey a useful technical reference for designing, implementing and deploying data lakes. We hope that the thorough comparison of existing solutions and the discussion of open research challenges in this survey will motivate the future development of data lake research and practice.},
  eventtitle = {{{IEEE Transactions}} on {{Knowledge}} and {{Data Engineering}}},
  file = {/Users/dkapitan/Zotero/storage/R87AQA44/Hai et al. - 2023 - Data Lakes A Survey of Functions and Systems.pdf}
}

@software{hapi-fhir,
  title = {{{HAPI FHIR}} - {{The Open Source FHIR API}} for {{Java}}},
  url = {https://hapifhir.io/},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/Z5VJXY8A/hapifhir.io.html}
}

@inproceedings{harby2022data,
  title = {From {{Data Warehouse}} to {{Lakehouse}}: {{A Comparative Review}}},
  shorttitle = {From {{Data Warehouse}} to {{Lakehouse}}},
  booktitle = {2022 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
  author = {Harby, Ahmed A. and Zulkernine, Farhana},
  date = {2022-12-17},
  pages = {389--395},
  publisher = {IEEE},
  location = {Osaka, Japan},
  doi = {10.1109/BigData55660.2022.10020719},
  url = {https://ieeexplore.ieee.org/document/10020719/},
  urldate = {2023-04-23},
  abstract = {Digital information systems currently generate a vast amount of data every minute which emphasizes the continuing need to advance big data management systems with efficient data ingestion and knowledge extraction capabilities. To address the ‘big data’ problems due to high volume, velocity, variety, and veracity, data management systems evolved from structured databases to big data storage systems, graph databases, data warehouses, and data lakes but each solution has its strengths and shortcomings. The need to produce actionable knowledge fast from unstructured data ingested from distributed sources requires a marriage of data warehouses and data lakes to create a data Lakehouse (LH). The objective is to use the strengths of the data warehouse in producing insights fast from processed merged data, and of the data lake in ingesting and storing high-speed unstructured data with post-storage transformation and analytics capabilities. In this paper, we present a comparative review of the existing data warehouse and data lake technology to highlight their strengths and weaknesses and propose the desired and necessary features of the LH architecture, which has recently gained a lot of attention in the big data management research community.},
  eventtitle = {2022 {{IEEE International Conference}} on {{Big Data}} ({{Big Data}})},
  isbn = {978-1-66548-045-1},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/H7SQYVUI/Harby and Zulkernine - 2022 - From Data Warehouse to Lakehouse A Comparative Re.pdf}
}

@online{harby2024data,
  type = {SSRN Scholarly Paper},
  title = {Data {{Lakehouse}}: {{A Survey}} and {{Experimental Study}}},
  shorttitle = {Data {{Lakehouse}}},
  author = {Harby, Ahmed A. and Zulkernine, Farhana},
  date = {2024-03-20},
  number = {4765588},
  location = {Rochester, NY},
  doi = {10.2139/ssrn.4765588},
  url = {https://papers.ssrn.com/abstract=4765588},
  urldate = {2024-03-27},
  abstract = {Efficient big data management is a dire necessity to manage the exponential growth in data generated by digital information systems to produce usable knowledge. Structured databases, data lakes, and warehouses have each provided a solution with varying degrees of success. However, a new and superior solution, the data Lakehouse, has emerged to extract actionable insights from unstructured data ingested from distributed sources. By combining the strengths of data warehouses and data lakes, the data Lakehouse can process and merge data quickly while ingesting and storing high-speed unstructured data with post-storage transformation and analytics capabilities. The Lakehouse architecture offers the necessary features for optimal functionality and has gained significant attention in the big data management research community. In this paper, we compare data lake, warehouse, and lakehouse systems, highlight their strengths and shortcomings, identify the desired features to handle the evolving challenges in big data management and analysis and propose an advanced data Lakehouse architecture. We also demonstrate the performance of three state-of-the-art data management systems namely HDFS data lake, Hive data warehouse, and Delta lakehouse in managing data for analytical query responses through an experimental study.},
  langid = {english},
  pubstate = {prepublished},
  keywords = {Big data,Data Lake,Data Lakehouse,Data Warehouse},
  file = {/Users/dkapitan/Zotero/storage/3THYQL7N/Harby and Zulkernine - 2024 - Data Lakehouse A Survey and Experimental Study.pdf}
}

@online{healthri2024agreements,
  type = {wiki},
  title = {Agreements on the {{National Health Data Infrastructure}} for {{Research}}, {{Policy}} and {{Innovation}} - {{Health-RI Nationale Gezondheidsdata-infrastructuur}} - {{Confluence}}},
  author = {Health-RI},
  date = {2024-01-29},
  url = {https://health-ri.atlassian.net/wiki/spaces/HNG/pages/249073646/Agreements+on+the+National+Health+Data+Infrastructure+for+Research+Policy+and+Innovation},
  urldate = {2024-06-03},
  file = {/Users/dkapitan/Zotero/storage/TF49QFQ4/Agreements+on+the+National+Health+Data+Infrastructure+for+Research+Policy+and+Innovation.html}
}

@report{hl72024state,
  title = {The {{State}} of {{FHIR}} 2024 {{Survey Results}}},
  date = {2024-05},
  institution = {HL7},
  url = {https://www.hl7.org/documentcenter/public/white-papers/2024%20StateofFHIRSurveyResults_final.pdf},
  urldate = {2024-04-04},
  file = {/Users/dkapitan/Zotero/storage/NRK88P5M/2024 StateofFHIRSurveyResults_final.pdf}
}

@online{hl7a,
  title = {{{HL7}} v2 vs. {{FHIR}}: {{Key Data Standard Differences}} | {{Cloudticity}}},
  shorttitle = {{{HL7}} v2 vs. {{FHIR}}},
  url = {https://blog.cloudticity.com/hl7-vs-fhir-key-differences-healthcare-data-exchange},
  urldate = {2024-09-20},
  abstract = {What's the difference between HL7v2 and FHIR and why is FHIR better? What this means for developers as well as providers, payers, and patients.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/NS2HAJXT/hl7-vs-fhir-key-differences-healthcare-data-exchange.html}
}

@report{india2020national,
  title = {National {{Digital Health Mission}}},
  date = {2020},
  institution = {India National Health Authority},
  file = {/Users/dkapitan/Zotero/storage/GEK2SB32/ndhm_strategy_overview.pdf}
}

@software{instant-openhie-v2,
  title = {Instant {{OpenHIE}} V2},
  date = {2024-07-03},
  url = {https://jembi.gitbook.io/instant-v2/},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/GHEPC2SQ/instant-v2.html}
}

@online{integration,
  title = {Integration of {{OMOP}} and {{OHDSI}} Tools in Vantage6},
  url = {https://distributedlearning.ai/news/omop-integration/},
  urldate = {2024-05-30}
}

@inproceedings{ismail2016hl7,
  title = {{{HL7 FHIR Compliant Data Access Model}} for {{Maternal Health Information System}}},
  booktitle = {2016 {{IEEE}} 16th {{International Conference}} on {{Bioinformatics}} and {{Bioengineering}} ({{BIBE}})},
  author = {Ismail, Saadia and Alshmari, Majed and Qamar, Usman and Butt, Wasi Haider and Latif, Khalid and Ahmad, Hafiz Farooq},
  date = {2016-10},
  pages = {51--56},
  publisher = {IEEE},
  location = {Taichung, Taiwan},
  doi = {10.1109/BIBE.2016.9},
  url = {http://ieeexplore.ieee.org/document/7789959/},
  urldate = {2023-03-19},
  abstract = {Effective decision-making to improve healthcare for people depends essentially upon availability of reliable health data. Several developing countries have maternal health indicators lagging behind as compared to international targets set by the UN as Millennium or Sustainable Development Goals. One of the major reasons is poor and non-standardized maternal health record keeping that affect data quality and undermines evidence-based decision making. The aim of this research is the design and development of HL7 FHIR compliant data access model for maintaining maternal health data as FHIR resources to enable effective exchange of health data. The proposed model is implemented as restful web services and data is stored in a NoSQL database for flexibility. To evaluate effectiveness, the system was reviewed by healthcare providers and expectant women. Their feedback highlights the usefulness of the proposed system as compared to traditional record keeping techniques. It is anticipated that the proposed system will lay the foundation of a comprehensive maternal healthcare information system. This shall enable trend analysis for policy-making to help accelerate the efforts for meeting global maternal health targets.},
  eventtitle = {2016 {{IEEE}} 16th {{International Conference}} on {{Bioinformatics}} and {{Bioengineering}} ({{BIBE}})},
  isbn = {978-1-5090-3834-3},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/Z3WLVGRH/Ismail et al. - 2016 - HL7 FHIR Compliant Data Access Model for Maternal .pdf}
}

@inproceedings{jager2024data,
  title = {From {{Data Imputation}} to {{Data Cleaning}} — {{Automated Cleaning}} of {{Tabular Data Improves Downstream Predictive Performance}}},
  booktitle = {Proceedings of {{The}} 27th {{International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  author = {Jäger, Sebastian and Biessmann, Felix},
  date = {2024-04-18},
  pages = {3394--3402},
  publisher = {PMLR},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v238/jager24a.html},
  urldate = {2024-11-29},
  abstract = {The translation of Machine Learning (ML) research innovations to real-world applications and the maintenance of ML components are hindered by reoccurring challenges, such as reaching high predictive performance, robustness, complying with regulatory constraints, or meeting ethical standards. Many of these challenges are related to data quality and, in particular, to the lack of automation in data pipelines upstream of ML components. Automated data cleaning remains challenging since many approaches neglect the dependency structure of the data errors and require task-specific heuristics or human input for calibration. In this study, we develop and evaluate an application-agnostic ML-based data cleaning approach using well-established imputation techniques for automated detection and cleaning of erroneous values. To improve the degree of automation, we combine imputation techniques with conformal prediction (CP), a model-agnostic and distribution-free method to quantify and calibrate the uncertainty of ML models. Extensive empirical evaluations demonstrate that Conformal Data Cleaning (CDC) improves predictive performance in downstream ML tasks in the majority of cases. Our code is available on GitHub: \textbackslash url\{https://github.com/se-jaeger/conformal-data-cleaning\}.},
  eventtitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/2LHQGLF9/Jäger and Biessmann - 2024 - From Data Imputation to Data Cleaning — Automated Cleaning of Tabular Data Improves Downstream Predi.pdf}
}

@article{jain2023analyzing,
  title = {Analyzing and {{Comparing Lakehouse Storage Systems}}},
  author = {Jain, Paras and Kraft, Peter and Power, Conor and Das, Tathagata and Stoica, Ion and Zaharia, Matei},
  date = {2023},
  abstract = {Lakehouse storage systems that implement ACID transactions and other management features over data lake storage, such as Delta Lake, Apache Hudi and Apache Iceberg, have rapidly grown in popularity, replacing traditional data lakes at many organizations. These open storage systems with rich management features promise to simplify management of large datasets, accelerate SQL workloads, and offer fast, direct file access for other workloads, such as machine learning. However, the research community has not explored the tradeoffs in designing lakehouse systems in detail. In this paper, we analyze the designs of the three most popular lakehouse storage systems—Delta Lake, Hudi and Iceberg—and compare their performance and features among varying axes based on these designs. We also release a simple benchmark, LHBench, that researchers can use to compare other designs. LHBench is available at https://github.com/lhbench/lhbench.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/AQ2TN7SR/p92-jain.pdf;/Users/dkapitan/Zotero/storage/KLZG5RE5/Jain et al. - 2023 - Analyzing and Comparing Lakehouse Storage Systems.pdf}
}

@online{jmir,
  title = {{{JMIR Medical Informatics}} - {{Open-Source Electronic Health Record Systems}} for {{Low-Resource Settings}}: {{Systematic Review}}},
  url = {https://medinform.jmir.org/2017/4/e44/},
  urldate = {2024-06-04},
  file = {/Users/dkapitan/Zotero/storage/689HC47Z/e44.html}
}

@inproceedings{johansson2021calibrating,
  title = {Calibrating Multi-Class Models},
  booktitle = {Proceedings of the {{Tenth Symposium}} on {{Conformal}} and {{Probabilistic Prediction}} and {{Applications}}},
  author = {Johansson, Ulf and Löfström, Tuwe and Boström, Henrik},
  date = {2021-09-20},
  pages = {111--130},
  publisher = {PMLR},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v152/johansson21a.html},
  urldate = {2024-10-01},
  abstract = {Predictive models communicating algorithmic confidence are very informative, but only  if well-calibrated and sharp, i.e., providing accurate probability estimates adjusted for each instance.  While almost all machine learning algorithms are able to produce probability estimates, these are  often poorly calibrated, thus requiring external calibration. For multiclass problems, external  calibration has typically been done using one-vs-all or all-vs-all schemes, thus adding to the  computational complexity, but also making it impossible to analyze and inspect the predictive  models. In this paper, we suggest a novel approach for calibrating inherently multi-class models.  Instead of providing a probability distribution over all labels, the estimation is of the probability that  the class label predicted by the underlying model is correct. In an extensive empirical study, it is  shown that the suggested approach, when applied to both Platt scaling and Venn-Abers, is able to  improve the probability estimates from decision trees, random forests and extreme gradient  boosting.},
  eventtitle = {Conformal and {{Probabilistic Prediction}} and {{Applications}}},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/DL8W5H5S/Johansson et al. - 2021 - Calibrating multi-class models.pdf}
}

@article{johnson2015data,
  title = {A {{Data Quality Ontology}} for the {{Secondary Use}} of {{EHR Data}}},
  author = {Johnson, Steven G. and Speedie, Stuart and Simon, Gyorgy and Kumar, Vipin and Westra, Bonnie L.},
  date = {2015-11-05},
  journaltitle = {AMIA Annual Symposium Proceedings},
  shortjournal = {AMIA Annu Symp Proc},
  volume = {2015},
  eprint = {26958293},
  eprinttype = {pmid},
  pages = {1937--1946},
  issn = {1942-597X},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4765682/},
  urldate = {2024-09-19},
  abstract = {The secondary use of EHR data for research is expected to improve health outcomes for patients, but the benefits will only be realized if the data in the EHR is of sufficient quality to support these uses. A data quality (DQ) ontology was developed to rigorously define concepts and enable automated computation of data quality measures. The healthcare data quality literature was mined for the important terms used to describe data quality concepts and harmonized into an ontology. Four high-level data quality dimensions (“correctness”, “consistency”, “completeness” and “currency”) categorize 19 lower level measures. The ontology serves as an unambiguous vocabulary, which defines concepts more precisely than natural language; it provides a mechanism to automatically compute data quality measures; and is reusable across domains and use cases. A detailed example is presented to demonstrate its utility. The DQ ontology can make data validation more common and reproducible.},
  pmcid = {PMC4765682},
  file = {/Users/dkapitan/Zotero/storage/EAFLQV4S/Johnson et al. - 2015 - A Data Quality Ontology for the Secondary Use of E.pdf}
}

@article{jones2021landscape,
  title = {A Landscape Survey of Planned {{SMART}}/{{HL7}} Bulk {{FHIR}} Data Access {{API}} Implementations and Tools},
  author = {Jones, James and Gottlieb, Daniel and Mandel, Joshua C and Ignatov, Vladimir and Ellis, Alyssa and Kubick, Wayne and Mandl, Kenneth D},
  date = {2021-06-01},
  journaltitle = {Journal of the American Medical Informatics Association},
  shortjournal = {Journal of the American Medical Informatics Association},
  volume = {28},
  number = {6},
  pages = {1284--1287},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocab028},
  url = {https://doi.org/10.1093/jamia/ocab028},
  urldate = {2023-01-20},
  abstract = {The Office of National Coordinator for Health Information Technology final rule implementing the interoperability and information blocking provisions of the 21st Century Cures Act requires support for two SMART (Substitutable Medical Applications, Reusable Technologies) application programming interfaces (APIs) and instantiates Health Level Seven International (HL7) Fast Healthcare Interoperability Resources (FHIR) as a lingua franca for health data. We sought to assess the current state and near-term plans for the SMART/HL7 Bulk FHIR Access API implementation across organizations including electronic health record vendors, cloud vendors, public health contractors, research institutions, payors, FHIR tooling developers, and other purveyors of health information technology platforms. We learned that many organizations not required through regulation to use standardized bulk data are rapidly implementing the API for a wide array of use cases. This may portend an unprecedented level of standardized population-level health data exchange that will support an apps and analytics ecosystem. Feedback from early adopters on the API’s limitations and unsolved problems in the space of population health are highlighted.},
  file = {/Users/dkapitan/Zotero/storage/PG68K5CE/Jones et al. - 2021 - A landscape survey of planned SMARTHL7 bulk FHIR .pdf;/Users/dkapitan/Zotero/storage/3NTEYRHE/6155897.html}
}

@article{karamagi2022ehealth,
  title = {{{eHealth}} or E-{{Chaos}}: {{The}} Use of {{Digital Health Interventions}} for {{Health Systems Strengthening}} in Sub-{{Saharan Africa}} over the Last 10 Years: {{A}} Scoping Review},
  shorttitle = {{{eHealth}} or E-{{Chaos}}},
  author = {Karamagi, Humphrey C and Muneene, Derrick and Droti, Benson and Jepchumba, Violet and Okeibunor, Joseph C and Nabyonga, Juliet and Asamani, James Avoka and Traore, Moussa and Kipruto, Hillary},
  date = {2022-12-03},
  journaltitle = {Journal of Global Health},
  shortjournal = {J Glob Health},
  volume = {12},
  pages = {04090},
  issn = {2047-2978, 2047-2986},
  doi = {10.7189/jogh.12.04090},
  url = {https://jogh.org/2022/jogh-12-04090},
  urldate = {2024-02-05},
  abstract = {Background Digital health solutions are a potent and complementary intervention in health system strengthening to accelerate universal access to health services. Implementing scalable, sustainable, and integrated digital solutions in a coordinated manner is necessary to experience the benefits of digital interventions in health systems. We sought to establish the breadth and scope of available digital health interventions (DHIs) and their functions in sub-Saharan Africa. Methods: We conducted a scoping review according to the Joanne Briggs Institute’s reviewers manual and followed the Preferred Reporting Items for Systematic Reviews and Meta-Analyses - Extension for Scoping Reviews (PRISMA-ScR) checklist and explanation. We retrieved data from the WHO Digital Health Atlas (DHA), the WHO e-Health country profiles report of 2015, and electronic databases. The protocol has been deposited in an open-source platform – the Open Science Framework at https://osf.io/5kzq7. Results The researchers retrieved 983 digital tools used to strengthen health systems in sub-Saharan Africa over the past 10 years. We included 738 DHIs in the analysis while 245 were excluded for not meeting the inclusion criteria. We observed a disproportionate distribution of DHIs towards service delivery (81.7\%, n\,=\,603), health care providers (91.8\%, n\,=\,678), and access and use of information (84.1\%, n\,=\,621). Fifty-three percent (53.4\%, n\,=\,394) of the solutions are established and 47.5\% (n\,=\,582) were aligned to 20\% (n\,=\,5) of the system categories. Conclusions Sub-Saharan Africa is endowed with digital health solutions in both numbers and distinct functions. It is lacking in coordination, integration, scalability, sustainability, and equitable distribution of investments in digital health. Digital health policymakers in sub-Saharan Africa need to urgently institute coordination mechanisms to terminate unending duplication and disjointed vertical implementations and manage solutions for scale. Central to this would be to build digital health leadership in countries within SSA, adopt standards and interoperability frameworks; advocate for more investments into lagging components, and promote multi-purpose solutions to halt the seeming “e-chaos” and progress to sustainable e-health solutions.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/549QGYAI/jogh-12-04090.pdf;/Users/dkapitan/Zotero/storage/U2E7IBUE/jogh-12-04090-s001.pdf}
}

@article{keller2021paradox,
  title = {The {{Paradox}} of {{Open}}},
  author = {Keller, Paul and Tarkowski, Alek},
  date = {2021-03-05},
  journaltitle = {Open Future},
  publisher = {Open Future Foundation},
  url = {https://openfuture.pubpub.org/pub/paradox-of-open/release/1},
  urldate = {2024-03-25},
  abstract = {In today’s digital environment, openness serves as both a challenge to concentrations of power and its enabler. Solving this paradox is at the heart of our work, which focuses on three objectives.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/GRW8I2Y9/Keller and Tarkowski - 2021 - The Paradox of Open.pdf}
}

@article{khalid2021standardized,
  title = {A Standardized Analytics Pipeline for Reliable and Rapid Development and Validation of Prediction Models Using Observational Health Data},
  author = {Khalid, Sara and Yang, Cynthia and Blacketer, Clair and Duarte-Salles, Talita and Fernández-Bertolín, Sergio and Kim, Chungsoo and Park, Rae Woong and Park, Jimyung and Schuemie, Martijn J. and Sena, Anthony G. and Suchard, Marc A. and You, Seng Chan and Rijnbeek, Peter R. and Reps, Jenna M.},
  date = {2021-11-01},
  journaltitle = {Computer Methods and Programs in Biomedicine},
  shortjournal = {Computer Methods and Programs in Biomedicine},
  volume = {211},
  pages = {106394},
  issn = {0169-2607},
  doi = {10.1016/j.cmpb.2021.106394},
  url = {https://www.sciencedirect.com/science/article/pii/S0169260721004685},
  urldate = {2024-05-27},
  abstract = {Background and objective As a response to the ongoing COVID-19 pandemic, several prediction models in the existing literature were rapidly developed, with the aim of providing evidence-based guidance. However, none of these COVID-19 prediction models have been found to be reliable. Models are commonly assessed to have a risk of bias, often due to insufficient reporting, use of non-representative data, and lack of large-scale external validation. In this paper, we present the Observational Health Data Sciences and Informatics (OHDSI) analytics pipeline for patient-level prediction modeling as a standardized approach for rapid yet reliable development and validation of prediction models. We demonstrate how our analytics pipeline and open-source software tools can be used to answer important prediction questions while limiting potential causes of bias (e.g., by validating phenotypes, specifying the target population, performing large-scale external validation, and publicly providing all analytical source code). Methods We show step-by-step how to implement the analytics pipeline for the question: ‘In patients hospitalized with COVID-19, what is the risk of death 0 to 30 days after hospitalization?’. We develop models using six different machine learning methods in a USA claims database containing over 20,000 COVID-19 hospitalizations and externally validate the models using data containing over 45,000 COVID-19 hospitalizations from South Korea, Spain, and the USA. Results Our open-source software tools enabled us to efficiently go end-to-end from problem design to reliable Model Development and evaluation. When predicting death in patients hospitalized with COVID-19, AdaBoost, random forest, gradient boosting machine, and decision tree yielded similar or lower internal and external validation discrimination performance compared to L1-regularized logistic regression, whereas the MLP neural network consistently resulted in lower discrimination. L1-regularized logistic regression models were well calibrated. Conclusion Our results show that following the OHDSI analytics pipeline for patient-level prediction modelling can enable the rapid development towards reliable prediction models. The OHDSI software tools and pipeline are open source and available to researchers from all around the world.},
  keywords = {COVID-19,Data harmonization,Data quality control,Distributed data network,Machine learning,Risk prediction},
  file = {/Users/dkapitan/Zotero/storage/D7HZSKV3/Khalid et al. - 2021 - A standardized analytics pipeline for reliable and.pdf}
}

@article{khanvertical,
  title = {Vertical {{Federated Learning}}: {{A Structured Literature Review}}},
  author = {Khan, Afsana},
  abstract = {Federated Learning (FL) has emerged as a promising distributed learning paradigm with an added advantage of data privacy. With the growing interest in having collaboration among data owners, FL has gained significant attention of organizations. The idea of FL is to enable collaborating participants train machine learning (ML) models on decentralized data without breaching privacy. In simpler words, federated learning is the approach of “bringing the model to the data, instead of bringing the data to the model”. Federated learning, when applied to data which is partitioned vertically across participants, is able to build a complete ML model by combining local models trained only using the data with distinct features at the local sites. This architecture of FL is referred to as vertical federated learning (VFL), which differs from the conventional FL on horizontally partitioned data. As VFL is different from conventional FL, it comes with its own issues and challenges. In this paper, we present a structured literature review discussing the state-of-the-art approaches in VFL. Additionally, the literature review highlights the existing solutions to challenges in VFL and provides potential research directions in this domain.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/D59A9FNF/Khan - Vertical Federated Learning A Structured Literature Review.pdf}
}

@article{kroes2022blueprint,
  title = {Blueprint for Harmonising Unstandardised Disease Registries to Allow Federated Data Analysis: Prepare for the Future},
  shorttitle = {Blueprint for Harmonising Unstandardised Disease Registries to Allow Federated Data Analysis},
  author = {Kroes, Johannes A. and Bansal, Aruna T. and Berret, Emmanuelle and Christian, Nils and Kremer, Andreas and Alloni, Anna and Gabetta, Matteo and Marshall, Chris and Wagers, Scott and Djukanovic, Ratko and Porsbjerg, Celeste and Hamerlijnck, Dominique and Fulton, Olivia and family=Brinke, given=Anneke, prefix=ten, useprefix=false and Bel, Elisabeth H. and Sont, Jacob K.},
  date = {2022-10-01},
  journaltitle = {ERJ Open Research},
  volume = {8},
  number = {4},
  publisher = {European Respiratory Society},
  issn = {2312-0541},
  doi = {10.1183/23120541.00168-2022},
  url = {https://openres.ersjournals.com/content/8/4/00168-2022},
  urldate = {2024-06-04},
  abstract = {Real-world evidence from multinational disease registries is becoming increasingly important not only for confirming the results of randomised controlled trials, but also for identifying phenotypes, monitoring disease progression, predicting response to new drugs and early detection of rare side-effects. With new open-access technologies, it has become feasible to harmonise patient data from different disease registries and use it for data analysis without compromising privacy rules. Here, we provide a blueprint for how a clinical research collaboration can successfully use real-world data from existing disease registries to perform federated analyses. We describe how the European severe asthma clinical research collaboration SHARP (Severe Heterogeneous Asthma Research collaboration, Patient-centred) fulfilled the harmonisation process from nonstandardised clinical registry data to the Observational Medical Outcomes Partnership Common Data Model and built a strong network of collaborators from multiple disciplines and countries. The blueprint covers organisational, financial, conceptual, technical, analytical and research aspects, and discusses both the challenges and the lessons learned. All in all, setting up a federated data network is a complex process that requires thorough preparation, but above all, it is a worthwhile investment for all clinical research collaborations, especially in view of the emerging applications of artificial intelligence and federated learning. Tweetable abstract @ERSpublications click to tweetHarmonising real-world patient data from diverse registries to allow federated analyses is a complex process that requires thorough preparation but is above all a valuable investment, especially in view of emerging applications of artificial intelligence https://bit.ly/3NEKKnV},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/CZ6TISLJ/Kroes et al. - 2022 - Blueprint for harmonising unstandardised disease r.pdf}
}

@inproceedings{kurniawan2019midwife,
  title = {Midwife Service Coverage, Quality of Work, and Client Health Improved after Deployment of an {{OpenSRP-driven}} Client Management Application in {{Indonesia}}},
  author = {Kurniawan, Kevin and FitriaSyah, Inraini and Jayakusuma, Ahmad Rafi and Armis, Resty Asmauryanah and Lubis, Yusran and Haryono, Muhammad Abdi and Harefa, Benyamin and Shankar, Anuraj},
  date = {2019-11},
  pages = {155--162},
  publisher = {Atlantis Press},
  issn = {2468-5739},
  doi = {10.2991/ichs-18.2019.21},
  url = {https://www.atlantis-press.com/proceedings/ichs-18/125921329},
  urldate = {2024-01-22},
  abstract = {The quality of maternal and child health services remains suboptimal in most low and middleincome countries (LMIC). Data are routinely collected with paper-based systems but are incomplete, underutilized, and reported as poorly-usable aggregated indicators. Therefore, we developed the Open Smart Register Platform (OpenSRP) application for midwives in...},
  eventtitle = {5th {{International Conference}} on {{Health Sciences}} ({{ICHS}} 2018)},
  isbn = {978-94-6252-824-6},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/AV5DFWKS/Kurniawan2019midwife.pdf;/Users/dkapitan/Zotero/storage/JB3NXXH3/Kurniawan et al. - 2019 - Midwife service coverage, quality of work, and cli.pdf}
}

@online{lee2022feedernet,
  title = {{{FeederNet}} ({{Federated E-Health Big Data}} for {{Evidence Renovation Network}}) Platform in {{Korea}} – {{OHDSI}}},
  author = {Lee, Seongwon and Kim, Chungsoo and Chang, Junyuk and Park, Rae Woong},
  date = {2022},
  url = {https://www.ohdsi.org/2022showcase-33/},
  urldate = {2024-06-04},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/SCV5WC6H/2022showcase-33.html}
}

@article{leefeasibility,
  title = {Feasibility {{Study}} of {{Federated Learning}} on the {{Distributed Research Network}} of {{OMOP Common Data Model}}},
  author = {Lee, Geun Hyeong and Park, Jonggul and Kim, Jihyeong and Kim, Yeesuk and Choi, Byungjin and Park, Rae Woong and Rhee, Sang Youl and Shin, Soo-Yong},
  journaltitle = {Healthcare Informatics Research},
  shortjournal = {Healthc Inform Res},
  volume = {29},
  number = {2},
  eprint = {37190741},
  eprinttype = {pmid},
  pages = {168--173},
  issn = {2093-3681},
  doi = {10.4258/hir.2023.29.2.168},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10209729/},
  urldate = {2024-06-04},
  abstract = {Objectives Since protecting patients’ privacy is a major concern in clinical research, there has been a growing need for privacy-preserving data analysis platforms. For this purpose, a federated learning (FL) method based on the Observational Medical Outcomes Partnership (OMOP) common data model (CDM) was implemented, and its feasibility was demonstrated. Methods We implemented an FL platform on FeederNet, which is a distributed clinical data analysis platform based on the OMOP CDM in Korea. We trained it through an artificial neural network (ANN) using data from patients who received steroid prescriptions or injections, with the aim of predicting the occurrence of side effects depending on the prescribed dose. The ANN was trained using the FL platform with the OMOP CDMs of Kyung Hee University Medical Center (KHMC) and Ajou University Hospital (AUH). Results The area under the receiver operating characteristic curves (AUROCs) for predicting bone fracture, osteonecrosis, and osteoporosis using only data from each hospital were 0.8426, 0.6920, and 0.7727 for KHMC and 0.7891, 0.7049, and 0.7544 for AUH, respectively. In contrast, when using FL, the corresponding AUROCs were 0.8260, 0.7001, and 0.7928 for KHMC and 0.7912, 0.8076, and 0.7441 for AUH, respectively. In particular, FL led to a 14\% improvement in performance for osteonecrosis at AUH. Conclusions FL can be performed with the OMOP CDM, and FL often shows better performance than using only a single institution’s data. Therefore, research using OMOP CDM has been expanded from statistical analysis to machine learning so that researchers can conduct more diverse research.},
  pmcid = {PMC10209729},
  file = {/Users/dkapitan/Zotero/storage/PGHXSLFQ/Lee et al. - Feasibility Study of Federated Learning on the Dis.pdf}
}

@article{mamuye2022health,
  title = {Health Information Exchange Policy and Standards for Digital Health Systems in Africa: {{A}} Systematic Review},
  shorttitle = {Health Information Exchange Policy and Standards for Digital Health Systems in Africa},
  author = {Mamuye, Adane L. and Yilma, Tesfahun M. and Abdulwahab, Ahmad and Broomhead, Sean and Zondo, Phumzule and Kyeng, Mercy and Maeda, Justin and Abdulaziz, Mohammed and Wuhib, Tadesse and Tilahun, Binyam C.},
  date = {2022-10-10},
  journaltitle = {PLOS Digital Health},
  shortjournal = {PLOS Digital Health},
  volume = {1},
  number = {10},
  pages = {e0000118},
  publisher = {Public Library of Science},
  issn = {2767-3170},
  doi = {10.1371/journal.pdig.0000118},
  url = {https://journals.plos.org/digitalhealth/article?id=10.1371/journal.pdig.0000118},
  urldate = {2023-06-12},
  abstract = {Lack of interoperability and integration between heterogeneous health systems is a big challenge to realize the potential benefits of eHealth. To best move from siloed applications to interoperable eHealth solutions, health information exchange (HIE) policy and standards are necessary to be established. However, there is no comprehensive evidence on the current status of HIE policy and standards on the African continent. Therefore, this paper aimed to systematically review the status of HIE policy and standards which are currently in practice in Africa. A systematic search of the literature was conducted from Medical Literature Analysis and Retrieval System Online (MEDLINE), Scopus, Web of Science, and Excerpta Medica Database (EMBASE), and a total of 32 papers (21 strategic documents and 11 peer-reviewed papers) were selected based on predefined criteria for synthesis. Results revealed that African countries have paid attention to the development, improvement, adoption, and implementation of HIE architecture for interoperability and standards. Synthetic and semantic interoperability standards were identified for the implementation of HIE in Africa. Based on this comprehensive review, we recommend that comprehensive interoperable technical standards should be set at each national level and should be guided by appropriate governance and legal frameworks, data ownership and use agreements, and health data privacy and security guidelines. On top of the policy issues, there is a need to identify a set of standards (health system standards, communication, messaging standards, terminology/vocabulary standards, patient profile standards, privacy and security, and risk assessment) and implement them throughout all levels of the health system. On top of this, we recommend that the Africa Union (AU) and regional bodies provide the necessary human resource and high-level technical support to African countries to implement HIE policy and standards. To realize the full potential of eHealth in the continent, it is recommended that African countries need to have a common HIE policy, interoperable technical standards, and health data privacy and security guidelines. Currently, there is an ongoing effort by the Africa Centres for Disease Control and Prevention (Africa CDC) towards promoting HIE on the continent. A task force has been established from Africa CDC, Health Information Service Provider (HISP) partners, and African and global HIE subject matter experts to provide expertise and guidance in the development of AU policy and standards for HIE. Although the work is still ongoing, the African Union shall continue to support the implementation of HIE policy and standards in the continent. The authors of this review are currently working under the umbrella of the African Union to develop the HIE policy and standard to be endorsed by the head of states of the Africa Union. As a follow-up publication to this, the result will be published in mid-2022.},
  langid = {english},
  keywords = {Africa,Communication in health care,Computer architecture,Data management,Global health,Health care policy,Health services administration and management,Health systems strengthening},
  file = {/Users/dkapitan/Zotero/storage/E6B963RU/Mamuye et al. - 2022 - Health information exchange policy and standards f.pdf}
}

@article{mancohealer,
  title = {{{HEALER}}: {{A Data Lake Architecture}} for {{Healthcare}}},
  author = {Manco, Carlo and Dolci, Tommaso and Azzalini, Fabio and Barbierato, Enrico and Gribaudo, Marco and Tanca, Letizia},
  abstract = {With the growth of the Internet of Things and the rapid progress of social networks, everything appears to generate data. The ever-increasing number of connected devices is accompanied by a growth of the volume of data, produced at an ever-increasing rate, and this massive flow includes data types that are difficult to process using standard database techniques. One of the most critical scenarios is healthcare, whose activities need to store and manage a variety of data types – reports written in natural language, medical images, genomic data and waveforms of vital signs – which do not have a well-defined structure. In order to benefit from this large amount of complex data, Data Lakes have recently emerged as a solution to grant central storage and flexible analysis for all types of data. However, there is no Data Lake architecture that fits all the possible scenarios, since the architecture depends heavily on the application domain and, so far, there are no Data Lake architectures that support the specific needs of the healthcare domain. This work proposes HEALER: a Data Lake architecture that effectively performs data ingestion, data storage, and data access with the aim of providing a single central repository for efficient storage of different types of healthcare data. The architecture also enables the analysis and querying of the data, which can be loaded into the Data Lake regardless of their format and type. To verify the effectiveness of the architecture, a proof-of-concept of HEALER has been developed, that allows ingestion of various data, performs waveforms processing to make them more interpretable to researchers and analysts, grants access to the saved data and allows the analysis of natural language reports. Finally we studied the performance of the system in each of its main phases: ingestion, processing, data access and analysis. The results lead us to some important considerations to be taken into account when using and configuring the system components.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/TWCJ2CUD/Manco et al. - HEALER A Data Lake Architecture for Healthcare.pdf}
}

@article{mandl2020push,
  title = {Push {{Button Population Health}}: {{The SMART}}/{{HL7 FHIR Bulk Data Access Application Programming Interface}}},
  shorttitle = {Push {{Button Population Health}}},
  author = {Mandl, Kenneth D. and Gottlieb, Daniel and Mandel, Joshua C. and Ignatov, Vladimir and Sayeed, Raheel and Grieve, Grahame and Jones, James and Ellis, Alyssa and Culbertson, Adam},
  date = {2020-11},
  journaltitle = {npj Digital Medicine},
  volume = {3},
  number = {1},
  pages = {1--9},
  publisher = {Nature Publishing Group},
  issn = {2398-6352},
  doi = {10.1038/s41746-020-00358-4},
  abstract = {The 21st Century Cures Act requires that certified health information technology have an application programming interface (API) giving access to all data elements of a patient's electronic health record, “without special effort”. In the spring of 2020, the Office of the National Coordinator of Health Information Technology (ONC) published a rule—21st Century Cures Act Interoperability, Information Blocking, and the ONC Health IT Certification Program—regulating the API requirement along with protections against information blocking. The rule specifies the SMART/HL7 FHIR Bulk Data Access API, which enables access to patient-level data across a patient population, supporting myriad use cases across healthcare, research, and public health ecosystems. The API enables “push button population health” in that core data elements can readily and standardly be extracted from electronic health records, enabling local, regional, and national-scale data-driven innovation.},
  copyright = {2020 The Author(s)},
  langid = {english},
  keywords = {Health policy,Outcomes research},
  file = {/Users/dkapitan/Zotero/storage/3JGH9Q26/Mandl et al. - 2020 - Push Button Population Health The SMARTHL7 FHIR .pdf;/Users/dkapitan/Zotero/storage/J3I6PXM2/s41746-020-00358-4.html}
}

@article{mateus2024data,
  title = {Data Harmonization and Federated Learning for Multi-Cohort Dementia Research Using the {{OMOP}} Common Data Model: {{A Netherlands}} Consortium of Dementia Cohorts Case Study},
  shorttitle = {Data Harmonization and Federated Learning for Multi-Cohort Dementia Research Using the {{OMOP}} Common Data Model},
  author = {Mateus, Pedro and Moonen, Justine and Beran, Magdalena and Jaarsma, Eva and family=Landen, given=Sophie M., prefix=van der, useprefix=true and Heuvelink, Joost and Birhanu, Mahlet and Harms, Alexander G. J. and Bron, Esther and Wolters, Frank J. and Cats, Davy and Mei, Hailiang and Oomens, Julie and Jansen, Willemijn and Schram, Miranda T. and Dekker, Andre and Bermejo, Inigo},
  date = {2024-07-01},
  journaltitle = {Journal of Biomedical Informatics},
  shortjournal = {Journal of Biomedical Informatics},
  volume = {155},
  pages = {104661},
  issn = {1532-0464},
  doi = {10.1016/j.jbi.2024.104661},
  url = {https://www.sciencedirect.com/science/article/pii/S1532046424000790},
  urldate = {2024-06-04},
  abstract = {Background Establishing collaborations between cohort studies has been fundamental for progress in health research. However, such collaborations are hampered by heterogeneous data representations across cohorts and legal constraints to data sharing. The first arises from a lack of consensus in standards of data collection and representation across cohort studies and is usually tackled by applying data harmonization processes. The second is increasingly important due to raised awareness for privacy protection and stricter regulations, such as the GDPR. Federated learning has emerged as a privacy-preserving alternative to transferring data between institutions through analyzing data in a decentralized manner. Methods In this study, we set up a federated learning infrastructure for a consortium of nine Dutch cohorts with appropriate data available to the etiology of dementia, including an extract, transform, and load (ETL) pipeline for data harmonization. Additionally, we assessed the challenges of transforming and standardizing cohort data using the Observational Medical Outcomes Partnership (OMOP) common data model (CDM) and evaluated our tool in one of the cohorts employing federated algorithms. Results We successfully applied our ETL tool and observed a complete coverage of the cohorts’ data by the OMOP CDM. The OMOP CDM facilitated the data representation and standardization, but we identified limitations for cohort-specific data fields and in the scope of the vocabularies available. Specific challenges arise in a multi-cohort federated collaboration due to technical constraints in local environments, data heterogeneity, and lack of direct access to the data. Conclusion In this article, we describe the solutions to these challenges and limitations encountered in our study. Our study shows the potential of federated learning as a privacy-preserving solution for multi-cohort studies that enhance reproducibility and reuse of both data and analyses.},
  keywords = {CDM,Cohort studies,Data harmonization,ETL,Federated learning,OMOP},
  file = {/Users/dkapitan/Zotero/storage/ARV4T27E/S1532046424000790.html}
}

@article{mehl2020open,
  title = {Open {{Smart Register Platform}} ({{OpenSRP}})},
  author = {Mehl, Garrett},
  date = {2020-10-14T10:19:22Z},
  journaltitle = {mHealth Compendium},
  shortjournal = {OpenSRP},
  volume = {5},
  pages = {42--43},
  url = {https://lib.digitalsquare.io/handle/123456789/77592},
  urldate = {2023-01-21},
  abstract = {The Open Smart Register Platform (OpenSRP) can run on any Android device and integrates previously dicrete proven innovations in mHealth. OpenSRP complements other widely-deployed information technology solutions, including DHIS2 and OpenMRS.},
  langid = {english},
  annotation = {Accepted: 2020-10-14T10:19:22Z},
  file = {/Users/dkapitan/Zotero/storage/AYEIEQ8L/Garrett Mehl - 2020 - Open Smart Register Platform (OpenSRP).pdf}
}

@article{mehl2023fullstac,
  title = {A Full-{{STAC}} Remedy for Global Digital Health Transformation: Open Standards, Technologies, Architectures and Content},
  shorttitle = {A Full-{{STAC}} Remedy for Global Digital Health Transformation},
  author = {Mehl, Garrett L and Seneviratne, Martin G and Berg, Matt L and Bidani, Suhel and Distler, Rebecca L and Gorgens, Marelize and Kallander, Karin E and Labrique, Alain B and Landry, Mark S and Leitner, Carl and Lubell-Doughtie, Peter B and Marcelo, Alvin D and Matias, Yossi and Nelson, Jennifer and Nguyen, Von and Nsengimana, Jean Philbert and Orton, Maeghan and Otzoy Garcia, Daniel R and Oyaole, Daniel R and Ratanaprayul, Natschja and Roth, Susann and Schaefer, Merrick P and Settle, Dykki and Tang, Jing and Tien-Wahser, Barakissa and Wanyee, Steven and Hersch, Fred},
  date = {2023-01-01},
  journaltitle = {Oxford Open Digital Health},
  shortjournal = {Oxford Open Digital Health},
  volume = {1},
  pages = {oqad018},
  issn = {2754-4591},
  doi = {10.1093/oodh/oqad018},
  url = {https://doi.org/10.1093/oodh/oqad018},
  urldate = {2024-02-09},
  abstract = {The global digital health ecosystem is project-centric: point solutions are developed for vertical health programs and financed through vertical funding allocations. This results in data fragmentation and technology lock-in, compromising health care delivery. A convergence of trends enabled by interoperability and digital governance makes possible a shift towards person-focused health. Together, open Standards, open Technologies, open Architectures and open Content represent a next-generation ‘full-STAC’ remedy for digital health transformation. Local developers and implementers can avoid reinventing the wheel, and instead build digital tools suited to local needs—where data travels with an individual over time, evidence-based practice is easily integrated, and insights are gleaned from harmonized data. This is the culmination of the vision endorsed by 194 WHO Member States in the Global Strategy on Digital Health 2020 to 2025.},
  file = {/Users/dkapitan/Zotero/storage/4D3EIB6H/Mehl et al. - 2023 - A full-STAC remedy for global digital health trans.pdf;/Users/dkapitan/Zotero/storage/9ILKUCUU/7475299.html}
}

@article{moncada-torres2021vantage6,
  title = {{{VANTAGE6}}: An Open Source {{priVAcy preserviNg federaTed leArninG infrastructurE}} for {{Secure Insight eXchange}}},
  shorttitle = {{{VANTAGE6}}},
  author = {Moncada-Torres, Arturo and Martin, Frank and Sieswerda, Melle and Van Soest, Johan and Geleijnse, Gijs},
  date = {2021-01-25},
  journaltitle = {AMIA Annual Symposium Proceedings},
  shortjournal = {AMIA Annu Symp Proc},
  volume = {2020},
  eprint = {33936462},
  eprinttype = {pmid},
  pages = {870--877},
  issn = {1942-597X},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8075508/},
  urldate = {2024-09-21},
  abstract = {Answering many of the research questions in the field of cancer informatics requires incorporating and centralizing data that are hosted by different parties. Federated Learning (FL) has emerged as a new approach in which a global model can be generated without disclosing private patient data by keeping them at their original location. Flexible, user-friendly, and robust infrastructures are crucial for bringing FL solutions to the day-to-day work of the cancer epidemiologist. In this paper, we present an open source priVAcy preserviNg federaTed leArninG infrastructurE for Secure Insight eXchange, VANTAGE6. We provide a detailed description of its conceptual design, modular architecture, and components. We also show a few examples where VANTAGE6 has been successfully used in research on observational cancer data. Developing and deploying technology to support federated analyses~– such as VANTAGE6~– will pave the way for the adoption and mainstream practice of this new approach for analyzing decentralized data.},
  pmcid = {PMC8075508},
  file = {/Users/dkapitan/Zotero/storage/IBF6X9ZZ/Moncada-Torres et al. - 2021 - VANTAGE6 an open source priVAcy preserviNg federa.pdf}
}

@article{moncada-torresvantage6,
  title = {{{VANTAGE6}}: An Open Source {{priVAcy preserviNg federaTed leArninG infrastructurE}} for {{Secure Insight eXchange}}},
  author = {Moncada-Torres, Arturo and Martin, Frank and Sieswerda, Melle},
  abstract = {Answering many of the research questions in the field of cancer informatics requires incorporating and centralizing data that are hosted by different parties. Federated Learning (FL) has emerged as a new approach in which a global model can be generated without disclosing private patient data by keeping them at their original location. Flexible, user-friendly, and robust infrastructures are crucial for bringing FL solutions to the day-to-day work of the cancer epidemiologist. In this paper, we present an open source priVAcy preserviNg federaTed leArninG infrastructurE for Secure Insight eXchange, VANTAGE6. We provide a detailed description of its conceptual design, modular architecture, and components. We also show a few examples where VANTAGE6 has been successfully used in research on observational cancer data. Developing and deploying technology to support federated analyses – such as VANTAGE6 – will pave the way for the adoption and mainstream practice of this new approach for analyzing decentralized data.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/XTXQ3S6U/Moncada-Torres et al. - VANTAGE6 an open source priVAcy preserviNg federa.pdf}
}

@article{mullie2023coda,
  title = {{{CODA}}: An Open-Source Platform for Federated Analysis and Machine Learning on Distributed Healthcare Data},
  shorttitle = {{{CODA}}},
  author = {Mullie, Louis and Afilalo, Jonathan and Archambault, Patrick and Bouchakri, Rima and Brown, Kip and Buckeridge, David L and Cavayas, Yiorgos Alexandros and Turgeon, Alexis F and Martineau, Denis and Lamontagne, François and Lebrasseur, Martine and Lemieux, Renald and Li, Jeffrey and Sauthier, Michaël and St-Onge, Pascal and Tang, An and Witteman, William and Chassé, Michaël},
  date = {2023-12-21},
  journaltitle = {Journal of the American Medical Informatics Association},
  shortjournal = {Journal of the American Medical Informatics Association},
  pages = {ocad235},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocad235},
  url = {https://doi.org/10.1093/jamia/ocad235},
  urldate = {2024-01-08},
  abstract = {Distributed computations facilitate multi-institutional data analysis while avoiding the costs and complexity of data pooling. Existing approaches lack crucial features, such as built-in medical standards and terminologies, no-code data visualizations, explicit disclosure control mechanisms, and support for basic statistical computations, in addition to gradient-based optimization capabilities.We describe the development of the Collaborative Data Analysis (CODA) platform, and the design choices undertaken to address the key needs identified during our survey of stakeholders. We use a public dataset (MIMIC-IV) to demonstrate end-to-end multi-modal FL using CODA. We assessed the technical feasibility of deploying the CODA platform at 9 hospitals in Canada, describe implementation challenges, and evaluate its scalability on large patient populations.The CODA platform was designed, developed, and deployed between January 2020 and January 2023. Software code, documentation, and technical documents were released under an open-source license. Multi-modal federated averaging is illustrated using the MIMIC-IV and MIMIC-CXR datasets. To date, 8 out of the 9 participating sites have successfully deployed the platform, with a total enrolment of \&gt;1M patients. Mapping data from legacy systems to FHIR was the biggest barrier to implementation.The CODA platform was developed and successfully deployed in a public healthcare setting in Canada, with heterogeneous information technology systems and capabilities. Ongoing efforts will use the platform to develop and prospectively validate models for risk assessment, proactive monitoring, and resource usage. Further work will also make tools available to facilitate migration from legacy formats to FHIR and DICOM.},
  file = {/Users/dkapitan/Zotero/storage/8Q7H7F9E/Mullie et al. - 2023 - CODA an open-source platform for federated analys.pdf;/Users/dkapitan/Zotero/storage/9WIQCS33/7486840.html}
}

@article{nguyen2018computerassisted,
  title = {Computer-{{Assisted Diagnostic Coding}}: {{Effectiveness}} of an {{NLP-based}} Approach Using {{SNOMED CT}} to {{ICD-10}} Mappings},
  shorttitle = {Computer-{{Assisted Diagnostic Coding}}},
  author = {Nguyen, Anthony N. and Truran, Donna and Kemp, Madonna and Koopman, Bevan and Conlan, David and O’Dwyer, John and Zhang, Ming and Karimi, Sarvnaz and Hassanzadeh, Hamed and Lawley, Michael J. and Green, Damian},
  date = {2018-12-05},
  journaltitle = {AMIA Annual Symposium Proceedings},
  shortjournal = {AMIA Annu Symp Proc},
  volume = {2018},
  eprint = {30815123},
  eprinttype = {pmid},
  pages = {807--816},
  issn = {1942-597X},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371260/},
  urldate = {2024-04-11},
  abstract = {Computer-assisted (diagnostic) coding (CAC) aims to improve the operational productivity and accuracy of clinical coders. The level of accuracy, especially for a wide range of complex and less prevalent clinical cases, remains an open research problem. This study investigates this problem on a broad spectrum of diagnostic codes and, in particular, investigates the effectiveness of utilising SNOMED CT for ICD-10 diagnosis coding. Hospital progress notes were used to provide the narrative rich electronic patient records for the investigation. A natural language processing (NLP) approach using mappings between SNOMED CT and ICD-10-AM (Australian Modification) was used to guide the coding. The proposed approach achieved 54.1\% sensitivity and 70.2\% positive predictive value. Given the complexity of the task, this was encouraging given the simplicity of the approach and what was projected as possible from a manual diagnosis code validation study (76.3\% sensitivity). The results show the potential for advanced NLP-based approaches that leverage SNOMED CT to ICD-10 mapping for hospital in-patient coding.},
  pmcid = {PMC6371260},
  file = {/Users/dkapitan/Zotero/storage/8XI9PD43/Nguyen et al. - 2018 - Computer-Assisted Diagnostic Coding Effectiveness.pdf}
}

@article{nsaghurwe2021one,
  title = {One Country's Journey to Interoperability: {{Tanzania}}'s Experience Developing and Implementing a National Health Information Exchange},
  shorttitle = {One Country's Journey to Interoperability},
  author = {Nsaghurwe, Alpha and Dwivedi, Vikas and Ndesanjo, Walter and Bamsi, Haji and Busiga, Moses and Nyella, Edwin and Massawe, Japhet Victor and Smith, Dasha and Onyejekwe, Kate and Metzger, Jonathan and Taylor, Patricia},
  date = {2021-04-29},
  journaltitle = {BMC Medical Informatics and Decision Making},
  shortjournal = {BMC Medical Informatics and Decision Making},
  volume = {21},
  number = {1},
  pages = {139},
  issn = {1472-6947},
  doi = {10.1186/s12911-021-01499-6},
  url = {https://doi.org/10.1186/s12911-021-01499-6},
  urldate = {2023-06-01},
  abstract = {Robust, flexible, and integrated health information (HIS) systems are essential to achieving national and international goals in health and development. Such systems are still uncommon in most low and middle income countries. This article describes a first-phase activity in Tanzania to integrate the country’s vertical health management information system with the help of an interoperability layer that enables cross-program data exchange.},
  keywords = {Architecture,Governance,Health,Interoperability,Standards},
  file = {/Users/dkapitan/Zotero/storage/YNEPA9TZ/Nsaghurwe et al. - 2021 - One country's journey to interoperability Tanzani.pdf;/Users/dkapitan/Zotero/storage/7XW5ART6/s12911-021-01499-6.html}
}

@online{ohdsi-implementations,
  title = {Software {{Tools}} – {{OHDSI}}},
  url = {https://www.ohdsi.org/software-tools/},
  urldate = {2024-09-20},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/HHSWTQ3I/software-tools.html}
}

@software{omoponfhir,
  title = {{{OMOPonFHIR}}},
  url = {https://omoponfhir.org/},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/G28T3BX9/omoponfhir.org.html}
}

@software{open-health-stack,
  title = {Open {{Health Stack}}},
  url = {https://developers.google.com/open-health-stack},
  urldate = {2024-09-20},
  abstract = {Building blocks for creating next-gen, data-driven healthcare solutions. Currently in beta.},
  file = {/Users/dkapitan/Zotero/storage/RPNU8MEL/open-health-stack.html}
}

@online{openehr-implementations,
  title = {{{openEHR Platform}}},
  author = {Beale, Thomas, Sam Heard},
  url = {https://openehr.org/products_tools/platform/},
  urldate = {2024-09-20},
  abstract = {openEHR provides open source specifications and reference implementations of future proof EHR systems.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/NYUQCHBP/platform.html}
}

@software{openfhir,
  title = {Welcome to {{openFHIR}}’s Documentation! — {{openFHIR}} 0.9.3 Documentation},
  url = {https://open-fhir.com/documentation/index.html},
  urldate = {2025-02-04},
  file = {/Users/dkapitan/Zotero/storage/HCTSHCZI/index.html}
}

@standard{openhie,
  title = {{{OpenHIE Framework}} v5.2-En},
  date = {2024-08-01},
  url = {https://ohie.org/},
  urldate = {2024-08-27},
  abstract = {OpenHIE is a community of individuals who strive to develop health information exchanges that improve patient care, health, and so much more.},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/55NPDMZ3/ohie.org.html}
}

@incollection{papadopoulos2014crossconformal,
  title = {A {{Cross-Conformal Predictor}} for {{Multi-label Classification}}},
  author = {Papadopoulos, Harris},
  date = {2014},
  volume = {8827},
  eprint = {2211.16238},
  eprinttype = {arXiv},
  eprintclass = {cs},
  pages = {241--250},
  doi = {10.1007/978-3-662-44722-2_26},
  url = {http://arxiv.org/abs/2211.16238},
  urldate = {2024-10-01},
  abstract = {Unlike the typical classification setting where each instance is associated with a single class, in multi-label learning each instance is associated with multiple classes simultaneously. Therefore the learning task in this setting is to predict the subset of classes to which each instance belongs. This work examines the application of a recently developed framework called Conformal Prediction (CP) to the multi-label learning setting. CP complements the predictions of machine learning algorithms with reliable measures of confidence. As a result the proposed approach instead of just predicting the most likely subset of classes for a new unseen instance, also indicates the likelihood of each predicted subset being correct. This additional information is especially valuable in the multi-label setting where the overall uncertainty is extremely high.},
  keywords = {Computer Science - Machine Learning},
  file = {/Users/dkapitan/Zotero/storage/9HJCVPUI/Papadopoulos - 2014 - A Cross-Conformal Predictor for Multi-label Classification.pdf;/Users/dkapitan/Zotero/storage/HS2LASR8/2211.html}
}

@inproceedings{patil2024processinga,
  title = {Processing {{FHIR}} in Modern {{Data Lakehouse}}},
  booktitle = {2024 {{IEEE}} 20th {{International Conference}} on E-{{Science}} (e-{{Science}})},
  author = {Patil, Shreyas and Belloum, A. S. Z. Adam},
  date = {2024-09-16},
  pages = {1--7},
  publisher = {IEEE},
  location = {Osaka, Japan},
  doi = {10.1109/e-Science62913.2024.10678734},
  url = {https://ieeexplore.ieee.org/document/10678734/},
  urldate = {2024-12-12},
  eventtitle = {2024 {{IEEE}} 20th {{International Conference}} on E-{{Science}} (e-{{Science}})},
  isbn = {9798350365610},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/39ICAVCD/Patil and Belloum - 2024 - Processing FHIR in modern Data Lakehouse.pdf}
}

@article{pedreira2023composable,
  title = {The {{Composable Data Management System Manifesto}}},
  author = {Pedreira, Pedro and Erling, Orri and Karanasos, Konstantinos and Schneider, Scott and McKinney, Wes and Valluri, Satya R and Zait, Mohamed and Nadeau, Jacques},
  date = {2023-06},
  journaltitle = {Proceedings of the VLDB Endowment},
  shortjournal = {Proc. VLDB Endow.},
  volume = {16},
  number = {10},
  pages = {2679--2685},
  issn = {2150-8097},
  doi = {10.14778/3603581.3603604},
  url = {https://dl.acm.org/doi/10.14778/3603581.3603604},
  urldate = {2023-12-27},
  abstract = {The requirement for specialization in data management systems has evolved faster than our software development practices. After decades of organic growth, this situation has created a siloed landscape composed of hundreds of products developed and maintained as monoliths, with limited reuse between systems. This fragmentation has resulted in developers often reinventing the wheel, increased maintenance costs, and slowed down innovation. It has also affected the end users, who are often required to learn the idiosyncrasies of dozens of incompatible SQL and non-SQL API dialects, and settle for systems with incomplete functionality and inconsistent semantics. In this vision paper, considering the recent popularity of open source projects aimed at standardizing different aspects of the data stack, we advocate for a paradigm shift in how data management systems are designed. We believe that by decomposing these into a modular stack of reusable components, development can be streamlined while creating a more consistent experience for users. Towards that goal, we describe the state-ofthe-art, principal open source technologies, and highlight open questions and areas where additional research is needed. We hope this work will foster collaboration, motivate further research, and promote a more composable future for data management.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/SVVTZQSR/Pedreira et al. - 2023 - The Composable Data Management System Manifesto.pdf}
}

@article{peng2023etlprocess,
  title = {An {{ETL-process}} Design for Data Harmonization to Participate in International Research with {{German}} Real-World Data Based on {{FHIR}} and {{OMOP CDM}}},
  author = {Peng, Yuan and Henke, Elisa and Reinecke, Ines and Zoch, Michéle and Sedlmayr, Martin and Bathelt, Franziska},
  date = {2023-01},
  journaltitle = {International Journal of Medical Informatics},
  shortjournal = {International Journal of Medical Informatics},
  volume = {169},
  pages = {104925},
  issn = {13865056},
  doi = {10.1016/j.ijmedinf.2022.104925},
  url = {https://linkinghub.elsevier.com/retrieve/pii/S1386505622002398},
  urldate = {2024-06-04},
  abstract = {Background: International studies are increasingly needed in order to gain more unbiased evidence from realworld data. To achieve this goal across the European Union, the EMA set up the DARWIN EU project based on OMOP CDM established by the OHDSI community. The harmonization of heterogeneous local health data in OMOP CDM is an essential step to participate in such networks. Using the widespread communication standard HL7 FHIR can reduce the complexity of the transformation process to OMOP CDM. Enabling German university hospitals to participate in such networks requires an Extract, Transform and Load (ETL)-process that satisfies the following criteria: 1) transforming German patient data from FHIR to OMOP CDM, 2) processing huge amount of data at once and 3) flexibility to cope with changes in FHIR profiles. Method: A mapping of German patient data from FHIR to OMOP CDM was accomplished, validated by an interdisciplinary team and checked through the OHDSI Data Quality Dashboard (DQD). To satisfy criteria 2–3, we decided to use SpringBatch-Framework according to its chunk-oriented design and reusable functions for processing large amounts of data. Results: We have successfully developed an ETL-process that fulfills the defined criteria of transforming German patient data from FHIR into OMOP CDM. To measure the validity of the mapping conformance and performance of the ETL-process, it was tested with 392,022 FHIR resources. The ETL execution lasted approximately-one minute and the DQD result shows 99\% conformance in OMOP CDM. Conclusions: Our ETL-process has been successfully tested and integrated at 10 German university hospitals. The data harmonization utilizing international recognized standards like FHIR and OMOP fosters their ability to participate in international observational studies. Additionally, the ETL process can help to prepare more German hospitals with their data harmonization journey based on existing standards.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/RGACTPF2/Peng et al. - 2023 - An ETL-process design for data harmonization to pa.pdf}
}

@online{plugin,
  title = {{{PLUGIN}} – {{Platform}} Voor {{Uitwisseling}} En {{Hergebruik}} van {{Klinische Data Nederland}}},
  url = {https://plugin.healthcare/},
  urldate = {2024-09-21},
  langid = {american},
  file = {/Users/dkapitan/Zotero/storage/UDD4RD4R/plugin.healthcare.html}
}

@incollection{pohjonen2022norway,
  title = {Norway, {{Sweden}}, and {{Finland}} as Forerunners in Open Ecosystems and {{openEHR}}},
  booktitle = {Roadmap to {{Successful Digital Health Ecosystems}}},
  author = {Pohjonen, Hanna},
  editor = {Hovenga, Evelyn and Grain, Heather},
  date = {2022-01-01},
  pages = {457--471},
  publisher = {Academic Press},
  doi = {10.1016/B978-0-12-823413-6.00011-2},
  url = {https://www.sciencedirect.com/science/article/pii/B9780128234136000112},
  urldate = {2025-02-05},
  abstract = {Norway, Sweden, and Finland may be considered as forerunners in adopting openEHR and moving towards open ecosystems. Cross-organisational data sharing, or data exchange, has historically been the objective in all Nordic countries. The needs to share data finally resulted in national document-based data sharing infrastructures in all Nordic countries. Document-based sharing is, however, somewhat limited, and the need for complementary structured sharing has started to emerge. openEHR projects typically start as data exchange projects and gradually develop into a modular EHR. In the Nordics, however, the dominant EHR market leaders chose openEHR as their own data model and started to modernise their solutions as their own internal decision. This is not necessarily the approach taken in other countries. The Nordic experience shows that in order to build a true ecosystem, it is important to increase awareness amongst all stakeholders. A monolithic solution may seem lucrative unless there is sufficient understanding of the benefits of modularity and openEHR.},
  isbn = {978-0-12-823413-6},
  keywords = {Data exchange,Data sharing,EHR,Modular,Nordic,Open ecosystem,openEHR},
  file = {/Users/dkapitan/Zotero/storage/UQT5YXBQ/Pohjonen - 2022 - Chapter 20 - Norway, Sweden, and Finland as forerunners in open ecosystems and openEHR.pdf;/Users/dkapitan/Zotero/storage/UTVGDW2T/B9780128234136000112.html}
}

@article{rauniyar2024federated,
  title = {Federated {{Learning}} for {{Medical Applications}}: {{A Taxonomy}}, {{Current Trends}}, {{Challenges}}, and {{Future Research Directions}}},
  shorttitle = {Federated {{Learning}} for {{Medical Applications}}},
  author = {Rauniyar, Ashish and Hagos, Desta Haileselassie and Jha, Debesh and Håkegård, Jan Erik and Bagci, Ulas and Rawat, Danda B. and Vlassov, Vladimir},
  date = {2024-03-01},
  journaltitle = {IEEE Internet of Things Journal},
  shortjournal = {IEEE Internet Things J.},
  volume = {11},
  number = {5},
  pages = {7374--7398},
  issn = {2327-4662, 2372-2541},
  doi = {10.1109/JIOT.2023.3329061},
  url = {https://ieeexplore.ieee.org/document/10304218/},
  urldate = {2025-01-24},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/438PLYBW/Rauniyar et al. - 2024 - Federated Learning for Medical Applications A Taxonomy, Current Trends, Challenges, and Future Rese.pdf}
}

@article{reynolds2011open,
  title = {Open {{Source}}, {{Open Standards}}, and {{Health Care Information Systems}}},
  author = {Reynolds, Carl J. and Wyatt, Jeremy C.},
  date = {2011-02-17},
  journaltitle = {Journal of Medical Internet Research},
  volume = {13},
  number = {1},
  pages = {e1521},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/jmir.1521},
  url = {https://www.jmir.org/2011/1/e24},
  urldate = {2024-05-29},
  abstract = {Recognition of the improvements in patient safety, quality of patient care, and efficiency that health care information systems have the potential to bring has led to significant investment. Globally the sale of health care information systems now represents a multibillion dollar industry. As policy makers, health care professionals, and patients, we have a responsibility to maximize the return on this investment. To this end we analyze alternative licensing and software development models, as well as the role of standards. We describe how licensing affects development. We argue for the superiority of open source licensing to promote safer, more effective health care information systems. We claim that open source licensing in health care information systems is essential to rational procurement strategy.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/NPY3U838/Reynolds and Wyatt - 2011 - Open Source, Open Standards, and Health Care Infor.pdf}
}

@article{rieke2020future,
  title = {The Future of Digital Health with Federated Learning},
  author = {Rieke, Nicola and Hancox, Jonny and Li, Wenqi and Milletarì, Fausto and Roth, Holger R. and Albarqouni, Shadi and Bakas, Spyridon and Galtier, Mathieu N. and Landman, Bennett A. and Maier-Hein, Klaus and Ourselin, Sébastien and Sheller, Micah and Summers, Ronald M. and Trask, Andrew and Xu, Daguang and Baust, Maximilian and Cardoso, M. Jorge},
  date = {2020-09-14},
  journaltitle = {npj Digital Medicine},
  shortjournal = {npj Digit. Med.},
  volume = {3},
  number = {1},
  pages = {1--7},
  publisher = {Nature Publishing Group},
  issn = {2398-6352},
  doi = {10.1038/s41746-020-00323-1},
  url = {https://www.nature.com/articles/s41746-020-00323-1},
  urldate = {2023-04-23},
  abstract = {Data-driven machine learning (ML) has emerged as a promising approach for building accurate and robust statistical models from medical data, which is collected in huge volumes by modern healthcare systems. Existing medical data is not fully exploited by ML primarily because it sits in data silos and privacy concerns restrict access to this data. However, without access to sufficient data, ML will be prevented from reaching its full potential and, ultimately, from making the transition from research to clinical practice. This paper considers key factors contributing to this issue, explores how federated learning (FL) may provide a solution for the future of digital health and highlights the challenges and considerations that need to be addressed.},
  issue = {1},
  langid = {english},
  keywords = {Medical imaging,Medical research},
  file = {/Users/dkapitan/Zotero/storage/DJI9BHQP/Rieke et al. - 2020 - The future of digital health with federated learni.pdf}
}

@article{rosenau2024uncovering,
  title = {Uncovering {{Harmonization Potential}} in {{Health Care Data Through Iterative Refinement}} of {{Fast Healthcare Interoperability Resources Profiles Based}} on {{Retrospective Discrepancy Analysis}}: {{Case Study}}},
  shorttitle = {Uncovering {{Harmonization Potential}} in {{Health Care Data Through Iterative Refinement}} of {{Fast Healthcare Interoperability Resources Profiles Based}} on {{Retrospective Discrepancy Analysis}}},
  author = {Rosenau, Lorenz and Behrend, Paul and Wiedekopf, Joshua and Gruendner, Julian and Ingenerf, Josef},
  date = {2024-07-23},
  journaltitle = {JMIR Medical Informatics},
  shortjournal = {JMIR Med Inform},
  volume = {12},
  pages = {e57005},
  issn = {2291-9694},
  doi = {10.2196/57005},
  url = {https://medinform.jmir.org/2024/1/e57005},
  urldate = {2024-08-12},
  abstract = {Background: Cross-institutional interoperability between health care providers remains a recurring challenge worldwide. The German Medical Informatics Initiative, a collaboration of 37 university hospitals in Germany, aims to enable interoperability between partner sites by defining Fast Healthcare Interoperability Resources (FHIR) profiles for the cross-institutional exchange of health care data, the Core Data Set (CDS). The current CDS and its extension modules define elements representing patients’ health care records. All university hospitals in Germany have made significant progress in providing routine data in a standardized format based on the CDS. In addition, the central research platform for health, the German Portal for Medical Research Data feasibility tool, allows medical researchers to query the available CDS data items across many participating hospitals. Objective: In this study, we aimed to evaluate a novel approach of combining the current top-down generated FHIR profiles with the bottom-up generated knowledge gained by the analysis of respective instance data. This allowed us to derive options for iteratively refining FHIR profiles using the information obtained from a discrepancy analysis. Methods: We developed an FHIR validation pipeline and opted to derive more restrictive profiles from the original CDS profiles. This decision was driven by the need to align more closely with the specific assumptions and requirements of the central feasibility platform’s search ontology. While the original CDS profiles offer a generic framework adaptable for a broad spectrum of medical informatics use cases, they lack the specificity to model the nuanced criteria essential for medical researchers. A key example of this is the necessity to represent specific laboratory codings and values interdependencies accurately. The validation results allow us to identify discrepancies between the instance data at the clinical sites and the profiles specified by the feasibility platform and addressed in the future. Results: A total of 20 university hospitals participated in this study. Historical factors, lack of harmonization, a wide range of source systems, and case sensitivity of coding are some of the causes for the discrepancies identified. While in our case study, Conditions, Procedures, and Medications have a high degree of uniformity in the coding of instance data due to legislative requirements for billing in Germany, we found that laboratory values pose a significant data harmonization challenge due to their interdependency between coding and value. Conclusions: While the CDS achieves interoperability, different challenges for federated data access arise, requiring more specificity in the profiles to make assumptions on the instance data. We further argue that further harmonization of the instance data can significantly lower required retrospective harmonization efforts. We recognize that discrepancies cannot be resolved},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/JVB7L4HM/Rosenau et al. - 2024 - Uncovering Harmonization Potential in Health Care Data Through Iterative Refinement of Fast Healthca.pdf}
}

@online{secondary,
  title = {The {{Secondary Use}} of {{Electronic Health Records}} for {{Data Mining}}: {{Data Characteristics}} and {{Challenges}} | {{ACM Computing Surveys}}},
  url = {https://dl-acm-org.dianus.libr.tue.nl/doi/full/10.1145/3490234},
  urldate = {2024-09-19},
  file = {/Users/dkapitan/Zotero/storage/GQNF2Y2T/The Secondary Use of Electronic Health Records for.pdf;/Users/dkapitan/Zotero/storage/5HSI52JE/3490234.html}
}

@article{sinaci2023dataa,
  title = {A {{Data Transformation Methodology}} to {{Create Findable}}, {{Accessible}}, {{Interoperable}}, and {{Reusable Health Data}}: {{Software Design}}, {{Development}}, and {{Evaluation Study}}},
  shorttitle = {A {{Data Transformation Methodology}} to {{Create Findable}}, {{Accessible}}, {{Interoperable}}, and {{Reusable Health Data}}},
  author = {Sinaci, A Anil and Gencturk, Mert and Teoman, Huseyin Alper and Laleci Erturkmen, Gokce Banu and Alvarez-Romero, Celia and Martinez-Garcia, Alicia and Poblador-Plou, Beatriz and Carmona-Pírez, Jonás and Löbe, Matthias and Parra-Calderon, Carlos Luis},
  date = {2023-03-08},
  journaltitle = {Journal of Medical Internet Research},
  shortjournal = {J Med Internet Res},
  volume = {25},
  eprint = {36884270},
  eprinttype = {pmid},
  pages = {e42822},
  issn = {1439-4456},
  doi = {10.2196/42822},
  url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10034606/},
  urldate = {2025-01-09},
  abstract = {Background Sharing health data is challenging because of several technical, ethical, and regulatory issues. The Findable, Accessible, Interoperable, and Reusable (FAIR) guiding principles have been conceptualized to enable data interoperability. Many studies provide implementation guidelines, assessment metrics, and software to achieve FAIR-compliant data, especially for health data sets. Health Level 7 (HL7) Fast Healthcare Interoperability Resources (FHIR) is a health data content modeling and exchange standard. Objective Our goal was to devise a new methodology to extract, transform, and load existing health data sets into HL7 FHIR repositories in line with FAIR principles, develop a Data Curation Tool to implement the methodology, and evaluate it on health data sets from 2 different but complementary institutions. We aimed to increase the level of compliance with FAIR principles of existing health data sets through standardization and facilitate health data sharing by eliminating the associated technical barriers. Methods Our approach automatically processes the capabilities of a given FHIR end point and directs the user while configuring mappings according to the rules enforced by FHIR profile definitions. Code system mappings can be configured for terminology translations through automatic use of FHIR resources. The validity of the created FHIR resources can be automatically checked, and the software does not allow invalid resources to be persisted. At each stage of our data transformation methodology, we used particular FHIR-based techniques so that the resulting data set could be evaluated as FAIR. We performed a data-centric evaluation of our methodology on health data sets from 2 different institutions. Results Through an intuitive graphical user interface, users are prompted to configure the mappings into FHIR resource types with respect to the restrictions of selected profiles. Once the mappings are developed, our approach can syntactically and semantically transform existing health data sets into HL7 FHIR without loss of data utility according to our privacy-concerned criteria. In addition to the mapped resource types, behind the scenes, we create additional FHIR resources to satisfy several FAIR criteria. According to the data maturity indicators and evaluation methods of the FAIR Data Maturity Model, we achieved the maximum level (level 5) for being Findable, Accessible, and Interoperable and level 3 for being Reusable. Conclusions We developed and extensively evaluated our data transformation approach to unlock the value of existing health data residing in disparate data silos to make them available for sharing according to the FAIR principles. We showed that our method can successfully transform existing health data sets into HL7 FHIR without loss of data utility, and the result is FAIR in terms of the FAIR Data Maturity Model. We support institutional migration to HL7 FHIR, which not only leads to FAIR data sharing but also eases the integration with different research networks.},
  pmcid = {PMC10034606},
  file = {/Users/dkapitan/Zotero/storage/TH78N8S3/Sinaci et al. - 2023 - A Data Transformation Methodology to Create Findable, Accessible, Interoperable, and Reusable Health.pdf}
}

@article{sinaci2024privacypreserving,
  title = {Privacy-Preserving Federated Machine Learning on {{FAIR}} Health Data: {{A}} Real-World Application},
  shorttitle = {Privacy-Preserving Federated Machine Learning on {{FAIR}} Health Data},
  author = {Sinaci, A. Anil and Gencturk, Mert and Alvarez-Romero, Celia and Laleci Erturkmen, Gokce Banu and Martinez-Garcia, Alicia and Escalona-Cuaresma, María José and Parra-Calderon, Carlos Luis},
  date = {2024-12-01},
  journaltitle = {Computational and Structural Biotechnology Journal},
  shortjournal = {Computational and Structural Biotechnology Journal},
  volume = {24},
  pages = {136--145},
  issn = {2001-0370},
  doi = {10.1016/j.csbj.2024.02.014},
  url = {https://www.sciencedirect.com/science/article/pii/S2001037024000382},
  urldate = {2024-06-04},
  abstract = {Objective This paper introduces a privacy-preserving federated machine learning (ML) architecture built upon Findable, Accessible, Interoperable, and Reusable (FAIR) health data. It aims to devise an architecture for executing classification algorithms in a federated manner, enabling collaborative model-building among health data owners without sharing their datasets. Materials and methods Utilizing an agent-based architecture, a privacy-preserving federated ML algorithm was developed to create a global predictive model from various local models. This involved formally defining the algorithm in two steps: data preparation and federated model training on FAIR health data and constructing the architecture with multiple components facilitating algorithm execution. The solution was validated by five healthcare organizations using their specific health datasets. Results Five organizations transformed their datasets into Health Level 7 Fast Healthcare Interoperability Resources via a common FAIRification workflow and software set, thereby generating FAIR datasets. Each organization deployed a Federated ML Agent within its secure network, connected to a cloud-based Federated ML Manager. System testing was conducted on a use case aiming to predict 30-day readmission risk for chronic obstructive pulmonary disease patients and the federated model achieved an accuracy rate of 87\%. Discussion The paper demonstrated a practical application of privacy-preserving federated ML among five distinct healthcare entities, highlighting the value of FAIR health data in machine learning when utilized in a federated manner that ensures privacy protection without sharing data. Conclusion This solution effectively leverages FAIR datasets from multiple healthcare organizations for federated ML while safeguarding sensitive health datasets, meeting legislative privacy and security requirements.},
  keywords = {Distributed datasets,FAIR data,Federated machine learning,Privacy-preserving machine learning},
  file = {/Users/dkapitan/Zotero/storage/JG84K6VB/Sinaci et al. - 2024 - Privacy-preserving federated machine learning on F.pdf;/Users/dkapitan/Zotero/storage/Q6KN6499/S2001037024000382.html}
}

@incollection{smits2022improved,
  title = {An {{Improved Infrastructure}} for {{Privacy-Preserving Analysis}} of {{Patient Data}}},
  booktitle = {Studies in {{Health Technology}} and {{Informatics}}},
  author = {Smits, Djura and Van Beusekom, Bart and Martin, Frank and Veen, Lourens and Geleijnse, Gijs and Moncada-Torres, Arturo},
  editor = {Mantas, John and Gallos, Parisis and Zoulias, Emmanouil and Hasman, Arie and Househ, Mowafa S. and Diomidous, Marianna and Liaskos, Joseph and Charalampidou, Martha},
  date = {2022-06-29},
  publisher = {IOS Press},
  doi = {10.3233/SHTI220682},
  url = {https://ebooks.iospress.nl/doi/10.3233/SHTI220682},
  urldate = {2023-04-26},
  abstract = {Incorporating healthcare data from different sources is crucial for a better understanding of patient (sub)populations. However, data centralization raises concerns about data privacy and governance. In this work, we present an improved infrastructure that allows privacy-preserving analysis of patient data: vantage6 v3. For this new version, we describe its architecture and upgraded functionality, which allows algorithms running at each party to communicate with one another through a virtual private network (while still being isolated from the public internet to reduce the risk of data leakage). This allows the execution of different types of algorithms (e.g., multi-party computation) that were practically infeasible before, as showcased by the included examples. The (continuous) development of this type of infrastructure is fundamental to meet the current and future demands of healthcare research with a strong emphasis on preserving the privacy of sensitive patient data.},
  isbn = {978-1-64368-290-7 978-1-64368-291-4},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/BCT5T3VP/Smits et al. - 2022 - An Improved Infrastructure for Privacy-Preserving .pdf}
}

@standard{sql-on-fhir-v2,
  title = {{{SQL}} on {{FHIR}} v2.0.0-Pre},
  url = {https://build.fhir.org/ig/FHIR/sql-on-fhir-v2/},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/WBHTLDSS/sql-on-fhir-v2.html}
}

@online{summitinstitutefordevelopment2023bunda,
  title = {{{BUNDA App}}},
  author = {Summit Institute for Development},
  date = {2023-05-09T17:00:00},
  url = {https://www.sid-indonesia.org/post/bunda-app},
  urldate = {2024-01-18},
  abstract = {Summit Institute for Development works closely with partners to develop cuffless optical BP measurement (OptiBP) integrated with antenatal decision-support and client tracking (BUNDA App) to improve health provider performance to detect hypertension and improve pregnancy outcomes. In this video, one of the village midwives in Lombok, West Nusa Tenggara, Indonesia, was sharing her experiences of using the BUNDA App. Please join us in making the best care for maternal and child health services. We},
  langid = {english},
  organization = {Summit Institute},
  file = {/Users/dkapitan/Zotero/storage/RLAL2KAQ/bunda-app.html}
}

@article{syzdykova2017opensource,
  title = {Open-{{Source Electronic Health Record Systems}} for {{Low-Resource Settings}}: {{Systematic Review}}},
  shorttitle = {Open-{{Source Electronic Health Record Systems}} for {{Low-Resource Settings}}},
  author = {Syzdykova, Assel and Malta, André and Zolfo, Maria and Diro, Ermias and Oliveira, José Luis},
  date = {2017-11},
  journaltitle = {JMIR Medical Informatics},
  volume = {5},
  number = {4},
  eprint = {29133283},
  eprinttype = {pmid},
  pages = {e44},
  issn = {2291-9694},
  doi = {10.2196/medinform.8131},
  abstract = {Background Despite the great impact of information and communication technologies on clinical practice and on the quality of health services, this trend has been almost exclusive to developed countries, whereas countries with poor resources suffer from many economic and social issues that have hindered the real benefits of electronic health (eHealth) tools. As a component of eHealth systems, electronic health records (EHRs) play a fundamental role in patient management and effective medical care services. Thus, the adoption of EHRs in regions with a lack of infrastructure, untrained staff, and ill-equipped health care providers is an important task. However, the main barrier to adopting EHR software in low- and middle-income countries is the cost of its purchase and maintenance, which highlights the open-source approach as a good solution for these underserved areas. Objective The aim of this study was to conduct a systematic review of open-source EHR systems based on the requirements and limitations of low-resource settings. Methods First, we reviewed existing literature on the comparison of available open-source solutions. In close collaboration with the University of Gondar Hospital, Ethiopia, we identified common limitations in poor resource environments and also the main requirements that EHRs should support. Then, we extensively evaluated the current open-source EHR solutions, discussing their strengths and weaknesses, and their appropriateness to fulfill a predefined set of features relevant for low-resource settings. Results The evaluation methodology allowed assessment of several key aspects of available solutions that are as follows: (1) integrated applications, (2) configurable reports, (3) custom reports, (4) custom forms, (5) interoperability, (6) coding systems, (7) authentication methods, (8) patient portal, (9) access control model, (10) cryptographic features, (11) flexible data model, (12) offline support, (13) native client, (14) Web client,(15) other clients, (16) code-based language, (17) development activity, (18) modularity, (19) user interface, (20) community support, and (21) customization. The quality of each feature is discussed for each of the evaluated solutions and a final comparison is presented. Conclusions There is a clear demand for open-source, reliable, and flexible EHR systems in low-resource settings. In this study, we have evaluated and compared five open-source EHR systems following a multidimensional methodology that can provide informed recommendations to other implementers, developers, and health care professionals. We hope that the results of this comparison can guide decision making when needing to adopt, install, and maintain an open-source EHR solution in low-resource settings.},
  pmcid = {PMC5703976},
  file = {/Users/dkapitan/Zotero/storage/FRAHQRX5/Syzdykova et al. - 2017 - Open-Source Electronic Health Record Systems for L.pdf}
}

@article{tayefi2021challenges,
  title = {Challenges and Opportunities beyond Structured Data in Analysis of Electronic Health Records},
  author = {Tayefi, Maryam and Ngo, Phuong and Chomutare, Taridzo and Dalianis, Hercules and Salvi, Elisa and Budrionis, Andrius and Godtliebsen, Fred},
  date = {2021},
  journaltitle = {WIREs Computational Statistics},
  volume = {13},
  number = {6},
  pages = {e1549},
  issn = {1939-0068},
  doi = {10.1002/wics.1549},
  url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/wics.1549},
  urldate = {2024-09-19},
  abstract = {Electronic health records (EHR) contain a lot of valuable information about individual patients and the whole population. Besides structured data, unstructured data in EHRs can provide extra, valuable information but the analytics processes are complex, time-consuming, and often require excessive manual effort. Among unstructured data, clinical text and images are the two most popular and important sources of information. Advanced statistical algorithms in natural language processing, machine learning, deep learning, and radiomics have increasingly been used for analyzing clinical text and images. Although there exist many challenges that have not been fully addressed, which can hinder the use of unstructured data, there are clear opportunities for well-designed diagnosis and decision support tools that efficiently incorporate both structured and unstructured data for extracting useful information and provide better outcomes. However, access to clinical data is still very restricted due to data sensitivity and ethical issues. Data quality is also an important challenge in which methods for improving data completeness, conformity and plausibility are needed. Further, generalizing and explaining the result of machine learning models are important problems for healthcare, and these are open challenges. A possible solution to improve data quality and accessibility of unstructured data is developing machine learning methods that can generate clinically relevant synthetic data, and accelerating further research on privacy preserving techniques such as deidentification and pseudonymization of clinical text. This article is categorized under: Applications of Computational Statistics {$>$} Health and Medical Data/Informatics},
  langid = {english},
  keywords = {electronic health records,machine learning,statistical methods,unstructured data},
  file = {/Users/dkapitan/Zotero/storage/4RHWQBTC/Tayefi et al. - 2021 - Challenges and opportunities beyond structured dat.pdf;/Users/dkapitan/Zotero/storage/MGS4FDSA/wics.html}
}

@article{teo2024federated,
  title = {Federated Machine Learning in Healthcare: {{A}} Systematic Review on Clinical Applications and Technical Architecture},
  shorttitle = {Federated Machine Learning in Healthcare},
  author = {Teo, Zhen Ling and Jin, Liyuan and Liu, Nan and Li, Siqi and Miao, Di and Zhang, Xiaoman and Ng, Wei Yan and Tan, Ting Fang and Lee, Deborah Meixuan and Chua, Kai Jie and Heng, John and Liu, Yong and Goh, Rick Siow Mong and Ting, Daniel Shu Wei},
  date = {2024-02},
  journaltitle = {Cell Reports Medicine},
  shortjournal = {Cell Reports Medicine},
  volume = {5},
  number = {2},
  pages = {101419},
  issn = {26663791},
  doi = {10.1016/j.xcrm.2024.101419},
  url = {https://linkinghub.elsevier.com/retrieve/pii/S2666379124000429},
  urldate = {2024-06-03},
  abstract = {Federated learning (FL) is a distributed machine learning framework that is gaining traction in view of increasing health data privacy protection needs. By conducting a systematic review of FL applications in healthcare, we identify relevant articles in scientific, engineering, and medical journals in English up to August 31st, 2023. Out of a total of 22,693 articles under review, 612 articles are included in the final analysis. The majority of articles are proof-of-concepts studies, and only 5.2\% are studies with real-life application of FL. Radiology and internal medicine are the most common specialties involved in FL. FL is robust to a variety of machine learning models and data types, with neural networks and medical imaging being the most common, respectively. We highlight the need to address the barriers to clinical translation and to assess its realworld impact in this new digital data-driven healthcare scene.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/84BC73II/Teo et al. - 2024 - Federated machine learning in healthcare A system.pdf}
}

@article{thaiya2021adoption,
  title = {Adoption of {{ICT}} to {{Enhance Access}} to {{Healthcare}} in {{Kenya}}},
  author = {Thaiya, Mbugua Samuel and Julia, Korongo and Joram, Mutai and Benard, Masese and Nambiro, Dr Alice},
  date = {2021-03-01},
  journaltitle = {IOSR Journal of Computer Engineering},
  shortjournal = {IOSR-JCE},
  volume = {23},
  number = {2},
  pages = {45--50},
  abstract = {Kenya like other developing countries is faced with a burden of disease aggravated by poverty. Communicable and non- communicable diseases are increasing at an alarming rate and straining already overburdened and weak heath systems. Faced with a rapidly growing population, poorly equipped health workforce and meager resources to support the healthcare systems, there is a continuous search for tools to offer better healthcare in these resource constrained settings. ICT for development is one tool that has been adopted to improve access to quality healthcare for populations in resource constrained settings. While the tool has been shown to impact the health care sector positively, the use of ICT has brought in challenges of interoperability and usability of electronic systems in health. In this paper we investigate how Kenya has utilized ICT to spur development in the health sector and the measures she is undertaking to address these challenges. We advance that ICT for development is a tool that needs to be fully exploited by developing countries in order to bridge gaps in healthcare emanating from limited finances, poor infrastructure and an over-stretched healthcare workforce.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/7KRWYANH/Thaiya et al. - Adoption of ICT to Enhance Access to Healthcare in.pdf}
}

@report{tilahun2023african,
  title = {African {{Union Health Information Exchange Guidelines}} and {{Standards}}},
  author = {Tilahun, Binyam and Mamuye, Adane and Yilma, Tesfahun and Shehata, Yasser},
  date = {2023-03-28},
  abstract = {The application of digital health technology is growing at a rapid rate in Africa, with the goals of improving the delivery of healthcare services and more effectively reaching out to remote and underserved communities. The lack of enabling guidelines and standards across the continent, on the other hand, makes it difficult to share data in a meaningful way across the continent. Considering this, Africa Centres for Disease Control and Prevention (Africa CDC) established a task force of 24 members to provide expertise and guidance in the development of AU HIE guidelines and standards. Members of the task force were subject matter experts working in Africa and internationally on the collection, analysis, and exchange of health information. Some of these experts had been involved in previous consultations on defining Africa CDC’s health information systems strategy. A chairperson, co-chairperson, and secretary were elected to engage the task force members in different technical working groups. The technical leadership and coordination of this work was done by the University of Gondar and HISP SA. A chairperson, co-chairperson, and secretary were elected to engage the task force members. Three technical working groups were constituted to lead the drafting of the three sections of the document: 1. HIE Policy directions 2. HIE Standards 3. Implementation Use Cases Following several virtual meetings and consultations, a draft document on AU HIE guidelines and standards was developed. The methodology employed involved the reviewing of scientific publications and governmental reports on HIE guidelines and standards in Africa; reviewing Internationally known comprehensive and robust data exchange standards; and incorporating recommendations from the Africa CDC HIE Task Force Members. Additionally, the document was critically reviewed and validated by Member States in all AU regions through several validation workshops; West Africa (validation workshop took place in Senegal), Central Africa (Congo-Brazaville), East Africa (Rwanda), South Africa (Namibia) and North Africa (Mauritania) through validation workshops. Representatives of Africa CDC, Regional},
  file = {/Users/dkapitan/Zotero/storage/94KNBUND/Tilahun et al. - 2023 - African Union Health Information Exchange Guidelin.pdf}
}

@article{tsafnat2024converge,
  title = {Converge or {{Collide}}? {{Making Sense}} of a {{Plethora}} of {{Open Data Standards}} in {{Health Care}}},
  shorttitle = {Converge or {{Collide}}?},
  author = {Tsafnat, Guy and Dunscombe, Rachel and Gabriel, Davera and Grieve, Grahame and Reich, Christian},
  date = {2024-04-09},
  journaltitle = {Journal of Medical Internet Research},
  volume = {26},
  number = {1},
  pages = {e55779},
  publisher = {JMIR Publications Inc., Toronto, Canada},
  doi = {10.2196/55779},
  url = {https://www.jmir.org/2024/1/e55779},
  urldate = {2024-09-21},
  abstract = {Practitioners of digital health are familiar with disjointed data environments that often inhibit effective communication among different elements of the ecosystem. This fragmentation leads in turn to issues such as inconsistencies in services versus payments, wastage, and notably, care delivered being less than best-practice. Despite the long-standing recognition of interoperable data as a potential solution, efforts in achieving interoperability have been disjointed and inconsistent, resulting in numerous incompatible standards, despite the widespread agreement that fewer standards would enhance interoperability. This paper introduces a framework for understanding health care data needs, discussing the challenges and opportunities of open data standards in the field. It emphasizes the necessity of acknowledging diverse data standards, each catering to specific viewpoints and needs, while proposing a categorization of health care data into three domains, each with its distinct characteristics and challenges, along with outlining overarching design requirements applicable to all domains and specific requirements unique to each domain.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/HCRU7DUV/e55779.html}
}

@inproceedings{vandaalen2023federated,
  title = {Federated {{Bayesian Network Ensembles}}},
  booktitle = {2023 {{Eighth International Conference}} on {{Fog}} and {{Mobile Edge Computing}} ({{FMEC}})},
  author = {family=Daalen, given=Florian, prefix=van, useprefix=true and Ippel, Lianne and Dekker, Andre and Bermejo, Inigo},
  date = {2023-09},
  pages = {22--33},
  doi = {10.1109/FMEC59375.2023.10306230},
  url = {https://ieeexplore.ieee.org/document/10306230},
  urldate = {2024-10-24},
  abstract = {Federated learning allows us to run machine learning algorithms on decentralized data when data sharing is not permitted due to privacy concerns. Ensemble-based learning works by training multiple (weak) classifiers whose output is aggregated. Federated ensembles are ensembles applied to a federated setting, where each classifier in the ensemble is trained on one data location. In this article, we explore the use of federated Bayesian network ensembles (FBNE) in a range of experiments and compare their performance with both locally trained models and models trained with VertiBayes, a federated learning algorithm to train Bayesian networks from decentralized data. Our results show that FBNE outperform local models and provides, among other advantages, a significant increase in training speed compared with VertiBayes while maintaining a similar performance in most settings. We show that FBNE are a potentially useful tool within the federated learning toolbox, especially when local populations are heavily biased, or there is a strong imbalance in population size across parties. We discuss the advantages and disadvantages of this approach in terms of time complexity, model accuracy, privacy protection, and model interpretability.},
  eventtitle = {2023 {{Eighth International Conference}} on {{Fog}} and {{Mobile Edge Computing}} ({{FMEC}})},
  keywords = {Bayesian network,Data models,Ensemble Learning,Federated Ensembles,Federated learning,Federated Learning,Multi-access edge computing,Privacy,privacy preserving,Runtime,Sociology,Training},
  file = {/Users/dkapitan/Zotero/storage/67UJDRZ8/van Daalen et al. - 2023 - Federated Bayesian Network Ensembles.pdf;/Users/dkapitan/Zotero/storage/NGGXQVQT/10306230.html}
}

@inproceedings{vandenberg2024empirical,
  title = {Empirical {{Research}} on {{Ensuring Ethical AI}} in {{Fraud Detection}} of {{Insurance Claims}}: {{A Field Study}} of {{Dutch Insurers}}},
  shorttitle = {Empirical {{Research}} on {{Ensuring Ethical AI}} in {{Fraud Detection}} of {{Insurance Claims}}},
  booktitle = {Artificial {{Intelligence}}. {{ECAI}} 2023 {{International Workshops}}},
  author = {family=Berg, given=Martin, prefix=van den, useprefix=true and Gerlings, Julie and Kim, Jenia},
  editor = {Nowaczyk, Sławomir and Biecek, Przemysław and Chung, Neo Christopher and Vallati, Mauro and Skruch, Paweł and Jaworek-Korjakowska, Joanna and Parkinson, Simon and Nikitas, Alexandros and Atzmüller, Martin and Kliegr, Tomáš and Schmid, Ute and Bobek, Szymon and Lavrac, Nada and Peeters, Marieke and family=Dierendonck, given=Roland, prefix=van, useprefix=true and Robben, Saskia and Mercier-Laurent, Eunika and Kayakutlu, Gülgün and Owoc, Mieczyslaw Lech and Mason, Karl and Wahid, Abdul and Bruno, Pierangela and Calimeri, Francesco and Cauteruccio, Francesco and Terracina, Giorgio and Wolter, Diedrich and Leidner, Jochen L. and Kohlhase, Michael and Dimitrova, Vania},
  date = {2024},
  pages = {106--114},
  publisher = {Springer Nature Switzerland},
  location = {Cham},
  doi = {10.1007/978-3-031-50485-3_9},
  abstract = {The insurance industry in the Netherlands applies artificial intelligence (AI) in different processes and acknowledges that AI should be implemented in an ethical and responsible manner. Therefore, the Dutch Association of Insurers supported the industry by publishing an ethical framework. However, the framework is a set of high-level requirements, and the question is how these requirements are translated into local practices. Our research question is how ethical requirements are applied by insurance companies when using AI systems to detect fraud in insurance claims. To answer this question, we conducted interviews with representatives of four different organizations. The study demonstrates the awareness amongst interviewees that AI needs to be applied in a responsible way. The ethical framework provides a good starting point for insurers to develop their own practical ethical guidelines. Empirical evidence confirms that accountability, safety, transparency, non-discrimination, and human agency are priorities in the process of AI implementation. The research shows that translation of the ethical framework into operational and actionable instructions is done in-house by each organization and requires a multidisciplinary approach and cooperation between teams.},
  isbn = {978-3-031-50485-3},
  langid = {english},
  keywords = {Ethical AI,Fraud detection,Insurance,Responsible AI},
  file = {/Users/dkapitan/Zotero/storage/YL2LC5JJ/van den Berg et al. - 2024 - Empirical Research on Ensuring Ethical AI in Fraud.pdf}
}

@article{vansoest2018using,
  title = {Using the {{Personal Health Train}} for {{Automated}} and {{Privacy-Preserving Analytics}} on {{Vertically Partitioned Data}}},
  author = {family=Soest, given=Johan, prefix=van, useprefix=true and Sun, Chang and Mussmann, Ole and Puts, Marco and Malic, Alexander and family=OPPEN, given=Claudia, prefix=van, useprefix=true and Towend, David and Dekker, Andre and Dumontier, Michel},
  date = {2018},
  abstract = {Conventional data mining algorithms are unable to satisfy the current requirements on analyzing big data in some fields such as medicine, policy making, judicial, and tax records. However, applying diverse datasets from different institutes (both healthcare and non-healthcare related) can enrich information and insights. So far, analyzing this data in an automated, privacy-preserving manner does not exist to our knowledge. In this work, we propose an infrastructure, and proof-of-concept for privacy-preserving analytics on vertically partitioned data.},
  langid = {english},
  file = {/Users/dkapitan/Zotero/storage/W2JCD9IA/van SOEST et al. - Using the Personal Health Train for Automated and .pdf}
}

@article{visweswaran2022atomic,
  title = {An Atomic Approach to the Design and Implementation of a Research Data Warehouse},
  author = {Visweswaran, Shyam and McLay, Brian and Cappella, Nickie and Morris, Michele and Milnes, John T and Reis, Steven E and Silverstein, Jonathan C and Becich, Michael J},
  date = {2022-04-01},
  journaltitle = {Journal of the American Medical Informatics Association},
  shortjournal = {Journal of the American Medical Informatics Association},
  volume = {29},
  number = {4},
  pages = {601--608},
  issn = {1527-974X},
  doi = {10.1093/jamia/ocab204},
  url = {https://doi.org/10.1093/jamia/ocab204},
  urldate = {2024-12-30},
  abstract = {As a long-standing Clinical and Translational Science Awards (CTSA) Program hub, the University of Pittsburgh and the University of Pittsburgh Medical Center (UPMC) developed and implemented a modern research data warehouse (RDW) to efficiently provision electronic patient data for clinical and translational research.We designed and implemented an RDW named Neptune to serve the specific needs of our CTSA. Neptune uses an atomic design where data are stored at a high level of granularity as represented in source systems. Neptune contains robust patient identity management tailored for research; integrates patient data from multiple sources, including electronic health records (EHRs), health plans, and research studies; and includes knowledge for mapping to standard terminologies.Neptune contains data for more than 5 million patients longitudinally organized as Health Insurance Portability and Accountability Act (HIPAA) Limited Data with dates and includes structured EHR data, clinical documents, health insurance claims, and research data. Neptune is used as a source for patient data for hundreds of institutional review board-approved research projects by local investigators and for national projects.The design of Neptune was heavily influenced by the large size of UPMC, the varied data sources, and the rich partnership between the University and the healthcare system. It includes several unique aspects, including the physical warehouse straddling the University and UPMC networks and management under an HIPAA Business Associates Agreement.We describe the design and implementation of an RDW at a large academic healthcare system that uses a distinctive atomic design where data are stored at a high level of granularity.},
  file = {/Users/dkapitan/Zotero/storage/MASKIJLK/Visweswaran et al. - 2022 - An atomic approach to the design and implementation of a research data warehouse.pdf;/Users/dkapitan/Zotero/storage/ACAKSTXB/6382237.html}
}

@inreference{wikipedia-gsm,
  title = {{{GSM}}},
  booktitle = {Wikipedia},
  date = {2024-09-14T12:53:57Z},
  url = {https://en.wikipedia.org/w/index.php?title=GSM&oldid=1245675274},
  urldate = {2024-09-20},
  abstract = {The Global System for Mobile Communications (GSM) is a standard developed by the European Telecommunications Standards Institute (ETSI) to describe the protocols for second-generation (2G) digital cellular networks used by mobile devices such as mobile phones and tablets. GSM is also a trade mark owned by the GSM Association. GSM may also refer to the Full Rate voice codec. It was first implemented in Finland in December 1991. By the mid-2010s, it became a global standard for mobile communications achieving over 90\% market share, and operating in over 193 countries and territories. 2G networks developed as a replacement for first generation (1G) analog cellular networks. The GSM standard originally described a digital, circuit-switched network optimized for full duplex voice telephony. This expanded over time to include data communications, first by circuit-switched transport, then by packet data transport via General Packet Radio Service (GPRS), and Enhanced Data Rates for GSM Evolution (EDGE). Subsequently, the 3GPP developed third-generation (3G) UMTS standards, followed by the fourth-generation (4G) LTE Advanced and the fifth-generation 5G standards, which do not form part of the ETSI GSM standard. Beginning in the late 2010s, various carriers worldwide started to shut down their GSM networks. Nevertheless, as a result of the network's widespread use, the acronym "GSM" is still used as a generic term for the plethora of G mobile phone technologies evolved from it.},
  langid = {english},
  annotation = {Page Version ID: 1245675274},
  file = {/Users/dkapitan/Zotero/storage/PE243J9U/GSM.html}
}

@online{zhang2023secure,
  title = {Secure and {{Private Healthcare Analytics}}: {{A Feasibility Study}} of {{Federated Deep Learning}} with {{Personal Health Train}}},
  shorttitle = {Secure and {{Private Healthcare Analytics}}},
  author = {Zhang, Chong and Choudhury, Ananya and Volmer, Leroy and Soest, Johan and Bermejo, Inigo and Dekker, Andre and Gomes, Aiara Lobo and Wee, Leonard},
  date = {2023-07-19},
  eprinttype = {Research Square},
  issn = {2693-5015},
  doi = {10.21203/rs.3.rs-3158418/v1},
  url = {https://www.researchsquare.com/article/rs-3158418/v1},
  urldate = {2024-10-24},
  abstract = {Objective In this article, we aim to present a new open-source Federated Learning infrastructure by conducting several proof-of-concept experiments. We seek to prove the reliability of the infrastructure to develop global models without sharing private patient data.Materials and Methods We applied the Personal Health Train (PHT) principles using the Vantage6 software to train a neural network to classify head and neck cancer patients\&amp;rsquo; distant metastasis using federated learning algorithms in a privacy preserving manner. Head and neck cancer patient data from four cohorts were assigned to two data stations. During each training iteration, model weights were averaged and sent back to central node.Results We compared the area under the receiver operating characteristic curves (AUCs) and model weights between the centralized and federated learning scenarios. The results showed that our federated infrastructure was able to achieve similar predicting power as in the centralized case. Different federated learning model weights aggregation methods were tested. The experiment results showed that federated learning models reached best performance when we aggregate model weights per epoch.Discussion and Conclusion PHT and FAIR data principles can efficiently calculate quality indicators in a privacy-preserving federated approach and the work can be scaled up both nationally and internationally. Despite this, application of the methodology was largely hampered by ELSI issues. However, the lessons learned from this study can provide other hospitals and researchers to adapt to the process easily and take effective measures in building quality of care infrastructures.},
  pubstate = {prepublished},
  file = {/Users/dkapitan/Zotero/storage/QSIP8H5B/Zhang et al. - 2023 - Secure and Private Healthcare Analytics A Feasibility Study of Federated Deep Learning with Persona.pdf}
}

@incollection{zohner2019challenges,
  title = {Challenges and {{Opportunities}} in {{Changing Data Structures}} of {{Clinical Document Archives}} from {{HL7-V2}} to {{FHIR-Based Archive Solutions}}},
  booktitle = {{{MEDINFO}} 2019: {{Health}} and {{Wellbeing}} e-{{Networks}} for {{All}}},
  author = {Zohner, Jochen and Marquardt, Kurt and Schneider, Henning and Michel Backofen, Achim},
  date = {2019},
  pages = {492--495},
  publisher = {IOS Press},
  doi = {10.3233/SHTI190270},
  url = {https://ebooks.iospress.nl/doi/10.3233/SHTI190270},
  urldate = {2024-09-20},
  file = {/Users/dkapitan/Zotero/storage/4MIA8SPZ/Zohner et al. - 2019 - Challenges and Opportunities in Changing Data Stru.pdf}
}