<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JME</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id>
      <journal-title>JMIR Medical Education</journal-title>
      <issn pub-type="epub">2369-3762</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e51388</article-id>
      <article-id pub-id-type="pmid">38227356</article-id>
      <article-id pub-id-type="doi">10.2196/51388</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Viewpoint</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Viewpoint</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Enriching Data Science and Health Care Education: Application and Impact of Synthetic Data Sets Through the Health Gym Project</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Venkatesh</surname>
            <given-names>Kaushik</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Kamel Boulos</surname>
            <given-names>Maged N.</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Seevanayanagam</surname>
            <given-names>Siven</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Black</surname>
            <given-names>Michaela</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Kuo</surname>
            <given-names>Nicholas I-Hsien</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Centre for Big Data Research in Health</institution>
            <institution>The University of New South Wales</institution>
            <addr-line>Level 2, AGSM Building (G27), Botany St, Kensington NSW</addr-line>
            <addr-line>Sydney, 2052</addr-line>
            <country>Australia</country>
            <phone>61 0293850645</phone>
            <email>n.kuo@unsw.edu.au</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8749-7280</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Perez-Concha</surname>
            <given-names>Oscar</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8823-7090</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Hanly</surname>
            <given-names>Mark</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9279-7453</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Mnatzaganian</surname>
            <given-names>Emmanuel</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-5091-2642</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Hao</surname>
            <given-names>Brandon</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-6237-1783</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Di Sipio</surname>
            <given-names>Marcus</given-names>
          </name>
          <degrees>BHSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-9271-755X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>Guolin</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-9382-1882</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Vanjara</surname>
            <given-names>Jash</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-3524-0696</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Valerie</surname>
            <given-names>Ivy Cerelia</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6361-1587</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>de Oliveira Costa</surname>
            <given-names>Juliana</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8355-023X</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Churches</surname>
            <given-names>Timothy</given-names>
          </name>
          <degrees>MBBS</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7905-5877</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Lujic</surname>
            <given-names>Sanja</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9555-0261</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Hegarty</surname>
            <given-names>Jo</given-names>
          </name>
          <degrees>BIT</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-7445-2179</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Jorm</surname>
            <given-names>Louisa</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0390-661X</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Barbieri</surname>
            <given-names>Sebastiano</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5919-372X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Centre for Big Data Research in Health</institution>
        <institution>The University of New South Wales</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>The University of New South Wales</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Medicines Intelligence Research Program</institution>
        <institution>School of Population Health</institution>
        <institution>The University of New South Wales</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>School of Clinical Medicine</institution>
        <institution>University of New South Wales</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Ingham Institute of Applied Medical Research</institution>
        <institution>Liverpool</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Sydney Local Health District</institution>
        <addr-line>Sydney</addr-line>
        <country>Australia</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Nicholas I-Hsien Kuo <email>n.kuo@unsw.edu.au</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>16</day>
        <month>1</month>
        <year>2024</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e51388</elocation-id>
      <history>
        <date date-type="received">
          <day>30</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>14</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>20</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>8</day>
          <month>11</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Nicholas I-Hsien Kuo, Oscar Perez-Concha, Mark Hanly, Emmanuel Mnatzaganian, Brandon Hao, Marcus Di Sipio, Guolin Yu, Jash Vanjara, Ivy Cerelia Valerie, Juliana de Oliveira Costa, Timothy Churches, Sanja Lujic, Jo Hegarty, Louisa Jorm, Sebastiano Barbieri. Originally published in JMIR Medical Education (https://mededu.jmir.org), 16.01.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on https://mededu.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mededu.jmir.org/2024/1/e51388" xlink:type="simple"/>
      <abstract>
        <p>Large-scale medical data sets are vital for hands-on education in health data science but are often inaccessible due to privacy concerns. Addressing this gap, we developed the Health Gym project, a free and open-source platform designed to generate synthetic health data sets applicable to various areas of data science education, including machine learning, data visualization, and traditional statistical models. Initially, we generated 3 synthetic data sets for sepsis, acute hypotension, and antiretroviral therapy for HIV infection. This paper discusses the educational applications of Health Gym’s synthetic data sets. We illustrate this through their use in postgraduate health data science courses delivered by the University of New South Wales, Australia, and a Datathon event, involving academics, students, clinicians, and local health district professionals. We also include adaptable worked examples using our synthetic data sets, designed to enrich hands-on tutorial and workshop experiences. Although we highlight the potential of these data sets in advancing data science education and health care artificial intelligence, we also emphasize the need for continued research into the inherent limitations of synthetic data.</p>
      </abstract>
      <kwd-group>
        <kwd>medical education</kwd>
        <kwd>generative model</kwd>
        <kwd>generative adversarial networks</kwd>
        <kwd>privacy</kwd>
        <kwd>antiretroviral therapy (ART)</kwd>
        <kwd>human immunodeficiency virus (HIV)</kwd>
        <kwd>data science</kwd>
        <kwd>educational purposes</kwd>
        <kwd>accessibility</kwd>
        <kwd>data privacy</kwd>
        <kwd>data sets</kwd>
        <kwd>sepsis</kwd>
        <kwd>hypotension</kwd>
        <kwd>HIV</kwd>
        <kwd>science education</kwd>
        <kwd>health care AI</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Clinical data gathered from health care institutions are crucial for enhancing health care quality [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. These data sets can feed into artificial intelligence (AI) and machine learning (ML) models to refine patient prognosis [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], diagnosis [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], and treatment optimization [<xref ref-type="bibr" rid="ref8">8</xref>]. Furthermore, statistical models applied to these data sets can uncover association and causal paths [<xref ref-type="bibr" rid="ref9">9</xref>]. However, stringent privacy regulations protecting patient confidentiality often hamper the prompt availability of these data sets for research and educational usage [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>Gaining access to clinical and health care data sets is a critical aspect of health data science education. This exposure provides trainees with invaluable practical experience, offering profound insights into the complexities of real-world health care scenarios [<xref ref-type="bibr" rid="ref15">15</xref>]. However, obtaining access to these sensitive data sets is a challenging endeavor—often involving a lengthy process of securing ethics approvals, institutional support, and data clearance [<xref ref-type="bibr" rid="ref16">16</xref>]. Moreover, the approved users may be required to work on-site under the direct supervision of the data custodian to prevent data leakage [<xref ref-type="bibr" rid="ref17">17</xref>]. These rigorous security measures, while essential for patient confidentiality, can hamper scalable training of future health data scientists.</p>
      <p>During this era of big data, with a soaring demand for skilled health data scientists [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>], synthetic data sets can bridge the gap between analytical skills and health context comprehension. As Kolaczyk et al [<xref ref-type="bibr" rid="ref20">20</xref>] astutely asserted, “Theory informs principle, and principle informs practice; practice, in turn, informs theory.”</p>
      <p>A promising solution to the lack of clinical and health care data is the utilization of generative AI to generate synthetic data sets. These data sets provide controlled, context-specific learning experiences that parallel real-world situations while maintaining patient privacy. The Health Gym project exemplifies this approach [<xref ref-type="bibr" rid="ref21">21</xref>]. Leveraging generative adversarial networks (GANs) [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>], Health Gym creates synthetic medical data sets, establishing a secure yet realistic platform for trainees to hone their health data analytical skills. The data sets, covering key health conditions such as sepsis, acute hypotension, and antiretroviral therapy (ART) for HIV infection, can be accessed at [<xref ref-type="bibr" rid="ref25">25</xref>]. The project’s open-source code is also available on GitHub at [<xref ref-type="bibr" rid="ref26">26</xref>] under the MIT License [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
      <p>As an integral part of the Master of Science in Health Data Science Program at the University of New South Wales (UNSW), Australia [<xref ref-type="bibr" rid="ref28">28</xref>] and a Datathon event [<xref ref-type="bibr" rid="ref29">29</xref>], the Health Gym synthetic data sets have proven their versatility and effectiveness in enriching health care education. They are freely accessible to the wider research and education community while complying with stringent security standards such as those specified by Health Canada [<xref ref-type="bibr" rid="ref30">30</xref>] and the European Medicines Agency [<xref ref-type="bibr" rid="ref31">31</xref>], thus minimizing patient data disclosure risks.</p>
      <p>In this viewpoint paper, we discuss the application of Health Gym synthetic data sets, their role in health data science education, and their potential in nurturing proficient health data scientists. We provide adaptable worked examples (accessible through Section A in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) by using our synthetic data sets, crafted to enrich hands-on tutorial and workshop experiences. We underline the importance of acknowledging the limitations of synthetic data to ensure their valid use in the creation of statistical models and AI applications in health care and the enhancement of health care education. Although synthetic data sets cannot supersede real-world data, they are a vital tool for training future health data scientists and supporting data-driven innovative approaches in health care.</p>
      <sec>
        <title>Ethics Approval</title>
        <p>We applied GANs to longitudinal data extracted from the MIMIC-III (Medical Information Mart for Intensive Care) [<xref ref-type="bibr" rid="ref32">32</xref>] and the EuResist [<xref ref-type="bibr" rid="ref33">33</xref>] databases to generate our synthetic data sets. This study was approved by the UNSW’s human research ethics committee (application HC210661). For patients in MIMIC-III, requirement for individual consent was waived because the project did not impact clinical care and all protected health information was deidentified [<xref ref-type="bibr" rid="ref32">32</xref>]. For people in the EuResist integrated database, all data providers obtained informed consent for the execution of retrospective studies and inclusion in merged cohorts [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
      </sec>
      <sec>
        <title>Health Gym</title>
        <p>The currently available synthetic data sets for the Health Gym project were derived from MIMIC-III [<xref ref-type="bibr" rid="ref32">32</xref>] and EuResist [<xref ref-type="bibr" rid="ref33">33</xref>] databases. MIMIC-III is a comprehensive database of anonymized health data associated with patients admitted to the critical care units of the Beth Israel Deaconess Medical Center, including data on laboratory tests, procedures, and medications. The EuResist network aims to develop a decision support system to optimize ART for individuals living with HIV, leveraging extensive clinical and virological data.</p>
        <p>After applying published selection or exclusion criteria, we extracted relevant data from databases that could facilitate the development of patient care algorithms. These data sets, focusing on sepsis, acute hypotension, and ART for HIV, served as the basis for our synthetic data creation. The synthetic data generation employed in the Health Gym was accomplished using GANs. The GAN model, as shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>, consists of 2 primary components: a generator and a discriminator. The process starts by sampling real patient records (depicted in pink) and employing the generator to create synthetic patient records (depicted in violet). Both the real and synthetic records are then forwarded to the discriminator network, which is tasked with differentiating the genuine data from the counterfeit. Both networks are trained in an adversarial process—the generator is updated to create more realistic records, while the discriminator is refined to identify generated records more accurately. As a result, the quality of the synthetic data is progressively enhanced, and the synthetic patient records become increasingly representative of the ground truth. The iterative training concludes when the discriminator can no longer reliably distinguish the synthetic records from the real records. Refer to more details in Kuo et al [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>Leveraging generative AI, Health Gym provides highly authentic clinical data sets, enriching health care education. Each data set undergoes rigorous quality assessment and security verification (detailed in Section B of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These synthetic data sets foster engaging learning experiences, aiding educators in developing tailored educational strategies. The following sections will illuminate the application of Health Gym in university-level courses, exemplified through ART for HIV data set.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Generative adversarial network setup.</p>
          </caption>
          <graphic xlink:href="mededu_v10i1e51388_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Synthetic ART for HIV Data Set</title>
        <p>The Health Gym data sets contain mixed-type longitudinal data, including numerical, binary, and categorical variables. They encompass patient demographics, vital signs measurements, and pathology results. The data sets hence reflect the complexities of real-life data, thereby making them suitable for training health data scientists in university courses. This paper will primarily delve into the application of synthetic data in health care education focusing on the ART for HIV data set. Readers interested in the sepsis and the acute hypotension data sets should refer to Section C in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <sec>
          <title>Data Set Description</title>
          <p>Our synthetic HIV data set, informed by the selection or exclusion criteria proposed by Parbhoo et al [<xref ref-type="bibr" rid="ref35">35</xref>] and drawn from the EuResist database, targets individuals living with HIV who initiated therapy after 2015 per the World Health Organization’s guidelines [<xref ref-type="bibr" rid="ref36">36</xref>]. ART for HIV typically includes a mix of 3 or more antiretroviral agents from at least 2 distinct medication classes. The dynamism of ART lies in its frequent regimen modifications resulting from various circumstances such as treatment failure due to poor adherence or viral resistance, intolerance to ART, clinical events such as pregnancy or coinfections, or optimization of therapy to support better adherence, reduce drug-drug interactions, maximize ART response, or prevent the emergence of drug-resistant viral strains [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p>
          <p>In addition to ART information, the data set encompasses vital indicators of ART success and disease progression, namely, viral load (VL) and CD4 cell count. Successful ART is often indicated by VL below 1000 copies/mL, while a CD4 cell count exceeding 500 cells/mm<sup>3</sup> signifies healthy immunological status [<xref ref-type="bibr" rid="ref36">36</xref>]. The complex interactions of these elements in our data set create a rich learning platform for health data science education.</p>
          <p><xref ref-type="table" rid="table1">Table 1</xref> encapsulates the data set’s 3 numeric, 5 binary, and 5 categorical variables. Numeric variables include VL, CD4 cell count, and relative CD4 laboratory test results. Treatment regimens follow those of Tang et al [<xref ref-type="bibr" rid="ref38">38</xref>], breaking down the ART regimen into several parts. The data set includes 50 combinations of 21 unique medications. The antiretroviral medication classes are nucleoside/nucleotide reverse transcriptase inhibitors (NRTIs), nonnucleoside reverse transcriptase inhibitors (NNRTIs), integrase inhibitors (INIs), protease inhibitors (PIs), and pharmacokinetic enhancers (pk-En). We deconstructed the ART regimen into its constituent parts: base drug combination (base drug combo), complimentary INIs (comp INIs), comp NNRTIs, extra PIs, and extra pk-En. The base drug combo primarily consists of NRTIs, with inclusion of other antiretroviral classes as well.</p>
          <p>Recognizing the notable amount of missing data in the original EuResist database, we added a suffix (M) to variables to denote whether measurements were recorded at specific time points. In the authentic data set, measurements were reported at 24.27% (129,835/534,960) for VL (measured), 22.21% (118,815/534,960) for CD4 (measured), and 85.13% (455,411/534,960) for drug (measured). The absence of some CD4 and VL records may be attributable to specific clinical practices and the frequency of test requests [<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. For instance, it is common for clinicians to discontinue requesting a CD4 cell count if the previous result exceeded 500 cells/mm<sup>3</sup> and the individual had an undetectable VL. Similarly, VL is typically measured in the first 3 months, at 6 months, 12 months, and then annually.</p>
          <p>Constructed using the GAN model developed by Kuo et al [<xref ref-type="bibr" rid="ref43">43</xref>], this data set comprises 8916 synthetic patients tracked over 60 months, resulting in 534,960 records (8916 <italic>×</italic> 60). <xref rid="figure2" ref-type="fig">Figure 2</xref> showcases a sample generated by the code in <xref rid="figure3" ref-type="fig">Figure 3</xref> [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. Each record features 15 columns, including a patient identifier, a time point, and 13 ARTs for HIV variables highlighted in <xref ref-type="table" rid="table1">Table 1</xref>. The synthetic data sets can be freely accessed in [<xref ref-type="bibr" rid="ref46">46</xref>] and [<xref ref-type="bibr" rid="ref47">47</xref>] on Figshare, a digital platform for research output sharing.</p>
          <table-wrap position="float" id="table1">
            <label>Table 1</label>
            <caption>
              <p>The variables of antiretroviral therapy in the HIV data set.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="410"/>
              <col width="130"/>
              <col width="120"/>
              <col width="340"/>
              <thead>
                <tr valign="top">
                  <td>Variable name</td>
                  <td>Data type</td>
                  <td>Unit</td>
                  <td>Valid categorical options</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Viral load (VL)</td>
                  <td>numeric</td>
                  <td>copies/mL</td>
                  <td>N/A<sup>a</sup></td>
                </tr>
                <tr valign="top">
                  <td>Absolute count for CD4 (CD4)</td>
                  <td>numeric</td>
                  <td>cells/µL</td>
                  <td>N/A</td>
                </tr>
                <tr valign="top">
                  <td>Relative count for CD4 (Rel CD4)</td>
                  <td>numeric</td>
                  <td>cells/µL</td>
                  <td>N/A</td>
                </tr>
                <tr valign="top">
                  <td>Gender</td>
                  <td>binary</td>
                  <td>N/A</td>
                  <td>Male, Female</td>
                </tr>
                <tr valign="top">
                  <td>Ethnicity (Ethnic)</td>
                  <td>categorical</td>
                  <td>N/A</td>
                  <td>Asian, African, Caucasian, other</td>
                </tr>
                <tr valign="top">
                  <td>Base drug combination (Base drug combo)</td>
                  <td>categorical</td>
                  <td>N/A</td>
                  <td>FTC<sup>b</sup> + TDF<sup>c</sup>, 3TC<sup>d</sup> + ABC<sup>e</sup>, FTC + TAF<sup>f</sup>, DRV<sup>g</sup> + FTC + TDF, FTC + RTVB<sup>h</sup> + TDF, other</td>
                </tr>
                <tr valign="top">
                  <td>Complementary integrase inhibitor (Comp INI)</td>
                  <td>categorical</td>
                  <td>N/A</td>
                  <td>DTG<sup>i</sup>, RAL<sup>j</sup>, EVG<sup>k</sup>, not applied</td>
                </tr>
                <tr valign="top">
                  <td>Complementary nonnucleoside reverse transcriptase inhibitor (Comp NNRTI)</td>
                  <td>categorical</td>
                  <td>N/A</td>
                  <td>NVP<sup>l</sup>, EFV<sup>m</sup>, RPV<sup>n</sup>, not applied</td>
                </tr>
                <tr valign="top">
                  <td>Extra protease inhibitor (Extra PI)</td>
                  <td>categorical</td>
                  <td>N/A</td>
                  <td>DRV, RTVB, LPV<sup>o</sup>, RTV<sup>p</sup>, ATV<sup>q</sup>, not applied</td>
                </tr>
                <tr valign="top">
                  <td>Extra pharmacokinetic enhancer (Extra pk-En)</td>
                  <td>binary</td>
                  <td>N/A</td>
                  <td>False, True</td>
                </tr>
                <tr valign="top">
                  <td>Viral load measured (VL) (M)<sup>r</sup></td>
                  <td>binary</td>
                  <td>N/A</td>
                  <td>False, True</td>
                </tr>
                <tr valign="top">
                  <td>CD4 (M)</td>
                  <td>binary</td>
                  <td>N/A</td>
                  <td>False, True</td>
                </tr>
                <tr valign="top">
                  <td>Drug recorded (M)</td>
                  <td>binary</td>
                  <td>N/A</td>
                  <td>False, True</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table1fn1">
                <p><sup>a</sup>N/A: not applicable.</p>
              </fn>
              <fn id="table1fn2">
                <p><sup>b</sup>FTC: emtricitabine.</p>
              </fn>
              <fn id="table1fn3">
                <p><sup>c</sup>TDF: tenofovir disoproxil fumarate.</p>
              </fn>
              <fn id="table1fn4">
                <p><sup>d</sup>3TC: lamivudine.</p>
              </fn>
              <fn id="table1fn5">
                <p><sup>e</sup>ABC: abacavir.</p>
              </fn>
              <fn id="table1fn6">
                <p><sup>f</sup>TAF: tenofovir alafenamide.</p>
              </fn>
              <fn id="table1fn7">
                <p><sup>g</sup>DRV: darunavir.</p>
              </fn>
              <fn id="table1fn8">
                <p><sup>h</sup>RTVB: ritonavir.</p>
              </fn>
              <fn id="table1fn9">
                <p><sup>i</sup>DTG: dolutegravir.</p>
              </fn>
              <fn id="table1fn10">
                <p><sup>j</sup>RAL: raltegravir.</p>
              </fn>
              <fn id="table1fn11">
                <p><sup>k</sup>EVG: elvitegravir.</p>
              </fn>
              <fn id="table1fn12">
                <p><sup>l</sup>NVP: nevirapine.</p>
              </fn>
              <fn id="table1fn13">
                <p><sup>m</sup>EFV: efavirenz.</p>
              </fn>
              <fn id="table1fn14">
                <p><sup>n</sup>RPV: rilpivirine.</p>
              </fn>
              <fn id="table1fn15">
                <p><sup>o</sup>LPV: lopinavir.</p>
              </fn>
              <fn id="table1fn16">
                <p><sup>p</sup>RTV: ritonavir.</p>
              </fn>
              <fn id="table1fn17">
                <p><sup>q</sup>ATV: atazanavir.</p>
              </fn>
              <fn id="table1fn18">
                <p><sup>r</sup>(M): measured.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Inspecting the antiretroviral therapy for an HIV data set (output of the code in Figure 3).</p>
            </caption>
            <graphic xlink:href="mededu_v10i1e51388_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <fig id="figure3" position="float">
            <label>Figure 3</label>
            <caption>
              <p>Code in Python for generating the output shown in Figure 2. This code uses pandas [<xref ref-type="bibr" rid="ref44">44</xref>] and NumPy [<xref ref-type="bibr" rid="ref45">45</xref>]. Base drug combo: base drug combination; comp INI: complementary integrase inhibitor; comp NNRTI: complementary nonnucleoside reverse transcriptase inhibitor; PI: protease inhibitor; pk-En: pharmacokinetic enhancer; VL: viral load.</p>
            </caption>
            <graphic xlink:href="mededu_v10i1e51388_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Applications and Case Studies</title>
          <p>This section highlights the use of our synthetic ART for HIV data set in a collaborative Datathon event and as an effective teaching tool at UNSW for medical education.</p>
        </sec>
      </sec>
      <sec>
        <title>Center for Big Data Research in Health Data Science Datathon</title>
        <p>The synthetic data set for ART for HIV was a central component of the UNSW Center for Big Data Research in Health Datathon [<xref ref-type="bibr" rid="ref48">48</xref>], an event merging theoretical learning with practical application. The Datathon was an enriching exercise in multidisciplinary collaboration. The event involved 6 teams, with a total of 24 participants, offering a tangible experience in data analysis. The student teams were supported by a group of mentors—a blend of data scientists, clinicians, health professionals, and government health informatics specialists from a local health district in Sydney, Australia [<xref ref-type="bibr" rid="ref49">49</xref>]. The data scientists and the panel of authors of the Health Gym project (ie, Kuo et al [<xref ref-type="bibr" rid="ref21">21</xref>]) elaborated on the technical aspects and navigated the participants through the intricacies of data analysis, including the assumptions we made to use the data (eg, time 0 corresponded to the date of ART initiation, the laboratory tests occurred before modifications in therapy). Meanwhile, clinicians and health professionals provided their expertise to guide students toward meaningful research questions (eg, discussing VL and CD4 count monitoring, drug-drug interactions, and metabolic toxicity [<xref ref-type="bibr" rid="ref50">50</xref>]). Government health informaticians, experienced in electronic medical records and real-world population health application and impact, evaluated the usefulness of the students’ findings.</p>
        <p>This collaborative effort facilitated a comprehensive learning experience, encompassing the development of analytical models, data visualization, and effective communication of research outcomes. Using our synthetic data sets, participants gained valuable insights into working with data sets that emulate real-world health scenarios, thereby providing a bridge between theoretical academia and practical execution.</p>
        <p>We summarize the findings of the 2 participating teams below. Detailed reports for Team 1 and Team 2 can be found in Section D and Section E of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, respectively. In addition, the associated codes for the 2 teams can be found in Section A of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <sec>
          <title>Findings of Team 1</title>
          <p>Team 1 investigated the effectiveness of medications, categorized by antiretroviral class, in achieving HIV suppression. Utilizing survival analysis, they assessed the time between the initiation of ART to the first occurrence of viral suppression, defined as VL below 1000 copies/mL [<xref ref-type="bibr" rid="ref36">36</xref>]. They also assessed the time to CD4 cell count exceeding 500 cells/mm<sup>3</sup> [<xref ref-type="bibr" rid="ref51">51</xref>], which indicates a healthy immunological status.</p>
          <p>With Cox proportional hazards models [<xref ref-type="bibr" rid="ref52">52</xref>] featuring time-varying covariates, the team identified particular antiretroviral agents associated with viral suppression. These findings were purely associative due to data set limitations, which did not account for factors such as age, socioeconomic status, comorbidities, and concurrent medications (of other illnesses).</p>
        </sec>
        <sec>
          <title>Findings of Team 2</title>
          <p>Team 2 focused on predicting the necessity of altering an individual’s ART regimen over a 5-year time span, factoring in disease flare-ups, resistance, or side effects. They formulated a “sliding search” function that generated individual records for each 12-month period, with predictions for antiretroviral modification and adherence to therapy in the subsequent year by using neural networks. The team’s methodology produced promising results, with an accuracy rate of 78% in predicting antiretroviral modification and 93% in predicting adherence to therapy. The algorithm detected trends in CD4 and VL results across the 12-month periods, which appeared to be the key predictive features. In addition, the team suggested that there could be potential benefits from exploring recurrent neural networks (eg, long short-term memory [<xref ref-type="bibr" rid="ref53">53</xref>]).</p>
        </sec>
      </sec>
      <sec>
        <title>Serving as UNSW Coursework Materials</title>
        <p>Beyond their utility in the Datathon, our synthetic data sets contribute to UNSW courses in the Master of Science in Health Data Science Program [<xref ref-type="bibr" rid="ref54">54</xref>], namely, HDAT9800 Visualization &#38; Communication and HDAT9510 Machine Learning II.</p>
        <p>HDAT9800 teaches future health data scientists the skills to visually communicate complex data effectively to diverse audiences. The course emphasizes the significance of clear data visualization and advocates for transparency and reproducibility in scientific work. It employs R [<xref ref-type="bibr" rid="ref55">55</xref>] and Python [<xref ref-type="bibr" rid="ref56">56</xref>] to demonstrate best practices in data analysis and visualization. Our synthetic data sets provide rich resources to enhance the learning in this setting. For instance, Marchesi et al [<xref ref-type="bibr" rid="ref57">57</xref>] used our data sets to present patient states via t-distributed stochastic neighbor embedding visualization techniques [<xref ref-type="bibr" rid="ref58">58</xref>].</p>
        <p>Meanwhile, HDAT9510 explores advanced modern ML algorithms and methods such as convolutional neural networks [<xref ref-type="bibr" rid="ref59">59</xref>], autoencoders [<xref ref-type="bibr" rid="ref60">60</xref>], and reinforcement learning (RL) [<xref ref-type="bibr" rid="ref61">61</xref>]. As the synthetic data sets consist of time-series variables, students can develop both feedforward and recurrent neural networks. See example models built using our data set in Marchesi et al [<xref ref-type="bibr" rid="ref57">57</xref>] with recurrent neural networks and even decision trees [<xref ref-type="bibr" rid="ref62">62</xref>] and hidden Markov models [<xref ref-type="bibr" rid="ref63">63</xref>], as in a similar data set suggested by Wu et al [<xref ref-type="bibr" rid="ref64">64</xref>]. Furthermore, with the presence of nonnumeric variables, students can learn about embedding [<xref ref-type="bibr" rid="ref65">65</xref>]—transforming nonnumeric levels into real-valued vectors so that similar levels that are closer in the vector space carry more analogous meaning. The presence of missing data in the synthetic data sets also encourages students to formulate plausible assumptions about the structure of the clinical data set prior to data modelling.</p>
        <p>We provide 3 adaptable worked examples using our ART for HIV data set, suitable for workshops and lectures. The associated codes for the worked examples can be found in Section A of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Our synthetic data set supports a variety of student engagements, from understanding complex data structures to developing advanced RL algorithms for optimizing clinical interventions. Moreover, the low patient disclosure risk associated with our data sets (refer to Section B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) eliminates the need for ethics approval [<xref ref-type="bibr" rid="ref66">66</xref>]. This makes these data sets ideal for a range of settings—from small seminars to larger lecture groups.</p>
        <sec>
          <title>Worked Example 1</title>
          <p>The first exercise, focused on data visualization using Python, compares VL trends over time among patients who commenced their ART with different base drug combos, against the general trend in all patients. The results of our worked example are depicted in <xref rid="figure4" ref-type="fig">Figure 4</xref>.</p>
          <p>This multifaceted exercise requires students to create sub–data sets based on specific starting base drug combos (ie, FTC + TDF [emtricitabine + tenofovir disoproxil fumarate] and 3TC + ABC [lamivudine + abacavir]), extract data for defined periods, and familiarize themselves with box and violin plots [<xref ref-type="bibr" rid="ref67">67</xref>]. They are also tasked with organizing the visual data as side-by-side plots.</p>
          <p>Through this exercise, students will understand the limitations of box plots, which cannot visualize underlying data distributions. They will learn about the additional insights provided by advanced plotting techniques such as violin plots. In addition, students will note that people who start with FTC + TDF and those who start with 3TC + ABC display similar patterns as the overall ART for HIV cohort. The overlap of the interquartile ranges across all box plots indicates a consistent behavior.</p>
          <fig id="figure4" position="float">
            <label>Figure 4</label>
            <caption>
              <p>Viral load distribution. Subplot (A) shows a box plot comparison of viral load across base drug combinations across time, and subplot (B) shows a violin plot comparison of viral load across base drug combinations across time. Grey indicates all patients, red indicates those initiating treatment with FTC + TDF (emtricitabine + tenofovir disoproxil fumarate), and blue indicates those initiating treatment with 3TC + ABC (lamivudine + abacavir). VL: viral load.</p>
            </caption>
            <graphic xlink:href="mededu_v10i1e51388_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Worked Example 2</title>
          <p>The second exercise delves into survival analysis using R [<xref ref-type="bibr" rid="ref55">55</xref>], building on insights from the initial data visualization task. The exercise continues to compare results among people starting with the base drug combo of FTC + TDF and those initiating with the base drug combo of 3TC + ABC. The goal is to estimate the time necessary for a person on ART to successfully suppress their VL. The results of our worked example are depicted in <xref rid="figure5" ref-type="fig">Figure 5</xref>.</p>
          <p>This task proves to be more complex than the first, requiring HIV domain knowledge, such as an understanding that a reasonable threshold for ART in HIV treatment is 1000 copies/mL [<xref ref-type="bibr" rid="ref36">36</xref>]. This threshold indicates slowed viral replication and immune system damage. Thus, students should select patients who commence ART with VL above 1000 copies/mL (ie, not experiencing the outcome of interest at baseline).</p>
          <p>Creating an appropriate data set for survival analysis is key, as is pinpointing when each patient’s VL first drops to or below 1000 copies/mL. In addition, students need to grasp the concept of right censoring [<xref ref-type="bibr" rid="ref68">68</xref>] and utilize Kaplan-Meier curves [<xref ref-type="bibr" rid="ref69">69</xref>] for time-to-event estimations. This offers an opportunity to engage with the influential survival package [<xref ref-type="bibr" rid="ref70">70</xref>] in the R language. Upon examining the results in <xref rid="figure5" ref-type="fig">Figure 5</xref>, students will note no significant differences in the timing of VL suppression between people who started with the base drug combo of FTC + TDF and those who initiated with the base drug combo of 3TC + ABC.</p>
          <fig id="figure5" position="float">
            <label>Figure 5</label>
            <caption>
              <p>Time-to-event estimation of viral load suppression for viral load lower than 1000 copies/mL. Red indicates those initiating treatment with FTC + TDF (emtricitabine + tenofovir disoproxil fumarate) and blue for those initiating treatment with 3TC + ABC (lamivudine + abacavir).</p>
            </caption>
            <graphic xlink:href="mededu_v10i1e51388_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Worked Example 3</title>
          <p>The third exercise immerses students in the process of developing an RL agent using Python. RL is a type of ML that learns an evidence-based policy to connect states (the current scenario) to actions (the potential responses to that scenario). In the context of our HIV treatment example, states refer to the representation of the patient’s current health status and medication history, while action refers to the selection of medication to use in response to each state.</p>
          <p>The RL agent selects an action based on a policy that optimizes for maximum cumulative rewards, even as environments evolve. This approach has particular relevance to health care. Clinicians often need to adapt treatment plans to each patient’s unique circumstances, and RL can help them to individualize treatment durations, dosages, or types. For example, they may alter the regimen, class, or specific agents of medication to better serve the patient’s needs. The outcomes of our example are visualized in <xref rid="figure6" ref-type="fig">Figure 6</xref>. This exercise highlights the potential of RL to enhance patient care through personalization—an aspect that is becoming increasingly important in today’s medical landscape.</p>
          <p>This complex exercise is designed for advanced students, posing challenges across multiple dimensions. It commences with data wrangling, where students scrutinize numeric variable distributions and evaluate the necessity for transformations such as rescaling, normalization [<xref ref-type="bibr" rid="ref71">71</xref>], power transformation [<xref ref-type="bibr" rid="ref72">72</xref>], or Box-Cox transformation [<xref ref-type="bibr" rid="ref73">73</xref>].</p>
          <p>In the next stage, students encounter categorical feature representation for medication regimens, practicing their skills in implementing embeddings. Advanced students can explore transfer learning for feature representation [<xref ref-type="bibr" rid="ref74">74</xref>]. This exercise also presents real-world challenges, requiring students to handle mixed-type data progression. During the model fitting phase, students must employ suitable ML models, distinguishing between RL method archetypes [<xref ref-type="bibr" rid="ref75">75</xref>] and considering their clinical implications.</p>
          <p>Data visualization is the next task, encouraging students to articulate model-derived insights into digestible visuals for a diverse audience. The concluding phase involves refining assumptions and model performance, incorporating multiple tests to identify optimal hyperparameters [<xref ref-type="bibr" rid="ref76">76</xref>]. Here, students peek into the “black box” nature of ML and gain an intuition for effective module combinations [<xref ref-type="bibr" rid="ref77">77</xref>-<xref ref-type="bibr" rid="ref79">79</xref>]. This step becomes critical for causal inference tasks that necessitate rigorous input data validation [<xref ref-type="bibr" rid="ref80">80</xref>].</p>
          <p><xref rid="figure6" ref-type="fig">Figure 6</xref> showcases the strategy employed by an RL agent in HIV therapy. Heatmaps visualize the relative frequencies of chosen actions (ie, the selected antiretroviral), where each tile represents a unique action and its frequency as a proportion of all actions. The example output shows that the RL agent consistently suggests the EFV + RAL (efavirenz + raltegravir)—a combination of comp NNRTIs and comp INIs—4.39% of the time, while never recommending the RPV + RAL (rilpivirine + raltegravir) combination. More information on the steps taken to create the output for this task can be found in Section F of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          <fig id="figure6" position="float">
            <label>Figure 6</label>
            <caption>
              <p>Visualizing the learned reinforcement learning policy. Comp INI: complementary integrase inhibitor; Comp NNRTI: complementary nonnucleoside reverse transcriptase inhibitor; DTG: dolutegravir; EFV: efavirenz; EVG: elvitegravir; NVP: nevirapine; RAL: raltegravir; RPV: rilpivirine.</p>
            </caption>
            <graphic xlink:href="mededu_v10i1e51388_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>This paper demonstrates the transformative potential of synthetic health data sets in health care education, especially in the evolving context of generative AI integration. These data sets provide a realistic representation of real-world health data complexities while preserving patient confidentiality, facilitating experiential learning, skills enhancement, and interdisciplinary collaboration. However, this significant stride toward AI integration in education is not without challenges, and the creation of AI models trained on curated quality data sets emerges as a promising research area.</p>
      <p>Despite our best efforts, the Health Gym synthetic data sets might not fully capture the complexity and diversity of real-world scenarios. For instance, some critical health determinants such as socioeconomic status [<xref ref-type="bibr" rid="ref81">81</xref>] and comorbidities [<xref ref-type="bibr" rid="ref82">82</xref>] are missing from the ART for HIV synthetic data sets. The absence of these factors mirrors the broader issues concerning data accessibility [<xref ref-type="bibr" rid="ref83">83</xref>], particularly when it involves specialized or rare disease information. Furthermore, synthetic data might overlook uncontrolled variables or confounders inherent in real-world data [<xref ref-type="bibr" rid="ref84">84</xref>,<xref ref-type="bibr" rid="ref85">85</xref>], posing pedagogical challenges. However, this limitation is not solely attributable to our methodology. Since the socioeconomic status variable is not present in the EuResist database, our model lacked the necessary reference data from the outset.</p>
      <p>In the field of health data science, proficient data set management and curation are essential due to the decentralized nature of health care data collection. Many entities contribute to health data, each using their own systems [<xref ref-type="bibr" rid="ref86">86</xref>]. Privacy laws such as Australia’s Privacy Act 1988 [<xref ref-type="bibr" rid="ref87">87</xref>] and the United States’ Health Insurance Portability and Accountability Act [<xref ref-type="bibr" rid="ref88">88</xref>] complicate the sharing of data, resulting in a fragmented view of patient information.</p>
      <p>Record linkage techniques [<xref ref-type="bibr" rid="ref89">89</xref>] such as probabilistic matching [<xref ref-type="bibr" rid="ref90">90</xref>] bridge this gap by linking disparate data records, offering a more comprehensive view of a patient’s health. Nevertheless, our synthetic data sets, despite their potential, carry limitations such as the absence of a master linkage key [<xref ref-type="bibr" rid="ref91">91</xref>], thereby reducing their applicability in university courses for data management and curation. Having such linked data sets are also great for health data science students to test hypotheses on the effects of comorbidities. Our experiences from the Datathon suggest that the Health Gym synthetic data sets are best used for creating algorithms to enhance patient care within specific disease management paradigms.</p>
      <p>Our Health Gym initiative leverages a unique application of generative AI, differing from those used in emerging AI-assisted chatbots, which have also shown promise as potent educational tools. AI chatbots, with their personalized and interactive responses using large language models, can significantly incite interest and foster self-directed learning in medical students [<xref ref-type="bibr" rid="ref92">92</xref>]. However, advanced AI tools such as OpenAI’s ChatGPT [<xref ref-type="bibr" rid="ref93">93</xref>] and Google’s BARD [<xref ref-type="bibr" rid="ref94">94</xref>] bring with them valid concerns around precision, reliability, potential misuse, and adherence to academic integrity [<xref ref-type="bibr" rid="ref95">95</xref>,<xref ref-type="bibr" rid="ref96">96</xref>]. In contrast, the synthetic clinical data sets, the generative product of our Health Gym project, offer controlled, scenario-specific learning environments that closely reflect real-world conditions while preserving patient privacy.</p>
      <p>Access to clinical data sets is integral to health data science education, but the necessity of maintaining patient confidentiality can hinder the training of future health data scientists on a larger scale. This may exacerbate the digital divide [<xref ref-type="bibr" rid="ref97">97</xref>,<xref ref-type="bibr" rid="ref98">98</xref>], which is a prominent challenge in the broader AI integration into education. As we shift toward AI-driven educational resources, it is essential to prioritize equitable access across varied socioeconomic backgrounds. Future research should evaluate the long-term effects of AI on student learning, clinical judgment, patient outcomes, and the development of educational resources for effective AI integration. The secure, realistic synthetic data sets of Health Gym may provide a valuable solution, potentially facilitating equal access to educational materials.</p>
      <sec>
        <title>Conclusion</title>
        <p>Despite their limitations, the Health Gym synthetic health data sets have demonstrated their value in educating and training future health data scientists. Their integration into interdisciplinary platforms such as Datathon illustrates their potential in promoting collaborative learning, skills enhancement, and innovative research. In addition, synthetic data sets offer a learning platform that balances realistic health scenario representation with data privacy preservation.</p>
        <p>Although we have primarily demonstrated the utility of Health Gym’s synthetic data sets by using the ART for HIV data set, we emphasize the importance of the additional acute hypotension and sepsis data sets that we have developed (see Section C in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). These data sets broaden the scope of medical education by providing insight into managing illnesses in intensive care units, encompassing a unique set of measurements and pathology information. As such, these synthetic data sets offer students an enriched, realistic learning environment for health data science education, complementing the HIV data set and furthering the applicability and versatility of synthetic health data.</p>
        <p>The majority of generative ML research is centered on computer vision [<xref ref-type="bibr" rid="ref99">99</xref>,<xref ref-type="bibr" rid="ref100">100</xref>] and, to a lesser extent, natural language processing [<xref ref-type="bibr" rid="ref101">101</xref>], leaving clinical health care data relatively unexplored. This gap suggests a valuable opportunity for future research, particularly considering that clinical data being longitudinal, mixed-type time series variables have a fundamentally different nature. As demonstrated in our prior studies [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref102">102</xref>], we have ascertained that our synthetic data sets attain a robust level of validity and are readily available to support both clinical research and medical pedagogy; predictive models instantiated on our synthetic data sets parallel those of the original data sets in their characteristics. We will focus our future work on comparing synthetic data sets created using various generative ML architectures, for example, GANs, variational autoencoders [<xref ref-type="bibr" rid="ref103">103</xref>], diffusion probabilistic models [<xref ref-type="bibr" rid="ref102">102</xref>,<xref ref-type="bibr" rid="ref104">104</xref>], and transformer-based models [<xref ref-type="bibr" rid="ref105">105</xref>].</p>
        <p>GANs, like other ML models, can only optimize according to predefined optimization functions. Given the current lack of research on the use of GANs in health care, more utility studies are necessary to fully comprehend the potential of our synthetic data sets. We are committed to continuing collaboration with clinicians and health professionals to better understand the practical strengths and weaknesses of synthetic data sets, including how to better evaluate and contain the risk of private information disclosure. Through these collective efforts, we aim to improve the quality of synthetic data sets, enhancing hands-on learning experiences for students in health data analytics.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary data.</p>
        <media xlink:href="mededu_v10i1e51388_app1.docx" xlink:title="DOCX File , 38 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">3TC</term>
          <def>
            <p>lamivudine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ABC</term>
          <def>
            <p>abacavir</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ART</term>
          <def>
            <p>antiretroviral therapy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">Base drug combo</term>
          <def>
            <p>base drug combination</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">Comp INI</term>
          <def>
            <p>complementary integrase inhibitor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">EFV</term>
          <def>
            <p>efavirenz</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">FTC</term>
          <def>
            <p>emtricitabine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">GAN</term>
          <def>
            <p>generative adversarial network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">INI</term>
          <def>
            <p>integrase inhibitor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MIMIC</term>
          <def>
            <p>Medical Information Mart for Intensive Care</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">NNRTI</term>
          <def>
            <p>nonnucleoside reverse transcriptase inhibitor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">NRTI</term>
          <def>
            <p>nucleotide reverse transcriptase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">PI</term>
          <def>
            <p>protease inhibitor</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">pk-En</term>
          <def>
            <p>pharmacokinetic enhancer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb17">RAL</term>
          <def>
            <p>raltegravir</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb18">RL</term>
          <def>
            <p>reinforcement learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb19">RPV</term>
          <def>
            <p>rilpivirine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb20">TDF</term>
          <def>
            <p>tenofovir disoproxil fumarate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb21">UNSW</term>
          <def>
            <p>University of New South Wales</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb22">VL</term>
          <def>
            <p>viral load</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study benefited from data provided by the EuResist Network EIDB, and this project has been funded by a Wellcome Trust Open Research Fund (reference 219691/Z/19/Z). JdOC is supported by the Medicines Intelligence Center of Research Excellence (grant 1196900).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>Authors NI-HK and SB were responsible for the design, implementation, and validation of the deep learning models employed to generate the synthetic data sets for the Health Gym project. The inception of Datathon was conceived by OP-C and MH who liaised with various disciplinary personnel to realize this initiative. JdOC contributed specialist knowledge on antiretroviral therapy for HIV to Datathon, while JH offered expertise in the evaluation of Datathon projects. Furthermore, TC and SL, alongside OP-C and MH, leveraged their extensive teaching experience to guide Datathon participants and explore further applications of the Health Gym synthetic data sets. LJ provided key insights on the potential risk of sensitive information disclosure. Datathon participants EM, BH, MDS, GY, JV, and ICV gave critical feedback on the strengths and shortcomings of the synthetic data sets, in addition to providing valuable reflections on the event itself. This manuscript was prepared by NI-HK. All authors contributed to interpreting the findings and revising the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsuliman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Humaidan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sliman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Machine learning and artificial intelligence in the service of medicine: necessity or potentiality?</article-title>
          <source>Curr Res Transl Med</source>
          <year>2020</year>
          <month>11</month>
          <volume>68</volume>
          <issue>4</issue>
          <fpage>245</fpage>
          <lpage>251</lpage>
          <pub-id pub-id-type="doi">10.1016/j.retram.2020.01.002</pub-id>
          <pub-id pub-id-type="medline">32029403</pub-id>
          <pub-id pub-id-type="pii">S2452-3186(20)30019-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naseem</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Akhund</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Arshad</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>Exploring the potential of artificial intelligence and machine learning to combat COVID-19 and existing opportunities for LMIC: a scoping review</article-title>
          <source>J Prim Care Community Health</source>
          <year>2020</year>
          <volume>11</volume>
          <fpage>2150132720963634</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/2150132720963634?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2150132720963634</pub-id>
          <pub-id pub-id-type="medline">32996368</pub-id>
          <pub-id pub-id-type="pmcid">PMC7533955</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Wicked problems: using data for better public policy</article-title>
          <source>The Australian Parliamentary Budget Office</source>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pbo.gov.au/sites/default/files/2023-03/PBO%20Conference_Danielle%20Wood_Data%20and%20wicked%20problems.pdf">https://www.pbo.gov.au/sites/default/files/2023-03/PBO%20Conference_Danielle%20Wood_Data%20and%20wicked%20problems.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gallego Luxan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hanly</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>de Steiger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Estimating incidence rates of periprosthetic joint infection after hip and knee arthroplasty for osteoarthritis using linked registry and administrative health data</article-title>
          <source>The Bone &#38; Joint Journal</source>
          <year>2022</year>
          <month>09</month>
          <day>01</day>
          <volume>104-B</volume>
          <issue>9</issue>
          <fpage>1060</fpage>
          <lpage>1066</lpage>
          <pub-id pub-id-type="doi">10.1302/0301-620x.104b9.bjj-2022-0116.r1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barbieri</surname>
              <given-names>Sebastiano</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>Suneela</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Billy</given-names>
            </name>
            <name name-style="western">
              <surname>Bharat</surname>
              <given-names>Chrianna</given-names>
            </name>
            <name name-style="western">
              <surname>Poppe</surname>
              <given-names>Katrina</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>Louisa</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>Rod</given-names>
            </name>
          </person-group>
          <article-title>Predicting cardiovascular risk from national administrative databases using a combined survival analysis and deep learning approach</article-title>
          <source>Int J Epidemiol</source>
          <year>2022</year>
          <month>06</month>
          <day>13</day>
          <volume>51</volume>
          <issue>3</issue>
          <fpage>931</fpage>
          <lpage>944</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34910160"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ije/dyab258</pub-id>
          <pub-id pub-id-type="medline">34910160</pub-id>
          <pub-id pub-id-type="pii">6463074</pub-id>
          <pub-id pub-id-type="pmcid">PMC9189958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>YZ</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>ZY</given-names>
            </name>
            <name name-style="western">
              <surname>Quiroz</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Rezazadegan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>QT</given-names>
            </name>
            <name name-style="western">
              <surname>Qian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>XF</given-names>
            </name>
            <name name-style="western">
              <surname>Berkovsky</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Coiera</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>XR</given-names>
            </name>
          </person-group>
          <article-title>Severity assessment and progression prediction of COVID-19 patients based on the LesionEncoder framework and chest CT</article-title>
          <source>J Med Internet Res</source>
          <comment>Preprint posted online on March 18, 2021</comment>
          <pub-id pub-id-type="doi">10.2196/preprints.28903</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bayer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Krcmar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Formica</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gwyther</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Srivastava</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Selloni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cotter</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hartmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Polari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bilgrami</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Sarac</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yung</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>McGowan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McGorry</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cecchi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Mizrahi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Corcoran</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The SPEAK study rationale and design: A linguistic corpus-based approach to understanding thought disorder</article-title>
          <source>Schizophr Res</source>
          <year>2023</year>
          <month>09</month>
          <volume>259</volume>
          <fpage>80</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.1016/j.schres.2022.12.048</pub-id>
          <pub-id pub-id-type="medline">36732110</pub-id>
          <pub-id pub-id-type="pii">S0920-9964(22)00495-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC10387495</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bachmann</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Von Siebenthal</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Vongrad</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Turk</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Neumann</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Beerenwinkel</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bogojeska</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fellay</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>YL</given-names>
            </name>
            <name name-style="western">
              <surname>Thorball</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Borghesi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Parbhoo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wieser</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Böni</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Perreau</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Klimkait</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yerly</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Battegay</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rauch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bernasconi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cavassini</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kouyos</surname>
              <given-names>RD</given-names>
            </name>
          </person-group>
          <article-title>Determinants of HIV-1 reservoir size and long-term dynamics during suppressive ART</article-title>
          <source>Nature Communications</source>
          <year>2019</year>
          <month>07</month>
          <day>19</day>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1101/19013763</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Glymour</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Spirtes</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Review of causal discovery methods based on graphical models</article-title>
          <source>Front Genet</source>
          <year>2019</year>
          <volume>10</volume>
          <fpage>524</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31214249"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fgene.2019.00524</pub-id>
          <pub-id pub-id-type="medline">31214249</pub-id>
          <pub-id pub-id-type="pmcid">PMC6558187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nosowsky</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Giordano</surname>
              <given-names>TJ</given-names>
            </name>
          </person-group>
          <article-title>The Health Insurance Portability and Accountability Act of 1996 (HIPAA) privacy rule: implications for clinical research</article-title>
          <source>Annu Rev Med</source>
          <year>2006</year>
          <volume>57</volume>
          <fpage>575</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev.med.57.121304.131257</pub-id>
          <pub-id pub-id-type="medline">16409167</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Keefe</surname>
              <given-names>Christine M</given-names>
            </name>
            <name name-style="western">
              <surname>Connolly</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>Privacy and the use of health data for research</article-title>
          <source>Med J Aust</source>
          <year>2010</year>
          <month>11</month>
          <day>01</day>
          <volume>193</volume>
          <issue>9</issue>
          <fpage>537</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.5694/j.1326-5377.2010.tb04041.x</pub-id>
          <pub-id pub-id-type="medline">21034389</pub-id>
          <pub-id pub-id-type="pii">oke11214_fm</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bentzen</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Fears</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Griffin</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ter Meulen</surname>
              <given-names>Volker</given-names>
            </name>
            <name name-style="western">
              <surname>Ursin</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Remove obstacles to sharing health data with researchers outside of the European Union</article-title>
          <source>Nat Med</source>
          <year>2021</year>
          <month>08</month>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>1329</fpage>
          <lpage>1333</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34345050"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-021-01460-0</pub-id>
          <pub-id pub-id-type="medline">34345050</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-021-01460-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC8329618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Oliveira Costa</surname>
              <given-names>Juliana</given-names>
            </name>
            <name name-style="western">
              <surname>Bruno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schaffer</surname>
              <given-names>Andrea L</given-names>
            </name>
            <name name-style="western">
              <surname>Raichand</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karanges</surname>
              <given-names>Emily A</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The changing face of Australian data reforms: impact on pharmacoepidemiology research</article-title>
          <source>Int J Popul Data Sci</source>
          <year>2021</year>
          <month>04</month>
          <day>15</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>1418</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34007904"/>
          </comment>
          <pub-id pub-id-type="doi">10.23889/ijpds.v6i1.1418</pub-id>
          <pub-id pub-id-type="medline">34007904</pub-id>
          <pub-id pub-id-type="pii">S239949082101418X</pub-id>
          <pub-id pub-id-type="pmcid">PMC8107783</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pratt</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>de Oliveira Costa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zoega</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Laba</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Etherton-Beer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sanfilippo</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Morgan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kalisch Ellett</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bruno</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kelty</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>IJzerman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Preen</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Vajdic</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Generating real-world evidence on the quality use, benefits and safety of medicines in Australia: history, challenges and a roadmap for the future</article-title>
          <source>IJERPH</source>
          <year>2021</year>
          <month>12</month>
          <day>18</day>
          <volume>18</volume>
          <issue>24</issue>
          <fpage>13345</fpage>
          <pub-id pub-id-type="doi">10.3390/ijerph182413345</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dash</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shakyawar</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaushik</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Big data in healthcare: management, analysis and future prospects</article-title>
          <source>J Big Data</source>
          <year>2019</year>
          <month>6</month>
          <day>19</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1186/s40537-019-0217-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>Data availability and transparency bill 2022</article-title>
          <source>Australian Parliament House</source>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aph.gov.au/Parliamentary_Business/Bills_LEGislation/Bills_Search_Results/Result?bId=r6649">https://www.aph.gov.au/Parliamentary_Busi ness/Bills_LEGislation/Bills_Search_Results/Result?bId=r6649</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>The Five Safes framework</article-title>
          <source>Australian Bureau of Statistics</source>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://tinyurl.com/4t3nnxpf">http://tinyurl.com/4t3nnxpf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The Quant Crunch: how the demand for data science skills is disrupting the job market</article-title>
          <source>Business-Higher Education Forum</source>
          <year>2017</year>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bhef.com/publications/quant-crunch-how-demand-data-science-skills-disrupting-job-market">https://www.bhef.com/publications/quant-crunch-how-demand-data-science-skills-disrupting -job-market</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Columbus</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>IBM predicts demand for data scientists will soar 28% by 2020</article-title>
          <source>Forbes</source>
          <year>2017</year>
          <month>05</month>
          <day>13</day>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.forbes.com/sites/louiscolumbus/2017/05/13/ibm-predicts-demand-for-data-scientists-will-soar-28-by-2020/?sh=7fe27cff7e3b">https://www.forbes.com/sites/louiscolumbus/2017/05/13/ibm-predicts-demand-for-data-scientists-will-soar-28-by-2020/?sh=7fe27cff7e3b</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kolaczyk</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yajima</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Statistics practicum: placing 'practice' at the center of data science education</article-title>
          <source>Harvard Data Science Review</source>
          <year>2021</year>
          <month>01</month>
          <day>29</day>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1162/99608f92.2d65fc70</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
            <name name-style="western">
              <surname>Polizzotto</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Finfer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sönnerborg</surname>
              <given-names>Anders</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Böhm</surname>
              <given-names>Michael</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Barbieri</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The Health Gym: synthetic health-related datasets for the development of reinforcement learning algorithms</article-title>
          <source>Sci Data</source>
          <year>2022</year>
          <month>11</month>
          <day>11</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>693</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41597-022-01784-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41597-022-01784-7</pub-id>
          <pub-id pub-id-type="medline">36369205</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41597-022-01784-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC9652426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pouget-Abadie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Warde-Farley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ozair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Generative adversarial networks</article-title>
          <source>Commun ACM</source>
          <year>2020</year>
          <month>10</month>
          <day>22</day>
          <volume>63</volume>
          <issue>11</issue>
          <fpage>139</fpage>
          <lpage>144</lpage>
          <pub-id pub-id-type="doi">10.1145/3422622</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arjovsky</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chintala</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bottou</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Wasserstein Generative Adversarial Networks</article-title>
          <year>2017</year>
          <conf-name>International Conference on Machine Learning</conf-name>
          <conf-date>August 6</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.mlr.press/v70/arjovsky17a.html"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gulrajani</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Arjovsky</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dumoulin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>Improved training of Wasserstein GANs</article-title>
          <year>2017</year>
          <month>12</month>
          <day>12</day>
          <conf-name>Neural Information Processing Systems</conf-name>
          <conf-date>2017</conf-date>
          <conf-loc>Long Beach, California</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
          </person-group>
          <article-title>The Health Gym</article-title>
          <source>HealthGym.ai</source>
          <access-date>2023-12-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://healthgym.ai/">https://healthgym.ai/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <article-title>Nic5472K / ScientificData2021_HealthGym</article-title>
          <source>GitHub</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://github.com/Nic5472K/ScientificData2021_HealthGym">https://github.com/Nic5472K/ScientificData2021_HealthGym</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Open Source Licensing: Software Freedom and Intellectual Property Law</source>
          <year>2004</year>
          <month>07</month>
          <day>01</day>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.immagic.com/eLibrary/ARCHIVES/EBOOKS/R050225R.pdf">https://www.imma gic.com/eLibrary/ARCHIVES/EBOOKS/R050225R.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Graduate certificate in Health Data Science</article-title>
          <source>The University of New South Wales</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.unsw.edu.au/study/postgraduate/graduate-certificate-in-health-data-science?studentType=Domestic">https://www.unsw.edu.au/study/postgraduate/graduate-certificate-in-health-data-science?studentType=Domestic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>CBDRH Health Data Science Datathon 2023</article-title>
          <source>GitHub</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cbdrh-hds-datathon-2023.github.io/">https://cbdrh-hds-datathon-2023.github.io/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <article-title>Public release of clinical information: guidance document</article-title>
          <source>Government of Canada</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.canada.ca/en/health-canada/services/drug-health-product-review-approval/profile-public-release-clinical-information-guidance/document.html,">https://www.canada.ca/en/health -canada/services/drug-health-product-review-approval/profile-public-release-clinical-information-guidance/document.html,</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Clinical data publication</article-title>
          <source>European Medicines Agency</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ema.europa.eu/en/human-regulatory-overview/marketing-authorisation/clinical-data-publication#:~:text=The%20Agency%20intends%20to%20gradually,%3A%2014%2D15%20December%202022">https://www.ema.europa.eu/en/human-regulatory-overview/marketing-authorisation/clinical-data-publication#:~:text=The%20Agency%20intends%20to%20gradually,%3A%2014%2D 15%20December%202022</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Pollard</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ghassemi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>Leo Anthony</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
          </person-group>
          <article-title>MIMIC-III, a freely accessible critical care database</article-title>
          <source>Sci Data</source>
          <year>2016</year>
          <month>05</month>
          <day>24</day>
          <volume>3</volume>
          <fpage>160035</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/sdata.2016.35"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/sdata.2016.35</pub-id>
          <pub-id pub-id-type="medline">27219127</pub-id>
          <pub-id pub-id-type="pii">sdata201635</pub-id>
          <pub-id pub-id-type="pmcid">PMC4878278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Incardona</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen-Zvi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lengauer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Altmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sonnerborg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lavee</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schülter</surname>
              <given-names>Eugen</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Predicting response to antiretroviral treatment by machine learning: the EuResist project</article-title>
          <source>Intervirology</source>
          <year>2012</year>
          <volume>55</volume>
          <issue>2</issue>
          <fpage>123</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1159/000332008"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000332008</pub-id>
          <pub-id pub-id-type="medline">22286881</pub-id>
          <pub-id pub-id-type="pii">000332008</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prosperi</surname>
              <given-names>MCF</given-names>
            </name>
            <name name-style="western">
              <surname>Rosen-Zvi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Altmann</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Di Giambenedetto</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Schülter</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Struck</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sloot</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>van de Vijver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Vandamme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sönnerborg</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Correction: antiretroviral therapy optimisation without genotype resistance testing: a perspective on treatment history based models</article-title>
          <source>PLoS ONE</source>
          <year>2011</year>
          <month>4</month>
          <day>26</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.1371/annotation/d0254103-21b9-4078-836b-57ba5bd1c26a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Parbhoo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bogojeska</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Combining kernel and model based learning for HIV therapy selection</article-title>
          <source>AMIA Jt Summits Transl Sci Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>239</fpage>
          <lpage>248</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28815137"/>
          </comment>
          <pub-id pub-id-type="medline">28815137</pub-id>
          <pub-id pub-id-type="pmcid">PMC5543338</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>Consolidated guidelines on the use of antiretroviral drugs for treating and preventing HIV infection: recommendations for a public health approach, 2nd ed</article-title>
          <source>World Health Organization</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/9789241549684">https://www.who.int/publications/i/item/9789241549684</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Bertagnolio</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sutherland</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gilks</surname>
              <given-names>CF</given-names>
            </name>
          </person-group>
          <article-title>The World Health Organization's global strategy for prevention and assessment of HIV drug resistance</article-title>
          <source>Antiviral Therapy</source>
          <year>2008</year>
          <month>02</month>
          <day>01</day>
          <volume>13</volume>
          <issue>2_suppl</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1177/135965350801302s03</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Shafer</surname>
              <given-names>RW</given-names>
            </name>
          </person-group>
          <article-title>The HIVdb system for HIV-1 genotypic resistance interpretation</article-title>
          <source>Intervirology</source>
          <year>2012</year>
          <volume>55</volume>
          <issue>2</issue>
          <fpage>98</fpage>
          <lpage>101</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1159/000331998"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000331998</pub-id>
          <pub-id pub-id-type="medline">22286876</pub-id>
          <pub-id pub-id-type="pii">000331998</pub-id>
          <pub-id pub-id-type="pmcid">PMC7068798</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Brennan</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Nattey</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>MacLeod</surname>
              <given-names>WB</given-names>
            </name>
            <name name-style="western">
              <surname>Harlow</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mlisana</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Maskew</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carmona</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bor</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Delays in repeat HIV viral load testing for those with elevated viral loads: a national perspective from South Africa</article-title>
          <source>J Int AIDS Soc</source>
          <year>2020</year>
          <month>07</month>
          <volume>23</volume>
          <issue>7</issue>
          <fpage>e25542</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32640101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jia2.25542</pub-id>
          <pub-id pub-id-type="medline">32640101</pub-id>
          <pub-id pub-id-type="pmcid">PMC7343337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>DIS</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hanhauser</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kuritzkes</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Siliciano</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Henrich</surname>
              <given-names>TJ</given-names>
            </name>
          </person-group>
          <article-title>Real-time predictions of reservoir size and rebound time during antiretroviral therapy interruption trials for HIV</article-title>
          <source>PLoS Pathog</source>
          <year>2016</year>
          <month>04</month>
          <volume>12</volume>
          <issue>4</issue>
          <fpage>e1005535</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.ppat.1005535"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.ppat.1005535</pub-id>
          <pub-id pub-id-type="medline">27119536</pub-id>
          <pub-id pub-id-type="pii">PPATHOGENS-D-15-02150</pub-id>
          <pub-id pub-id-type="pmcid">PMC4847932</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>What’s new in treatment monitoring: viral load and CD4 testing</article-title>
          <source>World Health Organisation</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/WHO-HIV-2017.22">https://www.who.int/publications/i/item/WHO-HIV-2017.22</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <article-title>NSW HIV strategy 2021-2025</article-title>
          <source>New South Wales Health</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.health.nsw.gov.au/endinghiv/Pages/nsw-hiv-strategy-2021-2025.aspx">https://www.health.nsw.gov.au/endinghiv/Pages/nsw-hiv-stra tegy-2021-2025.aspx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sönnerborg</surname>
              <given-names>Anders</given-names>
            </name>
            <name name-style="western">
              <surname>Böhm</surname>
              <given-names>Michael</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <collab>EuResist Network study group</collab>
            <name name-style="western">
              <surname>Polizzotto</surname>
              <given-names>Mark</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>Louisa</given-names>
            </name>
            <name name-style="western">
              <surname>Barbieri</surname>
              <given-names>Sebastiano</given-names>
            </name>
          </person-group>
          <article-title>Generating synthetic clinical data that capture class imbalanced distributions with generative adversarial networks: Example using antiretroviral therapy for HIV</article-title>
          <source>J Biomed Inform</source>
          <year>2023</year>
          <month>08</month>
          <volume>144</volume>
          <fpage>104436</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(23)00157-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2023.104436</pub-id>
          <pub-id pub-id-type="medline">37451495</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(23)00157-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mckinney</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Data structures for statistical computing in Python</article-title>
          <source>Proceedings of the 9th Python in Science Conference (SciPy 2010)</source>
          <year>2010</year>
          <fpage>1</fpage>
          <pub-id pub-id-type="doi">10.25080/majora-92bf1922-00a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van der Walt</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Colbert</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The NumPy array: a structure for efficient numerical computation</article-title>
          <source>Comput Sci Eng</source>
          <year>2011</year>
          <month>03</month>
          <volume>13</volume>
          <issue>2</issue>
          <fpage>22</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1109/mcse.2011.37</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
          </person-group>
          <article-title>The Heath Gym synthetic HIV dataset</article-title>
          <source>Figshare</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://figshare.com/articles/dataset/The_Heath_Gym_Synthetic_HIV_Dataset/19838470">https://figshare.com/articles/dataset/The_Heath_Gym_Syn thetic_HIV_Dataset/19838470</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
          </person-group>
          <article-title>The Health Gym v2.0 synthetic antiretroviral therapy (ART) for HIV dataset</article-title>
          <source>Figshare</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://figshare.com/articles/dataset/The_Health_Gym_v2_0_Synthetic_Antiretroviral_Therapy_ART_for_HIV_Dataset/22827878">https://figshare.com/articles/dataset/The_Health_Gym_v2_0_Synthetic_Antiretroviral_Therapy_ART_for_HIV_Dataset/22827878</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <article-title>Datathon highlights</article-title>
          <source>CBDRH Health Data Science Datathon 2023</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cbdrh-hds-datathon-2023.github.io/review.html">https://cbdrh-hds-datathon-2023.github.io/review.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <article-title>Sydney local health district</article-title>
          <source>New South Wales Health</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://slhd.health.nsw.gov.au/">https://slhd.health.nsw.gov.au/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Oliveira Costa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Medland</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gibbons</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schaffer</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Pearson</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Potential drug-drug interactions due to concomitant medicine use among people living with HIV on antiretroviral therapy in Australia</article-title>
          <source>Br J Clin Pharmacol</source>
          <year>2023</year>
          <month>05</month>
          <volume>89</volume>
          <issue>5</issue>
          <fpage>1541</fpage>
          <lpage>1553</lpage>
          <pub-id pub-id-type="doi">10.1111/bcp.15614</pub-id>
          <pub-id pub-id-type="medline">36434744</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>SAB</given-names>
            </name>
            <name name-style="western">
              <surname>Guzman</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Acquired immune deficiency syndrome CD4+ count</article-title>
          <source>StatPearls</source>
          <year>2023</year>
          <month>8</month>
          <day>14</day>
          <fpage>1</fpage>
          <pub-id pub-id-type="medline">30020661</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Regression models and life tables</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>34</volume>
          <issue>2</issue>
          <fpage>187</fpage>
          <lpage>202</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1972.tb00899.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hochreiter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schmidhuber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Long short-term memory</article-title>
          <source>Neural Comput</source>
          <year>1997</year>
          <month>11</month>
          <day>15</day>
          <volume>9</volume>
          <issue>8</issue>
          <fpage>1735</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/neco.1997.9.8.1735</pub-id>
          <pub-id pub-id-type="medline">9377276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>Master of Science in Health Data Science</article-title>
          <source>The University of New South Wales</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.unsw.edu.au/study/postgraduate/master-of-science?cq_plac=&#38;studentType=Domestic">https://www.unsw.edu.au/study/post graduate/master-of-science?cq_plac=&#38;studentType=Domestic</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <article-title>R: a language and environment for statistical computing</article-title>
          <source>R-Project</source>
          <access-date>2023-12-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.gbif.org/tool/81287/r-a-language-and-environment-for-statistical-computing">https://www.gbif.org/tool/81287/r-a-language-and -environment-for-statistical-computing</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Rossum</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Python tutorial</article-title>
          <source>Centrum Wiskunde &#38; Informatica Institutional Repository</source>
          <year>1995</year>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ir.cwi.nl/pub/5007">https://ir.cwi.nl/pub/5007</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marchesi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Micheletti</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jurman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Osmani</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Mitigating health data poverty: generative approaches versus resampling for time-series clinical data</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on October 26, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2210.13958</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van der Maaten</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Visualizing data using t-SNE</article-title>
          <source>Journal of Machine Learning Research</source>
          <year>2008</year>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf">https://www.jmlr.org/papers/volume9/vandermaaten08a/vandermaaten08a.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Nonlinear principal component analysis using autoassociative neural networks</article-title>
          <source>AIChE Journal</source>
          <year>2004</year>
          <month>06</month>
          <day>17</day>
          <volume>37</volume>
          <issue>2</issue>
          <fpage>233</fpage>
          <lpage>243</lpage>
          <pub-id pub-id-type="doi">10.1002/aic.690370209</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sutton</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Barto</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Reinforcement learning: an introduction</article-title>
          <source>IEEE Trans Neural Netw</source>
          <year>1998</year>
          <month>09</month>
          <volume>9</volume>
          <issue>5</issue>
          <fpage>1054</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1109/tnn.1998.712192</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Winterfeldt</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Edwards</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Decision Analysis and Behavioral Research</source>
          <year>1986</year>
          <month>08</month>
          <day>26</day>
          <publisher-loc>Cambridge, Massachusetts, USA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baum</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Petrie</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Statistical inference for probabilistic functions of finite state Markov chains</article-title>
          <source>Ann Math Statist</source>
          <year>1966</year>
          <month>12</month>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>1554</fpage>
          <lpage>1563</lpage>
          <pub-id pub-id-type="doi">10.1214/aoms/1177699147</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Parbhoo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Doshi-Velez</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Beyond sparsity: tree regularization of deep models for interpretability</article-title>
          <source>AAAI</source>
          <year>2018</year>
          <conf-name>AAAI Conference on Artificial Intelligence</conf-name>
          <conf-date>April 25</conf-date>
          <conf-loc>Chicago, Illinois, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1609/aaai.v32i1.11501</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on September 7, 2013</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1301.3781"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1301.3781</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barnett</surname>
              <given-names>AG</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shield</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Farrington</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>BG</given-names>
            </name>
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>The high costs of getting ethical and site-specific approvals for multi-centre research</article-title>
          <source>Res Integr Peer Rev</source>
          <year>2016</year>
          <volume>1</volume>
          <fpage>16</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://researchintegrityjournal.biomedcentral.com/articles/10.1186/s41073-016-0023-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s41073-016-0023-6</pub-id>
          <pub-id pub-id-type="medline">29451546</pub-id>
          <pub-id pub-id-type="pii">23</pub-id>
          <pub-id pub-id-type="pmcid">PMC5803625</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hunter</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Matplotlib: A 2D graphics environment</article-title>
          <source>Comput Sci Eng</source>
          <year>2007</year>
          <month>05</month>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>90</fpage>
          <lpage>95</lpage>
          <pub-id pub-id-type="doi">10.1109/mcse.2007.55</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Estimation and comparison of changes in the presence of informative right censoring by modeling the censoring process</article-title>
          <source>Biometrics</source>
          <year>1988</year>
          <month>03</month>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>175</fpage>
          <pub-id pub-id-type="doi">10.2307/2531905</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Meier</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Nonparametric estimation from incomplete observations</article-title>
          <source>Journal of the American Statistical Association</source>
          <year>1958</year>
          <month>06</month>
          <volume>53</volume>
          <issue>282</issue>
          <fpage>457</fpage>
          <lpage>481</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1958.10501452</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Therneau</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Lumley</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Elizabeth</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cynthia</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>survival: Survival Analysis</article-title>
          <source>The Comprehensive R Archive Network</source>
          <year>2015</year>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cran.r-project.org/web/packages/survival/index.html">https://cran.r-project.org/web/packages/survival/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patro</surname>
              <given-names>SGK</given-names>
            </name>
            <name name-style="western">
              <surname>Sahu</surname>
              <given-names>KK</given-names>
            </name>
          </person-group>
          <article-title>Normalization: a preprocessing stage</article-title>
          <source>ArXiv</source>
          <comment> Preprint posted online on March 19, 2015</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1503.06462"/>
          </comment>
          <pub-id pub-id-type="doi">10.17148/iarjset.2015.2305</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Caroll</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ruppert</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>On prediction and the power transformation family</article-title>
          <source>Biometrika</source>
          <year>1981</year>
          <volume>68</volume>
          <issue>3</issue>
          <fpage>609</fpage>
          <lpage>615</lpage>
          <pub-id pub-id-type="doi">10.1093/biomet/68.3.609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Box</surname>
              <given-names>GEP</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>An analysis of transformations</article-title>
          <source>Journal of the Royal Statistical Society: Series B (Methodological)</source>
          <year>2018</year>
          <month>12</month>
          <day>05</day>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>211</fpage>
          <lpage>243</lpage>
          <pub-id pub-id-type="doi">10.1111/j.2517-6161.1964.tb00553.x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Deep learning of representations for unsupervised and transfer learning</article-title>
          <year>2011</year>
          <conf-name>International Conference on Machine Learning Unsupervised and Transfer Learning Workshop</conf-name>
          <conf-date>July 2</conf-date>
          <conf-loc>Bellevue, Washington, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://proceedings.mlr.press/v27/bengio12a/bengio12a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levine</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Offline reinforcement learning: tutorial, review, and perspectives on open problems</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on November 1, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.01643"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2005.01643</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bergstra</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yamins</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>DD</given-names>
            </name>
          </person-group>
          <article-title>Making a science of model search</article-title>
          <year>2013</year>
          <month>06</month>
          <day>21</day>
          <conf-name>International Conference on Machine Learning</conf-name>
          <conf-date>June 21</conf-date>
          <conf-loc>Atlanta, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
          </person-group>
          <article-title>Understanding and modifying dynamical Hopfield neural networks for generating multiple coherent patterns [PhD thesis]</article-title>
          <source>The University of Auckland</source>
          <year>2017</year>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://researchspace.auckland.ac.nz/handle/2292/34849">https://researchspace.auckland.ac.nz/handle/2292/34849</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
            <name name-style="western">
              <surname>Harandi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fourrier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Walder</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ferraro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>An input residual connection for simplifying gated recurrent neural networks</article-title>
          <year>2020</year>
          <conf-name>International Joint Conference on Neural Networks</conf-name>
          <conf-date>July 19</conf-date>
          <conf-loc>Glasgow, United Kingdom</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ijcnn48605.2020.9207238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
            <name name-style="western">
              <surname>Harandi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fourrier</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Walder</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ferraro</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Suominen</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Plastic and stable gated classifiers for continual learning</article-title>
          <year>2021</year>
          <conf-name>IEEE Computer Society Conference on Computer Vision and Pattern Recognition Workshops</conf-name>
          <conf-date>June 19</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvprw53098.2021.00394</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Luque</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Le Pelley</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Beesley</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The role of uncertainty in attentional and choice exploration</article-title>
          <source>Psychon Bull Rev</source>
          <year>2019</year>
          <month>12</month>
          <volume>26</volume>
          <issue>6</issue>
          <fpage>1911</fpage>
          <lpage>1916</lpage>
          <pub-id pub-id-type="doi">10.3758/s13423-019-01653-2</pub-id>
          <pub-id pub-id-type="medline">31429060</pub-id>
          <pub-id pub-id-type="pii">10.3758/s13423-019-01653-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="web">
          <article-title>Socioeconomic indexes for areas</article-title>
          <source>Australian Bureau of Statistics</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.abs.gov.au/websitedbs/censushome.nsf/home/seifa">https://www.abs.gov.au/websitedbs/censushome.nsf/home/seifa</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref82">
        <label>82</label>
        <nlm-citation citation-type="web">
          <article-title>Chronic conditions and multimorbidity</article-title>
          <source>Australian Institute of Health and Welfare</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aihw.gov.au/reports/australias-health/chronic-conditions-and-multimorbidity">https://www.aihw.gov.au/reports/australias-health/chronic-conditions-and-multimorbidity</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref83">
        <label>83</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Filkins</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Hultner</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Castillo</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Ducom</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Topol</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Steinhubl</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Privacy and security in the era of digital health: what should translational researchers know and do about it?</article-title>
          <source>Am J Transl Res</source>
          <year>2016</year>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>1560</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27186282"/>
          </comment>
          <pub-id pub-id-type="medline">27186282</pub-id>
          <pub-id pub-id-type="pmcid">PMC4859641</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref84">
        <label>84</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corley</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Jensen</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>WK</given-names>
            </name>
            <name name-style="western">
              <surname>de Boer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levin</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Doubeni</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fireman</surname>
              <given-names>BH</given-names>
            </name>
            <name name-style="western">
              <surname>Quesenberry</surname>
              <given-names>CP</given-names>
            </name>
          </person-group>
          <article-title>Variation of adenoma prevalence by age, sex, race, and colon location in a large population: implications for screening and quality programs</article-title>
          <source>Clinical Gastroenterology and Hepatology</source>
          <year>2013</year>
          <month>02</month>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>172</fpage>
          <lpage>180</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cgh.2012.09.010</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref85">
        <label>85</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Earnshaw</surname>
              <given-names>VA</given-names>
            </name>
            <name name-style="western">
              <surname>Bogart</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Dovidio</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>DR</given-names>
            </name>
          </person-group>
          <article-title>Stigma and racial/ethnic HIV disparities: moving toward resilience</article-title>
          <source>Am Psychol</source>
          <year>2013</year>
          <volume>68</volume>
          <issue>4</issue>
          <fpage>225</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23688090"/>
          </comment>
          <pub-id pub-id-type="doi">10.1037/a0032705</pub-id>
          <pub-id pub-id-type="medline">23688090</pub-id>
          <pub-id pub-id-type="pii">2013-17443-003</pub-id>
          <pub-id pub-id-type="pmcid">PMC3740715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref86">
        <label>86</label>
        <nlm-citation citation-type="web">
          <article-title>Datasets - CHeReL</article-title>
          <source>Centre for Health Record Linkage</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cherel.org.au/datasets">https://www.cherel.org.au/datasets</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref87">
        <label>87</label>
        <nlm-citation citation-type="web">
          <article-title>Privacy act 1988</article-title>
          <source>The Australian Government Federal Register of Legislation</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.legislation.gov.au/Details/C2014C00076">https://www.legislation.gov.au/Details/C2014C00076</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref88">
        <label>88</label>
        <nlm-citation citation-type="web">
          <article-title>Health information privacy</article-title>
          <source>The US Department of Health &#38; Human Services</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hhs.gov/hipaa/index.html">https://www.hhs.gov/hipaa/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref89">
        <label>89</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fellegi</surname>
              <given-names>IP</given-names>
            </name>
            <name name-style="western">
              <surname>Sunter</surname>
              <given-names>AB</given-names>
            </name>
          </person-group>
          <article-title>A theory for record linkage</article-title>
          <source>Journal of the American Statistical Association</source>
          <year>1969</year>
          <month>12</month>
          <volume>64</volume>
          <issue>328</issue>
          <fpage>1183</fpage>
          <lpage>1210</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1969.10501049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref90">
        <label>90</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blakely</surname>
              <given-names>Tony</given-names>
            </name>
            <name name-style="western">
              <surname>Salmond</surname>
              <given-names>Clare</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic record linkage and a method to calculate the positive predictive value</article-title>
          <source>Int J Epidemiol</source>
          <year>2002</year>
          <month>12</month>
          <volume>31</volume>
          <issue>6</issue>
          <fpage>1246</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1093/ije/31.6.1246</pub-id>
          <pub-id pub-id-type="medline">12540730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref91">
        <label>91</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lujic</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Randall</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Falster</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>LR</given-names>
            </name>
          </person-group>
          <article-title>Interaction effects of multimorbidity and frailty on adverse health outcomes in elderly hospitalised patients</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <month>08</month>
          <day>19</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>14139</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-18346-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-18346-x</pub-id>
          <pub-id pub-id-type="medline">35986045</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-18346-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC9391344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref92">
        <label>92</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Analysis of the effect of an artificial intelligence chatbot educational program on non-face-to-face classes: a quasi-experimental study</article-title>
          <source>BMC Med Educ</source>
          <year>2022</year>
          <month>12</month>
          <day>01</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>830</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-022-03898-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-022-03898-3</pub-id>
          <pub-id pub-id-type="medline">36457086</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-022-03898-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC9713176</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref93">
        <label>93</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing ChatGPT</article-title>
          <source>OpenAI</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/chatgpt">https://openai.com/blog/chatgpt</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref94">
        <label>94</label>
        <nlm-citation citation-type="web">
          <article-title>An important next step on our AI journey</article-title>
          <source>Google</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.google/intl/en-africa/products/explore-get-answers/an-important-next-step-on-our-ai-journey/">https://blog.google/intl/en-africa/products/explore-get-answers/an-important-next-step-on-our-ai-journey/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref95">
        <label>95</label>
        <nlm-citation citation-type="web">
          <article-title>‘We are a little bit scared’: OpenAI CEO warns of risks of artificial intelligence</article-title>
          <source>The Guardian</source>
          <access-date>2023-12-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.theguardian.com/technology/2023/mar/17/openai-sam-altman-artificial-intelligence-warning-gpt4">https://www.theguar dian.com/technology/2023/mar/17/openai-sam-altman-artificial-intelligence-warning-gpt4</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref96">
        <label>96</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karabacak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ozkara</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Margetis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wintermark</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bisdas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The advent of generative language models in medical education</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>06</month>
          <day>06</day>
          <volume>9</volume>
          <fpage>e48163</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e48163/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48163</pub-id>
          <pub-id pub-id-type="medline">37279048</pub-id>
          <pub-id pub-id-type="pii">v9i1e48163</pub-id>
          <pub-id pub-id-type="pmcid">PMC10282912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref97">
        <label>97</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lembani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gunter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Breines</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dalu</surname>
              <given-names>MTB</given-names>
            </name>
          </person-group>
          <article-title>The same course, different access: the digital divide between urban and rural distance education students in South Africa</article-title>
          <source>Journal of Geography in Higher Education</source>
          <year>2019</year>
          <month>11</month>
          <day>22</day>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>70</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1080/03098265.2019.1694876</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref98">
        <label>98</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van de Werfhorst</surname>
              <given-names>HG</given-names>
            </name>
            <name name-style="western">
              <surname>Kessenich</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Geven</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The digital divide in online education: Inequality in digital readiness of students and schools</article-title>
          <source>Computers and Education Open</source>
          <year>2022</year>
          <month>12</month>
          <volume>3</volume>
          <fpage>100100</fpage>
          <pub-id pub-id-type="doi">10.1016/j.caeo.2022.100100</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref99">
        <label>99</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kazeminia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Baur</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kuijper</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>van Ginneken</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Navab</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Albarqouni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mukhopadhyay</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>GANs for medical image analysis</article-title>
          <source>Artif Intell Med</source>
          <year>2020</year>
          <month>09</month>
          <volume>109</volume>
          <fpage>101938</fpage>
          <pub-id pub-id-type="doi">10.1016/j.artmed.2020.101938</pub-id>
          <pub-id pub-id-type="medline">34756215</pub-id>
          <pub-id pub-id-type="pii">S0933-3657(19)31151-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref100">
        <label>100</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Armanious</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Fischer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Küstner</surname>
              <given-names>Thomas</given-names>
            </name>
            <name name-style="western">
              <surname>Hepp</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Nikolaou</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gatidis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>MedGAN: Medical image translation using GANs</article-title>
          <source>Comput Med Imaging Graph</source>
          <year>2020</year>
          <month>01</month>
          <volume>79</volume>
          <fpage>101684</fpage>
          <pub-id pub-id-type="doi">10.1016/j.compmedimag.2019.101684</pub-id>
          <pub-id pub-id-type="medline">31812132</pub-id>
          <pub-id pub-id-type="pii">S0895-6111(19)30099-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref101">
        <label>101</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bose</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Srinivasan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sleeman</surname>
              <given-names>WC</given-names>
            </name>
            <name name-style="western">
              <surname>Palta</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ghosh</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A survey on recent named entity recognition and relationship extraction techniques on clinical texts</article-title>
          <source>Applied Sciences</source>
          <year>2021</year>
          <month>09</month>
          <day>08</day>
          <volume>11</volume>
          <issue>18</issue>
          <fpage>8319</fpage>
          <pub-id pub-id-type="doi">10.3390/app11188319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref102">
        <label>102</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>NIH</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sonnerborg</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bohm</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zazzi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jorm</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Barbieri</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Synthetic health-related longitudinal data with mixed-type variables generated using diffusion models</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on March 22, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openreview.net/forum?id=1MV49Ug6q9"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.12281</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref103">
        <label>103</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kingma</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>Welling</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Auto-encoding variational Bayes</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on December 10, 2022</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1312.6114"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1312.6114</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref104">
        <label>104</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sohl-Dickstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Maheswaranathan</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ganguli</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Deep unsupervised learning using nonequilibrium thermodynamics</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on November 18, 2015</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1503.03585</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref105">
        <label>105</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>ArXiv. Preprint posted online on December 19, 2023</source>
          <year>2023</year>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2303.08774"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
