<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JME</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id>
      <journal-title>JMIR Medical Education</journal-title>
      <issn pub-type="epub">2369-3762</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v12i1e87102</article-id>
      <article-id pub-id-type="pmid">41678789</article-id>
      <article-id pub-id-type="doi">10.2196/87102</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using AI to Train Future Clinicians in Depression Assessment: Feasibility Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Stone</surname>
            <given-names>Alicia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Leung</surname>
            <given-names>Tiffany</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Schmidt</surname>
            <given-names>Simone</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Shin</surname>
            <given-names>Daun</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Holderried</surname>
            <given-names>Friederike</given-names>
          </name>
          <degrees>MME, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1828-0920</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sonanini</surname>
            <given-names>Alessandra</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-4893-3989</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Philipps</surname>
            <given-names>Annika</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-9938-3158</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Stegemann-Philipps</surname>
            <given-names>Christian</given-names>
          </name>
          <degrees>Dr rer nat</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8122-5724</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Herschbach</surname>
            <given-names>Lea</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>TIME - Tübingen Institute for Medical Education</institution>
            <institution>University of Tübingen</institution>
            <addr-line>Elfriede-Aulhorn-Strasse 10</addr-line>
            <addr-line>Tübingen, 72076</addr-line>
            <country>Germany</country>
            <phone>49 70712973715</phone>
            <email>lea.herschbach@med.uni-tuebingen.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0005-6378-5073</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Festl-Wietek</surname>
            <given-names>Teresa</given-names>
          </name>
          <degrees>Dr rer nat</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1450-1757</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Zipfel</surname>
            <given-names>Stephan</given-names>
          </name>
          <degrees>Prof Dr Med</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1659-4440</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Erschens</surname>
            <given-names>Rebecca</given-names>
          </name>
          <degrees>Dr rer nat</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4433-9378</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Herrmann-Werner</surname>
            <given-names>Anne</given-names>
          </name>
          <degrees>MME, Prof Dr Med</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2413-7047</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>TIME - Tübingen Institute for Medical Education</institution>
        <institution>University of Tübingen</institution>
        <addr-line>Tübingen, Baden-Wurttemberg</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Psychosomatic Medicine and Psychotherapy</institution>
        <institution>University Hospital Tübingen</institution>
        <institution>University of Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>German Center for Mental Health (DZPG), Partner Site Tübingen</institution>
        <addr-line>Tübingen</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Lea Herschbach <email>lea.herschbach@med.uni-tuebingen.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>12</day>
        <month>2</month>
        <year>2026</year>
      </pub-date>
      <volume>12</volume>
      <elocation-id>e87102</elocation-id>
      <history>
        <date date-type="received">
          <day>4</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>29</day>
          <month>12</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Friederike Holderried, Alessandra Sonanini, Annika Philipps, Christian Stegemann-Philipps, Lea Herschbach, Teresa Festl-Wietek, Stephan Zipfel, Rebecca Erschens, Anne Herrmann-Werner. Originally published in JMIR Medical Education (https://mededu.jmir.org), 12.02.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on https://mededu.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mededu.jmir.org/2026/1/e87102" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Depression is a major global health care challenge, causing significant individual distress but also contributing to a substantial global burden. Timely and accurate diagnosis is crucial. To help future clinicians develop these essential skills, we trained a generative pretrained transformer (GPT)–powered chatbot to simulate patients with varying degrees of depression and suicidality.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to evaluate the applicability and transferability of our GPT-4-powered chatbot for psychosomatic cases. Specifically, we aim to investigate how accurately the chatbot can simulate patients exhibiting various stages of depression and phases of suicidal ideation, while adhering to a predefined role script and maintaining a sufficient level of authenticity. Additionally, we want to analyze to what level the chatbot is suitable for practicing correctly diagnosing depressive disorders in patients, as well as assessing suicidality stages.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We developed 3 virtual patient role scripts depicting complex, realistic cases of depression and varying degrees of suicidality collaboratively with field experts and aligned with mental health assessment guidelines. These cases were integrated into a GPT-4–powered chatbot for practicing clinical history-taking. A total of 148 medical students, with an average age of 22.71 years and mostly in their sixth semester, interacted individually with one of the randomly assigned virtual patients through chat. Following this, they completed a questionnaire assessing their demographics and user experience. Chats were analyzed descriptively to assess diagnostic accuracy and suicidality assessments, as well as the role script adherence and authenticity of the artificial intelligence (AI). This was done to gain further insight into the chatbot’s behavior and the students’ diagnostic accuracy.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>In over 90% (725/778) of the answers, the chatbot maintained its assigned role. On average, students correctly identified the severity of depression in 60% (81/135) and the phase of suicidality in 67% (91/135) of the cases. Notably, the majority either failed to address or insufficiently explored the topic of suicidality despite explicit instructions beforehand.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study demonstrates that a GPT-powered chatbot can simulate patients with depression fairly accurately. More than two-thirds of participants perceived the AI-simulated patients with depression as authentic, and nearly 80% (106/135) indicated they would like to use the application for further practice, highlighting its potential as a training tool. While a small proportion of students expressed reservations, and the overall diagnostic accuracy varied depending on the severity of the case, the findings overall support the feasibility and educational value of AI-based role-playing in clinical training. AI-supported virtual patients provide a highly flexible, standardized, and readily available training tool, independent of real-life constraints.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>depression diagnosis</kwd>
        <kwd>generative pretrained transformer</kwd>
        <kwd>GPT-powered chatbot</kwd>
        <kwd>large language model</kwd>
        <kwd>LLM</kwd>
        <kwd>medical students</kwd>
        <kwd>suicidality</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Depression affects an estimated 280 million people globally, representing one of the world’s most prevalent mental health conditions [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. This disorder, along with anxiety, contributes to a US $1 trillion annual economic burden [<xref ref-type="bibr" rid="ref3">3</xref>]. Despite the availability of structured diagnostic criteria through the <italic>ICD-10</italic> (<italic>International Statistical Classification of Diseases, Tenth Revision</italic>) and the <italic>DSM-5</italic> (<italic>Diagnostic and Statistical Manual of Mental Disorders</italic> [Fifth Edition]) [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], accurately identifying depression remains challenging. Symptom interpretation is often complex, as presentations can vary significantly between individuals [<xref ref-type="bibr" rid="ref6">6</xref>]. Additionally, factors such as stigma and limited access to care contribute to the high rate of undiagnosed cases [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Effective diagnosis, therefore, requires not only familiarity with the formal criteria but also clinical expertise to contextualize symptoms within the broader patient history and presentation [<xref ref-type="bibr" rid="ref6">6</xref>]. Worldwide, organizations have respective guidelines on diagnosing and treating depression [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Thorough patient assessment is essential for classification and therapy of depression, but also suicidality—as currently over 700,000 suicide deaths occur annually [<xref ref-type="bibr" rid="ref12">12</xref>]. However, studies show that diagnostic accuracy is inconsistent among physicians [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Respective training is crucial to close this gap and has proven to be successful [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>Artificial intelligence (AI)–simulated patients allow learners to practice clinical interviews flexibly, repeatedly, and at scale, providing realistic interactions to train diagnostic accuracy [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Our research group has already demonstrated the effectiveness of a generative pretrained transformer (GPT)–powered chatbot used as an AI-driven virtual patient for clinical history-taking exercises of somatic diseases, with the capability to provide automated feedback [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Despite these advances, empirical research on AI-driven virtual patients in the field of mental health training is still limited [<xref ref-type="bibr" rid="ref28">28</xref>]. In particular, little is known about the stability of large language model (LLM)–generated symptom presentations, their ability to represent nuanced depressive or suicidal symptoms, or their impact on students’ diagnostic reasoning. Therefore, we developed a training module for medical students within their regular teaching in psychosomatic medicine and psychotherapy (PSM), using AI-driven virtual patients with depressive symptoms and varying degrees of suicidality. Virtual patients are well-established in medical education and have been shown to help students apply communication skills in clinical practice [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref31">31</xref>]. Recent advances in natural language processing have enhanced interactions with virtual patients to increase user engagement [<xref ref-type="bibr" rid="ref32">32</xref>]. LLMs are particularly useful in mental health training and have been shown to accurately answer questions related to depression, even outperforming general practitioners in direct comparisons of treatment recommendations [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. They have also been used as interviewee models for depression screening [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
      <p>In this study, we aim to evaluate the applicability and transferability of our GPT-4–powered chatbot for psychosomatic cases, specifically addressing the following research questions: (1) How accurately can a GPT-4–powered chatbot simulate patients exhibiting various stages of depression and phases of suicidal ideation, while adhering to a predefined role script and maintaining a sufficient level of authenticity? What specific challenges and limitations can be observed? (2) To what level is the GPT-4–powered chatbot suitable for practicing correctly diagnosing depressive disorders in patients, as well as assessing suicidality stages?</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Creation of AI-Simulated Patients</title>
        <p>For this study, we created 3 different virtual patient role scripts, each describing a complex, realistic patient history and current symptomatology of depression or suicidality stage (<xref rid="figure1" ref-type="fig">Figure 1</xref> provides further details). The case vignettes and role scripts were collaboratively developed and critically revised by field experts and aligned with guidelines for mental health assessment in virtual patients [<xref ref-type="bibr" rid="ref35">35</xref>]. All case vignettes were created exclusively by clinical experts and were not generated or assisted by any AI system. These patient cases were integrated into our GPT-4–powered chatbot, which was prompted for the purpose of practicing clinical history taking [<xref ref-type="bibr" rid="ref26">26</xref>] and adapted to the new challenges within the mental health context. ChatGPT (OpenAI) was selected due to its wide availability, established performance in health-related natural language processing tasks, and strong conversational capabilities. At the time of data collection, GPT-4 was one of the leading models and the most advanced OpenAI Model. We had prior experience with GPT-4 in complex role-playing settings and were thus able to ensure a high level of quality of responses. In addition, the OpenAI account we used to access the application programming interface had a sufficiently high token limit to allow usage in groups of 10-20 people simultaneously. Temperature for GPT-4 was set to 0.1 to minimize risk of diversion from the provided information. While slightly repetitive, the answers were usually regarded as natural enough, even with this low setting.</p>
        <p>Patient cases, student interactions, and questionnaires were conducted in German, and all examples cited in this document were translated into English via DeepL [<xref ref-type="bibr" rid="ref36">36</xref>]. Textbox S3 and Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref37">37</xref>] provide an exemplary chat history and screenshots of the application.</p>
        <p>To categorize the levels of depression, we used the <italic>ICD-10</italic> diagnostic criteria from the World Health Organization’s classification system [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], which are also integrated into the curriculum at German universities (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To classify suicidality, 4 stages have been defined, ranging from passive death wish (stage 1), suicidal ideation (stage 2), and suicide plans/preparations (stage 3) to suicide actions (stage 4) [<xref ref-type="bibr" rid="ref37">37</xref>] (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study outline. AI: artificial intelligence; PSM: psychosomatic medicine and psychotherapy; QAP: question-answer pair.</p>
          </caption>
          <graphic xlink:href="mededu_v12i1e87102_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Experts</title>
        <p>Case vignettes were developed by 2 experts in the field of PSM. Subsequently, 2 other experts evaluated the student chats, and 3 further experts with longstanding clinical experience in PSM and mental health reevaluated the vignettes diagnostically to check concordance. Finally, question-answer pairs (QAPs) were qualitatively coded for recurring errors by different experts in the field of PSM (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> provides detailed information about the setting, participants, and study design. The data collection took place between April and July 2024; the AI component of the session lasted 1 hour. Students, equipped with laptops, were introduced to a preconfigured web interface. Students were randomly assigned to interact with virtual patients exhibiting varying levels of depression severity (mild, moderate, and severe). Provided with general case details (setting, gender, and age of the virtual patient), they engaged in a 20-minute individual interaction with the AI-driven patient suffering from depression; the medical conversation was simulated through chat. After the digital interaction, students supplied demographic information and completed a questionnaire. The first section of the questionnaire focused on students’ subjective evaluation of the application, assessing the authenticity of the virtual patient and the likelihood of using the application regularly.</p>
        <p>The second part of the questionnaire evaluated the students’ diagnostic accuracy. This included identifying the relevant <italic>ICD-10</italic> diagnostic criteria for depression, assessing the severity and recurrence of the episode, and determining the degree of suicidality.</p>
        <p>The session concluded with an interactive discussion with a teacher from the department of PSM present, which involved reviewing correct answers to the questions and exploring additional insights into the diagnosis and treatment of depression aside from the encountered AI case.</p>
      </sec>
      <sec>
        <title>Prompt Engineering</title>
        <p>Generally, we built on the techniques described in preceding publications [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Main aspects of the prompting are general descriptions of the patients’ situation and character, as well as a customizable, case-dependent list of medical categories. For each category, information is provided in the form of clinical information or as example answers. For this study, we included depression criteria and phases of suicidal ideation as categories in order to accurately describe the patients’ situation. We also expanded the general prompting to account for the required emotional depth of the simulated interviews. To increase authenticity, we adapted the prompts and the role scripts as described next.</p>
        <p>We refined the virtual patient’s scripts by adding explanatory notes for ambiguous terms and structuring responses progressively, revealing details only upon user inquiry to enhance authenticity. To improve realism in sensitive discussions, we instructed the model to simulate patient reluctance and provided example dialogues. Additionally, we implemented safeguards to prevent prompt extraction attacks in example inputs that make the model repeat the hidden system prompt in the running chat, exposing it to the user. In our case, this would have included the complete character definition. These safeguards were thus also meant to ensure the model maintains its role without disclosing unintended information (Textboxes S1 and S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provide prompt engineering techniques and full prompting).</p>
      </sec>
      <sec>
        <title>Data Analysis</title>
        <p>An overview of the data analysis process is presented in <xref rid="figure1" ref-type="fig">Figure 1</xref>. After completing data collection, all data were exported. Due to various reasons, 13 chats were excluded, leaving a total of 135 chats for inclusion in phase 1 of the data analysis.</p>
        <sec>
          <title>Demographic Data</title>
          <p>The dataset was analyzed descriptively in terms of demography and the students’ assessment of the severity of depression, the episode, the presence of the individual diagnostic criteria, and the stage of suicidality. Furthermore, questions concerning the authenticity of simulated patients and the likelihood of using the application in the future were evaluated descriptively. Frequencies and distributions were calculated for all variables mentioned above to provide an overview of students’ assessments and demographic characteristics.</p>
        </sec>
        <sec>
          <title>Students´ Evaluation of Diagnosis and Suicidality</title>
          <p>In data analysis phase 2, a total of 23 chats were selected out of the 135 chats for a more detailed exploration at the QAP level. To maximize issue detection, we selected chats in which students incorrectly assessed depression severity and suicidal ideation based on the classification established during the development of the role scripts. From this group, a random sample was drawn, stratified by the observed error rates across severity levels: 11 chats for moderate depression (highest error rate), 7 for mild (lower error rate), and 5 for severe depression (lowest error rate). In the 23 exemplary chats, 778 QAPs were recorded, with a median of 29 per conversation. To assess interrater reliability, 2 experts independently evaluated 23 randomly selected chats in terms of the extent to which the criteria had been addressed, not addressed at all, partially addressed, or fully addressed. The criteria evaluated were authenticity, episode, severity, suicidality, as well as the 10 diagnostic criteria of depression. Prior work shows that users interpret authenticity as a cue for sincerity, interpersonal alignment, and emotional credibility in chatbot interactions [<xref ref-type="bibr" rid="ref39">39</xref>], which aligns closely with the evaluative focus of this study. More specifically, factors that influenced the authenticity of the interaction in this study included instances in which the response did not correspond with either the question or the prompt, or in which the interaction was considered to be inauthentic for other reasons, such as repetitions or the use of unconventional phrasing. Since the sources of error already occurred repeatedly in the selected chats, it can be assumed that the findings can also be transferred to the entire dataset. The QAPs served as the units of analysis. Both experts examined them independently in a Microsoft Excel 2408 sheet (Microsoft Corporation). They achieved Cohen κ=0.794 (QAP level) and κ=0.846 (chat level). Due to the high level of agreement, 1 rating was selected randomly. Statistical analysis was performed using SPSS (version 28; IBM Corp).</p>
          <p>First, all QAPs were analyzed to assess the extent to which students considered the diagnostic criteria for depression and suicidality. Subsequently, the 23 selected chats were analyzed to identify reasons for incorrect statements, including insufficient discussion of the criteria, misinterpretation of the AI answers by the students, or AI-driven conversations that led to incorrect diagnoses.</p>
        </sec>
        <sec>
          <title>Expert Validation of Case Vignettes</title>
          <p>As we identified cases in which misleading prompts led to incorrect assessments and were based on content ambiguities in the prompt, the prompts were retrospectively reviewed independently by 3 additional experts in the field of psychosomatic medicine with regard to the diagnostic criteria for all 3 case vignettes. The experts received the same chat information as the participants to assess whether they could correctly identify the severity of depression and suicidality; the prompts were not modified or retested as part of this study.</p>
        </sec>
        <sec>
          <title>Error Analysis and Prompt Associations</title>
          <p>To examine patterns in students’ diagnostic thinking and deviations in the role of AI, we applied an inductive, theme-oriented coding approach based on the principles of data-driven analysis developed by Braun and Clarke [<xref ref-type="bibr" rid="ref40">40</xref>]. Two expert raters independently used qualitative coding to identify recurring patterns in the QAPs, including errors in students’ diagnostic reasoning and role deviations, nonresponsive answers, and inappropriate repetitions. After discussing discrepancies to reach consensus, findings were summarized descriptively to highlight common issues and inform recommendations for improving AI-driven training interactions.</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>In phase 1, descriptive statistics (frequencies and percentages) were calculated for all variables to provide an overview of students’ assessments and demographic characteristics. In phase 2, all QAPs were aggregated and stored in a Microsoft Excel 2408 sheet. Two raters separately assessed authenticity and diagnostic criteria of each QAP. Cohen κ was calculated to determine the interrater reliability between the 2 raters.</p>
        <p>Statistical analysis was performed using SPSS (version 28). Mean values, associated SDs, frequencies, and percentages were calculated. Figure generation was performed using Excel 2408.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>The Ethics Committee of the Faculty of Medicine at University Hospital Tübingen approved the study (209/2024BO2). Participation was voluntary, with informed consent obtained; no compensation was offered for participation. All data were anonymized. All procedures were conducted in accordance with the Declaration of Helsinki and ethical guidelines for human participants. Informed consent for participation in this study was obtained verbally from all participants. Verbal consent was chosen over written consent because the study was conducted within an educational setting as part of the participants’ course activities. Additionally, since no sensitive or personally identifiable information was collected, verbal consent was deemed appropriate and approved by the Ethics Committee. Furthermore, participants also confirmed their agreement to data collection and processing by ticking the respective box on the computer. Participants were fully informed about the study’s purpose, procedures, and their right to withdraw at any time and without negative consequences before providing their consent.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Demographic Data</title>
        <p>A total of 148 chat datasets were collected, with 13 excluded (<xref rid="figure1" ref-type="fig">Figure 1</xref>), leaving 135 for analysis. Participants included 89 females, 45 males, and 1 diverse participant, with a mean age of 22.71 (SD 2.44; range 19-30) years. The majority (n=130) of the participants were in their sixth semester, 1 each in their fourth and seventh semesters, and 3 in their fifth semester. According to the curriculum, while most students have had some prior exposure to the topics of depression and suicidality, they have not yet studied them in a structured seminar format. Cohort sizes vary by topic and are clearly indicated to ensure accurate interpretation.</p>
      </sec>
      <sec>
        <title>Authenticity</title>
        <p>Based on the dataset shown above (n=135), <xref ref-type="table" rid="table1">Table 1</xref> shows that more than two-thirds of the students rated the AI-controlled presentation of the patient with depression as convincing, very convincing, or humanlike. A total of 78.52% (106/135) of respondents said they would use this application again (sometimes, often, or whenever possible).</p>
        <p>To assess the perceived authenticity, AI adherence to the role script, and students’ chats in relation to the suspected diagnoses, 2 experts analyzed 23 selected chats in detail (Data Analysis section). Factors influencing authenticity, including frequent repetitions, inappropriate use of medical terminology, and unclear references to the prompt (65/778, 8.35%), were assessed. Based on these criteria, 87.5% (678/778) of QAPs were classified as authentic, while 12.85% (100/778) of QAPs were classified as inauthentic.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Student´s opinion on the application.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="750"/>
            <col width="0"/>
            <col width="220"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Questions and responses</td>
                <td>Frequency, n/N (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>How convincing was the AI<sup>a</sup></bold>
                  <bold>in the patient role?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not convincing</td>
                <td colspan="2">2/135 (1.48)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Acceptable</td>
                <td colspan="2">40/135 (29.63)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Convincing</td>
                <td colspan="2">54/135 (40)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Very convincing</td>
                <td colspan="2">36/135 (26.67)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Human like</td>
                <td colspan="2">3/135 (2.22)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>How likely would it be for you to use the application if it was available on a regular basis?</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not at all</td>
                <td colspan="2">7/135 (5.19)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Rarely</td>
                <td colspan="2">22/135 (16.30)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Sometimes</td>
                <td colspan="2">70/135 (51.85)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Often</td>
                <td colspan="2">29/135 (21.48)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Whenever possible</td>
                <td colspan="2">7/135 (5.19)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>AI: artificial intelligence.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Role Script Adherence</title>
        <p>For content categorization, QAPs were classified as “relevant” (diagnostic criteria or suicidality) or “not relevant” (other medical topics or general conversation elements). A total of 49.36% (384/778) QAPs were considered relevant for diagnosis. For role conformity, expert raters assessed whether GPT-4’s responses (1) matched to the question asked and (2) remained consistent with the predefined role script. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, GPT-4 deviated from the given role in 6.81% (53/778) of the QAPs.</p>
        <p>Of the initial 6.81% (53/778) deviations, a total of 1.54% (12/778) stemmed from “not relevant” topics outside depression or suicidality (eg, other medical conditions) and were excluded from analysis. The remaining 5.27% (41/778) of “relevant’ QAPs were distributed across the 5 key diagnostic criteria: “loss of interest,” “loss of drive,” “pessimistic future prospects,” “changes in appetite,” and “impaired concentration” (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Role adherence of the chatbot.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="730"/>
            <col width="0"/>
            <col width="240"/>
            <thead>
              <tr valign="top">
                <td colspan="3">GPT-4 roleplay outcome and QAP<sup>a</sup> type</td>
                <td>Frequency, n/N (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">GPT<sup>b</sup> stays in role</td>
                <td>725/778 (93.19)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>GPT falls out of role, QAP concerning diagnostic criteria</bold>
                </td>
                <td>41/778 (5.27)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Loss of interest</td>
                <td colspan="2">14/778 (1.80)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Loss of drive</td>
                <td colspan="2">5/778 (0.64)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Pessimistic future prospects</td>
                <td colspan="2">7/778 (0.90)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Changes in appetite</td>
                <td colspan="2">8/778 (1.03)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Impaired concentration</td>
                <td colspan="2">6/778 (0.77)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other</td>
                <td colspan="2">1/778 (0.13)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">GPT falls out of role, QAP concerning other medical information</td>
                <td>12/778 (1.54)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>QAP: question-answer pair.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>GPT: generative pretrained transformer.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Qualitative Analysis</title>
        <p>Identified erroneous criteria were qualitatively coded by the 2 expert raters to detect recurring patterns. The qualitative analysis of role deviations revealed 3 key issues (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides detailed examples). First, GPT-4 struggled with temporal consistency; time jumps (patient memories) or inconsistent timelines in patient history (“she has gradually started engaging in her previous leisure activities”) led to role-deviant behavior. This highlights the need to clearly separate current symptoms from previous history and to provide explicit timelines in the model. Second, inconsistency within the role scripts showed that overemphasis on isolated case details could lead GPT-4 to disproportionately amplify these aspects, resulting in misleading responses (“[My Concentration] has gotten a bit better since I started taking the new medication (…) But it’s still not as good as it used to be”). Third, we observed a tendency toward symptom aggravation, especially in mild cases, likely influenced by common training data patterns and negatively phrased diagnostic criteria. In 97% (33/34) of the analyzed misleading answers, there was a negative shift, resulting in a depiction of greater depression than intended in the prompt (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> summarizes the findings). These findings underscore the importance of neutral, precisely structured prompts to ensure stable and realistic virtual patient behavior.</p>
      </sec>
      <sec>
        <title>Students’ Diagnostic Accuracy</title>
        <p>After the patient interviews, students provided a suspected diagnosis, including episode type, depression severity, and suicidality stage. Comparing their assessments (n=135) with the predefined case diagnoses showed a 94.07% (127/135) agreement on episode classification, with minor case-related deviations. Agreement on depression severity, particularly relevant for therapy decisions, averaged 60.00% (81/135). The distribution across the subgroups is particularly noteworthy: the case with severe depression reached an agreement of 100%, while the other cases reached only 40.82% (20/49; moderate depression) and 47.92% (23/48; mild depression) agreement. Regarding the phase of suicidality, agreement across all subgroups was 67.41% (91/135) on average, with a tendency toward better assessment of passive death wishes. To examine discrepancies in severity assessment, we reviewed the diagnostic criteria identified by students. Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows correctly recognized criteria (blue) and those incorrectly selected despite not being part of the role script (gray). For mild depression, students struggled to recognize “feelings of guilt” as present, while “reduced self-esteem” and “loss of drive” were mistakenly classified as present. In contrast, 100% correctly identified “sleep disorders” as absent. For severe depression, all main and secondary criteria were present, preventing false positives. The main challenge was identifying “impaired concentration” (28/38, 73.68% correct), while all other criteria were correctly assessed by at least 86.84% (33/38). Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> visualizes these findings.</p>
      </sec>
      <sec>
        <title>Expert Validation of Case Vignettes</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> shows the diagnostic criteria for the case developed as moderate depression. Notably, students frequently assessed “loss of interest,” “concentration disorders,” “changes in appetite,” and “pessimistic future prospects” as present, although these were not defined as present by 2 mental health field experts who had created the case vignettes. To verify this discrepancy, all 3 vignettes were reevaluated by 3 additional field experts. These experts received the same prompt as GPT-4 and assessed the virtual patient according to the diagnostic criteria. For the cases of mild depression and severe depression, there were no relevant deviations between the initial developing expert group and the reassessing expert group. For moderate depression, significant deviations occurred in individual diagnostic criteria (<xref rid="figure2" ref-type="fig">Figure 2</xref>). Student selections (monochrome) are contrasted with expert assessments (striped). Blue indicates agreement with the initial role script classification, while gray indicates criteria not originally included, highlighting discrepancies. Two criteria—“decline in interests” (core symptom) and “changes in appetite” (additional symptom)—were consistently rated as present by all 3 additional experts. In contrast, “impaired concentration” and “pessimistic future prospects” were selected as present by more than half of the students, while the majority of experts did not consider them to be present.</p>
        <p>If the additional experts’ classification is used as the reference, the case originally labeled as moderate depression would instead be classified as severe. Based on this revised classification, 59% (29/49) of students correctly assessed the severity level.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Students’ and experts’ assessment of a moderate depression case vignette. Percentages of students (monochrome bars) and experts (striped bars) who identified the displayed International Classification of Diseases, Tenth Revision (ICD-10) diagnostic criteria for depression as present in the case vignette are shown. Blue bars indicate agreement with the initial role-script classification, whereas gray bars represent criteria not originally included, highlighting discrepancies between the vignette and the standard diagnostic criteria.</p>
          </caption>
          <graphic xlink:href="mededu_v12i1e87102_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Error Analysis and Prompt Associations</title>
        <p>Due to discrepancies in the diagnostic criteria, a detailed analysis was conducted to verify the intended diagnostic combinations for each case. Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows the overall and case-related results. Even with a margin of error of ±1 criterion, only for severe depression did a significant number of students (32/38) select the correct combination. For mild and revised severe (formerly moderate) depression, fewer than one-third (13/48 mild and 12/49 revised major) achieved this goal.</p>
        <p>To identify error sources unrelated to case vignette creation, 2 independent experts analyzed all QAPs from 23 chats across 3 cases. Three main causes were identified (1) misleading AI responses, (2) misinterpretation of AI responses by students, and (3) insufficient exploration of the criterion by students (<xref rid="figure3" ref-type="fig">Figure 3</xref>).</p>
        <p>Diagnostic criteria were assessed with varying accuracy. “Depressed mood” and “sleep disorders” were correctly identified by 100% of students, while “impaired concentration” and “decline in interests” (13/23, 56.52% each) posed the greatest challenges, mainly due to misleading AI responses. Only 1.74% (4/230) of errors resulted from misinterpretation by students, while 10.00% (23/230) resulted from insufficient follow-up questions. Correct selections also included cases in which students guessed without a solid chat-based assessment.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Detailed analysis of errors at the chat level. Errors were categorized according to their origin (1) errors due to insufficient addressing of the user’s query, (2) errors arising from misinterpretation of the input, and (3) errors resulting from misleading or incorrect responses generated by the large language model (LLM).</p>
          </caption>
          <graphic xlink:href="mededu_v12i1e87102_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Students’ Evaluation of Suicidality Stages</title>
        <p>To analyze the assessment of suicidality in more detail, a differentiated evaluation of this part of the anamnesis interviews was carried out. This involved categorizing the information based on how the topic was addressed and an assessment by the experts regarding the degree to which the students evaluated this symptomatology. It showed that 21.74% (5/23) of the students did not address the issue of suicidality at all. Another 39.13% (9/23) did not address it sufficiently enough for a proper assessment, and only 39.13% (9/23) addressed it comprehensively enough.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overview</title>
        <p>This study shows GPT-4’s capability to accurately simulate patients exhibiting different levels of depression and stages of suicidality in a virtual patient chat format while adhering to the role script and maintaining authenticity. Additionally, the detailed analysis of student anamnesis and their interpretation of collected patient information in relation to diagnostic criteria and suspected diagnoses reveals learning challenges inherent to the diagnostic process of depression and suicidal ideation, which this training approach helps to make more transparent and analyzable.</p>
        <p>To ensure quality and enable further optimization, evaluating GPT-4’s role stability—especially with regard to different severity levels and stages of suicidality—is essential. Our findings suggest that the current prompting approach already provides a high degree of role stability. While in some cases inauthentic behavior was observed, these cases were rare and mostly due to structural deficiencies in the prompt design rather than inherent AI inconsistencies. Given the importance of role stability in medical applications, we propose specific recommendations for further improvements.</p>
        <p>Our findings are consistent with previous studies [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] indicating that observed deviations are mainly due to structural issues in prompt design rather than from inconsistent AI responses. This suggests that case presentations can be further refined by addressing known sources of error. Other studies confirm that effective prompt engineering significantly enhances chatbot performance [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref43">43</xref>]. However, existing recommendations remain general, and to our knowledge, no medical education application uses a patient chatbot at this level of detail. Standard prompts like “avoid technical terms” often fail to produce consistent results. Instead, our approach relies on detailed, structured example responses, requiring careful case development but yielding significantly more precise and authentic AI-generated answers. Given the limited peer-reviewed literature, we also considered open-source online resources that document prompt engineering strategies [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
      </sec>
      <sec>
        <title>In-Depth Analysis of Role-Playing</title>
        <p>Our qualitative analysis of the chats that deviated from the predefined role led to several detailed recommendations for prompt design, particularly with regard to example responses (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). A comprehensive list is provided in Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        <boxed-text id="box1" position="float">
          <title>In-depth analysis of role-playing</title>
          <p>
            <bold>Temporal structure</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Temporal changes in symptoms, especially in chronic, psychiatric, or recurrent illnesses, must be clearly structured in case vignettes. Since LLMs have difficulties with the consistent presentation of time courses, we recommend a subdivision into “current state” (eg, last 2 weeks) and “previous findings” (course of the disease), as well as the addition of a timeline to emphasize diagnostically relevant time periods.</p>
            </list-item>
          </list>
          <p>
            <bold>Formal consistency</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Formal consistency is crucial for complex cases that require a detailed role script. Placing more emphasis on individual aspects can lead GPT-4 to overemphasize them, thereby misleading users.</p>
            </list-item>
          </list>
          <p>
            <bold>Symptom aggravation</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>We observed a tendency toward symptom aggravation, especially in mild depression. Large language models generally tend to shift toward normal variants because they reflect frequent patterns from the training data [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Possible causes are that the model associates “typical depression” with more symptoms or tends toward moderate levels, thus creating deviations from the role script. Additionally, negatively formulated diagnostic criteria (eg, “decreased energy” in <italic>International Classification of Diseases, Tenth Revision</italic>) may have influenced the responses. Future prompts should be neutrally formulated (“energy”) and clearly distinguish between the presence and absence of a symptom.</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Overall, the AI performed convincingly, aligning with previous findings [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. The suggested optimizations concern fine details, and the cost–benefit ratio depends on the application context. In virtual training, absolute accuracy is not required because real patients can also provide incorrect information due to memory errors, misunderstandings, or shame [<xref ref-type="bibr" rid="ref47">47</xref>], and medical students need to be trained for these situations. Standardized patients, who are frequently used in medical education, also show variability, although their authenticity is often discussed as a limitation [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. However, for unsupervised student feedback or exam assessments, a high degree of role stability is an essential requirement.</p>
        <p>AI-supported virtual patients offer even greater availability, flexibility, and standardization than simulated patients [<xref ref-type="bibr" rid="ref50">50</xref>]. A key factor for acceptance in skill learning is perceived authenticity [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. In this study, over two-thirds of students rated the tool as at least “convincing,” with a further 29.62% (32/108) rating it as “acceptable,” indicating great potential for future use.</p>
        <p>While our results confirm the high role stability of GPT-4, certain discrepancies were found in the interpretation of the severity classifications. Due to divergent assessments by our experts—particularly regarding the diagnostic criteria “loss of interest” and “appetite changes”—the original classification of the case as “moderate depression” could not be reliably maintained. To ensure clarity and accuracy, the results were reevaluated, with this case reclassified as “revised severe (formerly moderate) depression” and the results carefully contextualized in the discussion. Where reevaluation was not feasible, these data were excluded from further interpretation.</p>
      </sec>
      <sec>
        <title>AI Simulated Patients to Improve Students’ Diagnostic Accuracy</title>
        <p>We found that students’ diagnostic accuracy in assessing the severity of depression depends heavily on its actual severity. While severe cases were consistently recognized, the fact that fewer than half (23/48, 47.92%) correctly diagnosed mild depression indicates deficits in the recognition of mild but clinically relevant cases. This highlights the need for more targeted training to ensure that future doctors can identify depression at all levels of severity at an early stage—especially those that would otherwise be overlooked and thus delay timely treatment.</p>
        <p>This becomes particularly evident when examining the selection of diagnostic criteria used in forming a suspected diagnosis. Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> reveals a low success rate in correctly identifying all relevant criteria per case. Even with a margin of error of 1 criterion, only 27.08% (13/48) of students achieved this goal for mild depression and 24.49%% (12/49) for formerly moderate/revised severe depression. The results were best for severe depression: two-thirds of students selected the criteria correctly, presumably due to the higher number of symptoms present. It is important to note that students’ performance was affected by inconsistent case vignettes and AI role-playing errors, which must be taken into account when interpreting the results. In addition, they had limited time to reach a diagnosis and only little prior training on depression and suicidality. All of this should be examined more closely in future research. The difficulty of distinguishing between mild and moderate depression contributes to the lower success rates—a problem that has also been observed in general practitioners [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]. According to Kroenke [<xref ref-type="bibr" rid="ref55">55</xref>], primary care physicians correctly diagnose depression only about 50% of the time, and are more likely to over-diagnose (n=15 per 100 patients) than under-diagnose (n=10) or correctly diagnose (n=10). These findings highlight that even experienced clinicians struggle with accurate classification. This underscores the need for improved diagnostic training at all levels. <italic>ICD-10</italic> makes this distinction even more difficult, as it often requires only a single additional minor criterion to distinguish between moderate and mild depression [<xref ref-type="bibr" rid="ref38">38</xref>]. Mental Health cases pose an additional layer of complexity. In our experience, such cases require a higher face validity than somatic illnesses in order to compensate for potential uncertainty or overlap between individual diagnostic criteria [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. A potential key improvement could be the future use of <italic>ICD-11</italic> (<italic>International Classification of Diseases, 11th Revision</italic>), which is intended to provide a less rigid discrimination of the severity levels of depression [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref57">57</xref>]. This was not used in this study, as it was not part of the curriculum at the time of data collection; however, it could be seamlessly integrated into the application in the future.</p>
        <p>To further evaluate students’ diagnostic accuracy, we conducted a detailed analysis at the QAP level (n=778). While the criteria “sleep disorder” and “depressive mood” were consistently well recognized, errors mainly occurred when students did not explore key aspects of the medical history in sufficient depth. This was particularly evident in the diagnostic criteria “feelings of guilt” and “reduced self-esteem.” It is noteworthy that misinterpretations of the AI-generated answers were rare (less than 2%) and mainly related to suicidal ideation. This suggests that diagnostic difficulties were due more to insufficient follow-up questions than to a misunderstanding of the AI answers.</p>
      </sec>
      <sec>
        <title>Neglecting Suicidal Tendencies</title>
        <p>Even more concerning is that the majority of students (14/23, 60.87%) either did not address the topic of potential suicidality at all or did so only insufficiently—despite explicit instruction during the introduction. Our qualitative analysis shows that the LLM sometimes responded very avoidantly to questions about suicidality, which may have given students a false impression of the actual severity of the topic. However, the model never denied existing suicidality but was simply reluctant to talk about it, which adequately mirrors some real patients’ behavior. Future prompts should, nonetheless, contain clearer risk indicators to guide students toward more targeted questions and avoid making the scenarios too challenging, especially at such early career training stages. Additionally, although students knew that asking explicit questions about suicidal thoughts is a standard procedure in mental health assessment, many still found it difficult to address, causing considerable discomfort and uncertainty. Literature shows that suicidality is often a shameful topic for medical students, and they fear that addressing it will intensify suicidal thoughts [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. However, the opposite is the case, which underscores the need for targeted training in how to deal with suicidality [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref60">60</xref>,<xref ref-type="bibr" rid="ref61">61</xref>]. The integration of AI-supported virtual patients into mental health curricula, in combination with structured feedback sessions, could offer sustainable opportunities to practice assessing suicidality and reduce discomfort and uncertainty among students.</p>
      </sec>
      <sec>
        <title>Implications for AI-Supported Medical Training</title>
        <p>Our results show that AI-supported online training for the diagnosis of depression and suicidality is a feasible and effective method. Nevertheless, we recommend integrating it into face-to-face formats—either fully, as in this study, or in a blended learning approach.</p>
        <p>While other studies have shown that online training for suicide prevention can be equally effective or even superior [<xref ref-type="bibr" rid="ref62">62</xref>,<xref ref-type="bibr" rid="ref63">63</xref>], our results suggest that the immersive nature of virtual patient encounters contributes to a high level of perceived realism. In the feedback sessions, some students reported feeling emotionally affected, particularly when dealing with suicidality, which underscores the importance of structured support in such training formats.</p>
        <p>The extent to which these observations can be generalized beyond our specific context remains to be investigated. Nevertheless, the potential for broader application in various medical education contexts appears promising.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, it was conducted at a single medical school, limiting generalizability. Second, students had limited time to interact with the AI-supported patient, reflecting real-world time constraints but possibly insufficient for a practice scenario. Since the chatbot provides more information in a limited time than a real patient typically would, the diagnostic skills practiced may not fully transfer to real-world settings, and it must be assumed that performance in actual clinical encounters could be somewhat lower. Furthermore, the participants’ prior experience with AI-based systems was not assessed, which may have influenced their assessment of the tool and their interaction with it. This variable should be systematically evaluated in future studies.</p>
        <p>Additionally, only 23 of 135 chats were analyzed at the QAP level. This selection was made because we aimed to maximize issue detection by focusing on chats in which students incorrectly assessed depression severity or suicidal ideation. This procedure ensured that the analyzed subset was both representative and information-rich. Nevertheless, analyzing full chat transcripts of incorrect diagnoses might have provided further insights.</p>
        <p>Despite these limitations, this study provides valuable insights into the use of AI-based virtual patients to assess depression and suicidality. Future research should further develop chatbot models to support diagnostic training more comprehensively—in particular, through more precise and reliable feedback, improved conversational techniques (eg, question formulation and depth of exploration), and targeted support for clinical reasoning processes to better understand underlying thought patterns. Furthermore, it would also be interesting to examine the students’ reflections on the ethical aspects of interacting with chatbot patients.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>In summary, the chatbot can successfully simulate patients with depression with various stages of suicidality with high authenticity and adherence to predefined role scripts. Students’ diagnostic accuracy was influenced by the actual severity of depression, with moderate cases posing the greatest challenge. Notably, the majority either failed to address or insufficiently explored the topic of suicidality, which negatively influenced the correct classification of the phase of suicidality. AI-supported virtual patients provide a feasible and valuable tool for psychiatric history-taking training when cases are carefully developed and reviewed by experts. Potential risks, such as misinformation due to AI limitations and ethical considerations, should be addressed in future implementations.</p>
        <p>Future medical education cannot be envisioned without integration of AI to practice realistic patient encounters—rigorous research on how these models should be programmed and used best is thus necessary. The AI tool presented here could benefit not only students and trainees but also general practitioners by increasing awareness of depression. Due to its online nature, it can be transferred to a variety of different settings and languages; nevertheless, it may require adaptation for less commonly used languages or specific cultural contexts. Ideally, this could contribute to improving the care of patients with depressive symptoms and thus to better management of this increasing societal challenge.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Additional material and prompts.</p>
        <media xlink:href="mededu_v12i1e87102_app1.docx" xlink:title="DOCX File , 271 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DSM-5</term>
          <def>
            <p>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GPT</term>
          <def>
            <p>generative pretrained transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICD-10</term>
          <def>
            <p>International Statistical Classification of Diseases, Tenth Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">ICD-11</term>
          <def>
            <p>International Classification of Diseases, 11th Revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PSM</term>
          <def>
            <p>psychosomatic medicine and psychotherapy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">QAP</term>
          <def>
            <p>question-answer pair</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We acknowledge the support of the Open Access Publishing Funds of the University of Tuebingen. The authors declare the use of generative artificial intelligence (GAI) in the research and writing process. According to the GAIDeT taxonomy [<xref ref-type="bibr" rid="ref64">64</xref>], the following tasks were delegated to GAI tools under full human supervision: literature search and systematization, adapting and adjusting emotional tone, and translation. The GAI tool used were Chat GPT-5 and DeepL Write. Responsibility for the final manuscript lies entirely with the authors. GAI tools are not listed as authors and do not bear responsibility for the final outcomes. This declaration was submitted by LH. After using these tools, the authors thoroughly reviewed, revised, and edited the content, and take full responsibility for the final version of the manuscript.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>No external financial support or grants were received from any public, commercial, or not-for-profit entities for the research, authorship, or publication of this article.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets generated or analyzed during this study are available from the corresponding author (LH) on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>FH and AHW conceptualized the study. FH and TFW were responsible for data curation. AP, RE and AS performed the formal analysis, and AP and RE conducted the investigation. LH, TFW, and AS were responsible for the methodology and interpretation. LH contributed to the visualization and CSP was responsible for the software. AHW did the project administration, while SZ and AHW provided resources and supervision. FH and AHW drafted the manuscript, and all authors participated in critically revising and editing it. All authors provided final approval of the version to be published.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>GBD 2019 Diseases and Injuries Collaborators</collab>
          </person-group>
          <article-title>Global burden of 369 diseases and injuries in 204 countries and territories, 1990-2019: a systematic analysis for the Global Burden of Disease Study 2019</article-title>
          <source>Lancet</source>
          <year>2020</year>
          <volume>396</volume>
          <issue>10258</issue>
          <fpage>1204</fpage>
          <lpage>1222</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://air.unimi.it/handle/2434/776336"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(20)30925-9</pub-id>
          <pub-id pub-id-type="medline">33069326</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(20)30925-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC7567026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Institute for Health Metrics and Evaluation</collab>
          </person-group>
          <source>GBD Results Tool</source>
          <year>2023</year>
          <access-date>2024-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://vizhub.healthdata.org/gbd-results">https://vizhub.healthdata.org/gbd-results</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <source>WHO Guidelines on Mental Health at Work. First edition</source>
          <year>2022</year>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <source>Diagnostic and Statistical Manual of Mental Disorders. Fifth edition</source>
          <year>2013</year>
          <publisher-loc>Washington</publisher-loc>
          <publisher-name>American Psychiatric Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <source>The ICD-10 Classification of Mental and Behavioural Disorders: Clinical Descriptions and Diagnostic Guidelines</source>
          <year>1993</year>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lynch</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gunning</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Liston</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Causes and consequences of diagnostic heterogeneity in depression: paths to discovering novel biological depression subtypes</article-title>
          <source>Biol Psychiatry</source>
          <year>2020</year>
          <volume>88</volume>
          <issue>1</issue>
          <fpage>83</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1016/j.biopsych.2020.01.012</pub-id>
          <pub-id pub-id-type="medline">32171465</pub-id>
          <pub-id pub-id-type="pii">S0006-3223(20)30046-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Handy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mangal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stead</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Coffee</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Ganti</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Prevalence and impact of diagnosed and undiagnosed depression in the United States</article-title>
          <source>Cureus</source>
          <year>2022</year>
          <volume>14</volume>
          <issue>8</issue>
          <fpage>e28011</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36134073"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.28011</pub-id>
          <pub-id pub-id-type="medline">36134073</pub-id>
          <pub-id pub-id-type="pmcid">PMC9470500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pelletier</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>O'Donnell</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dykxhoorn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McRae</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Patten</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Under-diagnosis of mood disorders in Canada</article-title>
          <source>Epidemiol Psychiatr Sci</source>
          <year>2016</year>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>414</fpage>
          <lpage>423</lpage>
          <pub-id pub-id-type="doi">10.1017/s2045796016000329</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <article-title>Depression in adults: treatment and management (NICE guideline NG222)</article-title>
          <source>National Institute for Health and Care Excellence</source>
          <year>2022</year>
          <access-date>2024-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nice.org.uk/guidance/ng222">https://www.nice.org.uk/guidance/ng222</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>American Psychological Association</collab>
          </person-group>
          <source>Clinical Practice Guideline for the Treatment of Depression Across Three Age Cohorts</source>
          <year>2019</year>
          <access-date>2024-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.apa.org/depression-guideline">https://www.apa.org/depression-guideline</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>Arbeitsgemeinschaft der Wissenschaftlichen Medizinischen Fachgesellschaften</collab>
          </person-group>
          <source>Nationale VersorgungsLeitlinie Unipolare Depression – Langfassung, Version 3</source>
          <year>2023</year>
          <access-date>2024-08-18</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.leitlinien.de/themen/depression/version-3">https://www.leitlinien.de/themen/depression/version-3</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <article-title>Depressive disorder (depression)</article-title>
          <source>World Health Organization</source>
          <year>2023</year>
          <access-date>2025-01-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/news-room/fact-sheets/detail/depression">https://www.who.int/news-room/fact-sheets/detail/depression</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mojtabai</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Clinician-identified depression in community settings: concordance with structured-interview diagnoses</article-title>
          <source>Psychother Psychosom</source>
          <year>2013</year>
          <volume>82</volume>
          <issue>3</issue>
          <fpage>161</fpage>
          <lpage>169</lpage>
          <pub-id pub-id-type="doi">10.1159/000345968</pub-id>
          <pub-id pub-id-type="medline">23548817</pub-id>
          <pub-id pub-id-type="pii">000345968</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cepoiu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McCusker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cole</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Sewitch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Belzile</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ciampi</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Recognition of depression by non-psychiatric physicians: a systematic literature review and meta-analysis</article-title>
          <source>J Gen Intern Med</source>
          <year>2008</year>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>36</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17968628"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s11606-007-0428-5</pub-id>
          <pub-id pub-id-type="medline">17968628</pub-id>
          <pub-id pub-id-type="pmcid">PMC2173927</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Houston</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Haw</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Townsend</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hawton</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>General practitioner contacts with patients before and after deliberate self harm</article-title>
          <source>Br J Gen Pract</source>
          <year>2003</year>
          <volume>53</volume>
          <issue>490</issue>
          <fpage>365</fpage>
          <lpage>370</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bjgp.org/lookup/pmidlookup?view=long&#38;pmid=12830563"/>
          </comment>
          <pub-id pub-id-type="medline">12830563</pub-id>
          <pub-id pub-id-type="pmcid">PMC1314595</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ryan</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>McCabe</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Groom</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hickie</surname>
              <given-names>IB</given-names>
            </name>
          </person-group>
          <article-title>Barriers to the effective management of depression in general practice</article-title>
          <source>Aust N Z J Psychiatry</source>
          <year>2004</year>
          <volume>38</volume>
          <issue>10</issue>
          <fpage>795</fpage>
          <lpage>803</lpage>
          <pub-id pub-id-type="doi">10.1080/j.1440-1614.2004.01464.x</pub-id>
          <pub-id pub-id-type="medline">15369538</pub-id>
          <pub-id pub-id-type="pii">ANP1464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Identifying depression and its determinants upon initiating treatment: ChatGPT versus primary care physicians</article-title>
          <source>Fam Med Community Health</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>4</issue>
          <fpage>e002391</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&#38;pmid=37844967"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2023-002391</pub-id>
          <pub-id pub-id-type="medline">37844967</pub-id>
          <pub-id pub-id-type="pii">fmch-2023-002391</pub-id>
          <pub-id pub-id-type="pmcid">PMC10582915</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coppens</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Van Audenhove</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gusmão</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Purebl</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Székely</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness of General Practitioner training to improve suicide awareness and knowledge and skills towards depressionEffectiveness of general practitioner training to improve suicide awareness and knowledge and skills towards depression</article-title>
          <source>J Affect Disord</source>
          <year>2018</year>
          <volume>227</volume>
          <fpage>17</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://core.ac.uk/reader/158340293?utm_source=linkout"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jad.2017.09.039</pub-id>
          <pub-id pub-id-type="medline">29049931</pub-id>
          <pub-id pub-id-type="pii">S0165-0327(17)30455-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Os</surname>
              <given-names>TWDP</given-names>
            </name>
            <name name-style="western">
              <surname>van den Brink</surname>
              <given-names>RHS</given-names>
            </name>
            <name name-style="western">
              <surname>Jenner</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>van der Meer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tiemens</surname>
              <given-names>BG</given-names>
            </name>
            <name name-style="western">
              <surname>Ormel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Effects on depression pharmacotherapy of a Dutch general practitioner training program</article-title>
          <source>J Affect Disord</source>
          <year>2002</year>
          <volume>71</volume>
          <issue>1-3</issue>
          <fpage>105</fpage>
          <lpage>111</lpage>
          <pub-id pub-id-type="doi">10.1016/s0165-0327(01)00415-3</pub-id>
          <pub-id pub-id-type="medline">12167506</pub-id>
          <pub-id pub-id-type="pii">S0165032701004153</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henriksson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Isacsson</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Increased antidepressant use and fewer suicides in Jämtland county, Sweden, after a primary care educational programme on the treatment of depression</article-title>
          <source>Acta Psychiatr Scand</source>
          <year>2006</year>
          <volume>114</volume>
          <issue>3</issue>
          <fpage>159</fpage>
          <lpage>167</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1600-0447.2006.00822.x</pub-id>
          <pub-id pub-id-type="medline">16889586</pub-id>
          <pub-id pub-id-type="pii">ACP822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <source>Public Health Action for the Prevention of Suicide: A Framework</source>
          <year>2012</year>
          <access-date>2024-12-18</access-date>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iris.who.int/handle/10665/75166">https://iris.who.int/handle/10665/75166</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="web">
          <source>Preventing Suicide: A Global Imperative</source>
          <year>2014</year>
          <access-date>2025-01-07</access-date>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iris.who.int/handle/10665/131056">https://iris.who.int/handle/10665/131056</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yamamoto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Koda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ogawa</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Miyoshi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Maeda</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Otsuka</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Enhancing medical interview skills through AI-simulated patient interactions: nonrandomized controlled trial</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e58753</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e58753/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/58753</pub-id>
          <pub-id pub-id-type="medline">39312284</pub-id>
          <pub-id pub-id-type="pii">v10i1e58753</pub-id>
          <pub-id pub-id-type="pmcid">PMC11459107</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Creating virtual patients using large language models: scalable, global, and low cost</article-title>
          <source>Med Teach</source>
          <year>2025</year>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>40</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1080/0142159X.2024.2376879</pub-id>
          <pub-id pub-id-type="medline">38992981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Potter</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jefferies</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Enhancing communication and clinical reasoning in medical education: building virtual patients with generative AI</article-title>
          <source>Future Healthc J</source>
          <year>2024</year>
          <volume>11</volume>
          <fpage>100043</fpage>
          <pub-id pub-id-type="doi">10.1016/j.fhj.2024.100043</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holderried</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Stegemann-Philipps</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Herschbach</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moldt</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nevins</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Griewatz</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A generative pretrained transformer (GPT)-powered chatbot as a simulated patient to practice history taking: prospective, mixed methods study</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e53961</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e53961/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53961</pub-id>
          <pub-id pub-id-type="medline">38227363</pub-id>
          <pub-id pub-id-type="pii">v10i1e53961</pub-id>
          <pub-id pub-id-type="pmcid">PMC10828948</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holderried</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Stegemann-Philipps</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Herrmann-Werner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Festl-Wietek</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Holderried</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Eickhoff</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A language model–powered simulated patient with automated feedback for history taking: prospective study</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <volume>10</volume>
          <fpage>e59213</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e59213/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59213</pub-id>
          <pub-id pub-id-type="medline">39150749</pub-id>
          <pub-id pub-id-type="pii">v10i1e59213</pub-id>
          <pub-id pub-id-type="pmcid">PMC11364946</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Atapattu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Thilakaratne</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Do</surname>
              <given-names>DN</given-names>
            </name>
            <name name-style="western">
              <surname>Herath</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Falkner</surname>
              <given-names>KE</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Che</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Nabende</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shutova</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pilehvar</surname>
              <given-names>MT</given-names>
            </name>
          </person-group>
          <article-title>Exploring the role of mental health conversational agents in training medical students and professionals: a systematic literature review</article-title>
          <source>Findings of the Association for Computational Linguistics: ACL 2025</source>
          <year>2025</year>
          <publisher-loc>Vienna</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>20785</fpage>
          <lpage>20798</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Erwin</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Triola</surname>
              <given-names>MM</given-names>
            </name>
          </person-group>
          <article-title>Computerized virtual patients in health professions education: a systematic review and meta-analysis</article-title>
          <source>Academic Medicine</source>
          <year>2010</year>
          <volume>85</volume>
          <issue>10</issue>
          <fpage>1589</fpage>
          <lpage>1602</lpage>
          <pub-id pub-id-type="doi">10.1097/acm.0b013e3181edfe13</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chaudhary</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Waller</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Borish</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using virtual patients to teach empathy</article-title>
          <source>Simul Healthc</source>
          <year>2016</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>181</fpage>
          <lpage>189</lpage>
          <pub-id pub-id-type="doi">10.1097/sih.0000000000000142</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chaby</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Benamara</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Prigent</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ravenet</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Embodied virtual patients as a simulation-based framework for training clinician-patient communication skills: an overview of their use in psychiatric and geriatric care</article-title>
          <source>Front Virtual Real</source>
          <year>2022</year>
          <volume>3</volume>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://paperpile.com/b/kULUIs/WvccJ"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frvir.2022.827312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Campillos-Llanos</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bilinski</surname>
              <given-names>É</given-names>
            </name>
            <name name-style="western">
              <surname>Neuraz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rosset</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zweigenbaum</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Lessons learned from the usability evaluation of a simulated patient dialogue system</article-title>
          <source>J Med Syst</source>
          <year>2021</year>
          <volume>45</volume>
          <issue>7</issue>
          <fpage>69</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hal.archives-ouvertes.fr/hal-03887098"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-021-01737-4</pub-id>
          <pub-id pub-id-type="medline">33999302</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-021-01737-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sezgin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chekeni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Keim</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Clinical accuracy of large language models and Google search responses to postpartum depression questions: cross-sectional study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>e49240</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e49240/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/49240</pub-id>
          <pub-id pub-id-type="medline">37695668</pub-id>
          <pub-id pub-id-type="pii">v25i1e49240</pub-id>
          <pub-id pub-id-type="pmcid">PMC10520763</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenman</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hendler</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>LLM questionnaire completion for automatic psychiatric assessment</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on June 12, 2024</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2406.06636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dupuy</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>de Sevin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cassoudesalle</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ballot</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Dehail</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Aouizerate</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cuny</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Micoulaud-Franchi</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Philip</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Guidelines for the design of a virtual patient for psychiatric interview training</article-title>
          <source>J Multimodal User Interfaces</source>
          <year>2020</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>99</fpage>
          <lpage>107</lpage>
          <pub-id pub-id-type="doi">10.1007/s12193-020-00338-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <source>DeepL Translator</source>
          <year>2023</year>
          <access-date>2025-01-07</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.deepl.com/de/translator">https://www.deepl.com/de/translator</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Althaus</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hegerl</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Ursachen, Diagnose und Therapie von Suizidalität [Causes, diagnosis and treatment of suicidality]</article-title>
          <source>Nervenarzt</source>
          <year>2004</year>
          <volume>75</volume>
          <issue>11</issue>
          <fpage>1123</fpage>
          <lpage>1135</lpage>
          <pub-id pub-id-type="doi">10.1007/s00115-004-1824-2</pub-id>
          <pub-id pub-id-type="medline">15480527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Müssigbrodt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Michels</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Malchow</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Dilling</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Munk-Jørgensen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bertelsen</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Use of the ICD-10 classification in psychiatry: an international survey</article-title>
          <source>Psychopathology</source>
          <year>2000</year>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>94</fpage>
          <lpage>99</lpage>
          <pub-id pub-id-type="doi">10.1159/000029127</pub-id>
          <pub-id pub-id-type="medline">10705253</pub-id>
          <pub-id pub-id-type="pii">29127</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seitz</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Artificial empathy in healthcare chatbots: does it feel authentic?</article-title>
          <source>Comput Hum Behav Artif Hum</source>
          <year>2024</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>100067</fpage>
          <pub-id pub-id-type="doi">10.1016/j.chbah.2024.100067</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Clarke</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Using thematic analysis in psychology</article-title>
          <source>Qualitative Research in Psychology</source>
          <year>2008</year>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>77</fpage>
          <lpage>101</lpage>
          <pub-id pub-id-type="doi">10.1191/1478088706qp063oa</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>KQ</given-names>
            </name>
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>LLM-empowered chatbots for psychiatrist and patient simulation: application and evaluation</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 24, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2305.13614</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Milani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Eack</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Labrum</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>PATIENT-Ψ: using large language models to simulate patients for training mental health professionals</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 28, 2024</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2405.19660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Application of large language models in medical training evaluation—using ChatGPT as a standardized patient: multimetric assessment</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <volume>27</volume>
          <fpage>e59435</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e59435/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59435</pub-id>
          <pub-id pub-id-type="medline">39742453</pub-id>
          <pub-id pub-id-type="pii">v27i1e59435</pub-id>
          <pub-id pub-id-type="pmcid">PMC11736217</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <article-title>Character design</article-title>
          <source>SillyTavern</source>
          <year>2025</year>
          <access-date>2026-01-24</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://docs.sillytavern.app/usage/core-concepts/characterdesign/">https://docs.sillytavern.app/usage/core-concepts/characterdesign/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salewski</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Alaniz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rio-Torto</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Akata</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>In-context impersonation reveals large language models’ strengths and biases</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 24, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2305.14930</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shrivastava</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Deshpande</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kalyan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sabharwal</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Bias runs deep: implicit reasoning biases in persona-assigned LLMs</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online November 8, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2311.04892</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kessels</surname>
              <given-names>RPC</given-names>
            </name>
          </person-group>
          <article-title>Patients' memory for medical information</article-title>
          <source>J R Soc Med</source>
          <year>2003</year>
          <volume>96</volume>
          <issue>5</issue>
          <fpage>219</fpage>
          <lpage>222</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/12724430"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/014107680309600504</pub-id>
          <pub-id pub-id-type="medline">12724430</pub-id>
          <pub-id pub-id-type="pmcid">PMC539473</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kühne</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ay</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Otterbeck</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Weck</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Standardized patients in clinical psychology and psychotherapy: a scoping review of barriers and facilitators for implementation</article-title>
          <source>Acad Psychiatry</source>
          <year>2018</year>
          <volume>42</volume>
          <issue>6</issue>
          <fpage>773</fpage>
          <lpage>781</lpage>
          <pub-id pub-id-type="doi">10.1007/s40596-018-0886-6</pub-id>
          <pub-id pub-id-type="medline">29423828</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40596-018-0886-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kühne</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Maaß</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Weck</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Standardized patients in clinical psychology: from research to practice</article-title>
          <source>Verhaltenstherapie</source>
          <year>2021</year>
          <volume>32</volume>
          <issue>Suppl 1</issue>
          <fpage>245</fpage>
          <lpage>253</lpage>
          <pub-id pub-id-type="doi">10.1159/000510049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barrows</surname>
              <given-names>HS</given-names>
            </name>
          </person-group>
          <article-title>An overview of the uses of standardized patients for teaching and evaluating clinical skills</article-title>
          <source>Acad Med</source>
          <year>1993</year>
          <volume>68</volume>
          <issue>6</issue>
          <fpage>443</fpage>
          <lpage>451</lpage>
          <pub-id pub-id-type="doi">10.1097/00001888-199306000-00002</pub-id>
          <pub-id pub-id-type="medline">8507309</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kneebone</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Nestel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wetzel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jacklin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The human face of simulation: patient-focused simulation training</article-title>
          <source>Acad Med</source>
          <year>2006</year>
          <volume>81</volume>
          <issue>10</issue>
          <fpage>919</fpage>
          <lpage>924</lpage>
          <pub-id pub-id-type="doi">10.1097/01.acm.0000238323.73623.c2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lane</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rollnick</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The use of simulated patients and role-play in communication skills training: a review of the literature to August 2005</article-title>
          <source>Patient Educ Couns</source>
          <year>2007</year>
          <volume>67</volume>
          <issue>1-2</issue>
          <fpage>13</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2007.02.011</pub-id>
          <pub-id pub-id-type="medline">17493780</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(07)00091-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krupinski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tiller</surname>
              <given-names>JWG</given-names>
            </name>
          </person-group>
          <article-title>The identification and treatment of depression by general practitioners</article-title>
          <source>Aust N Z J Psychiatry</source>
          <year>2001</year>
          <volume>35</volume>
          <issue>6</issue>
          <fpage>827</fpage>
          <lpage>832</lpage>
          <pub-id pub-id-type="doi">10.1046/j.1440-1614.2001.00960.x</pub-id>
          <pub-id pub-id-type="medline">11990894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldman</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Champion</surname>
              <given-names>HC</given-names>
            </name>
          </person-group>
          <article-title>Awareness, diagnosis, and treatment of depression</article-title>
          <source>J Gen Intern Med</source>
          <year>1999</year>
          <volume>14</volume>
          <issue>9</issue>
          <fpage>569</fpage>
          <lpage>580</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/10491249"/>
          </comment>
          <pub-id pub-id-type="doi">10.1046/j.1525-1497.1999.03478.x</pub-id>
          <pub-id pub-id-type="medline">10491249</pub-id>
          <pub-id pub-id-type="pii">jgi03478</pub-id>
          <pub-id pub-id-type="pmcid">PMC1496741</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kroenke</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Review: GPs accurately diagnose about 50% of patients with depression and accurately classify 81% of nondepressed patients</article-title>
          <source>Ann Intern Med</source>
          <year>2010</year>
          <volume>152</volume>
          <issue>8</issue>
          <fpage>JC4</fpage>
          <pub-id pub-id-type="doi">10.7326/0003-4819-152-8-201004200-02013</pub-id>
          <pub-id pub-id-type="medline">20404370</pub-id>
          <pub-id pub-id-type="pii">152/8/JC4-13</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaebel</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Stricker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kerst</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Changes from ICD-10 to ICD-11 and future directionsin psychiatric classification </article-title>
          <source>Dialogues Clin Neurosci</source>
          <year>2022</year>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>7</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.31887/dcns.2020.22.1/wgaebel</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gaebel</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Stricker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Riesbeck</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zielasek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kerst</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Meisenzahl-Lechner</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of diagnostic classification and clinical utility assessment of ICD-11 compared to ICD-10 in 10 mental disorders: findings from a web-based field study</article-title>
          <source>Eur Arch Psychiatry Clin Neurosci</source>
          <year>2020</year>
          <volume>270</volume>
          <issue>3</issue>
          <fpage>281</fpage>
          <lpage>289</lpage>
          <pub-id pub-id-type="doi">10.1007/s00406-019-01076-z</pub-id>
          <pub-id pub-id-type="medline">31654119</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00406-019-01076-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zapata</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Huzij</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Examination of medical student and physician attitudes towards suicide reveals need for required training</article-title>
          <source>Front Public Health</source>
          <year>2024</year>
          <volume>12</volume>
          <fpage>1331208</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38633234"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2024.1331208</pub-id>
          <pub-id pub-id-type="medline">38633234</pub-id>
          <pub-id pub-id-type="pmcid">PMC11021567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Brady</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bùi Trà-My</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wolk</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>A qualitative examination of clinician anxiety about suicide prevention and its impact on clinical practice</article-title>
          <source>Community Ment Health J</source>
          <year>2025</year>
          <volume>61</volume>
          <issue>3</issue>
          <fpage>568</fpage>
          <lpage>575</lpage>
          <pub-id pub-id-type="doi">10.1007/s10597-024-01364-6</pub-id>
          <pub-id pub-id-type="medline">39361092</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10597-024-01364-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Omerov</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Steineck</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dyregrov</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Runeson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nyberg</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>The ethics of doing nothing: suicide-bereavement and research—ethical and methodological considerations</article-title>
          <source>Psychol Med</source>
          <year>2014</year>
          <volume>44</volume>
          <issue>16</issue>
          <fpage>3409</fpage>
          <lpage>3420</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23867073"/>
          </comment>
          <pub-id pub-id-type="doi">10.1017/S0033291713001670</pub-id>
          <pub-id pub-id-type="medline">23867073</pub-id>
          <pub-id pub-id-type="pii">S0033291713001670</pub-id>
          <pub-id pub-id-type="pmcid">PMC4255316</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dazzi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gribble</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Wessely</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Fear</surname>
              <given-names>NT</given-names>
            </name>
          </person-group>
          <article-title>Does asking about suicide and related behaviours induce suicidal ideation? What is the evidence?</article-title>
          <source>Psychol Med</source>
          <year>2014</year>
          <volume>44</volume>
          <issue>16</issue>
          <fpage>3361</fpage>
          <lpage>3363</lpage>
          <pub-id pub-id-type="doi">10.1017/s0033291714001299</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holmes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Clacy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hamilton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kõlves</surname>
              <given-names>Kairi</given-names>
            </name>
          </person-group>
          <article-title>Online versus in-person gatekeeper suicide prevention training: comparison in a community sample</article-title>
          <source>J Ment Health</source>
          <year>2024</year>
          <volume>33</volume>
          <issue>5</issue>
          <fpage>605</fpage>
          <lpage>612</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.1080/09638237.2024.2332811?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/09638237.2024.2332811</pub-id>
          <pub-id pub-id-type="medline">38602188</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pechek</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Vincenzes</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Forziat-Pytel</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nowakowski</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Romero-Lucero</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Teaching suicide assessment and intervention online: a model of practice</article-title>
          <source>Prof Couns</source>
          <year>2024</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>100</fpage>
          <lpage>112</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://tpcjournal.nbcc.org/teaching-suicide-assessment-and-intervention-online-a-model-of-practice/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suchikova</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsybuliak</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Teixeira da Silva</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nazarovets</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>GAIDeT (Generative AI Delegation Taxonomy): a taxonomy for humans to delegate tasks to generative artificial intelligence in scientific research and publishing</article-title>
          <source>Accountability Res</source>
          <year>2025</year>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.1080/08989621.2025.2544331</pub-id>
          <pub-id pub-id-type="medline">40781729</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
