<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e77332</article-id><article-id pub-id-type="doi">10.2196/77332</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Utility of Generative Artificial Intelligence for Japanese Medical Interview Training: Randomized Crossover Pilot Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hirosawa</surname><given-names>Takanobu</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yokose</surname><given-names>Masashi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sakamoto</surname><given-names>Tetsu</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Harada</surname><given-names>Yukinori</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tokumasu</surname><given-names>Kazuki</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mizuta</surname><given-names>Kazuya</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shimizu</surname><given-names>Taro</given-names></name><degrees>MD, MSc, MPH, MBA, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Diagnostic and Generalist Medicine, Dokkyo Medical University</institution><addr-line>880 Kitakobayashi, Mibu-cho</addr-line><addr-line>Shimotsuga</addr-line><country>Japan</country></aff><aff id="aff2"><institution>Department of General Medicine, Graduate School of Medicine, Dentistry and Pharmaceutical Sciences, Okayama University</institution><addr-line>Okayama</addr-line><country>Japan</country></aff><aff id="aff3"><institution>Department of Intensive Care Medicine, Kameda Medical Center</institution><addr-line>Chiba</addr-line><country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Gentges</surname><given-names>Joshua</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Potla</surname><given-names>Ravi Teja</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Izquierdo-Alvarez</surname><given-names>Vanessa</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to  Takanobu Hirosawa, MD, PhD, Department of Diagnostic and Generalist Medicine, Dokkyo Medical University, 880 Kitakobayashi, Mibu-cho, Shimotsuga, 321-0293, Japan, 81 282861111; <email>t.hirosawa1983@gmail.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>1</day><month>8</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e77332</elocation-id><history><date date-type="received"><day>12</day><month>05</month><year>2025</year></date><date date-type="rev-recd"><day>08</day><month>06</month><year>2025</year></date><date date-type="accepted"><day>12</day><month>06</month><year>2025</year></date></history><copyright-statement>&#x00A9; Takanobu Hirosawa, Masashi Yokose, Tetsu Sakamoto, Yukinori Harada, Kazuki Tokumasu, Kazuya Mizuta, Taro Shimizu. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 1.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2025/1/e77332"/><abstract><sec><title>Background</title><p>The medical interview remains a cornerstone of clinical training. There is growing interest in applying generative artificial intelligence (AI) in medical education, including medical interview training. However, its utility in culturally and linguistically specific contexts, including Japanese, remains underexplored. This study investigated the utility of generative AI for Japanese medical interview training.</p></sec><sec><title>Objective</title><p>This pilot study aimed to evaluate the utility of generative AI as a tool for medical interview training by comparing its performance with that of traditional face-to-face training methods using a simulated patient.</p></sec><sec sec-type="methods"><title>Methods</title><p>We conducted a randomized crossover pilot study involving 20 postgraduate year 1&#x2010;2 physicians from a university hospital. Participants were randomly allocated into 2 groups. Group A began with an AI-based station on a case involving abdominal pain, followed by a traditional station with a standardized patient presenting chest pain. Group B followed the reverse order, starting with the traditional station for abdominal pain and subsequently within the AI-based station for the chest pain scenario. In the AI-based stations, participants interacted with a GPT-configured platform that simulated patient behaviors. GPTs are customizable versions of ChatGPT adapted for specific purposes. The traditional stations involved face-to-face interviews with a simulated patient. Both groups used identical, standardized case scenarios to ensure uniformity. Two independent evaluators, blinded to the study conditions, assessed participants&#x2019; performances using 6 defined metrics: patient care and communication, history taking, physical examination, accuracy and clarity of transcription, clinical reasoning, and patient management. A 6-point Likert scale was used for scoring. The discrepancy between the evaluators was resolved through discussion. To ensure cultural and linguistic authenticity, all interviews and evaluations were conducted in Japanese.</p></sec><sec sec-type="results"><title>Results</title><p>AI-based stations scored lower across most categories, particularly in patient care and communication, than traditional stations (4.48 vs 4.95; <italic>P</italic>=.009). However, AI-based stations demonstrated comparable performance in clinical reasoning, with a nonsignificant difference (4.43 vs 4.85; <italic>P</italic>=.10).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The comparable performance of generative AI in clinical reasoning highlights its potential as a complementary tool in medical interview training. One of its main advantages lies in enabling self-learning, allowing trainees to independently practice interviews without the need for simulated patients. Nonetheless, the lower scores in patient care and communication underline the importance of maintaining traditional methods that capture the nuances of human interaction. These findings support the adoption of hybrid training models that combine generative AI with conventional approaches to enhance the overall effectiveness of medical interview training in Japan.</p></sec><sec><title>Trial Registration</title><p>UMIN-CTR UMIN000053747; https://center6.umin.ac.jp/cgi-open-bin/ctr_e/ctr_view.cgi?recptno=R000061336</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>generative artificial intelligence</kwd><kwd>medical interview training</kwd><kwd>mock patient</kwd><kwd>simulation education</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Medical Interview Training</title><p>Medical interview training is an essential part of medical education, significantly influencing clinical competence, patient satisfaction, and treatment outcomes [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. Effective medical interviewing skills are crucial not only for accurate diagnosis but also for establishing trust and rapport among health care professionals, patients, and their families [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. For example, several studies revealed that proper diagnoses can often be made based mainly on an effective medical interview rather than investigations [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. These findings highlighted the pivotal role of communication skills in clinical practice.</p></sec><sec id="s1-2"><title>Barriers to Medical Interview Training</title><p>Despite its importance, medical interview training often faces several barriers [<xref ref-type="bibr" rid="ref14">14</xref>]. For instance, traditional training methods typically involve simulated patient interactions, which are resource-intensive, requiring substantial time commitments from both medical trainees and educators [<xref ref-type="bibr" rid="ref15">15</xref>]. While simulation training can provide valuable experiential learning [<xref ref-type="bibr" rid="ref16">16</xref>-<xref ref-type="bibr" rid="ref18">18</xref>], its scalability is often limited by resource and financial constraints [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Consequently, medical students and junior physicians may not receive sufficient opportunities for comprehensive and repeated practice, limiting their development of essential communication and clinical reasoning skills [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p></sec><sec id="s1-3"><title>Potential of Artificial Intelligence for Medical Interview Training</title><p>In response to these challenges, artificial intelligence (AI) has emerged as a promising tool in medical education [<xref ref-type="bibr" rid="ref25">25</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. Until recent breakthroughs, AI performance remained inadequate due to technical limitations [<xref ref-type="bibr" rid="ref29">29</xref>]. However, the current development of suitable technologies, including Compute Unified Device Architecture and advanced graphics processing units, has remarkably enhanced AI capabilities [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. AI-driven platforms offer scalable, consistent, and flexible training experiences that allow trainees to practice extensively [<xref ref-type="bibr" rid="ref34">34</xref>]. These tools have the potential to bridge gaps in access to traditional training by enabling frequent, independent practice [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>].</p></sec><sec id="s1-4"><title>Potential of Generative AI for Medical Interview Training</title><p>Generative AI, a subset of AI that generates human-like responses and interactions [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], presents exciting potential for medical interview training [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. It often incorporates natural language processing and large language models, which enable it to generate and respond to human dialogue in contextually appropriate ways [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Unlike traditional training methods, generative AI can simulate diverse and complex patient scenarios, providing interactive, responsive, and personalized feedback [<xref ref-type="bibr" rid="ref43">43</xref>]. This capability not only enhances clinical reasoning but also facilitates self-learning, allowing students to practice repeatedly at their convenience [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>].</p></sec><sec id="s1-5"><title>Prior Work</title><p>Recent studies have explored the application of generative AI in medical interview training, particularly in the context of Objective Structured Clinical Examinations (OSCEs). For example, research in Japan reported that GPT-4 (legacy) based stations outperformed traditional stations for medical students [<xref ref-type="bibr" rid="ref47">47</xref>]. However, direct comparison with previous work is limited by differences in AI versions, participant populations, clinical cases, and study designs. Further, earlier studies found that previous versions of GPT occasionally generated implausible responses [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Additionally, the comparative performance between ChatGPT-4 (legacy) and human physicians in conducting medical interviews revealed comparable aggregate scores across 5 components on the 5-Likert scale (15/25 vs 15/25; <italic>P</italic>&#x003C;.28) [<xref ref-type="bibr" rid="ref50">50</xref>].</p></sec><sec id="s1-6"><title>Research Gap and Aim of the Study</title><p>Despite these advances, there is still a lack of research evaluating the utility of generative AI tools in Japanese clinical contexts. Cultural and linguistic nuances, including Japanese, play a significant role in effective communication [<xref ref-type="bibr" rid="ref51">51</xref>-<xref ref-type="bibr" rid="ref53">53</xref>]. However, there is a lack of enough research evaluating the effectiveness and adaptability of generative AI tools within the Japanese clinical context. To the best of our knowledge, there is limited research regarding the effectiveness and applicability of generative AI-driven training tools for Japanese medical trainees [<xref ref-type="bibr" rid="ref47">47</xref>]. Therefore, this study aimed to evaluate the utility and limitations of generative AI by comparing AI-driven medical interview scenarios with traditional mock patient interactions among postgraduate physicians in Japan.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Setting</title><p>This pilot study was conducted in the Department of Diagnostic and Generalist Medicine (general internal medicine [GIM]) at Dokkyo Medical University, Tochigi, Japan.</p><p>To minimize variability in participants&#x2019; medical interview skills, a randomized crossover design was used [<xref ref-type="bibr" rid="ref54">54</xref>]. All interviews and evaluations were conducted in Japanese to preserve cultural and linguistic integrity. The study consisted of 3 main components: participant recruitment, medical interview implementation, and interview evaluation. This study adhered to the CONSORT-EHEALTH (Consolidated Standards of Reporting Trials of Electronic and Mobile Health Applications and Online Telehealth) guidelines (the CONSORT-EHEALTH checklist is provided in <xref ref-type="supplementary-material" rid="app4">Checklist 1</xref>).</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>Ethics approval was obtained from the Institutional Review Board at Dokkyo Medical University Hospital (number R-79&#x2010;14J). The research adhered strictly to the Helsinki Declaration guidelines to ensure ethical conduct in human participant research.</p></sec><sec id="s2-3"><title>Participant Inclusion</title><p>Participants included postgraduate year 1&#x2010;2 physicians rotating through the GIM department at Dokkyo Medical University Hospital between April 2024 and January 2025. All eligible physicians during this period were invited to participate. Exclusion criteria included hearing loss or unwillingness to attend the research. Before enrollment, all participants received detailed explanations regarding the study&#x2019;s objectives, procedures, and confidentiality protocols from researchers. Written informed consent was obtained from each participant.</p></sec><sec id="s2-4"><title>Medical Interview</title><sec id="s2-4-1"><title>Overview</title><p>Participants were randomly allocated into 2 groups through block randomization to ensure an equal group size [<xref ref-type="bibr" rid="ref55">55</xref>]. The random allocation sequence was generated by an independent researcher (KM) using Microsoft Excel. This ensured balanced distribution and minimized potential confounding from individual differences.</p><p>Each participant completed 2 types of medical interview stations&#x2014;an AI-based station using the GPTs platform and a traditional station with face-to-face interviews with a trained actor simulating the patient (simulated patient). The 2 stations covered separate clinical cases: abdominal pain and chest pain. In the AI-based stations, participants typed their questions and responses into a laptop computer to interact with the GPTs platform. In the traditional stations, participants engaged in spoken conversation with a simulated patient to conduct the medical interview.</p><p>Participants in Group A started with the AI-based interview on abdominal pain, followed by the traditional interview on chest pain. Group B began with the traditional interview on abdominal pain and proceeded to the AI-based interview on chest pain.</p></sec><sec id="s2-4-2"><title>Station Structure</title><p>Both the AI-based and traditional stations followed an identical structure based on The OSCE [<xref ref-type="bibr" rid="ref56">56</xref>]. Initially, participants reviewed the simulated patient&#x2019;s basic information for 1 minute. The medical interview, including questions relevant to physical examination, was conducted over 15 minutes. Physical examinations were not actually performed in either station due to maintaining consistency with the text-based interaction in the AI-based station. Following the medical interview, participants had 6 minutes to formulate an assessment and plan. Brief feedback and learning points were then provided for several minutes, after which the participants moved to the next station.</p></sec><sec id="s2-4-3"><title>GPTs Setting</title><p>GPTs are custom versions of ChatGPT that we can adjust for a specific purpose without programming [<xref ref-type="bibr" rid="ref57">57</xref>]. In this study, the systems were configured to simulate a patient based on detailed case information provided in Japanese. Importantly, the GPTs were not trained or fine-tuned in the Japanese medical language. The systems did not provide a final diagnosis, even if participants asked. Furthermore, if a participant inputted medical jargon [<xref ref-type="bibr" rid="ref58">58</xref>], GPTs responded with queries such as &#x201C;What is XXX?&#x201D; to simulate realistic patient confusion. Additional configuration with translation in English details is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-4-4"><title>Simulated Patient</title><p>The traditional simulated patient interviews were conducted by researcher TH, who was trained to ensure consistency in responses and demeanor. This approach was chosen because the researcher serves not only as a trained actor simulating symptoms but also as an educator providing brief feedback to the participants at the end of each session. Identical clinical scenarios were used across both groups, based on a widely used and standardized textbook for medical interview training [<xref ref-type="bibr" rid="ref59">59</xref>].</p></sec></sec><sec id="s2-5"><title>Evaluation for Medical Interview</title><p>Traditional stations were video-recorded and transcribed. AI-based stations used the saved text logs. For consistency in evaluation, the transcriptions were refined to match the same structures between stations. For example, headers labeled as &#x201C;GPTs&#x201D; in the AI-based stations were changed to &#x201C;Patient.&#x201D; Self-introduction parts were removed. The corresponding text files were also anonymized. Sample transcript with translation in English is available in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><p>Two experienced physicians, MK and TSa, independently evaluated the transcripts. The evaluators did not take part in the previous participant recruitment and medical interview implementation. Evaluators used a structured scoring system using a 6-point Likert scale, where 1 is inferior and 6 is excellent. Assessments were based on six key domains: (1) patient care and communication skills, (2) thoroughness of history-taking, (3) physical examination proficiency, (4) accuracy and clarity of transcription, (5) clinical reasoning capability, and (6) overall patient management strategies. The discrepancy was resolved through discussion. Evaluators were blinded to interview methods and participant identity. They assessed transcripts in random order. The scoring system is also based on The OSCE [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref60">60</xref>].</p></sec><sec id="s2-6"><title>Statistical Analysis</title><sec id="s2-6-1"><title>Outcome</title><p>The primary outcomes were the comparison of mean scores between AI-based and traditional stations for the whole and each assessment component. The secondary outcome measures involved comparisons within each clinical case, abdominal pain, and chest pain, by interview style.</p></sec><sec id="s2-6-2"><title>Data Collection</title><p>Baseline characteristics data were collected, including years since obtaining a degree in medicine and sex. All medical interviews were also recorded to ensure accurate transcription: traditional stations were video-recorded, and AI-based stations preserved the conversation logs as text.</p></sec><sec id="s2-6-3"><title>Analysis</title><p>For both primary and secondary outcomes, scores on the 6-point Likert scale were presented as mean with 95% CIs. To assess the appropriateness of statistical tests, the normality of the paired score differences between AI-based and traditional stations was checked using the Shapiro-Wilk test [<xref ref-type="bibr" rid="ref61">61</xref>]. As the score differences were not normally distributed, the Mann-Whitney <italic>U</italic> test was used as the primary method for comparing paired outcomes between the 2 stations. A <italic>P</italic> value &#x003C;.05 was considered statistically significant. For reference, the 95% CIs are provided to supplement the <italic>P</italic> values (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> contains detailed normality test results and detailed mean difference).</p><p>Continuous variable related to participant characteristics is presented as medians and IQRs and compared using the Mann-Whitney <italic>U</italic> test. The categorical variable was compared using the Fisher exact test. All statistical analyses were conducted using R (version 4.2.2; The R Foundation for Statistical Computing) for MacOS X.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Participants Characteristics</title><p>A total of 20 postgraduate physicians were enrolled (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Among them, 11 (56%) physicians were first year after graduation, while 9 (45%) physicians were in their second year. Two (10%) female participants were included. There were no statistical differences in participant characteristics between group A and group B, as shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The flow chart includes participants and allocating the groups.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e77332_fig01.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Participants' characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Group A (N=10)</td><td align="left" valign="bottom">Group B (N=10)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Female, n (%)</td><td align="char" char="." valign="top">0 (0)</td><td align="char" char="." valign="top">2 (20)</td><td align="char" char="." valign="top">.47<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Years after graduation (years), median (IQR)</td><td align="char" char="." valign="top">1.5 (1.0)</td><td align="char" char="." valign="top">1.0 (1.0)</td><td align="char" char="." valign="top">.69<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Fisher exact test.</p></fn><fn id="table1fn2"><p><sup>b</sup>Mann-Whitney <italic>U</italic> test.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Evaluation Outcomes</title><p>Performance scores were compared between the AI-based and traditional stations across overall and 6 assessment domains, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. Overall, the total score was 4.89 in the AI-based stations compared with 5.47 in the traditional stations (<italic>P</italic>&#x003C;.001).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance scores were compared between the artificial intelligence&#x2013;based and traditional stations across overall and 6 assessment domains.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Scoring system with a 6-point Likert scale</td><td align="left" valign="bottom">Artificial intelligence&#x2013;based (GPTs) stations (N=20<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>), 95% CI</td><td align="left" valign="bottom">Traditional stations (N=20<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>), 95% CI</td><td align="left" valign="bottom"><italic>P</italic> value<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Overall</td><td align="left" valign="top">4.89 (4.74&#x2010;5.04)</td><td align="left" valign="top">5.47 (5.35&#x2010;5.58)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Patient care and communication</td><td align="left" valign="top">5.05 (4.73&#x2010;5.37)</td><td align="left" valign="top">5.45 (5.06&#x2010;5.84)</td><td align="left" valign="top">.04</td></tr><tr><td align="left" valign="top">History taking</td><td align="left" valign="top">4.90 (4.69&#x2010;5.11)</td><td align="left" valign="top">5.30 (4.96&#x2010;5.65)</td><td align="left" valign="top">.04</td></tr><tr><td align="left" valign="top">Physical examination</td><td align="left" valign="top">5.10 (4.73&#x2010;5.47)</td><td align="left" valign="top">5.80 (5.61&#x2010;5.99)</td><td align="left" valign="top">.001</td></tr><tr><td align="left" valign="top">Accuracy and clarity of transcription</td><td align="left" valign="top">4.70 (4.36&#x2010;5.05)</td><td align="left" valign="top">5.40 (5.16&#x2010;5.64)</td><td align="left" valign="top">.002</td></tr><tr><td align="left" valign="top">Clinical reasoning</td><td align="left" valign="top">4.75 (4.23&#x2010;5.27)</td><td align="left" valign="top">5.30 (4.96&#x2010;5.64)</td><td align="left" valign="top">.13</td></tr><tr><td align="left" valign="top">Management</td><td align="left" valign="top">4.85 (4.34&#x2010;5.36)</td><td align="left" valign="top">5.55 (5.31&#x2010;5.79)</td><td align="left" valign="top">.02</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Crossover participants with 10 chest paincasese and 10 abdominal paincasese.</p></fn><fn id="table2fn2"><p><sup>b</sup>Mann-Whitney <italic>U</italic> test.</p></fn></table-wrap-foot></table-wrap><p>AI-based stations yielded slightly lower scores in patient care and communication (mean score: 5.05 vs 5.45; <italic>P</italic>=.04). Scores in other domains such as history taking (4.90 vs 5.30; <italic>P</italic>=.04), physical examination (5.10 vs 5.80<italic>; P</italic>=.001), accuracy and clarity of transcription (4.70 vs 5.40; <italic>P</italic>=.002), and management (4.85 vs 5.55; <italic>P</italic>=.02) also trended lower for the AI-based stations. In contrast, the domain of clinical reasoning showed no significant difference between AI-based and traditional stations (4.75 vs 5.30; <italic>P</italic>=.13).</p></sec><sec id="s3-3"><title>Subgroup Analysis</title><sec id="s3-3-1"><title>Overview</title><p>Subgroup analyses were performed to compare the AI-based and traditional stations for each clinical case individually. The initial case presented to participants was abdominal pain, followed sequentially by a chest pain case.</p></sec><sec id="s3-3-2"><title>Abdominal Pain Cases</title><p>For the abdominal pain case, as shown in <xref ref-type="table" rid="table3">Table 3</xref>, the overall score was significantly lower in the AI-based stations compared with the traditional stations (4.70 vs 5.48; <italic>P</italic>&#x003C;.001). Notably, scores for clinical reasoning (4.30 vs 5.50; <italic>P</italic>=.01) and accuracy and clarity of the transcript (4.40 vs 5.40; <italic>P</italic>=.009) were significantly lower in the AI-based stations. While other domains such as patient care and communication (5.00 vs 5.60; <italic>P</italic>=.06), physical examination (5.20 vs 5.80; <italic>P</italic>=.06), and management (4.60 vs 5.50; <italic>P</italic>=.07) were lower in the AI-based stations than the traditional stations, these did not reach statistical significance.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Subgroup analysis for abdominal pain cases compared the artificial intelligence-based and traditional stations across overall and 6 assessment domains.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Scoring system with a 6-point Likert scale</td><td align="left" valign="bottom">Artificial intelligence-based (GPTs) stations (N=10), 95% CI</td><td align="left" valign="bottom">Traditional stations (N=10), 95% CI</td><td align="left" valign="bottom"><italic>P</italic> value<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Overall</td><td align="left" valign="top">4.70 (4.47&#x2010;4.93)</td><td align="left" valign="top">5.48 (5.31&#x2010;5.66)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Patient care and communication</td><td align="left" valign="top">5.00 (4.52&#x2010;5.48)</td><td align="left" valign="top">5.50 (4.80&#x2010;6.20)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top">History taking</td><td align="left" valign="top">4.70 (4.35&#x2010;5.05)</td><td align="left" valign="top">5.20 (4.54&#x2010;5.86)</td><td align="left" valign="top">.17</td></tr><tr><td align="left" valign="top">Physical examination</td><td align="left" valign="top">5.20 (4.64&#x2010;5.76)</td><td align="left" valign="top">5.80 (5.50&#x2010;6.10)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top">Accuracy and clarity of transcription</td><td align="left" valign="top">4.40 (3.78&#x2010;5.00)</td><td align="left" valign="top">5.40 (5.03&#x2010;5.77)</td><td align="left" valign="top">.009</td></tr><tr><td align="left" valign="top">Clinical reasoning</td><td align="left" valign="top">4.30 (3.54&#x2010;5.06)</td><td align="left" valign="top">5.50 (5.12&#x2010;5.88)</td><td align="left" valign="top">.01</td></tr><tr><td align="left" valign="top">Management</td><td align="left" valign="top">4.60 (3.70&#x2010;5.50)</td><td align="left" valign="top">5.50 (5.12&#x2010;5.88)</td><td align="left" valign="top">.07</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Mann-Whitney <italic>U</italic> test.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3-3"><title>Chest Pain Cases</title><p>In the case of chest pain, as shown in <xref ref-type="table" rid="table4">Table 4</xref>, the AI-based stations scored slightly lower in overall scores compared with those in the traditional stations (5.08 vs 5.45; <italic>P</italic>=.004). Physical examination skills were also significantly lower in the AI-based stations (5.00 vs 5.80; <italic>P</italic>=.009). Other domains, including patient care and communication (5.10 vs 5.40; <italic>P</italic>=.37), history taking (5.10 vs 5.40; <italic>P</italic>=.14), and transcription clarity (5.00 vs 5.40; <italic>P</italic>=.09), demonstrated trends in favor of the traditional stations but did not reach significance. Clinical reasoning scores were comparable between the 2 stations (5.10 vs 5.20; <italic>P</italic>=.72), indicating consistent reasoning performance regardless of the interview modality.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Subgroup analysis for chest pain cases compared the artificial intelligence&#x2013;based and traditional stations across overall and 6 assessment domains.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Scoring system with a 6-point Likert scale</td><td align="left" valign="bottom">Artificial intelligence-based (GPTs) stations (N=10), 95% CI</td><td align="left" valign="bottom">Traditional stations (N=10), 95% CI</td><td align="left" valign="bottom"><italic>P</italic> value<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Overall</td><td align="char" char="." valign="top">5.08 (4.90&#x2010;5.27)</td><td align="char" char="." valign="top">5.45 (5.29&#x2010;5.61)</td><td align="left" valign="top">.004</td></tr><tr><td align="left" valign="top">Patient care and communication</td><td align="char" char="." valign="top">5.10 (4.57&#x2010;5.63)</td><td align="char" char="." valign="top">5.40 (4.90&#x2010;5.90)</td><td align="left" valign="top">.37</td></tr><tr><td align="left" valign="top">History taking</td><td align="char" char="." valign="top">5.10 (4.87&#x2010;5.33)</td><td align="char" char="." valign="top">5.40 (5.03&#x2010;5.77)</td><td align="left" valign="top">.14</td></tr><tr><td align="left" valign="top">Physical examination</td><td align="char" char="." valign="top">5.00 (4.42&#x2010;5.58)</td><td align="char" char="." valign="top">5.80 (5.50&#x2010;6.10)</td><td align="left" valign="top">.009</td></tr><tr><td align="left" valign="top">Accuracy and clarity of transcription</td><td align="char" char="." valign="top">5.00 (4.66&#x2010;5.34)</td><td align="char" char="." valign="top">5.40 (5.03&#x2010;5.77)</td><td align="left" valign="top">.09</td></tr><tr><td align="left" valign="top">Clinical reasoning</td><td align="char" char="." valign="top">5.20 (4.46&#x2010;5.94)</td><td align="char" char="." valign="top">5.10 (4.47&#x2010;5.73)</td><td align="left" valign="top">.72</td></tr><tr><td align="left" valign="top">Management</td><td align="char" char="." valign="top">5.10 (4.47&#x2010;5.73)</td><td align="char" char="." valign="top">5.60 (5.23&#x2010;5.97)</td><td align="left" valign="top">.20</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Mann-Whitney <italic>U</italic> test.</p></fn></table-wrap-foot></table-wrap></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study evaluated the utility of generative AI in medical interview training compared with traditional simulated patient interactions among postgraduate physicians in Japan. The principal findings indicate that while AI-based stations provide alternative training methods, they generally yield lower performance scores across several critical domains, including patient care and communication, thoroughness of history-taking, physical examination proficiency, accuracy and clarity of transcription, and management. Participants may have found it difficult to express empathy or engage in natural conversation through typed exchanges [<xref ref-type="bibr" rid="ref62">62</xref>], limiting the development of interpersonal skills in the GPT stations. While generative AI demonstrates the potential for medical interview training, our findings suggest that it is best suited as a supplementary tool rather than a replacement for traditional simulated patient interactions. The lower performance observed in domains dependent on human interaction&#x2014;such as communication and patient care&#x2014;highlights current limitations in AI&#x2019;s ability to simulate empathy and nonverbal cues. Traditional stations, facilitated by trained actors or simulated patients, remain essential for developing advanced interpersonal and communication skills.</p><p>A key methodological aspect of this study was configuring the GPT instance to realistically simulate Japanese patient interactions. The GPTs were set up to operate entirely in Japanese, with patient cases, and presented in culturally appropriate language. To enhance authenticity, the system was instructed to respond using typical expressions. Furthermore, the GPTs were directed to avoid using medical terminology.</p><p>Despite the limitations in interpersonal skill development, domains such as clinical reasoning remained comparable between GPTs and traditional stations. This finding reinforces the potential of AI-based stations in supporting cognitive aspects of clinical assessment. This result highlights the enduring value of traditional stations, where human dynamics and emotional responsiveness can be authentically practiced and assessed.</p><p>Subgroup analyses further demonstrated these differences across specific clinical scenarios. In the abdominal pain case, AI-based stations scored significantly lower in overall performance, clinical reasoning, and transcription clarity. Although other domains like patient care and physical examination were also lower, they did not reach statistical significance. For the chest pain case, while the overall scores were also lower in the GPT stations, the difference was narrower, with physical examination skills showing the most significant disparity. Interestingly, a sub-analysis of abdominal pain cases revealed a significantly lower clinical reasoning score in the AI-based station. This disparity may be attributed to differences in case complexity or the broader differential diagnoses associated with abdominal presentations. In particular, abdominal pain may demand a nuanced interpretation of information [<xref ref-type="bibr" rid="ref63">63</xref>], suggesting that the limited interactivity of the AI-based format may have constrained diagnostic reasoning. This finding, which was not apparent in the overall analysis, provides an important supplementary insight. It highlights the need to account for case-specific characteristics when selecting cases or designing AI-driven educational tools [<xref ref-type="bibr" rid="ref64">64</xref>].</p></sec><sec id="s4-2"><title>Limitations</title><p>Several limitations must be acknowledged. First, this study was designed as a feasibility and exploratory trial and was not fully powered or intended for formal hypothesis testing. The small sample size (n=20) and limited number of stations constrain the generalizability of the findings. The primary goal was to assess the feasibility and gather preliminary data to inform future larger-scale studies. Second, the study only included postgraduate physicians from a single institution, potentially restricting the diversity and representativeness of the findings. Results may not be directly applicable to undergraduate medical students, other health care professionals, or participants from different institutions or backgrounds. Third, the mode of interaction differed between AI, typed input, and traditional stations, spoken conversation, which may have inherently biased communication-related scores. Furthermore, physical examinations were not really performed in either station to unify the format for the text-based interaction in the AI-based station, which could have influenced how this domain was assessed. Fourth, the blinded evaluators may have been able to discern the interview modality indirectly, potentially introducing bias. Fifth, it should also be noted that there was some difference in difficulty between the abdominal pain and chest pain cases. This discrepancy arose because it is inherently challenging to create cases of identical complexity based on different primary concerns. Such differences in case difficulty may have influenced performance results and should be considered when interpreting subgroup analyses. Finally, the study was conducted in a single language using only one generative AI platform, GPTs, limiting its applicability to other languages, cultural contexts, and AI technologies.</p></sec><sec id="s4-3"><title>Comparison With Prior Work</title><p>The current findings expand upon the existing literature. Previous research on OSCEs in Japan found that GPT-4 (legacy) based stations outperformed traditional stations of medical students, with significantly higher total scores across 5 components of a 6-point Likert scale (28.1/31, vs 27.1/31; <italic>P</italic>=.01) [<xref ref-type="bibr" rid="ref47">47</xref>]. Several differences between the previous study and the current findings limit direct comparison. These include variations in the AI versions used (GPT-4 legacy vs GPTs), participant demographics (medical students vs physicians), cases, and study design (nonrandomized vs randomized crossover).</p><p>In relation to the quality of simulated patient responses, previous research on GPT-3.5 and GPT-4 (legacy) indicated implausible response rates of 2% (14/842) and 0.7% (13/1894), respectively [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. In this study using the latest GPTs, responses were almost entirely plausible, with only one instance where GPTs prematurely revealed full physical exam results. This highlights rare but relevant issues in prompt sensitivity.</p><p>These findings are particularly promising for resource-limited settings or educational scenarios where access to trained professionals for mock interviews is constrained [<xref ref-type="bibr" rid="ref65">65</xref>]. However, caution remains warranted in extrapolating these outcomes to real-world clinical environments.</p></sec><sec id="s4-4"><title>Future Direction</title><p>To expand the utility of generative AI in medical interview training, future research should aim for broader validation across diverse educational settings, languages, and digital technology platforms. Improvements in multimodal AI and the integration of voice-based interactions may enhance the realism and interpersonal aspects of AI simulations [<xref ref-type="bibr" rid="ref66">66</xref>]. Multimodal AI processes and understands information from different types of data, including text, images, audio, video, and sometimes even sensor data [<xref ref-type="bibr" rid="ref67">67</xref>]. Future investigations should also explore the longitudinal impacts of repeated practice with AI-driven tools to better evaluate the long-term benefits [<xref ref-type="bibr" rid="ref68">68</xref>]. Additionally, studies comparing hybrid models&#x2014;such as AI-assisted interviews followed by human debriefing&#x2014;may offer insights into how best to combine the strengths of both methods [<xref ref-type="bibr" rid="ref69">69</xref>,<xref ref-type="bibr" rid="ref70">70</xref>].</p></sec><sec id="s4-5"><title>Conclusions</title><p>This study provides important proof-of-concept evidence for the use of generative AI, specifically GPTs, as a tool in medical interview training among postgraduate physicians. While the AI-based (GPT) station underperformed compared with traditional stations across several domains, including patient care and communication, the performance in clinical reasoning was comparable. These results suggested that generative AI could serve as a supplemental tool for medical education in cognitive components of clinical assessment.</p><p>The practical implications for medical education are important. Generative AI can enable self-directed, scalable, and accessible medical interview practice. However, the current findings also reinforce the value of human interaction in developing nuanced communication and empathy. Therefore, the adoption of hybrid educational models may be particularly effective. This approach is the unique strength of combining AI and human educators in simulation-based learning environments.</p><p>Nevertheless, these conclusions are preliminary. The small sample size, single-institution setting, and limited number of clinical cases restrict the generalizability of our findings. The crossover design, differences in case complexity, modality of interaction (typed vs spoken), and the use of a single AI language model and language all further limit broad application. These feasibility findings warrant cautious interpretation and highlight the need for larger, multicenter, and longitudinal studies to establish comparative effectiveness and assess the long-term educational impact of AI-assisted training.</p><p>Future research should explore the integration of multimodal AI systems to enhance the realism and authenticity of patient simulations. Additionally, multiple institutional collaborations, broader participant demographics, and studies in other languages and contexts are needed to determine the true potential and limitations of AI in medical education.</p></sec></sec></body><back><ack><p>This study was made possible using the resources from the Department of Diagnostic and Generalist Medicine, Dokkyo Medical University.</p></ack><fn-group><fn fn-type="con"><p>TH, MY, TSa, YH, KT, KM, and TSh contributed to the study's conceptualization and design. TH served as a simulated patient, and MY was responsible for participant allocation using block randomization. MK and TSa independently evaluated the interview transcripts. TH conducted the statistical analyses and drafted the manuscript. YH, KT, and TSh provided critical revisions to the manuscript for intellectual content. All authors reviewed and approved the final version of the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CONSORT-EHEALTH</term><def><p>Consolidated Standards of Reporting Trials of Electronic and Mobile Health Applications and Online Telehealth</p></def></def-item><def-item><term id="abb3">GIM</term><def><p>general internal medicine</p></def></def-item><def-item><term id="abb4">OSCE</term><def><p>Objective Structured Clinical Examination</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lipkin</surname><given-names>M</given-names>  <suffix>Jr</suffix></name><name name-style="western"><surname>Quill</surname><given-names>TE</given-names> </name><name name-style="western"><surname>Napodano</surname><given-names>RJ</given-names> </name></person-group><article-title>The medical interview: a core curriculum for residencies in internal medicine</article-title><source>Ann Intern Med</source><year>1984</year><month>02</month><volume>100</volume><issue>2</issue><fpage>277</fpage><lpage>284</lpage><pub-id pub-id-type="doi">10.7326/0003-4819-100-2-277</pub-id><pub-id pub-id-type="medline">6362513</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stoeckle</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Billings</surname><given-names>JA</given-names> </name></person-group><article-title>A history of history-taking: the medical interview</article-title><source>J Gen Intern Med</source><year>1987</year><volume>2</volume><issue>2</issue><fpage>119</fpage><lpage>127</lpage><pub-id pub-id-type="doi">10.1007/BF02596310</pub-id><pub-id pub-id-type="medline">3550009</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seitz</surname><given-names>T</given-names> </name><name name-style="western"><surname>Raschauer</surname><given-names>B</given-names> </name><name name-style="western"><surname>L&#x00E4;ngle</surname><given-names>AS</given-names> </name><name name-style="western"><surname>L&#x00F6;ffler-Stastka</surname><given-names>H</given-names> </name></person-group><article-title>Competency in medical history taking-the training physicians&#x2019; view</article-title><source>Wien Klin Wochenschr</source><year>2019</year><month>01</month><volume>131</volume><issue>1-2</issue><fpage>17</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1007/s00508-018-1431-z</pub-id><pub-id pub-id-type="medline">30569233</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keifenheim</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Teufel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ip</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Teaching history taking to medical students: a systematic review</article-title><source>BMC Med Educ</source><year>2015</year><month>09</month><day>28</day><volume>15</volume><fpage>159</fpage><pub-id pub-id-type="doi">10.1186/s12909-015-0443-x</pub-id><pub-id pub-id-type="medline">26415941</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lichstein</surname><given-names>PR</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>Walker</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>WD</given-names> </name><name name-style="western"><surname>Hurst</surname><given-names>JW</given-names> </name></person-group><article-title>The medical interview</article-title><source>Clinical Methods: The History, Physical, and Laboratory Examinations</source><year>1990</year><edition>3</edition><publisher-name>Butterworths</publisher-name></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Novack</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Dub&#x00E9;</surname><given-names>C</given-names> </name><name name-style="western"><surname>Goldstein</surname><given-names>MG</given-names> </name></person-group><article-title>Teaching medical interviewing. a basic course on interviewing and the physician-patient relationship</article-title><source>Arch Intern Med</source><year>1992</year><month>09</month><volume>152</volume><issue>9</issue><fpage>1814</fpage><lpage>1820</lpage><pub-id pub-id-type="doi">10.1001/archinte.152.9.1814</pub-id><pub-id pub-id-type="medline">1520048</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eggly</surname><given-names>S</given-names> </name></person-group><article-title>Physician-patient co-construction of illness narratives in the medical interview</article-title><source>Health Commun</source><year>2002</year><volume>14</volume><issue>3</issue><fpage>339</fpage><lpage>360</lpage><pub-id pub-id-type="doi">10.1207/S15327027HC1403_3</pub-id><pub-id pub-id-type="medline">12186492</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Derksen</surname><given-names>F</given-names> </name><name name-style="western"><surname>Bensing</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lagro-Janssen</surname><given-names>A</given-names> </name></person-group><article-title>Effectiveness of empathy in general practice: a systematic review</article-title><source>Br J Gen Pract</source><year>2013</year><month>01</month><volume>63</volume><issue>606</issue><fpage>e76</fpage><lpage>84</lpage><pub-id pub-id-type="doi">10.3399/bjgp13X660814</pub-id><pub-id pub-id-type="medline">23336477</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hatem</surname><given-names>DS</given-names> </name><name name-style="western"><surname>Barrett</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Hewson</surname><given-names>M</given-names> </name><name name-style="western"><surname>Steele</surname><given-names>D</given-names> </name><name name-style="western"><surname>Purwono</surname><given-names>U</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>R</given-names> </name></person-group><article-title>Teaching the medical interview: methods and key learning issues in a faculty development course</article-title><source>J Gen Intern Med</source><year>2007</year><month>12</month><volume>22</volume><issue>12</issue><fpage>1718</fpage><lpage>1724</lpage><pub-id pub-id-type="doi">10.1007/s11606-007-0408-9</pub-id><pub-id pub-id-type="medline">17952511</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Foronda</surname><given-names>C</given-names> </name><name name-style="western"><surname>MacWilliams</surname><given-names>B</given-names> </name><name name-style="western"><surname>McArthur</surname><given-names>E</given-names> </name></person-group><article-title>Interprofessional communication in healthcare: an integrative review</article-title><source>Nurse Educ Pract</source><year>2016</year><month>07</month><volume>19</volume><fpage>36</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1016/j.nepr.2016.04.005</pub-id><pub-id pub-id-type="medline">27428690</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dang</surname><given-names>BN</given-names> </name><name name-style="western"><surname>Westbrook</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Njue</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Giordano</surname><given-names>TP</given-names> </name></person-group><article-title>Building trust and rapport early in the new doctor-patient relationship: a longitudinal qualitative study</article-title><source>BMC Med Educ</source><year>2017</year><month>02</month><day>2</day><volume>17</volume><issue>1</issue><fpage>32</fpage><pub-id pub-id-type="doi">10.1186/s12909-017-0868-5</pub-id><pub-id pub-id-type="medline">28148254</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hampton</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Harrison</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Mitchell</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Prichard</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Seymour</surname><given-names>C</given-names> </name></person-group><article-title>Relative contributions of history-taking, physical examination, and laboratory investigation to diagnosis and management of medical outpatients</article-title><source>BMJ</source><year>1975</year><month>05</month><day>31</day><volume>2</volume><issue>5969</issue><fpage>486</fpage><lpage>489</lpage><pub-id pub-id-type="doi">10.1136/bmj.2.5969.486</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peterson</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Holbrook</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Von Hales</surname><given-names>D</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>NL</given-names> </name><name name-style="western"><surname>Staker</surname><given-names>LV</given-names> </name></person-group><article-title>Contributions of the history, physical examination, and laboratory investigation in making medical diagnoses</article-title><source>West J Med</source><year>1992</year><month>02</month><volume>156</volume><issue>2</issue><fpage>163</fpage><lpage>165</lpage><pub-id pub-id-type="medline">1536065</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oliveira Franco</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Martins Machado</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Satovschi Grinbaum</surname><given-names>R</given-names> </name><name name-style="western"><surname>Martiniano Porf&#x00ED;rio</surname><given-names>GJ</given-names> </name></person-group><article-title>Barriers to outpatient education for medical students: a narrative review</article-title><source>Int J Med Educ</source><year>2019</year><month>09</month><day>27</day><volume>10</volume><fpage>180</fpage><lpage>190</lpage><pub-id pub-id-type="doi">10.5116/ijme.5d76.32c5</pub-id><pub-id pub-id-type="medline">31562805</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Purva</surname><given-names>M</given-names> </name><name name-style="western"><surname>Baxendale</surname><given-names>B</given-names> </name><name name-style="western"><surname>Scales</surname><given-names>E</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Nicklin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Howes</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Simulation-based education in healthcare standards framwork and guidance</article-title><source>Association for Simulated Practice in Healthcare</source><access-date>2023-04-20</access-date><publisher-name>NHS Health Education England</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://aspih.org.uk/wp-content/uploads/2017/07/standards-framework.pdf">https://aspih.org.uk/wp-content/uploads/2017/07/standards-framework.pdf</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Higham</surname><given-names>H</given-names> </name></person-group><article-title>Simulation past, present and future-a decade of progress in simulation-based education in the UK</article-title><source>BMJ Simul Technol Enhanc Learn</source><year>2021</year><volume>7</volume><issue>5</issue><fpage>404</fpage><lpage>409</lpage><pub-id pub-id-type="doi">10.1136/bmjstel-2020-000601</pub-id><pub-id pub-id-type="medline">35515719</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beal</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Kinnear</surname><given-names>J</given-names> </name><name name-style="western"><surname>Anderson</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>TD</given-names> </name><name name-style="western"><surname>Wamboldt</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hooper</surname><given-names>L</given-names> </name></person-group><article-title>The effectiveness of medical simulation in teaching medical students critical care medicine: a systematic review and meta-analysis</article-title><source>Simul Healthc</source><year>2017</year><month>04</month><volume>12</volume><issue>2</issue><fpage>104</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1097/SIH.0000000000000189</pub-id><pub-id pub-id-type="medline">28704288</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kononowicz</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Woodham</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Edelbring</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Virtual patient simulations in health professions education: systematic review and meta-analysis by the digital health education collaboration</article-title><source>J Med Internet Res</source><year>2019</year><month>07</month><day>2</day><volume>21</volume><issue>7</issue><fpage>e14676</fpage><pub-id pub-id-type="doi">10.2196/14676</pub-id><pub-id pub-id-type="medline">31267981</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cleland</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Abe</surname><given-names>K</given-names> </name><name name-style="western"><surname>Rethans</surname><given-names>JJ</given-names> </name></person-group><article-title>The use of simulated patients in medical education: AMEE Guide No 42</article-title><source>Med Teach</source><year>2009</year><month>06</month><volume>31</volume><issue>6</issue><fpage>477</fpage><lpage>486</lpage><pub-id pub-id-type="doi">10.1080/01421590903002821</pub-id><pub-id pub-id-type="medline">19811162</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bosse</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Nickel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Huwendiek</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schultz</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Nikendei</surname><given-names>C</given-names> </name></person-group><article-title>Cost-effectiveness of peer role play and standardized patients in undergraduate communication training</article-title><source>BMC Med Educ</source><year>2015</year><month>10</month><day>24</day><volume>15</volume><fpage>183</fpage><pub-id pub-id-type="doi">10.1186/s12909-015-0468-1</pub-id><pub-id pub-id-type="medline">26498479</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al Odhayani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ratnapalan</surname><given-names>S</given-names> </name></person-group><article-title>Teaching communication skills</article-title><source>Can Fam Physician</source><year>2011</year><month>10</month><volume>57</volume><issue>10</issue><fpage>1216</fpage><lpage>1218</lpage><pub-id pub-id-type="medline">21998240</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maloney</surname><given-names>S</given-names> </name><name name-style="western"><surname>Haines</surname><given-names>T</given-names> </name></person-group><article-title>Issues of cost-benefit and cost-effectiveness for simulation in health professions education</article-title><source>Adv Simul (Lond)</source><year>2016</year><volume>1</volume><fpage>13</fpage><pub-id pub-id-type="doi">10.1186/s41077-016-0020-3</pub-id><pub-id pub-id-type="medline">29449982</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elendu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Amaechi</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Okatta</surname><given-names>AU</given-names> </name><etal/></person-group><article-title>The impact of simulation-based training in medical education: a review</article-title><source>Medicine (Baltimore)</source><year>2024</year><month>07</month><day>5</day><volume>103</volume><issue>27</issue><fpage>e38813</fpage><pub-id pub-id-type="doi">10.1097/MD.0000000000038813</pub-id><pub-id pub-id-type="medline">38968472</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abe</surname><given-names>K</given-names> </name><name name-style="western"><surname>Suzuki</surname><given-names>T</given-names> </name><name name-style="western"><surname>Fujisaki</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ban</surname><given-names>N</given-names> </name></person-group><article-title>Demographic characteristics of standardized patients (SPs) and their satisfaction and burdensome in Japan: the first report of a nationwide survey</article-title><source>Igaku Kyoiku</source><year>2007</year><volume>38</volume><issue>5</issue><fpage>301</fpage><lpage>307</lpage></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>P</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>Z</given-names> </name></person-group><article-title>Artificial intelligence in education: a review</article-title><source>IEEE Access</source><year>2020</year><volume>8</volume><fpage>75264</fpage><lpage>75278</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2020.2988510</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>R</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>T</given-names> </name></person-group><article-title>Evolution of artificial intelligence in medical education from 2000 to 2024: bibliometric analysis</article-title><source>Interact J Med Res</source><year>2025</year><month>01</month><day>30</day><volume>14</volume><fpage>e63775</fpage><pub-id pub-id-type="doi">10.2196/63775</pub-id><pub-id pub-id-type="medline">39883926</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Li</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kulasegaram</surname><given-names>KM</given-names> </name></person-group><article-title>Artificial intelligence in undergraduate medical education: a scoping review</article-title><source>Acad Med</source><year>2021</year><month>11</month><day>1</day><volume>96</volume><issue>11S</issue><fpage>S62</fpage><lpage>S70</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000004291</pub-id><pub-id pub-id-type="medline">34348374</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adamopoulou</surname><given-names>E</given-names> </name><name name-style="western"><surname>Moussiades</surname><given-names>L</given-names> </name></person-group><article-title>Chatbots: history, technology, and applications</article-title><source>Mach Learn Appl</source><year>2020</year><month>12</month><volume>2</volume><fpage>100006</fpage><pub-id pub-id-type="doi">10.1016/j.mlwa.2020.100006</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Delipetrev</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tsinaraki</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kostic</surname><given-names>U</given-names> </name></person-group><source>Historical Evolution of Artificial Intelligence</source><year>2020</year><publisher-name>Publications Office of the European Union</publisher-name></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Jeon</surname><given-names>W</given-names> </name><name name-style="western"><surname>Ko</surname><given-names>G</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ha</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ro</surname><given-names>WW</given-names> </name></person-group><article-title>Deep learning with gpus</article-title><source>Advances in Computers</source><year>2021</year><volume>122</volume><publisher-name>Elsevier</publisher-name><fpage>167</fpage><lpage>215</lpage><pub-id pub-id-type="doi">10.1016/bs.adcom.2020.11.003</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pandey</surname><given-names>M</given-names> </name><name name-style="western"><surname>Fernandez</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gentile</surname><given-names>F</given-names> </name><etal/></person-group><article-title>The transformational role of GPU computing and deep learning in drug discovery</article-title><source>Nat Mach Intell</source><year>2022</year><volume>4</volume><issue>3</issue><fpage>211</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1038/s42256-022-00463-x</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Akkisetty</surname><given-names>PK</given-names> </name></person-group><person-group person-group-type="editor"><name name-style="western"><surname>AMR</surname><given-names>PRC</given-names> </name><name name-style="western"><surname>Colby</surname><given-names>R</given-names> </name><name name-style="western"><surname>Nagasubramanian</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ranganath</surname><given-names>S</given-names> </name></person-group><article-title>An overview of AI platforms, frameworks, libraries, and processors</article-title><source>Model Optimization Methods for Efficient and Edge AI: Federated Learning Architectures, Frameworks and Applications</source><year>2024</year><publisher-name>Wiley</publisher-name><fpage>43</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1002/9781394219230</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krizhevsky</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name><name name-style="western"><surname>Hinton</surname><given-names>GE</given-names> </name></person-group><article-title>Imagenet classification with deep convolutional neural networks</article-title><source>Commun ACM</source><year>2017</year><volume>60</volume><issue>6</issue><fpage>84</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1145/3065386</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Schaekermann</surname><given-names>M</given-names> </name><name name-style="western"><surname>Palepu</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Towards conversational diagnostic artificial intelligence</article-title><source>Nature New Biol</source><year>2025</year><month>06</month><volume>642</volume><issue>8067</issue><fpage>442</fpage><lpage>450</lpage><pub-id pub-id-type="doi">10.1038/s41586-025-08866-7</pub-id><pub-id pub-id-type="medline">40205050</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stamer</surname><given-names>T</given-names> </name><name name-style="western"><surname>Steinh&#x00E4;user</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fl&#x00E4;gel</surname><given-names>K</given-names> </name></person-group><article-title>Artificial intelligence supporting the training of communication skills in the education of health care professions: scoping review</article-title><source>J Med Internet Res</source><year>2023</year><month>06</month><day>19</day><volume>25</volume><fpage>e43311</fpage><pub-id pub-id-type="doi">10.2196/43311</pub-id><pub-id pub-id-type="medline">37335593</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Okonkwo</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Ade-Ibijola</surname><given-names>A</given-names> </name></person-group><article-title>Chatbots applications in education: a systematic review</article-title><source>Comput Educ Artif Intell</source><year>2021</year><volume>2</volume><fpage>100033</fpage><pub-id pub-id-type="doi">10.1016/j.caeai.2021.100033</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gaur</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sai</surname><given-names>R</given-names> </name><name name-style="western"><surname>Chamola</surname><given-names>V</given-names> </name><name name-style="western"><surname>Guizani</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rodrigues</surname><given-names>JJPC</given-names> </name></person-group><article-title>Generative AI for transformative healthcare: a comprehensive study of emerging models, applications, case studies, and limitations</article-title><source>IEEE Access</source><year>2024</year><volume>12</volume><fpage>31078</fpage><lpage>31106</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2024.3367715</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Vere Hunt</surname><given-names>IJ</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>KX</given-names> </name><name name-style="western"><surname>Linos</surname><given-names>E</given-names> </name></person-group><article-title>A framework for considering the use of generative AI for health</article-title><source>NPJ Digit Med</source><year>2025</year><month>05</month><day>21</day><volume>8</volume><issue>1</issue><fpage>297</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01695-y</pub-id><pub-id pub-id-type="medline">40399429</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sardesai</surname><given-names>N</given-names> </name><name name-style="western"><surname>Russo</surname><given-names>P</given-names> </name><name name-style="western"><surname>Martin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sardesai</surname><given-names>A</given-names> </name></person-group><article-title>Utilizing generative conversational artificial intelligence to create simulated patient encounters: a pilot study for anaesthesia training</article-title><source>Postgrad Med J</source><year>2024</year><month>03</month><day>18</day><volume>100</volume><issue>1182</issue><fpage>237</fpage><lpage>241</lpage><pub-id pub-id-type="doi">10.1093/postmj/qgad137</pub-id><pub-id pub-id-type="medline">38240054</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdelnabi</surname><given-names>AAB</given-names> </name><name name-style="western"><surname>Soykan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bhatti</surname><given-names>D</given-names> </name><name name-style="western"><surname>Rabadi</surname><given-names>G</given-names> </name></person-group><article-title>Usefulness of large language models (LLMs) for student feedback on H&#x0026;P during clerkship: artificial intelligence for personalized learning</article-title><source>ACM Trans Comput Healthcare</source><year>2025</year><pub-id pub-id-type="doi">10.1145/371229</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Otter</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Medina</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Kalita</surname><given-names>JK</given-names> </name></person-group><article-title>A survey of the usages of deep learning for natural language processing</article-title><source>IEEE Trans Neural Netw Learning Syst</source><year>2020</year><volume>32</volume><issue>2</issue><fpage>604</fpage><lpage>624</lpage><pub-id pub-id-type="doi">10.1109/TNNLS.2020.2979670</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><etal/></person-group><article-title>A survey on evaluation of large language models</article-title><source>ACM Trans Intell Syst Technol</source><year>2024</year><month>06</month><day>30</day><volume>15</volume><issue>3</issue><fpage>1</fpage><lpage>45</lpage><pub-id pub-id-type="doi">10.1145/3641289</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>White</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Wendling</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lampotang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lizdas</surname><given-names>D</given-names> </name><name name-style="western"><surname>Cordar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lok</surname><given-names>B</given-names> </name></person-group><article-title>The role for virtual patients in the future of medical education</article-title><source>Acad Med</source><year>2017</year><month>01</month><volume>92</volume><issue>1</issue><fpage>9</fpage><lpage>10</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000001487</pub-id><pub-id pub-id-type="medline">28027092</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Parente</surname><given-names>DJ</given-names> </name></person-group><article-title>Generative artificial intelligence and large language models in primary care medical education</article-title><source>Fam Med</source><year>2024</year><month>10</month><volume>56</volume><issue>9</issue><fpage>534</fpage><lpage>540</lpage><pub-id pub-id-type="doi">10.22454/FamMed.2024.775525</pub-id><pub-id pub-id-type="medline">39207784</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eysenbach</surname><given-names>G</given-names> </name></person-group><article-title>The role of ChatGPT, generative language models, and artificial intelligence in medical education: a conversation with ChatGPT and a call for papers</article-title><source>JMIR Med Educ</source><year>2023</year><month>03</month><day>6</day><volume>9</volume><fpage>e46885</fpage><pub-id pub-id-type="doi">10.2196/46885</pub-id><pub-id pub-id-type="medline">36863937</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohammad</surname><given-names>B</given-names> </name><name name-style="western"><surname>Supti</surname><given-names>T</given-names> </name><name name-style="western"><surname>Alzubaidi</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The pros and cons of using ChatGPT in medical education: a scoping review</article-title><source>Stud Health Technol Inform</source><year>2023</year><month>06</month><day>29</day><volume>305</volume><fpage>644</fpage><lpage>647</lpage><pub-id pub-id-type="doi">10.3233/SHTI230580</pub-id><pub-id pub-id-type="medline">37387114</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yamamoto</surname><given-names>A</given-names> </name><name name-style="western"><surname>Koda</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ogawa</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Enhancing medical interview skills through AI-simulated patient interactions: nonrandomized controlled trial</article-title><source>JMIR Med Educ</source><year>2024</year><month>09</month><day>23</day><volume>10</volume><fpage>e58753</fpage><pub-id pub-id-type="doi">10.2196/58753</pub-id><pub-id pub-id-type="medline">39312284</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holderried</surname><given-names>F</given-names> </name><name name-style="western"><surname>Stegemann-Philipps</surname><given-names>C</given-names> </name><name name-style="western"><surname>Herschbach</surname><given-names>L</given-names> </name><etal/></person-group><article-title>A generative pretrained transformer (GPT)-powered chatbot as a simulated patient to practice history taking: prospective, mixed methods study</article-title><source>JMIR Med Educ</source><year>2024</year><month>01</month><day>16</day><volume>10</volume><fpage>e53961</fpage><pub-id pub-id-type="doi">10.2196/53961</pub-id><pub-id pub-id-type="medline">38227363</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holderried</surname><given-names>F</given-names> </name><name name-style="western"><surname>Stegemann-Philipps</surname><given-names>C</given-names> </name><name name-style="western"><surname>Herrmann-Werner</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A language model-powered simulated patient with automated feedback for history taking: prospective study</article-title><source>JMIR Med Educ</source><year>2024</year><month>08</month><day>16</day><volume>10</volume><fpage>e59213</fpage><pub-id pub-id-type="doi">10.2196/59213</pub-id><pub-id pub-id-type="medline">39150749</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>TY</given-names> </name><name name-style="western"><surname>Hsieh</surname><given-names>PH</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>YC</given-names> </name></person-group><article-title>Performance comparison of junior residents and ChatGPT in the objective structured clinical examination (OSCE) for medical history taking and documentation of medical records: development and usability study</article-title><source>JMIR Med Educ</source><year>2024</year><month>11</month><day>21</day><volume>10</volume><fpage>e59902</fpage><pub-id pub-id-type="doi">10.2196/59902</pub-id><pub-id pub-id-type="medline">39622713</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schouten</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Meeuwesen</surname><given-names>L</given-names> </name></person-group><article-title>Cultural differences in medical communication: a review of the literature</article-title><source>Patient Educ Couns</source><year>2006</year><month>12</month><volume>64</volume><issue>1-3</issue><fpage>21</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2005.11.014</pub-id><pub-id pub-id-type="medline">16427760</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hyd&#x00E9;n</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Mishler</surname><given-names>EG</given-names> </name></person-group><article-title>Language and medicine</article-title><source>Ann Rev Appl Linguist</source><year>1999</year><month>01</month><volume>19</volume><fpage>174</fpage><lpage>192</lpage><pub-id pub-id-type="doi">10.1017/S0267190599190093</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meyer</surname><given-names>A</given-names> </name><name name-style="western"><surname>Riese</surname><given-names>J</given-names> </name><name name-style="western"><surname>Streichert</surname><given-names>T</given-names> </name></person-group><article-title>Comparison of the performance of GPT-3.5 and GPT-4 with that of medical students on the written German medical licensing examination: observational study</article-title><source>JMIR Med Educ</source><year>2024</year><month>02</month><day>8</day><volume>10</volume><fpage>e50965</fpage><pub-id pub-id-type="doi">10.2196/50965</pub-id><pub-id pub-id-type="medline">38329802</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dale MacLaine</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lowe</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dale</surname><given-names>J</given-names> </name></person-group><article-title>The use of simulation in medical student education on the topic of breaking bad news: a systematic review</article-title><source>Patient Educ Couns</source><year>2021</year><month>11</month><volume>104</volume><issue>11</issue><fpage>2670</fpage><lpage>2681</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2021.04.004</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Broglio</surname><given-names>K</given-names> </name></person-group><article-title>Randomization in clinical trials: permuted blocks and stratification</article-title><source>JAMA</source><year>2018</year><month>06</month><day>5</day><volume>319</volume><issue>21</issue><fpage>2223</fpage><lpage>2224</lpage><pub-id pub-id-type="doi">10.1001/jama.2018.6360</pub-id><pub-id pub-id-type="medline">29872845</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Madrazo</surname><given-names>L</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>CB</given-names> </name><name name-style="western"><surname>McConnell</surname><given-names>M</given-names> </name><name name-style="western"><surname>Khamisa</surname><given-names>K</given-names> </name></person-group><article-title>Self-assessment differences between genders in a low-stakes objective structured clinical examination (OSCE)</article-title><source>BMC Res Notes</source><year>2018</year><month>06</month><day>15</day><volume>11</volume><issue>1</issue><fpage>393</fpage><pub-id pub-id-type="doi">10.1186/s13104-018-3494-3</pub-id><pub-id pub-id-type="medline">29903050</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="web"><article-title>Introducing gpts 2023</article-title><source>OpenAI</source><access-date>2025-05-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/introducing-gpts">https://openai.com/index/introducing-gpts</ext-link></comment></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hersh</surname><given-names>L</given-names> </name><name name-style="western"><surname>Salzman</surname><given-names>B</given-names> </name><name name-style="western"><surname>Snyderman</surname><given-names>D</given-names> </name></person-group><article-title>Health literacy in primary care practice</article-title><source>Am Fam Physician</source><year>2015</year><month>07</month><day>15</day><volume>92</volume><issue>2</issue><fpage>118</fpage><lpage>124</lpage><pub-id pub-id-type="medline">26176370</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Le</surname><given-names>T</given-names> </name><name name-style="western"><surname>Bhushan</surname><given-names>V</given-names> </name><name name-style="western"><surname>Sheikh-Ali</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shahin</surname><given-names>FA</given-names> </name></person-group><source>First Aid for the USMLE Step 2 CS</source><year>2012</year><edition>4</edition><publisher-name>McGraw-Hill Medical</publisher-name></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>Organization CAT</collab></person-group><article-title>Earning and assessment items related to the skills and attitudes required of students participating in clinical participatory clinical practice (version 42)</article-title><source>CATO</source><year>2022</year><access-date>2025-05-12</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.cato.or.jp/pdf/osce_42.pdf">https://www.cato.or.jp/pdf/osce_42.pdf</ext-link></comment></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Mardia</surname><given-names>KV</given-names> </name></person-group><article-title>9 tests of unvariate and multivariate normality</article-title><source>Handbook of Statistics</source><year>1980</year><volume>1</volume><publisher-name>Elsevier</publisher-name><fpage>279</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1016/S0169-7161(80)01011-5</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Limpanopparat</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gibson</surname><given-names>E</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>DA</given-names> </name></person-group><article-title>User engagement, attitudes, and the effectiveness of chatbots as a mental health intervention: a systematic review</article-title><source>Comput Hum Behav Artifi Hum</source><year>2024</year><month>08</month><volume>2</volume><issue>2</issue><fpage>100081</fpage><pub-id pub-id-type="doi">10.1016/j.chbah.2024.100081</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cartwright</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Knudson</surname><given-names>MP</given-names> </name></person-group><article-title>Evaluation of acute abdominal pain in adults</article-title><source>Am Fam Physician</source><year>2008</year><month>04</month><day>1</day><volume>77</volume><issue>7</issue><fpage>971</fpage><lpage>978</lpage><pub-id pub-id-type="medline">18441863</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lafleur</surname><given-names>A</given-names> </name><name name-style="western"><surname>C&#x00F4;t&#x00E9;</surname><given-names>L</given-names> </name><name name-style="western"><surname>Leppink</surname><given-names>J</given-names> </name></person-group><article-title>Influences of OSCE design on students&#x2019; diagnostic reasoning</article-title><source>Med Educ</source><year>2015</year><month>02</month><volume>49</volume><issue>2</issue><fpage>203</fpage><lpage>214</lpage><pub-id pub-id-type="doi">10.1111/medu.12635</pub-id><pub-id pub-id-type="medline">25626751</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dangi</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Sharma</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vageriya</surname><given-names>V</given-names> </name></person-group><article-title>Transforming healthcare in low-resource settings with artificial intelligence: recent developments and outcomes</article-title><source>Public Health Nurs</source><year>2025</year><volume>42</volume><issue>2</issue><fpage>1017</fpage><lpage>1030</lpage><pub-id pub-id-type="doi">10.1111/phn.13500</pub-id><pub-id pub-id-type="medline">39629887</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kalyan</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Sangeetha</surname><given-names>S</given-names> </name></person-group><article-title>SECNLP: a survey of embeddings in clinical natural language processing</article-title><source>J Biomed Inform</source><year>2020</year><month>01</month><volume>101</volume><fpage>103323</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2019.103323</pub-id><pub-id pub-id-type="medline">31711972</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Acosta</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Falcone</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Rajpurkar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>Multimodal biomedical AI</article-title><source>Nat Med</source><year>2022</year><month>09</month><volume>28</volume><issue>9</issue><fpage>1773</fpage><lpage>1784</lpage><pub-id pub-id-type="doi">10.1038/s41591-022-01981-2</pub-id><pub-id pub-id-type="medline">36109635</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Feigerlova</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hani</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hothersall-Davies</surname><given-names>E</given-names> </name></person-group><article-title>A systematic review of the impact of artificial intelligence on educational outcomes in health professions education</article-title><source>BMC Med Educ</source><year>2025</year><month>01</month><day>27</day><volume>25</volume><issue>1</issue><fpage>129</fpage><pub-id pub-id-type="doi">10.1186/s12909-025-06719-5</pub-id><pub-id pub-id-type="medline">39871336</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Duan</surname><given-names>W</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>S</given-names> </name><name name-style="western"><surname>Scalia</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>Understanding the evolvement of trust over time within human-AI teams</article-title><source>Proc ACM Hum-Comput Interact</source><year>2024</year><month>11</month><day>7</day><volume>8</volume><issue>CSCW2</issue><fpage>1</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1145/3687060</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Raisch</surname><given-names>S</given-names> </name><name name-style="western"><surname>Fomina</surname><given-names>K</given-names> </name></person-group><article-title>Combining human and artificial intelligence: hybrid problem-solving in organizations</article-title><source>AMR</source><year>2025</year><month>04</month><volume>50</volume><issue>2</issue><fpage>441</fpage><lpage>464</lpage><pub-id pub-id-type="doi">10.5465/amr.2021.0421</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Details of GPTs setting for artificial intelligence (AI)&#x2013;based medical interview training.</p><media xlink:href="mededu_v11i1e77332_app1.docx" xlink:title="DOCX File, 26 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>An example of transcription.</p><media xlink:href="mededu_v11i1e77332_app2.docx" xlink:title="DOCX File, 25 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Supplementary statistical analysis.</p><media xlink:href="mededu_v11i1e77332_app3.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material><supplementary-material id="app4"><label>Checklist 1</label><p>CONSORT-EHEALTH (Consolidated Standards of Reporting Trials of Electronic and Mobile Health Applications and Online Telehealth) checklist.</p><media xlink:href="mededu_v11i1e77332_app4.pdf" xlink:title="PDF File, 824 KB"/></supplementary-material></app-group></back></article>