<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn></journal-meta><article-meta><article-id pub-id-type="publisher-id">54067</article-id><article-id pub-id-type="doi">10.2196/54067</article-id><title-group><article-title>Using ChatGPT in Psychiatry to Design Script Concordance Tests in Undergraduate Medical Education: Mixed Methods Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hudon</surname><given-names>Alexandre</given-names></name><degrees>BEng, MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kiepura</surname><given-names>Barnab&#x00E9;</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pelletier</surname><given-names>Myriam</given-names></name><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Phan</surname><given-names>V&#x00E9;ronique</given-names></name><degrees>MSc, MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Psychiatry and Addictology, University of Montreal</institution>, <addr-line>Montreal</addr-line><addr-line>QC</addr-line>, <country>Canada</country></aff><aff id="aff2"><institution>Faculty of Medicine, Universit&#x00E9; Laval</institution>, <addr-line>Qu&#x00E9;bec</addr-line><addr-line>QC</addr-line>, <country>Canada</country></aff><aff id="aff3"><institution>Department of Pediatrics, Universit&#x00E9; de Montr&#x00E9;al</institution>, <addr-line>Montreal</addr-line><addr-line>QC</addr-line>, <country>Canada</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Eysenbach</surname><given-names>Gunther</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Mogali</surname><given-names>Sreenivasulu Reddy</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Cardoso</surname><given-names>Taiane de Azevedo</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Mlakar</surname><given-names>Izidor</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kimmerle</surname><given-names>Joachim</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Alexandre Hudon, BEng, MD<email>alexandre.hudon.1@umontreal.ca</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>4</day><month>4</month><year>2024</year></pub-date><volume>10</volume><elocation-id>e54067</elocation-id><history><date date-type="received"><day>28</day><month>10</month><year>2023</year></date><date date-type="rev-recd"><day>06</day><month>03</month><year>2024</year></date><date date-type="accepted"><day>07</day><month>03</month><year>2024</year></date></history><copyright-statement>&#x00A9; Alexandre Hudon, Barnab&#x00E9; Kiepura, Myriam Pelletier, V&#x00E9;ronique Phan. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 4.4.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2024/1/e54067"/><abstract><sec><title>Background</title><p>Undergraduate medical studies represent a wide range of learning opportunities served in the form of various teaching-learning modalities for medical learners. A clinical scenario is frequently used as a modality, followed by multiple-choice and open-ended questions among other learning and teaching methods. As such, script concordance tests (SCTs) can be used to promote a higher level of clinical reasoning. Recent technological developments have made generative artificial intelligence (AI)&#x2013;based systems such as ChatGPT (OpenAI) available to assist clinician-educators in creating instructional materials.</p></sec><sec><title>Objective</title><p>The main objective of this project is to explore how SCTs generated by ChatGPT compared to SCTs produced by clinical experts on 3 major elements: the scenario (stem), clinical questions, and expert opinion.</p></sec><sec sec-type="methods"><title>Methods</title><p>This mixed method study evaluated 3 ChatGPT-generated SCTs with 3 expert-created SCTs using a predefined framework. Clinician-educators as well as resident doctors in psychiatry involved in undergraduate medical education in Quebec, Canada, evaluated via a web-based survey the 6 SCTs on 3 criteria: the scenario, clinical questions, and expert opinion. They were also asked to describe the strengths and weaknesses of the SCTs.</p></sec><sec sec-type="results"><title>Results</title><p>A total of 102 respondents assessed the SCTs. There were no significant distinctions between the 2 types of SCTs concerning the scenario (<italic>P</italic>=.84), clinical questions (<italic>P</italic>=.99), and expert opinion (<italic>P</italic>=.07), as interpretated by the respondents. Indeed, respondents struggled to differentiate between ChatGPT- and expert-generated SCTs. ChatGPT showcased promise in expediting SCT design, aligning well with <italic>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</italic> criteria, albeit with a tendency toward caricatured scenarios and simplistic content.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study is the first to concentrate on the design of SCTs supported by AI in a period where medicine is changing swiftly and where technologies generated from AI are expanding much faster. This study suggests that ChatGPT can be a valuable tool in creating educational materials, and further validation is essential to ensure educational efficacy and accuracy.</p></sec></abstract><kwd-group><kwd>psychiatry</kwd><kwd>artificial intelligence</kwd><kwd>medical education</kwd><kwd>concordance scripts</kwd><kwd>machine learning</kwd><kwd>ChatGPT</kwd><kwd>evaluation</kwd><kwd>education</kwd><kwd>medical learners</kwd><kwd>learning</kwd><kwd>teaching</kwd><kwd>design</kwd><kwd>support</kwd><kwd>tool</kwd><kwd>validation</kwd><kwd>educational</kwd><kwd>accuracy</kwd><kwd>clinical questions</kwd><kwd>educators</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Undergraduate Medical Education</title><p>Undergraduate medical studies offer a wide range of learning opportunities through various teaching methods for medical students [<xref ref-type="bibr" rid="ref1">1</xref>]. The competencies required are partly dictated by the Medical Council of Canada, and these skills are regularly assessed throughout the undergraduate medical education (UGME) program. Training programs must incorporate clinical reasoning instruction to aid students in developing this crucial competency [<xref ref-type="bibr" rid="ref2">2</xref>]. The Bloom taxonomy is a useful tool for clearly identifying the cognitive level targeted by different teaching methods [<xref ref-type="bibr" rid="ref3">3</xref>]. The taxonomy helps determine the appropriate methods for teaching and evaluating students based on the desired level of competency. Although various teaching methods are used, clinical situations followed by multiple-choice questions, as well as open-ended questions, are commonly used initially [<xref ref-type="bibr" rid="ref4">4</xref>]. However, these types of questions have limitations when it comes to assessing a student&#x2019;s analysis and clinical reasoning [<xref ref-type="bibr" rid="ref5">5</xref>]. To address this, script concordance tests (SCTs) can be used to enhance the development of higher-level clinical reasoning skills [<xref ref-type="bibr" rid="ref6">6</xref>].</p></sec><sec id="s1-2"><title>The Use of SCTs</title><p>Methods such as SCTs are grounded in clinical cases designed to mirror real-life clinical scenarios, where information may be incomplete or unclear. The process involves presenting an initial vignette with some preliminary hypotheses, followed by additional information given to the student. SCTs assess how this new information influences the likelihood of the initial hypotheses being considered as correct or relevant [<xref ref-type="bibr" rid="ref6">6</xref>]. Students express the impact on the initial hypothesis using a 5-level Likert scale ranging from &#x201C;much less likely&#x201D; to &#x201C;much more likely.&#x201D; This process serves as a proxy for clinical reasoning, aiming to replicate decision-making in actual clinical practice. Typically, specialists in the subject develop the cases, and a robust SCT should comprise a minimum of 60 questions for strong internal validity [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. The student&#x2019;s responses are then compared to those of an expert panel, ideally consisting of at least 10 experts. Research suggests that 15 experts are necessary for high-impact testing, with minimal added benefit beyond 20 experts [<xref ref-type="bibr" rid="ref10">10</xref>]. A notable limitation of SCTs is acceptability; a study on SCT acceptability with surgical residents revealed that experts tend to be more satisfied than students. Experts found the questions to be representative of real-life clinical settings [<xref ref-type="bibr" rid="ref11">11</xref>]. However, SCTs may potentially provide a more precise assessment of students&#x2019; clinical reasoning compared to multiple-choice questions [<xref ref-type="bibr" rid="ref12">12</xref>]. In psychiatry, the use of SCTs is emerging. Early data indicate good internal validity, with a correlation between learners&#x2019; education level, test scores, and improvement in evaluations tested before and after a psychiatry rotation [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>The creation of SCTs demands a substantial investment of human resources [<xref ref-type="bibr" rid="ref14">14</xref>]. Moreover, the questions are influenced by the designers&#x2019; inherent biases, necessitating multiple rounds of refinement with field experts [<xref ref-type="bibr" rid="ref15">15</xref>]. This iterative process can lead to delays in developing educational materials. In a time when efficiency is crucial&#x2014;such as during the COVID-19 pandemic or in situations with limited teaching resources&#x2014;swift adaptations and improvements in the effectiveness of certain teaching methods may be imperative to uphold the quality of medical training [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p></sec><sec id="s1-3"><title>Large Language Models and Their Uses in SCT Design</title><p>For clinician-educators seeking assistance in crafting educational materials, recent advancements include the availability of generative artificial intelligence (AI) tools, including large language models (LLM) such as ChatGPT (OpenAI) [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Originally designed for the public, these tools are currently under scrutiny by various companies and educational institutions to assess their limitations and advantages [<xref ref-type="bibr" rid="ref20">20</xref>]. Numerous studies highlight the tool&#x2019;s utility in developing clinical vignettes within medical studies and other health science domains [<xref ref-type="bibr" rid="ref21">21</xref>]. However, to date, there is no study demonstrating the educational quality of SCT vignettes produced using ChatGPT. Before integrating tools such as ChatGPT into the design of educational materials, it is crucial to evaluate the quality of scenarios, questions, and related expertise generated by ChatGPT, as well as its ability to assess clinical reasoning. It is equally important to consider the potential limitations in using such tools for medical education material design. Although these generative models can be beneficial, they may also introduce errors that limit their usefulness [<xref ref-type="bibr" rid="ref18">18</xref>]. As for medical students&#x2019; attitude toward AI, a recent study on the subject reported that medical students viewed AI in medicine as reliable, trustworthy, and technically competent, although they expressed limited confidence in its capabilities. While acknowledging AI&#x2019;s intelligence, they did not consider it to be anthropomorphic. The consensus was that fundamental AI knowledge, covering its operation, ethics, applications, reliability, and potential risks, should be integrated into medical education [<xref ref-type="bibr" rid="ref22">22</xref>].</p></sec><sec id="s1-4"><title>Objective and Hypotheses</title><p>The primary goal of this project is to investigate how SCTs generated by ChatGPT compare to those produced by clinical experts in 3 key aspects: the scenario (stem), clinical questions, and expert opinion. A secondary objective is to assess whether blind evaluators can distinguish between an SCT generated by ChatGPT and one crafted by experts. Additionally, another subobjective aims to identify the advantages and limitations of the clinical vignettes under examination. Our hypothesis posits that the clinical SCTs created by ChatGPT will likely be considered acceptable by the medical community in terms of scenarios and clinical questions. However, we anticipate that their use with learners may necessitate supervision from clinical experts. Preliminary studies have indicated that AI is a promising tool to aid clinician-educators in designing clinical scenarios. Still, given that the underlying algorithms rely on potentially erroneous data, it is crucial to validate and fine-tune the content before using them as educational materials for learners.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Ethical Considerations</title><p>This study received the approval of the ethics of research committee of the Universit&#x00E9; de Montr&#x00E9;al (approval 2023-4906). Participants were given a description of the study in the letter they received and were asked for their consent for their data to be used. Data were anonymized. The participants received no compensation for this study.</p></sec><sec id="s2-2"><title>Recruitment</title><p>The project was aimed at residents and clinician-educators in the field of psychiatry since SCTs are already used in UGME programs. To be included in the study, participants needed to be either clinician-educators in the field of psychiatry or medical residents in psychiatry affiliated with 1 of Qu&#x00E9;bec&#x2019;s 4 universities that offer UGME programs (McGill University, Universit&#x00E9; de Montr&#x00E9;al, Universit&#x00E9; de Sherbrooke, and Universit&#x00E9; Laval). Psychiatrists not involved in an UGME program were excluded. A total of 100 participants were anticipated for this study, according to similar studies to determine whether there were significant differences between clinical vignettes developed by ChatGPT or those developed by experts [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Convenience sampling was conducted with the help of the departments of psychiatry of the 4 universities listed above, and a letter was sent out by email that includes a link to a survey that contained all the questions from this study.</p></sec><sec id="s2-3"><title>Data Collection</title><p>A web-based survey, hosted on LimeSurvey (LimeSurvey GmbH), featured 3 SCTs generated by ChatGPT and 3 SCTs previously crafted by experts in the field, currently used in the digital learning environment at the Universit&#x00E9; de Montr&#x00E9;al. The experts consisted of experienced psychiatrists and primary care physicians who underwent training in SCT concepts. As the primary language for the participants is French, the survey was conducted in French. The original, comprehensive survey in French is available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, with an English translation provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. Participants assessed the SCTs based on their respective roles. Due to the anonymous nature of the survey and the inclusion criteria requiring respondents to be either psychiatry residents or physicians, additional demographic data were not collected. The study did, however, document information on the participants&#x2019; level of training (resident doctors vs clinician-educators) and their level of clinical experience (0-5, 6-10, or &#x2265;10 y).</p><p>Each SCT was evaluated by the participants using the conceptual framework developed by Fournier et al [<xref ref-type="bibr" rid="ref9">9</xref>] for creating SCTs. This conceptual framework provides a general guideline for SCTs. The SCTs involve real-life medical situations, each describing as a short scenario with some uncertainty. To solve the problem presented in each scenario, there are multiple relevant options available for the medical student. Each scenario, along with its questions, is considered an item. The questions are divided into 3 parts. The first part provides a relevant diagnostic or management option. The second part introduces a new clinical finding, such as a physical sign or test result. The third part uses a 5-point Likert scale for examinees to express their decision on how the new finding affects the option, considering direction (positive, negative, or neutral) and intensity. Examinees are tasked with determining the impact of the new information, and the Likert scale is used to capture their decisions, as script theory suggests that clinical reasoning involves qualitative judgments.</p><p>Three components are evaluated by this framework when constructing SCTs: the scenario, clinical questions, and expert opinion. The scenario refers to the stem presented by the SCTs. The clinical questions are the individual questions adding a key element to the stem to stimulate clinical reasoning. The expert opinion refers to the opinion of an expert in the field giving a subjective appreciation as to the ability of the SCT to generate clinical reasoning. The elements of this framework are presented in <xref ref-type="table" rid="table1">Table 1</xref>. A common SCT template was used for both SCTs generated by ChatGPT and the experts in the field to ensure that the presentation of the SCTs does not create bias.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>The script concordance test (SCT) components with their relevant questions as per the framework by Fournier et al [<xref ref-type="bibr" rid="ref9">9</xref>] for the evaluation and conception of SCTs.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">SCT components and questions</td><td align="left" valign="bottom">Potential answers</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><bold>Scenarios</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">S1. Describes a challenging circumstance, even for experts</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">S2. Describes an appropriate situation for test takers</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">S3. The scenario is necessary to understand the question and to set the context</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">S4. The clinical presentation is typical</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">S5. The scenario is well written</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top" colspan="3"><bold>Clinical questions</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q1. The questions are developed using a key element approach</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q2. In the opinion of experts, the options are relevant</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q3. The same option is not found in 2 consecutive questions</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q4. The new information (second column) makes it possible to test the link between the new information and the option (first column) in the context described</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q5. Likert-scale anchors are clearly defined and unambiguous</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q6. Questions are expanded to distribute responses equally across all Likert-scale values</td><td align="left" valign="top">Yes or no</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Q7. Questions are designed to provide a balance between low and high variability</td><td align="left" valign="top">Yes or no</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>Expert Opinion</title><p>The participants needed to state if the SCT was generated (or not) by ChatGPT (single-blinded mode), give their main hypothesis as to the main diagnosis studied in the SCT, and state in free-text style the strengths and weaknesses of each SCT.</p></sec><sec id="s2-5"><title>Creating SCTs With ChatGPT</title><p>The ChatGPT tool operates through commands or prompts to enhance its performance. These prompts must offer a context of use, an expertise level, and a specific task. Following the typical steps involved in creating SCTs, we designed the prompts based on the approach outlined in Fournier et al [<xref ref-type="bibr" rid="ref9">9</xref>]. In this initial study on the subject, we did not explore different sets of prompts, and the generated SCTs were used without modification.</p><p>The following commands were entered into ChatGPT to create the SCTs:</p><disp-quote><p>1. Act as an expert in university pedagogy of health sciences, in the field of psychiatry.</p><p>2. Also acts as an expert in designing thumbnails by script matching.</p><p>3. Generates a script matching vignette that includes three questions for the following diagnosis: (diagnosis name), according to DSM-5.</p><p>4. Create questions linked to the vignette which start with if you think of &#x2019;&#x2019;a diagnostic hypothesis&#x201D; and you find &#x2019;&#x2019;a sign or a symptom&#x2019;&#x2019;, this hypothesis is probable or not (from &#x2212;2 to 2, using a Likert scale)</p></disp-quote><p>Choosing the ChatGPT 3.5 algorithm as the main LLM for this task made sense for a few key reasons. This algorithm has a vast knowledge base covering a wide array of medical topics, making it an adequate tool for instructors crafting medical questions for medical students [<xref ref-type="bibr" rid="ref25">25</xref>]. Its natural language comprehension, used in various medical fields, aids in question development [<xref ref-type="bibr" rid="ref26">26</xref>]. The model&#x2019;s flexibility allows educators to create different types of questions to suit various learning styles and assessment methods. Notably, ChatGPT 3.5 supports multiple languages, including French, making it accessible for instructors in French-speaking regions. The model&#x2019;s ability to grasp context enables the creation of questions that build on existing knowledge, providing a more cohesive learning experience [<xref ref-type="bibr" rid="ref27">27</xref>]. Educators can save time with the model&#x2019;s human-like text generation based on specific prompts or instructions. It is also crucial to highlight that this algorithm is open access and free, a substantial consideration when cost is a factor in choosing educational tools. Additionally, it is noteworthy that generating an SCT takes less than a minute on average with this tool.</p></sec><sec id="s2-6"><title>Selecting Existing Expert-Created SCTs</title><p>Three SCTs were chosen at random from the 10 SCTs currently available to learners on the digital learning platform for the clinical psychiatry clerkship rotation at Universit&#x00E9; de Montr&#x00E9;al. As stated above, a total of 3 ChatGPT-generated SCTs and 3 expert-created SCTs were chosen to limit the possibility that chance alone would identify the SCTs generated by ChatGPT from those produced by experts.</p></sec><sec id="s2-7"><title>Statistical Analysis</title><p>A combined mixed method analysis was conducted with qualitative and quantitative components.</p><sec id="s2-7-1"><title>Qualitative Analysis</title><p>We conducted a content analysis by examining participants&#x2019; open responses regarding the advantages and drawbacks of the presented SCTs. The objective was to pinpoint the primary types of benefits and limitations for emphasis. After receiving the open-ended survey responses, we individually extracted emergent themes from respondents using the grounded theory design framework [<xref ref-type="bibr" rid="ref28">28</xref>]. Subsequently, AH and MP created an initial classification scheme based on these emerging themes. They applied this scheme to annotate the open-ended responses using the Qualitative Data Analysis Miner program (Provalis Research). Any discrepancies in annotations among responders were deliberated upon until a consensus was reached.</p></sec><sec id="s2-7-2"><title>Quantitative Analysis</title><p>We conducted a descriptive statistical analysis to showcase the proportion of participants accurately identifying SCTs generated by ChatGPT compared to those crafted by experts. This same approach was applied to diagnostic hypotheses.</p><p>Additionally, we performed a descriptive statistical analysis to compare SCT scores based on the domains of the scenario and clinical questions, following the conceptual framework by Fournier et al [<xref ref-type="bibr" rid="ref9">9</xref>]. Using a <italic>&#x03C7;</italic><sup>2</sup> test, we assessed the average results within each domain for the SCTs generated by ChatGPT and those by the experts. This allowed us to observe any statistical differences in the responses (yes or no) for various criteria within the scenario and clinical questions domains. We established a statistical significance threshold of <italic>P</italic>&#x003C;.05 to identify noteworthy observations between the 2 types of SCTs.</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Participants Characteristics</title><p>A total of 102 participants completed the survey. Considering that there are an estimated 400 teaching clinicians in psychiatry in Quebec (about a third of the 1200 practicing psychiatrists), as well as 235 medical residents in psychiatry, this represents 16.1% (102/635) of the pool of potential responders. From the 102 participants, 45 (44.1%) identified as medical residents in psychiatry, 2 (2%) identified as teaching psychiatrists with less than 5 years of experience, 16 (15.7%) identified as teaching psychiatrists with between 6 and 10 years of experience, and 39 (38.2%) identified as teaching psychiatrists with more than 10 years of experience.</p></sec><sec id="s3-2"><title>SCT Evaluation</title><p>The pooled averages of evaluations of the SCTs for each domain of assessment, stratified by the respondent categories, are shown in <xref ref-type="table" rid="table2">Table 2</xref>. A complete table reporting the evaluations of the respondents for each individual component of the domains of assessment is available in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. SCTs 2, 3 and 4 were generated by ChatGPT. It can be observed that there was no significant distinction between the pooled results for the SCTs generated by ChatGPT as compared to those generated by experts in the field. The questions related to the scenario component of the SCTs received better approval from the participants as compared to the clinical questions component.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Responses for every component of the script concordance test (SCT) evaluations for the 6 SCTs, stratified by respondent categories. &#x201C;Yes&#x201D; indicates that the respondents agreed that the domain was elaborated appropriately.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">SCT and evaluated component</td><td align="left" valign="top">Medical residents (n=45), n (%)</td><td align="left" valign="top">Teaching physicians (&#x2264;5 y; n=2), n (%)</td><td align="left" valign="top">Teaching physicians (6-10 y; n=16), n (%)</td><td align="left" valign="top">Teaching physicians (&#x2265;10 y; n=39), n (%)</td><td align="left" valign="top">Pooled average<break/>(N=102), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7"><bold>SCT 1</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">30 (67)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">12 (75)</td><td align="left" valign="top">31 (79)</td><td align="left" valign="top">75 (74)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">29 (64)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">13 (81)</td><td align="left" valign="top">28 (72)</td><td align="left" valign="top">72 (71)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">25 (44)</td><td align="left" valign="top">1 (50)</td><td align="left" valign="top">6 (38)</td><td align="left" valign="top">18 (54)</td><td align="left" valign="top">50 (49)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>SCT 2</bold><sup><xref ref-type="table-fn" rid="table2fn1"><bold>a</bold></xref></sup></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">29 (64)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">13 (81)</td><td align="left" valign="top">25 (64)</td><td align="left" valign="top">69 (68)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">30 (67)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">14 (88)</td><td align="left" valign="top">25 (64)</td><td align="left" valign="top">71 (70)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">22 (49)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">6 (38)</td><td align="left" valign="top">18 (46)</td><td align="left" valign="top">46 (45)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>SCT 3</bold><sup><xref ref-type="table-fn" rid="table2fn1"><bold>a</bold></xref></sup></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">28 (62)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">12 (75)</td><td align="left" valign="top">26 (67)</td><td align="left" valign="top">68 (67)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">28 (62)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">13 (81)</td><td align="left" valign="top">25 (64)</td><td align="left" valign="top">68 (67)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">16 (36)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">4 (25)</td><td align="left" valign="top">16 (41)</td><td align="left" valign="top">36 (35)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>SCT 4</bold><sup><xref ref-type="table-fn" rid="table2fn1"><bold>a</bold></xref></sup></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">28 (62)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">11 (69)</td><td align="left" valign="top">26 (67)</td><td align="left" valign="top">67 (66)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">25 (56)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">14 (88)</td><td align="left" valign="top">28 (72)</td><td align="left" valign="top">69 (68)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">19 (42)</td><td align="left" valign="top">1 (50)</td><td align="left" valign="top">6 (38)</td><td align="left" valign="top">12 (31)</td><td align="left" valign="top">38 (37)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>SCT 5</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">26 (58)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">11 (69)</td><td align="left" valign="top">26 (67)</td><td align="left" valign="top">65 (64)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">27 (60)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">13 (81)</td><td align="left" valign="top">28 (72)</td><td align="left" valign="top">70 (69)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">21 (53)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">8 (50)</td><td align="left" valign="top">23 (59)</td><td align="left" valign="top">54 (53)</td></tr><tr><td align="left" valign="top" colspan="7"><bold>SCT 6</bold></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Scenario (yes)</td><td align="left" valign="top">27 (60)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">12 (75)</td><td align="left" valign="top">26 (67)</td><td align="left" valign="top">67 (66)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Clinical questions (yes)</td><td align="left" valign="top">24 (53)</td><td align="left" valign="top">2 (100)</td><td align="left" valign="top">13 (81)</td><td align="left" valign="top">27 (69)</td><td align="left" valign="top">66 (65)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Is it a ChatGPT-generated scenario? (correct answers)</td><td align="left" valign="top">21 (53)</td><td align="left" valign="top">1 (50)</td><td align="left" valign="top">8 (50)</td><td align="left" valign="top">18 (46)</td><td align="left" valign="top">48 (47)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Script concordance tests created by ChatGPT.</p></fn></table-wrap-foot></table-wrap><p>Participants could not identify which SCT was created by ChatGPT from those created by experts in the field, as observed in <xref ref-type="table" rid="table2">Table 2</xref>. Teaching clinicians with more than 10 years of experience tended to better recognize SCTs generated by ChatGPT than their peers with less experience and medical residents, except for SCT 4.</p></sec><sec id="s3-3"><title>Comparisons Between ChatGPT- and Expert-Generated SCTs</title><p>When using the pooled observations for the scenario and clinical questions domains across the SCTs generated by ChatGPT and those generated by experts, no statistically significant distinctions were observed when comparing both types of SCTs (all <italic>P</italic>&#x003E;.05), as seen in <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparisons of the script concordance tests (SCTs) generated by ChatGPT as opposed to those generated by experts in the field.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Components</td><td align="left" valign="bottom">SCTs 1, 5, and 6 (experts), average score (%)</td><td align="left" valign="bottom">SCTs 2, 3, and 4 (ChatGPT), average score (%)</td><td align="left" valign="bottom"><italic>P</italic> value (ChatGPT-generated vs expert-generated SCTs)</td></tr></thead><tbody><tr><td align="left" valign="top">Scenario</td><td align="left" valign="top">66.40</td><td align="left" valign="top">67.27</td><td align="left" valign="top">.84</td></tr><tr><td align="left" valign="top">Clinical questions</td><td align="left" valign="top">70.05</td><td align="left" valign="top">68.86</td><td align="left" valign="top">.99</td></tr><tr><td align="left" valign="top">Identifying if generated by AI<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">54</td><td align="left" valign="top">40</td><td align="left" valign="top">.07</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AI: artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Reported Strengths and Weaknesses of the SCTs</title><sec id="s3-4-1"><title>Overview</title><p>Only 39 (38.2%) of the 102 participants wrote at least 1 comment on the strengths or weaknesses for each of individual SCT. The strengths and weaknesses of the SCTs generated by ChatGPT were similarly reported across all the respondents and resembled those identified for the SCTs generated by experts in the field. Respondents reported that SCTs generated by ChatGPT were well aligned with the <italic>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</italic> (<italic>DSM-5</italic>) but were also too caricatural.</p></sec><sec id="s3-4-2"><title>Strengths of the SCTs Generated by Experts in the Field</title><p>Overall, 3 (8%) of the 39 respondents indicated for 1 or more SCTs generated by experts in the field that the scenario represented typical clinical challenges. Most of the respondents (27/39, 69%) reported that the SCTs used clear prompts to test clinical reasoning. Sample responses included the following:</p><disp-quote><p>This concordance test was easy to follow as because the scenarios were concise and the prompts were clear. [Respondent 1]</p><p>In terms of clarity, the prompts were well written and it was very simple to see how they could elicit clinical reasoning. [Respondent 9]</p></disp-quote></sec><sec id="s3-4-3"><title>Strengths of the SCTs Generated by ChatGPT</title><p>Almost all respondents (32/39, 82%) mentioned that the SCTs were using typical clinical signs and symptoms reported in the <italic>DSM-5</italic>. Some (5/39, 13%) indicated that the SCTs were very well nuanced. Sample responses included the following:</p><disp-quote><p>This scenario corresponds to the textbook&#x2019;s description of the presented diagnosis. [Respondent 4]</p><p>I see that these prompts do not try to derive too much from the differential diagnoses intended for the suggested clinical presentation. They offered a degree of flexibility to enable the student to use their clinical reasoning. [Respondent 71]</p></disp-quote></sec><sec id="s3-4-4"><title>Limitations of the SCTs Generated by Experts in the Field</title><p>In all, 2 (5%) of the 39 respondents mentioned that they found the SCTs straightforward and unchallenging. There were no other comments regarding the limitations of the SCTs generated by experts in the field. Sample responses included the following:</p><disp-quote><p>This scenario is too easy. I find little value as it is clear for the student that we are looking at the specific diagnosis. [Respondent 1]</p><p>I don&#x2019;t see how this is challenging for the medical student who is going to take this test. [Respondent 80]</p></disp-quote></sec><sec id="s3-4-5"><title>Limitations of the SCTs Generated by ChatGPT</title><p>Most respondents (29/39, 74%) reported the SCTs generated by ChatGPT as caricatural or stereotypical clinical presentations as observed in textbooks with little regard to atypical presentations. A total of 7 (18%) respondents indicated that the SCTs generated by ChatGPT were too simple, as they tended to include additional information that were too trivial when attempting to challenge the responder&#x2019;s clinical reasoning. Sample responses included the following:</p><disp-quote><p>This is very trivial. I mean, it is not very difficult to find out what are the answers to these prompts as they clearly hint towards the same diagnosis. [Respondent 3]</p><p>It would be interesting to add more challenging prompts as they tend to be very simplistic and poorly represent complex clinical cases as they are very stereotypical to what is found in the DSM-5. [Respondent 4]</p></disp-quote></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The aim of this study was to compare SCTs created by ChatGPT to SCTs produced by clinical specialists on the scenario (stem), clinical questions, and expert opinions. There were no significant distinctions between the SCTs generated by ChatGPT as compared to those developed by experts in the field for the evaluated components. The strengths and weaknesses were similar across the 2 types of SCT. Respondents reported that the SCTs generated by ChatGPT were well aligned with the <italic>DSM-5</italic> but were also too caricatural.</p></sec><sec id="s4-2"><title>Comparison With Prior Work</title><p>Since the creation of ChatGPT, it has been used in various areas of medical education such as digital teaching assistants and personalized education [<xref ref-type="bibr" rid="ref29">29</xref>]. As a recent exploration study on the role of LLMs such as ChatGPT demonstrated, these models can provide interactive cases in a medical education context [<xref ref-type="bibr" rid="ref30">30</xref>]. Considering these previous studies of ChatGPT in the development of medical education tools, it is possible that the inability to recognize a SCT generated by ChatGPT from one developed by experts in the field can be explained by the generative nature of this LLM. As such, a recent review on the use of ChatGPT in health care has identified that this form of AI can be used for problem-based learning and critical thinking in health care education [<xref ref-type="bibr" rid="ref31">31</xref>]. However, it is mentioned in the literature that although the quality of the scenarios (or information) generated by ChatGPT might appear impressive, there is a need for an expert to assess the content generated, as it might be an amalgamation of erroneous information [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>Although a few comments were provided regarding the strengths and limitations of both types of SCTs, they align with what is commonly reported in the literature for similar tasks. Some respondents noted caricature-like scenarios, possibly attributed to the robotic and dehumanized nature often associated with vignettes produced by LLMs [<xref ref-type="bibr" rid="ref33">33</xref>]. It is plausible that more intricate prompts could have resulted in more nuanced scenarios. Therefore, the mentioned strengths of the scenarios and clinical questions, particularly their clinical alignment with the <italic>DSM-5</italic>, may be tied to the fact that this was one of the prompts used when conceptualizing interactions with ChatGPT during the creation of the SCTs.</p><p>In the field of psychiatry, applications of ChatGPT to medical education are limited. Among the limited available evidence, a novel study tested the knowledge of ChatGPT by exposing it to 100 clinical cases vignettes, and it performed extremely well [<xref ref-type="bibr" rid="ref34">34</xref>]. Another similar use of ChatGPT was as an aid to answer clinical questions. A recent study evaluated the performance of users (psychiatrist and medical residents in the Netherlands) using ChatGPT as compared to nonusers for answering several questions in psychiatry, and it was observed that the users had better and faster responses as compared to nonusers [<xref ref-type="bibr" rid="ref35">35</xref>]. Although these applications differ from this study, they might hint that ChatGPT currently has a database that holds relevant data in the field of psychiatry, which might explain the realism of scenarios and prompts observed for SCTs 2, 3, and 4.</p><p>There are substantial ethical considerations that must be accounted for when using such tool to assist medical educators. As an example, it is important to consider that ChatGPT (and other LLMs) are bound to the data they have been trained with along with their inherent biases [<xref ref-type="bibr" rid="ref36">36</xref>]. Cross-validation of the generated information is often necessary to ensure that learners are not exposed to false information [<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s4-3"><title>Limitations</title><p>Although web-based surveys offer convenience in distribution, they struggle with the challenge of accurately identifying the characteristics of the assessed population [<xref ref-type="bibr" rid="ref38">38</xref>]. In our survey, we did not differentiate between those formally trained in SCTs and those who merely encountered them during their medical training, thus introducing potential limitations in generalizing the results. It is plausible that clinicians more experienced with SCTs were more likely to participate in the survey, but our recruitment from psychiatry departments exclusively helps mitigate this bias. Interpretation biases may also be present, as not all participants might be familiar with the framework used in this study. We did not explore acceptability regarding the use of generative AI in SCT creation, marking another limitation. Additionally, we did not compare different prompts, and it is conceivable that alternative sets of prompts could have produced better results for the SCTs generated by ChatGPT. Opting for a different language model might have yielded varied performances, and it is plausible that alternative models could outperform ChatGPT in this context.</p></sec><sec id="s4-4"><title>Conclusions</title><p>In an era of rapidly evolving medicine and where technologies derived from AI are growing even more quickly, this study is the first to focus on the design of SCTs assisted by AI. The primary goal of this study highlighted that no statistical differences were found between the SCTs generated by ChatGPT and those created by clinical experts in the field of psychiatry for the elaboration of a scenario and the clinical questions presented in the SCTs. On average, the respondents incorrectly identified which SCTs were created with the help of AI. The major strength of SCTs generated by ChatGPT was that they were consistent with the <italic>DSM-5</italic>, whereas the caricatural quality or triviality of the SCTs generated by ChatGPT were the main weaknesses reported by the respondents. A possible way to mitigate this effect would be to provide more complex prompts to the generative AI or editing some details of the vignette. This study opens the door to larger-scale studies in this area to assess the impact of such aid on the academic success of medical students and how it can be used to improve efficiencies.</p></sec></sec></body><back><ack><p>This study did not receive any financial support.</p></ack><fn-group><fn fn-type="con"><p>AH, BK, MP, and VP contributed to the study conceptualization and writing of the original manuscript. All authors participated in the investigation and validation process. All authors edited the manuscript draft and reviewed the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">DSM-5</term><def><p>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb4">SCT</term><def><p>script concordance test</p></def></def-item><def-item><term id="abb5">UGME</term><def><p>undergraduate medical education</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Frank</surname><given-names>JR</given-names></name><name name-style="western"><surname>Snell</surname><given-names>LS</given-names></name><name name-style="western"><surname>Cate</surname><given-names>OT</given-names></name><etal/></person-group><article-title>Competency-based medical education: theory to practice</article-title><source>Med Teach</source><year>2010</year><month>08</month><volume>32</volume><issue>8</issue><fpage>638</fpage><lpage>645</lpage><pub-id pub-id-type="doi">10.3109/0142159X.2010.501190</pub-id><pub-id pub-id-type="medline">20662574</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Connor</surname><given-names>DM</given-names></name><name name-style="western"><surname>Durning</surname><given-names>SJ</given-names></name><name name-style="western"><surname>Rencic</surname><given-names>JJ</given-names></name></person-group><article-title>Clinical reasoning as a core competency</article-title><source>Acad Med</source><year>2020</year><month>08</month><volume>95</volume><issue>8</issue><fpage>1166</fpage><lpage>1171</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000003027</pub-id><pub-id pub-id-type="medline">31577583</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adams</surname><given-names>NE</given-names></name></person-group><article-title>Bloom&#x2019;s taxonomy of cognitive learning objectives</article-title><source>J Med Libr Assoc</source><year>2015</year><month>07</month><volume>103</volume><issue>3</issue><fpage>152</fpage><lpage>153</lpage><pub-id pub-id-type="doi">10.3163/1536-5050.103.3.010</pub-id><pub-id pub-id-type="medline">26213509</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heist</surname><given-names>BS</given-names></name><name name-style="western"><surname>Gonzalo</surname><given-names>JD</given-names></name><name name-style="western"><surname>Durning</surname><given-names>S</given-names></name><name name-style="western"><surname>Torre</surname><given-names>D</given-names></name><name name-style="western"><surname>Elnicki</surname><given-names>DM</given-names></name></person-group><article-title>Exploring clinical reasoning strategies and test-taking behaviors during clinical vignette style multiple-choice examinations: a mixed methods study</article-title><source>J Grad Med Educ</source><year>2014</year><month>12</month><volume>6</volume><issue>4</issue><fpage>709</fpage><lpage>714</lpage><pub-id pub-id-type="doi">10.4300/JGME-D-14-00176.1</pub-id><pub-id pub-id-type="medline">26140123</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Butler</surname><given-names>AC</given-names></name></person-group><article-title>Multiple-choice testing in education: are the best practices for assessment also good for learning?</article-title><source>J Appl Res Mem Cogn</source><year>2018</year><month>07</month><volume>7</volume><issue>3</issue><fpage>323</fpage><lpage>331</lpage><pub-id pub-id-type="doi">10.1016/j.jarmac.2018.07.002</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name><name name-style="western"><surname>Roy</surname><given-names>L</given-names></name><name name-style="western"><surname>Brailovsky</surname><given-names>C</given-names></name><name name-style="western"><surname>Goulet</surname><given-names>F</given-names></name><name name-style="western"><surname>van der Vleuten</surname><given-names>C</given-names></name></person-group><article-title>The script concordance test: a tool to assess the reflective clinician</article-title><source>Teach Learn Med</source><year>2000</year><volume>12</volume><issue>4</issue><fpage>189</fpage><lpage>195</lpage><pub-id pub-id-type="doi">10.1207/S15328015TLM1204_5</pub-id><pub-id pub-id-type="medline">11273368</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Giet</surname><given-names>D</given-names></name><name name-style="western"><surname>Massart</surname><given-names>V</given-names></name><name name-style="western"><surname>Gagnon</surname><given-names>R</given-names></name><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name></person-group><article-title>Le test de concordance de script en 20 questions. Twenty questions on script concordance tests [Article in French]</article-title><source>P&#x00E9;dagogie M&#x00E9;dicale</source><year>2013</year><month>02</month><day>4</day><volume>14</volume><issue>1</issue><fpage>39</fpage><lpage>48</lpage><pub-id pub-id-type="doi">10.1051/pmed/2012026</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Petrucci</surname><given-names>AM</given-names></name><name name-style="western"><surname>Nouh</surname><given-names>T</given-names></name><name name-style="western"><surname>Boutros</surname><given-names>M</given-names></name><name name-style="western"><surname>Gagnon</surname><given-names>R</given-names></name><name name-style="western"><surname>Meterissian</surname><given-names>SH</given-names></name></person-group><article-title>Assessing clinical judgment using the script concordance test: the importance of using specialty-specific experts to develop the scoring key</article-title><source>Am J Surg</source><year>2013</year><month>02</month><volume>205</volume><issue>2</issue><fpage>137</fpage><lpage>140</lpage><pub-id pub-id-type="doi">10.1016/j.amjsurg.2012.09.002</pub-id><pub-id pub-id-type="medline">23246286</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fournier</surname><given-names>JP</given-names></name><name name-style="western"><surname>Demeester</surname><given-names>A</given-names></name><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name></person-group><article-title>Script concordance tests: guidelines for construction</article-title><source>BMC Med Inform Decis Mak</source><year>2008</year><month>05</month><day>6</day><volume>8</volume><fpage>18</fpage><pub-id pub-id-type="doi">10.1186/1472-6947-8-18</pub-id><pub-id pub-id-type="medline">18460199</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dory</surname><given-names>V</given-names></name><name name-style="western"><surname>Gagnon</surname><given-names>R</given-names></name><name name-style="western"><surname>Vanpee</surname><given-names>D</given-names></name><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name></person-group><article-title>How to construct and implement script concordance tests: insights from a systematic review</article-title><source>Med Educ</source><year>2012</year><month>06</month><volume>46</volume><issue>6</issue><fpage>552</fpage><lpage>563</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2923.2011.04211.x</pub-id><pub-id pub-id-type="medline">22626047</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leclerc</surname><given-names>AA</given-names></name><name name-style="western"><surname>Nguyen</surname><given-names>LHP</given-names></name><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name><name name-style="western"><surname>Lubarsky</surname><given-names>S</given-names></name><name name-style="western"><surname>Ayad</surname><given-names>T</given-names></name></person-group><article-title>Assessing the acceptability of script concordance testing: a nationwide study in otolaryngology</article-title><source>Can J Surg</source><year>2021</year><month>05</month><day>26</day><volume>64</volume><issue>3</issue><fpage>E317</fpage><lpage>E323</lpage><pub-id pub-id-type="doi">10.1503/cjs.014919</pub-id><pub-id pub-id-type="medline">34038060</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>See</surname><given-names>KC</given-names></name><name name-style="western"><surname>Tan</surname><given-names>KL</given-names></name><name name-style="western"><surname>Lim</surname><given-names>TK</given-names></name></person-group><article-title>The script concordance test for clinical reasoning: re-examining its utility and potential weakness</article-title><source>Med Educ</source><year>2014</year><month>11</month><volume>48</volume><issue>11</issue><fpage>1069</fpage><lpage>1077</lpage><pub-id pub-id-type="doi">10.1111/medu.12514</pub-id><pub-id pub-id-type="medline">25307634</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kazour</surname><given-names>F</given-names></name><name name-style="western"><surname>Richa</surname><given-names>S</given-names></name><name name-style="western"><surname>Zoghbi</surname><given-names>M</given-names></name><name name-style="western"><surname>El-Hage</surname><given-names>W</given-names></name><name name-style="western"><surname>Haddad</surname><given-names>FG</given-names></name></person-group><article-title>Using the script concordance test to evaluate clinical reasoning skills in psychiatry</article-title><source>Acad Psychiatry</source><year>2017</year><month>02</month><volume>41</volume><issue>1</issue><fpage>86</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1007/s40596-016-0539-6</pub-id><pub-id pub-id-type="medline">27178278</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Charlin</surname><given-names>B</given-names></name><name name-style="western"><surname>Gagnon</surname><given-names>R</given-names></name><name name-style="western"><surname>Lubarsky</surname><given-names>S</given-names></name><etal/></person-group><article-title>Assessment in the context of uncertainty using the script concordance test: more meaning for scores</article-title><source>Teach Learn Med</source><year>2010</year><month>07</month><volume>22</volume><issue>3</issue><fpage>180</fpage><lpage>186</lpage><pub-id pub-id-type="doi">10.1080/10401334.2010.488197</pub-id><pub-id pub-id-type="medline">20563937</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lineberry</surname><given-names>M</given-names></name><name name-style="western"><surname>Kreiter</surname><given-names>CD</given-names></name><name name-style="western"><surname>Bordage</surname><given-names>G</given-names></name></person-group><article-title>Threats to validity in the use and interpretation of script concordance test scores</article-title><source>Med Educ</source><year>2013</year><month>12</month><volume>47</volume><issue>12</issue><fpage>1175</fpage><lpage>1183</lpage><pub-id pub-id-type="doi">10.1111/medu.12283</pub-id><pub-id pub-id-type="medline">24206151</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walters</surname><given-names>M</given-names></name><name name-style="western"><surname>Alonge</surname><given-names>T</given-names></name><name name-style="western"><surname>Zeller</surname><given-names>M</given-names></name></person-group><article-title>Impact of COVID-19 on medical education: perspectives from students</article-title><source>Acad Med</source><year>2022</year><month>03</month><day>1</day><volume>97</volume><issue>3S</issue><fpage>S40</fpage><lpage>S48</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000004525</pub-id><pub-id pub-id-type="medline">34789656</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saeki</surname><given-names>S</given-names></name><name name-style="western"><surname>Okada</surname><given-names>R</given-names></name><name name-style="western"><surname>Shane</surname><given-names>PY</given-names></name></person-group><article-title>Medical education during the COVID-19: a review of guidelines and policies adapted during the 2020 pandemic</article-title><source>Healthcare (Basel)</source><year>2023</year><month>03</month><day>16</day><volume>11</volume><issue>6</issue><fpage>867</fpage><pub-id pub-id-type="doi">10.3390/healthcare11060867</pub-id><pub-id pub-id-type="medline">36981524</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khan</surname><given-names>RA</given-names></name><name name-style="western"><surname>Jawaid</surname><given-names>M</given-names></name><name name-style="western"><surname>Khan</surname><given-names>AR</given-names></name><name name-style="western"><surname>Sajjad</surname><given-names>M</given-names></name></person-group><article-title>ChatGPT - reshaping medical education and clinical management</article-title><source>Pak J Med Sci</source><year>2023</year><volume>39</volume><issue>2</issue><fpage>605</fpage><lpage>607</lpage><pub-id pub-id-type="doi">10.12669/pjms.39.2.7653</pub-id><pub-id pub-id-type="medline">36950398</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><article-title>ChatGPT</article-title><source>OpenAI</source><access-date>2024-03-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chat.openai.com/">https://chat.openai.com/</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohammad</surname><given-names>B</given-names></name><name name-style="western"><surname>Supti</surname><given-names>T</given-names></name><name name-style="western"><surname>Alzubaidi</surname><given-names>M</given-names></name><etal/></person-group><article-title>The pros and cons of using ChatGPT in medical education: a scoping review</article-title><source>Stud Health Technol Inform</source><year>2023</year><month>06</month><day>29</day><volume>305</volume><fpage>644</fpage><lpage>647</lpage><pub-id pub-id-type="doi">10.3233/SHTI230580</pub-id><pub-id pub-id-type="medline">37387114</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hirosawa</surname><given-names>T</given-names></name><name name-style="western"><surname>Kawamura</surname><given-names>R</given-names></name><name name-style="western"><surname>Harada</surname><given-names>Y</given-names></name><etal/></person-group><article-title>ChatGPT-generated differential diagnosis lists for complex case-derived clinical vignettes: diagnostic accuracy evaluation</article-title><source>JMIR Med Inform</source><year>2023</year><month>10</month><day>9</day><volume>11</volume><fpage>e48808</fpage><pub-id pub-id-type="doi">10.2196/48808</pub-id><pub-id pub-id-type="medline">37812468</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kimmerle</surname><given-names>J</given-names></name><name name-style="western"><surname>Timm</surname><given-names>J</given-names></name><name name-style="western"><surname>Festl-Wietek</surname><given-names>T</given-names></name><name name-style="western"><surname>Cress</surname><given-names>U</given-names></name><name name-style="western"><surname>Herrmann-Werner</surname><given-names>A</given-names></name></person-group><article-title>Medical students' attitudes toward AI in medicine and their expectations for medical education</article-title><source>J Med Educ Curric Dev</source><year>2023</year><month>12</month><day>6</day><volume>10</volume><fpage>23821205231219346</fpage><pub-id pub-id-type="doi">10.1177/23821205231219346</pub-id><pub-id pub-id-type="medline">38075443</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mart&#x00ED;nez-Mesa</surname><given-names>J</given-names></name><name name-style="western"><surname>Gonz&#x00E1;lez-Chica</surname><given-names>DA</given-names></name><name name-style="western"><surname>Bastos</surname><given-names>JL</given-names></name><name name-style="western"><surname>Bonamigo</surname><given-names>RR</given-names></name><name name-style="western"><surname>Duquia</surname><given-names>RP</given-names></name></person-group><article-title>Sample size: how many participants do I need in my research?</article-title><source>An Bras Dermatol</source><year>2014</year><volume>89</volume><issue>4</issue><fpage>609</fpage><lpage>615</lpage><pub-id pub-id-type="doi">10.1590/abd1806-4841.20143705</pub-id><pub-id pub-id-type="medline">25054748</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Asiamah</surname><given-names>N</given-names></name><name name-style="western"><surname>Mensah</surname><given-names>H</given-names></name><name name-style="western"><surname>Oteng-Abayie</surname><given-names>EF</given-names></name></person-group><article-title>Do larger samples really lead to more precise estimates? a simulation study</article-title><source>Am J Educ Res</source><year>2017</year><month>01</month><volume>5</volume><issue>1</issue><fpage>9</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.12691/education-5-1-2</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilson</surname><given-names>A</given-names></name><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names></name><name name-style="western"><surname>Huang</surname><given-names>T</given-names></name><etal/></person-group><article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? the implications of large language models for medical education and knowledge assessment</article-title><source>JMIR Med Educ</source><year>2023</year><month>02</month><day>8</day><volume>9</volume><fpage>e45312</fpage><pub-id pub-id-type="doi">10.2196/45312</pub-id><pub-id pub-id-type="medline">36753318</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dave</surname><given-names>T</given-names></name><name name-style="western"><surname>Athaluri</surname><given-names>SA</given-names></name><name name-style="western"><surname>Singh</surname><given-names>S</given-names></name></person-group><article-title>ChatGPT in medicine: an overview of its applications, advantages, limitations, future prospects, and ethical considerations</article-title><source>Front Artif Intell</source><year>2023</year><month>05</month><day>4</day><volume>6</volume><fpage>1169595</fpage><pub-id pub-id-type="doi">10.3389/frai.2023.1169595</pub-id><pub-id pub-id-type="medline">37215063</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Z</given-names></name></person-group><article-title>Why and how to embrace AI such as ChatGPT in your academic life</article-title><source>R Soc Open Sci</source><year>2023</year><month>08</month><day>23</day><volume>10</volume><issue>8</issue><fpage>230658</fpage><pub-id pub-id-type="doi">10.1098/rsos.230658</pub-id><pub-id pub-id-type="medline">37621662</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chun Tie</surname><given-names>Y</given-names></name><name name-style="western"><surname>Birks</surname><given-names>M</given-names></name><name name-style="western"><surname>Francis</surname><given-names>K</given-names></name></person-group><article-title>Grounded theory research: a design framework for novice researchers</article-title><source>SAGE Open Med</source><year>2019</year><month>01</month><day>2</day><volume>7</volume><fpage>2050312118822927</fpage><pub-id pub-id-type="doi">10.1177/2050312118822927</pub-id><pub-id pub-id-type="medline">30637106</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>H</given-names></name></person-group><article-title>The rise of ChatGPT: exploring its potential in medical education</article-title><source>Anat Sci Educ</source><year>2023</year><month>03</month><day>10</day><pub-id pub-id-type="doi">10.1002/ase.2270</pub-id><pub-id pub-id-type="medline">36916887</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names></name><name name-style="western"><surname>Sidamon-Eristoff</surname><given-names>AE</given-names></name><name name-style="western"><surname>Gilson</surname><given-names>A</given-names></name><name name-style="western"><surname>Chartash</surname><given-names>D</given-names></name></person-group><article-title>The role of large language models in medical education: applications and implications</article-title><source>JMIR Med Educ</source><year>2023</year><month>08</month><day>14</day><volume>9</volume><fpage>e50945</fpage><pub-id pub-id-type="doi">10.2196/50945</pub-id><pub-id pub-id-type="medline">37578830</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sallam</surname><given-names>M</given-names></name></person-group><article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title><source>Healthcare (Basel)</source><year>2023</year><month>03</month><day>19</day><volume>11</volume><issue>6</issue><fpage>887</fpage><pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id><pub-id pub-id-type="medline">36981544</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Homolak</surname><given-names>J</given-names></name></person-group><article-title>Opportunities and risks of ChatGPT in medicine, science, and academic publishing: a modern Promethean dilemma</article-title><source>Croat Med J</source><year>2023</year><month>02</month><day>28</day><volume>64</volume><issue>1</issue><fpage>1</fpage><lpage>3</lpage><pub-id pub-id-type="doi">10.3325/cmj.2023.64.1</pub-id><pub-id pub-id-type="medline">36864812</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ashraf</surname><given-names>H</given-names></name><name name-style="western"><surname>Ashfaq</surname><given-names>H</given-names></name></person-group><article-title>The role of ChatGPT in medical research: progress and limitations</article-title><source>Ann Biomed Eng</source><year>2024</year><month>03</month><volume>52</volume><issue>3</issue><fpage>458</fpage><lpage>461</lpage><pub-id pub-id-type="doi">10.1007/s10439-023-03311-0</pub-id><pub-id pub-id-type="medline">37452215</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franco D&#x2019;Souza</surname><given-names>R</given-names></name><name name-style="western"><surname>Amanullah</surname><given-names>S</given-names></name><name name-style="western"><surname>Mathew</surname><given-names>M</given-names></name><name name-style="western"><surname>Surapaneni</surname><given-names>KM</given-names></name></person-group><article-title>Appraising the performance of ChatGPT in psychiatry using 100 clinical case vignettes</article-title><source>Asian J Psychiatr</source><year>2023</year><month>11</month><volume>89</volume><fpage>103770</fpage><pub-id pub-id-type="doi">10.1016/j.ajp.2023.103770</pub-id><pub-id pub-id-type="medline">37812998</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luykx</surname><given-names>JJ</given-names></name><name name-style="western"><surname>Gerritse</surname><given-names>F</given-names></name><name name-style="western"><surname>Habets</surname><given-names>PC</given-names></name><name name-style="western"><surname>Vinkers</surname><given-names>CH</given-names></name></person-group><article-title>The performance of ChatGPT in generating answers to clinical questions in psychiatry: a two-layer assessment</article-title><source>World Psychiatry</source><year>2023</year><month>10</month><volume>22</volume><issue>3</issue><fpage>479</fpage><lpage>480</lpage><pub-id pub-id-type="doi">10.1002/wps.21145</pub-id><pub-id pub-id-type="medline">37713576</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karabacak</surname><given-names>M</given-names></name><name name-style="western"><surname>Ozkara</surname><given-names>BB</given-names></name><name name-style="western"><surname>Margetis</surname><given-names>K</given-names></name><name name-style="western"><surname>Wintermark</surname><given-names>M</given-names></name><name name-style="western"><surname>Bisdas</surname><given-names>S</given-names></name></person-group><article-title>The advent of generative language models in medical education</article-title><source>JMIR Med Educ</source><year>2023</year><month>06</month><day>6</day><volume>9</volume><fpage>e48163</fpage><pub-id pub-id-type="doi">10.2196/48163</pub-id><pub-id pub-id-type="medline">37279048</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jeyaraman</surname><given-names>M</given-names></name><name name-style="western"><surname>Ramasubramanian</surname><given-names>S</given-names></name><name name-style="western"><surname>Balaji</surname><given-names>S</given-names></name><name name-style="western"><surname>Jeyaraman</surname><given-names>N</given-names></name><name name-style="western"><surname>Nallakumarasamy</surname><given-names>A</given-names></name><name name-style="western"><surname>Sharma</surname><given-names>S</given-names></name></person-group><article-title>ChatGPT in action: harnessing artificial intelligence potential and addressing ethical challenges in medicine, education, and scientific research</article-title><source>World J Methodol</source><year>2023</year><month>09</month><day>20</day><volume>13</volume><issue>4</issue><fpage>170</fpage><lpage>178</lpage><pub-id pub-id-type="doi">10.5662/wjm.v13.i4.170</pub-id><pub-id pub-id-type="medline">37771867</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andrade</surname><given-names>C</given-names></name></person-group><article-title>The limitations of online surveys</article-title><source>Indian J Psychol Med</source><year>2020</year><month>10</month><day>13</day><volume>42</volume><issue>6</issue><fpage>575</fpage><lpage>576</lpage><pub-id pub-id-type="doi">10.1177/0253717620957496</pub-id><pub-id pub-id-type="medline">33354086</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Original survey in French.</p><media xlink:href="mededu_v10i1e54067_app1.pdf" xlink:title="PDF File, 502 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Translated survey in English.</p><media xlink:href="mededu_v10i1e54067_app2.pdf" xlink:title="PDF File, 949 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Responses for every component of the script concordance test (SCT) evaluations for the 6 SCTs, stratified by the category of respondents.</p><media xlink:href="mededu_v10i1e54067_app3.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material></app-group></back></article>