<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e72034</article-id><article-id pub-id-type="doi">10.2196/72034</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Assessment of Large Language Model Performance on Medical School Essay-Style Concept Appraisal Questions: Exploratory Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Mehta</surname><given-names>Seysha</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Haddad</surname><given-names>Eliot N</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Burke</surname><given-names>Indira Bhavsar</given-names></name><degrees>MHPE, MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Majors</surname><given-names>Alana K</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Maeda</surname><given-names>Rie</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Burke</surname><given-names>Sean M</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Deshpande</surname><given-names>Abhishek</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nowacki</surname><given-names>Amy S</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lindenmeyer</surname><given-names>Christina C</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Mehta</surname><given-names>Neil</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Cleveland Clinic Lerner College of Medicine, School of Medicine, Case Western Reserve University</institution><addr-line>9500 Euclid Ave, G10</addr-line><addr-line>Cleveland</addr-line><addr-line>OH</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Internal Medicine, The University of Texas Southwestern Medical Center</institution><addr-line>Dallas</addr-line><addr-line>TX</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Car</surname><given-names>Lorainne Tudor</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chartash</surname><given-names>David</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Yang</surname><given-names>Ren</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Neil Mehta, MBBS, Cleveland Clinic Lerner College of Medicine, School of Medicine, Case Western Reserve University, 9500 Euclid Ave, G10, Cleveland, OH, 44195, United States, 1 2164456512, 1 2164451007; <email>mehtan@ccf.org</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>16</day><month>6</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e72034</elocation-id><history><date date-type="received"><day>02</day><month>02</month><year>2025</year></date><date date-type="rev-recd"><day>11</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>16</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Seysha Mehta, Eliot N Haddad, Indira Bhavsar Burke, Alana K Majors, Rie Maeda, Sean M Burke, Abhishek Deshpande, Amy S Nowacki, Christina C Lindenmeyer, Neil Mehta. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 16.6.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2025/1/e72034"/><abstract><p>Bing Chat (subsequently renamed Microsoft Copilot)&#x2014;a ChatGPT 4.0&#x2013;based large language model&#x2014;demonstrated comparable performance to medical students in answering essay-style concept appraisals, while assessors struggled to differentiate artificial intelligence (AI) responses from human responses. These results highlight the need to prepare students and educators for a future world of AI by fostering reflective learning practices and critical thinking.</p></abstract><kwd-group><kwd>essay-type questions</kwd><kwd>large language models</kwd><kwd>generative AI</kwd><kwd>Microsoft Copilot</kwd><kwd>artificial intelligence</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Large language models (LLMs) are of growing interest in medical education. LLMs have demonstrated passing scores on the United States Medical Licensing Examination (USMLE), raising questions about their impact on assessment frameworks [<xref ref-type="bibr" rid="ref1">1</xref>], including whether artificial intelligence (AI) can successfully answer essay-style, reasoning-based questions and whether assessors can distinguish AI-generated and student-written responses. Our medical school&#x2019;s preclinical students complete application-level, essay-type questions&#x2014;concept appraisals (CAPPs)&#x2014;every week (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref2">2</xref>]. We evaluated LLMs&#x2019; performance on CAPPs and examined assessors&#x2019; ability to distinguish AI-generated and human responses.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>Ten retired CAPP questions were selected, ensuring representation from multiple preclinical organ-system blocks, including gastroenterology, endocrinology, musculoskeletal science, cardiorespiratory medicine, hematology, renal biology, and immunology. Retired CAPPs were used, so that currently used ones were not exposed to students. Answering these required literature review and application of knowledge to clinical scenarios.</p><p>Five student responses from previous classes (before availability of LLMs) were randomly selected and deidentified. Individuals at various medical training levels generated AI responses via Bing Chat (subsequently renamed Microsoft Copilot; <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), which used GPT-4 algorithms and had similar performance on medical tasks as ChatGPT 4.0&#x2014;the most advanced LLM at the time of study [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Users first prompted Bing Chat by using the original CAPP text and then iteratively refined prompts to generate more comprehensive answers and match institutional standards without manual editing (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Ten expert assessors graded responses to 1 CAPP question each. While unaware that any responses had been AI-generated, they graded 5 deidentified student responses and 2 AI-generated responses (presented in random order) for their CAPP question, using a standard rubric (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). For 2 CAPPs, 4 student responses were used instead of 5 due to lack of consent for inclusion in the registry. Grading each CAPP took approximately 30 minutes; thus, a larger sample size was infeasible for this exploratory study. Afterward, assessors identified whether responses were AI- or student-generated and provided their rationales.</p><p>Scoring differences between human- and AI-generated responses and identification accuracy were evaluated, using descriptive statistics. Thematic analysis was conducted on assessors&#x2019; classification rationales; 2 team members independently analyzed reasons to identify themes, compared findings, and reconciled differences (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study used deidentified data from the Cleveland Clinic Institutional Review Board&#x2013;approved registry #6600. Since this was a registry for which students had already provided informed consent, separate informed consent was not required. Each CAPP reviewer was paid US $100.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>AI responses received scores higher than or equal to those for human responses for most questions, with substantial performance variability; AI scored better than, equivalent to, or worse than humans, depending on the CAPP question (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Average of human vs AI scores for each question. CAPP questions were answered either by students (human) or by prompting Microsoft Copilot (AI). Expert graders scored the CAPP questions based on a rubric. The average scores received by humans and AI are shown by question (colored vs open circles, respectively). AI responses received scores higher than or equal to those for human responses for most questions. Each question had a unique maximum score. This figure illustrates the relative scores of humans vs AI. AI: artificial intelligence; CAPP: concept appraisal.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e72034_fig01.png"/></fig><p>Assessors correctly identified response sources 53% (36/68) of the time (student responses: 27/48, 56%; AI-generated responses: 9/20, 45%). Only 1 assessor correctly classified all responses. Consistent with other studies, 1 assessor who used AI detection tools did not have much success [<xref ref-type="bibr" rid="ref5">5</xref>] (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Percentage of responses correctly identified as human or artificial intelligence (AI) responses for each critical appraisal (CAPP) question.<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Question number</td><td align="left" valign="top">Correctly identified responses, n/N (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Q1</td><td align="left" valign="top">3/6 (50)</td></tr><tr><td align="left" valign="top">Q2</td><td align="left" valign="top">3/7 (43)</td></tr><tr><td align="left" valign="top">Q3</td><td align="left" valign="top">3/7 (43)</td></tr><tr><td align="left" valign="top">Q4</td><td align="left" valign="top">6/7 (86)</td></tr><tr><td align="left" valign="top">Q5</td><td align="left" valign="top">3/6 (50)</td></tr><tr><td align="left" valign="top">Q6</td><td align="left" valign="top">2/7 (29)</td></tr><tr><td align="left" valign="top">Q7<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">0/7 (0)</td></tr><tr><td align="left" valign="top">Q8</td><td align="left" valign="top">5/7 (71)</td></tr><tr><td align="left" valign="top">Q9</td><td align="left" valign="top">4/7 (58)</td></tr><tr><td align="left" valign="top">Q10<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">7/7 (100)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Responses for each question were graded by 1 expert. Expert graders were blinded and were not told which responses were generated by humans vs AI.</p></fn><fn id="table1fn2"><p><sup>b</sup>Despite utilization of AI detection tools, 1 assessor did not correctly classify any of the responses (Q7).</p></fn><fn id="table1fn3"><p><sup>c</sup>Only 1 assessor correctly classified all responses for their CAPP question (Q10).</p></fn></table-wrap-foot></table-wrap><p>Thematic analysis showed that the most cited reason for identification was the perceived &#x201C;writing style,&#x201D; though many assessors noted an inability to distinguish categories (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>We demonstrate that AI can provide high-quality answers to essay-style medical education questions requiring detailed research and knowledge application. Content experts struggled to distinguish AI-generated and human-written responses, underscoring the challenges of identifying academic misuse of generative AI.</p><p>Iterative prompting of Microsoft Copilot was essential for generating acceptable responses. This process mirrors students&#x2019; typical workflow for refining drafts through edits; thus, iterative prompting does not necessarily disadvantage AI. Our findings highlight concerns about potential overreliance on AI and its implications for assessment validity, especially as recent survey data suggest that 89% of students use ChatGPT during self-study [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>Given AI responses&#x2019; similarity to human responses, institutions must consider frameworks for integrating AI into assessments without compromising academic integrity [<xref ref-type="bibr" rid="ref8">8</xref>]. Potential strategies include structured classroom use of AI during collaborative group work (eg, requiring students to assess AI responses and cite primary evidence to support or refute them) [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p><p>Study limitations include a small sample of AI-generated responses and the research&#x2019;s exploratory nature. Expanding the sample size and including additional questions could provide insights on AI&#x2019;s performance (relative to humans) for specific question types (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Additionally, the findings prompt further discussions on ethically integrating generative AI into medical curricula while ensuring students develop critical appraisal and independent reasoning skills [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>AI&#x2019;s performance suggests its potential as a learning enhancement tool. However, medical educators must implement strategies for preventing overreliance on AI, fostering reflective learning practices and critical thinking, and maintaining assessment integrity.</p></sec></body><back><ack><p>The authors would like to thank the following individuals for serving as concept appraisal (CAPP) graders: William Albabish, William Cantrell, Thomas Crilley, Ryan Ellis, Andrew Ford, Emily Frisch, Jeffrey Schwartz, Michael Smith, Mohammad Sohail, and Anirudh Yalamanchali. Financial support was received from The Jones Day Endowment Fund.</p></ack><fn-group><fn fn-type="con"><p>IBB and NM contributed to the literature review. NM, AKM, and CCL contributed to the conceptual design. SM, NM, ASN, and AD contributed to data analysis and visualization. IBB and SMB contributed to thematic analysis. SM, ENH, and NM contributed to manuscript writing. All authors contributed to the critical revision of the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CAPP</term><def><p>concept appraisal</p></def></def-item><def-item><term id="abb3">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb4">USMLE</term><def><p>United States Medical Licensing Examination</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Preiksaitis</surname><given-names>C</given-names> </name><name name-style="western"><surname>Rose</surname><given-names>C</given-names> </name></person-group><article-title>Opportunities, challenges, and future directions of generative artificial intelligence in medical education: scoping review</article-title><source>JMIR Med Educ</source><year>2023</year><month>10</month><day>20</day><volume>9</volume><fpage>e48785</fpage><pub-id pub-id-type="doi">10.2196/48785</pub-id><pub-id pub-id-type="medline">37862079</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bierer</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Dannefer</surname><given-names>EF</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hall</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hull</surname><given-names>AL</given-names> </name></person-group><article-title>Methods to assess students&#x2019; acquisition, application and integration of basic science knowledge in an innovative competency-based curriculum</article-title><source>Med Teach</source><year>2008</year><volume>30</volume><issue>7</issue><fpage>e171</fpage><lpage>e177</lpage><pub-id pub-id-type="doi">10.1080/01421590802139740</pub-id><pub-id pub-id-type="medline">18777415</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cai</surname><given-names>LZ</given-names> </name><name name-style="western"><surname>Shaheen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance of generative large language models on ophthalmology board-style questions</article-title><source>Am J Ophthalmol</source><year>2023</year><month>10</month><volume>254</volume><fpage>141</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.1016/j.ajo.2023.05.024</pub-id><pub-id pub-id-type="medline">37339728</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Amin</surname><given-names>KS</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Doshi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Haims</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Khosla</surname><given-names>P</given-names> </name><name name-style="western"><surname>Forman</surname><given-names>HP</given-names> </name></person-group><article-title>Accuracy of ChatGPT, Google Bard, and Microsoft Bing for simplifying radiology reports</article-title><source>Radiology</source><year>2023</year><month>11</month><volume>309</volume><issue>2</issue><fpage>e232561</fpage><pub-id pub-id-type="doi">10.1148/radiol.232561</pub-id><pub-id pub-id-type="medline">37987662</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elkhatat</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Elsaid</surname><given-names>K</given-names> </name><name name-style="western"><surname>Almeer</surname><given-names>S</given-names> </name></person-group><article-title>Evaluating the efficacy of AI content detection tools in differentiating between human and AI-generated text</article-title><source>Int J Educ Integr</source><year>2023</year><month>09</month><day>1</day><volume>19</volume><issue>1</issue><fpage>17</fpage><pub-id pub-id-type="doi">10.1007/s40979-023-00140-5</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Westfall</surname><given-names>C</given-names> </name></person-group><article-title>Educators battle plagiarism as 89% of students admit to using OpenAI&#x2019;s ChatGPT for homework</article-title><source>Forbes</source><year>2023</year><month>01</month><day>28</day><access-date>2025-04-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.forbes.com/sites/chriswestfall/2023/01/28/educators-battle-plagiarism-as-89-of-students-admit-to-using-open-ais-chatgpt-for-homework/">https://www.forbes.com/sites/chriswestfall/2023/01/28/educators-battle-plagiarism-as-89-of-students-admit-to-using-open-ais-chatgpt-for-homework/</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mehta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mehta</surname><given-names>N</given-names> </name></person-group><article-title>Embracing the illusion of explanatory depth: a strategic framework for using iterative prompting for integrating large language models in healthcare education</article-title><source>Med Teach</source><year>2025</year><month>02</month><volume>47</volume><issue>2</issue><fpage>208</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2024.2382863</pub-id><pub-id pub-id-type="medline">39058399</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Silverman</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Rybak</surname><given-names>A</given-names> </name><name name-style="western"><surname>van Goudoever</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Leleiko</surname><given-names>NS</given-names> </name></person-group><article-title>Generative AI: potential and pitfalls in academic publishing</article-title><source>JPGN Rep</source><year>2023</year><month>11</month><day>8</day><volume>4</volume><issue>4</issue><fpage>e387</fpage><pub-id pub-id-type="doi">10.1097/PG9.0000000000000387</pub-id><pub-id pub-id-type="medline">38034432</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jowsey</surname><given-names>T</given-names> </name><name name-style="western"><surname>Stokes-Parish</surname><given-names>J</given-names> </name><name name-style="western"><surname>Singleton</surname><given-names>R</given-names> </name><name name-style="western"><surname>Todorovic</surname><given-names>M</given-names> </name></person-group><article-title>Medical education empowered by generative artificial intelligence large language models</article-title><source>Trends Mol Med</source><year>2023</year><month>12</month><volume>29</volume><issue>12</issue><fpage>971</fpage><lpage>973</lpage><pub-id pub-id-type="doi">10.1016/j.molmed.2023.08.012</pub-id><pub-id pub-id-type="medline">37718142</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Halkiopoulos</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gkintoni</surname><given-names>E</given-names> </name></person-group><article-title>Leveraging AI in e-learning: personalized learning and adaptive assessment through cognitive neuropsychology&#x2014;a systematic analysis</article-title><source>Electronics (Basel)</source><year>2024</year><month>09</month><day>22</day><volume>13</volume><issue>18</issue><fpage>3762</fpage><pub-id pub-id-type="doi">10.3390/electronics13183762</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary materials regarding concept appraisal questions and grading, Bing Chat (subsequently renamed Microsoft Copilot), the iterative prompting used in this study, and the thematic analysis.</p><media xlink:href="mededu_v11i1e72034_app1.docx" xlink:title="DOCX File, 148 KB"/></supplementary-material></app-group></back></article>