<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e63129</article-id><article-id pub-id-type="doi">10.2196/63129</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Performance of ChatGPT-4o on the Japanese Medical Licensing Examination: Evalution of Accuracy in Text-Only and Image-Based Questions</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Miyazaki</surname><given-names>Yuki</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hata</surname><given-names>Masahiro</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Omori</surname><given-names>Hisaki</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hirashima</surname><given-names>Atsuya</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nakagawa</surname><given-names>Yuta</given-names></name><degrees>DR</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Eto</surname><given-names>Mitsuhiro</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Takahashi</surname><given-names>Shun</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ikeda</surname><given-names>Manabu</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Psychiatry, Osaka University Graduate School of Medicine</institution><addr-line>Suita</addr-line><country>Japan</country></aff><aff id="aff2"><institution>Department of Psychiatry, Shichiyama Hospital</institution><addr-line>Sennan District</addr-line><country>Japan</country></aff><aff id="aff3"><institution>Department of Psychiatry, Osaka Psychiatric Medical Center</institution><addr-line>Hirakata</addr-line><country>Japan</country></aff><aff id="aff4"><institution>Department of Psychiatry, Asakayama General Hospital</institution><addr-line>Sakai</addr-line><country>Japan</country></aff><aff id="aff5"><institution>Clinical Research and Education Center, Asakayama General Hospital</institution><addr-line>Sakai</addr-line><country>Japan</country></aff><aff id="aff6"><institution>Graduate School of Rehabilitation Science, Osaka Metropolitan University</institution><addr-line>Habikino</addr-line><country>Japan</country></aff><aff id="aff7"><institution>Department of Neuropsychiatry, Wakayama Medical University</institution><addr-line>Wakayama</addr-line><country>Japan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lesselroth</surname><given-names>Blake</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Mall</surname><given-names>Rajib</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Cheng</surname><given-names>Yih-Dih</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yuki Miyazaki, MD, Department of Psychiatry, Osaka University Graduate School of Medicine, 2-2 D3, Yamadaoka, Suita, 565-0871, Japan, 81 6-6879-3051, 81 6-6879-3059; <email>miyazaki@psy.med.osaka-u.ac.jp</email></corresp></author-notes><pub-date pub-type="collection"><year>2024</year></pub-date><pub-date pub-type="epub"><day>24</day><month>12</month><year>2024</year></pub-date><volume>10</volume><elocation-id>e63129</elocation-id><history><date date-type="received"><day>13</day><month>06</month><year>2024</year></date><date date-type="rev-recd"><day>20</day><month>09</month><year>2024</year></date><date date-type="accepted"><day>23</day><month>11</month><year>2024</year></date></history><copyright-statement>&#x00A9; Yuki Miyazaki, Masahiro Hata, Hisaki Omori, Atsuya Hirashima, Yuta Nakagawa, Mitsuhiro Eto, Shun Takahashi, Manabu Ikeda. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 24.12.2024. </copyright-statement><copyright-year>2024</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2024/1/e63129"/><abstract><p>This study evaluated the performance of ChatGPT with GPT-4 Omni (GPT-4o) on the 118th Japanese Medical Licensing Examination. The study focused on both text-only and image-based questions. The model demonstrated a high level of accuracy overall, with no significant difference in performance between text-only and image-based questions. Common errors included clinical judgment mistakes and prioritization issues, underscoring the need for further improvement in the integration of artificial intelligence into medical education and practice.</p></abstract><kwd-group><kwd>medical education</kwd><kwd>artificial intelligence</kwd><kwd>clinical decision-making</kwd><kwd>GPT-4o</kwd><kwd>medical licensing examination</kwd><kwd>Japan</kwd><kwd>images</kwd><kwd>accuracy</kwd><kwd>AI technology</kwd><kwd>application</kwd><kwd>decision-making</kwd><kwd>image-based</kwd><kwd>reliability</kwd><kwd>ChatGPT</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Artificial intelligence (AI) models, like ChatGPT [<xref ref-type="bibr" rid="ref1">1</xref>], have shown promise in answering medical questions and assisting in clinical decision-making. Previous studies have evaluated AI performance on medical exams such as the United States Medical Licensing Examination (USMLE), where ChatGPT (GPT-3) achieved correct response rates of 42%&#x2010;64% on step 1 and 2 exams [<xref ref-type="bibr" rid="ref2">2</xref>]. Studies on the Japanese Medical Licensing Examination (JMLE) reported that GPT-4 achieved 77.7% correct responses on 292 questions in 2022 (the 116th JMLE) [<xref ref-type="bibr" rid="ref3">3</xref>] and 79.9% on 254 questions in 2023 (the 117th JMLE) [<xref ref-type="bibr" rid="ref4">4</xref>]. GPT-4, using prompt tuning, achieved 82.7% on essential questions and 77.2% on basic and clinical questions among 336 questions [<xref ref-type="bibr" rid="ref5">5</xref>]. GPT-4 Vision scored 78.2% on 386 questions, with significantly lower performance on image-based (71.9%) and table-based questions (35%) [<xref ref-type="bibr" rid="ref6">6</xref>]. No studies have evaluated an AI model on all 400 JMLE questions. ChatGPT with GPT-4 Omni (GPT-4o), released May 13, 2024, represents significantly more natural human-computer interaction; it can accept input as text, audio, images, and video and create output as text, audio, and images [<xref ref-type="bibr" rid="ref7">7</xref>], promising improved performance on image-based questions. Recent research has shown that GPT-4 has superior performance on psychiatric licensing examinations, emphasizing its potential in various medical fields [<xref ref-type="bibr" rid="ref8">8</xref>]. As generative AI is increasingly applied in medical education, understanding its limitations will be essential for effectively integrating it into learning and practice. This study aimed to evaluate the performance of ChatGPT-4o on the JMLE, specifically assessing its ability to handle both text- and image-based questions. We hypothesized that ChatGPT-4o would demonstrate high proficiency in answering these questions, potentially meeting the JMLE passing criteria.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>ChatGPT-4o was used from May 13 to May 19, 2024, to complete all 400 questions of the 118th JMLE, which was held in February 2024 [<xref ref-type="bibr" rid="ref9">9</xref>]. The model, updated with data up to May 2023, was assessed on both text-only and image-based questions. The Japanese-language questions and multiple-choice responses were input verbatim without prompt engineering or memory functions. Images were also input when present.</p></sec><sec id="s2-2"><title>Statistical Analysis</title><p>To compare the correct response rates between the image-based and text-only questions, an independent sample, 2-tailed <italic>t</italic>-test was used. Statistical significance was set at <italic>P</italic>&#x003C;.05 for all 2-tailed tests. All statistical analyses used Python&#x2019;s <italic>SciPy</italic> library (v1.13.1).</p></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This study used previously available data and no human participants. Therefore, ethics approval was not mandated.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Evaluation Outcomes</title><p>Accuracy overall was 93.25%, with 93.48% for image-based questions and 93.18% for text-only questions (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance comparison of ChatGPT with GPT-4 Omni across different sections in the 118th Japanese Medical Licensing Examination.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Characteristics</td><td align="left" valign="top">Correct responses among all questions, n/N (%)</td><td align="left" valign="top">Correct responses among text-only questions, n/N (%)</td><td align="left" valign="top">Correct responses among image-based questions, n/N (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Overall</td><td align="left" valign="top">373/400 (93.2)</td><td align="left" valign="top">287/308 (93.2)</td><td align="left" valign="top">86/92 (93.5)</td></tr><tr><td align="left" valign="top">Section A (A001-A075)</td><td align="left" valign="top">71/75 (94.7)</td><td align="left" valign="top">42/43 (97.7)</td><td align="left" valign="top">29/32 (90.6)</td></tr><tr><td align="left" valign="top">Section B (B001-B050)</td><td align="left" valign="top">46/50 (92)</td><td align="left" valign="top">39/43 (90.7)</td><td align="left" valign="top">7/7 (100)</td></tr><tr><td align="left" valign="top">Section C (C001-C075)</td><td align="left" valign="top">68/75 (90.7)</td><td align="left" valign="top">61/68 (89.7)</td><td align="left" valign="top">7/7 (100)</td></tr><tr><td align="left" valign="top">Section D (D001-D075)</td><td align="left" valign="top">71/75 (94.7)</td><td align="left" valign="top">43/45 (95.6)</td><td align="left" valign="top">28/30 (93.3)</td></tr><tr><td align="left" valign="top">Section E (E001-E050)</td><td align="left" valign="top">48/50 (96)</td><td align="left" valign="top">46/48 (95.8)</td><td align="left" valign="top">2/2 (100)</td></tr><tr><td align="left" valign="top">Section F (F001-F075)</td><td align="left" valign="top">69/75 (92)</td><td align="left" valign="top">56/61 (91.8)</td><td align="left" valign="top">13/14 (92.9)</td></tr></tbody></table></table-wrap><p>The correct response rate was not significantly different for text-only and image-based questions (<italic>t</italic><sub>5</sub>=&#x2212;1.190; <italic>P</italic>=.26).</p></sec><sec id="s3-2"><title>Error Classification</title><p>Errors made by ChatGPT-4o were analyzed and classified into 4 categories: diagnostic, logical, medical knowledge, and clinical judgment (<xref ref-type="table" rid="table2">Table 2</xref>). This classification system was developed and applied by multiple researchers with medical backgrounds; discrepancies were resolved through discussion.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Classification and details of all errors of ChatGPT with GPT-4 Omni in the 118th Japanese Medical Licensing Examination.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Problem number</td><td align="left" valign="bottom">Classification</td><td align="left" valign="bottom">Error details</td></tr></thead><tbody><tr><td align="left" valign="top">A021</td><td align="left" valign="top">Diagnostic error</td><td align="left" valign="top">Incorrect diagnosis: ChatGPT acknowledged multiple diagnostic possibilities but ultimately selected an incorrect option</td></tr><tr><td align="left" valign="top">A039</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Incorrect logic regarding risk reduction for blister package ingestion</td></tr><tr><td align="left" valign="top">A059</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect use of medical knowledge regarding the urea breath test</td></tr><tr><td align="left" valign="top">A061</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Incorrect final answer despite correct assessment of individual questions</td></tr><tr><td align="left" valign="top">B021</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding the risk relationship of latex allergy after banana ingestion</td></tr><tr><td align="left" valign="top">B038</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge for classifying activity restriction</td></tr><tr><td align="left" valign="top">B047</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge about social support systems</td></tr><tr><td align="left" valign="top">B049</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge for describing the Trousseau sign</td></tr><tr><td align="left" valign="top">C012</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Correct medical knowledge but incorrect final answer (confusion between right and left)</td></tr><tr><td align="left" valign="top">C020</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding occupational cataract risk</td></tr><tr><td align="left" valign="top">C040</td><td align="left" valign="top">Clinical judgment error</td><td align="left" valign="top">Incorrect triage decision, suggesting a black tag for a critically ill patient</td></tr><tr><td align="left" valign="top">C043</td><td align="left" valign="top">Clinical judgment error</td><td align="left" valign="top">Incorrect clinical judgment, prioritizing ultrasound over cardiotocogram</td></tr><tr><td align="left" valign="top">C055</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding fetal rotation</td></tr><tr><td align="left" valign="top">C056</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Incorrect interpretation of the problem statement</td></tr><tr><td align="left" valign="top">C074</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">In a case of hyperosmolar hyperglycemic syndrome, recommendation of a hypotonic solution instead of the correct choice of normal saline (0.9% sodium chloride)</td></tr><tr><td align="left" valign="top">D012</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding chronic kidney disease severity classification</td></tr><tr><td align="left" valign="top">D017</td><td align="left" valign="top">Diagnostic error</td><td align="left" valign="top">Incorrect diagnosis: failure to accurately integrate textual and image data, leading to an erroneous diagnostic conclusion</td></tr><tr><td align="left" valign="top">D035</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">In a case of metabolic alkalosis, failure to consider the importance of lactate-free solution</td></tr><tr><td align="left" valign="top">D047</td><td align="left" valign="top">Diagnostic error</td><td align="left" valign="top">Incorrect diagnosis: selection of the wrong option without considering or mentioning other differential diagnoses</td></tr><tr><td align="left" valign="top">E034</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding postprandial blood glucose targets in gestational diabetes management</td></tr><tr><td align="left" valign="top">E041</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge for Glasgow Coma Scale motor response</td></tr><tr><td align="left" valign="top">F001</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding the design principles of tactile paving</td></tr><tr><td align="left" valign="top">F010</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Incorrect medical knowledge regarding the peak population year in Japan</td></tr><tr><td align="left" valign="top">F018</td><td align="left" valign="top">Medical knowledge error</td><td align="left" valign="top">Correct image interpretation but incorrect medical knowledge regarding sagittal suture alignment</td></tr><tr><td align="left" valign="top">F054</td><td align="left" valign="top">Clinical judgment error</td><td align="left" valign="top">Incorrect decision on referring to a specialized hospital versus a community support hospital</td></tr><tr><td align="left" valign="top">F066</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Incorrect interpretation and judgment regarding wheelchair options</td></tr><tr><td align="left" valign="top">F068</td><td align="left" valign="top">Logical error</td><td align="left" valign="top">Incorrect interpretation of the problem statement regarding creatinine clearance calculation</td></tr></tbody></table></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>ChatGPT-4o achieved an overall correct response rate of 93.2% on the 2024 (118th) JMLE without prompt engineering or memory functions, surpassing prior GPT models. Its performance did not decline on image-based or table-based questions, marking a significant improvement in multimodal question handling. This suggests that integrating multimodal capabilities may have significantly enhanced its clinical decision-making skills.</p><p>ChatGPT-4o&#x2019;s performance meets the 118th JMLE passing criteria [<xref ref-type="bibr" rid="ref10">10</xref>], which require (1) at least 160/200 points for compulsory questions (sections B and F); (2) at least 230/300 points for noncompulsory questions (sections A, C, D, and E); and (3) no more than 3 incorrect choices in contraindicated options, which remain undisclosed.</p><p>Although ChatGPT-4o met criteria (1) and (2), some responses suggest problematic clinical judgment. In question C040, the model incorrectly suggested a black tag (deceased/expectant) for a critically ill patient during triage, when the correct answer was a red tag (an immediate life-threatening condition). This error could have severe consequences in real-world emergency situations, potentially denying urgent care to a rescuable patient. In question C043, it incorrectly prioritized ultrasound over cardiotocography in a clinical decision. These errors highlight the potential for AI models to make clinical errors in judgment, as GPT-4o struggled with questions requiring clinical prioritization. This critical skill will become increasingly important in medical education.</p><p>These findings underscore the need for continued enhancement of AI models to ensure reliable and accurate clinical decision-making. Understanding and addressing these limitations will be critical for effectively integrating AI into medical education and practice.</p></sec></body><back><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">GPT-4o</term><def><p>GPT-4 Omni</p></def></def-item><def-item><term id="abb3">JMLE</term><def><p>Japanese Medical Licensing Examination</p></def></def-item><def-item><term id="abb4">USMLE</term><def><p>United States Medical Licensing Examination</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>ChatGPT</article-title><source>OpenAI</source><year>2024</year><access-date>2024-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/chatgpt/">https://openai.com/chatgpt/</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>T</given-names> </name><etal/></person-group><article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? The implications of large language models for medical education and knowledge assessment</article-title><source>JMIR Med Educ</source><year>2023</year><month>02</month><day>8</day><volume>9</volume><fpage>e45312</fpage><pub-id pub-id-type="doi">10.2196/45312</pub-id><pub-id pub-id-type="medline">36753318</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yanagita</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yokokawa</surname><given-names>D</given-names> </name><name name-style="western"><surname>Uchida</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tawara</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ikusaka</surname><given-names>M</given-names> </name></person-group><article-title>Accuracy of ChatGPT on medical questions in the National Medical Licensing Examination in Japan: evaluation study</article-title><source>JMIR Form Res</source><year>2023</year><month>10</month><day>13</day><volume>7</volume><fpage>e48023</fpage><pub-id pub-id-type="doi">10.2196/48023</pub-id><pub-id pub-id-type="medline">37831496</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takagi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Watari</surname><given-names>T</given-names> </name><name name-style="western"><surname>Erabi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sakaguchi</surname><given-names>K</given-names> </name></person-group><article-title>Performance of GPT-3.5 and GPT-4 on the Japanese Medical Licensing Examination: comparison study</article-title><source>JMIR Med Educ</source><year>2023</year><month>06</month><day>29</day><volume>9</volume><fpage>e48002</fpage><pub-id pub-id-type="doi">10.2196/48002</pub-id><pub-id pub-id-type="medline">37384388</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tanaka</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Nakata</surname><given-names>T</given-names> </name><name name-style="western"><surname>Aiga</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Performance of generative pretrained transformer on the National Medical Licensing Examination in Japan</article-title><source>PLOS Dig Health</source><year>2024</year><month>01</month><volume>3</volume><issue>1</issue><fpage>e0000433</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000433</pub-id><pub-id pub-id-type="medline">38261580</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takagi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Koda</surname><given-names>M</given-names> </name><name name-style="western"><surname>Watari</surname><given-names>T</given-names> </name></person-group><article-title>The performance of ChatGPT-4V in interpreting images and tables in the Japanese Medical Licensing Exam</article-title><source>JMIR Med Educ</source><year>2024</year><month>05</month><day>23</day><volume>10</volume><fpage>e54283</fpage><pub-id pub-id-type="doi">10.2196/54283</pub-id><pub-id pub-id-type="medline">38787024</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>Hello GPT-4o</article-title><source>OpenAI</source><access-date>2024-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/index/hello-gpt-4o/">https://openai.com/index/hello-gpt-4o/</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Kao</surname><given-names>YC</given-names> </name><name name-style="western"><surname>Tsai</surname><given-names>SJ</given-names> </name><etal/></person-group><article-title>Comparing the performance of ChatGPT GPT-4, Bard, and Llama-2 in the Taiwan Psychiatric Licensing Examination and in differential diagnosis with multi-center psychiatrists</article-title><source>Psychiatry Clin Neurosci</source><year>2024</year><month>06</month><volume>78</volume><issue>6</issue><fpage>347</fpage><lpage>352</lpage><pub-id pub-id-type="doi">10.1111/pcn.13656</pub-id><pub-id pub-id-type="medline">38404249</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><article-title>The 118th National Medical Examination questions and correct answers [Japanese]</article-title><source>Ministry of Health, Labour and Welfare</source><access-date>2024-05-13</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mhlw.go.jp/seisakunitsuite/bunya/kenkou_iryou/iryou/topics/tp240424-01.html">https://www.mhlw.go.jp/seisakunitsuite/bunya/kenkou_iryou/iryou/topics/tp240424-01.html</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>Announcement of successful passage of the 118th National Medical Examination [Japanese]</article-title><source>Ministry of Health, Labour and Welfare</source><access-date>2024-05-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mhlw.go.jp/content/10803000/001226841.pdf">https://www.mhlw.go.jp/content/10803000/001226841.pdf</ext-link></comment></nlm-citation></ref></ref-list></back></article>