<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e56850</article-id><article-id pub-id-type="doi">10.2196/56850</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Performance of ChatGPT-3.5 and ChatGPT-4 in the Taiwan National Pharmacist Licensing Examination: Comparative Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wang</surname><given-names>Ying-Mei</given-names></name><degrees>MBA</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Shen</surname><given-names>Hung-Wei</given-names></name><degrees>MBA</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chen</surname><given-names>Tzeng-Ji</given-names></name><degrees>Dr Med</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chiang</surname><given-names>Shu-Chiung</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lin</surname><given-names>Ting-Guan</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Medical Education and Research, Taipei Veterans General Hospital Hsinchu Branch</institution><addr-line>81, Section 1, Zhongfeng Road, Zhudong</addr-line><addr-line>Hsinchu</addr-line><country>Taiwan</country></aff><aff id="aff2"><institution>Department of Pharmacy, Taipei Veterans General Hospital Hsinchu Branch</institution><addr-line>Hsinchu</addr-line><country>Taiwan</country></aff><aff id="aff3"><institution>School of Medicine, National Tsing Hua University</institution><addr-line>Hsinchu</addr-line><country>Taiwan</country></aff><aff id="aff4"><institution>Hsinchu County Pharmacists Association</institution><addr-line>Hsinchu</addr-line><country>Taiwan</country></aff><aff id="aff5"><institution>Department of Family Medicine, Taipei Veterans General Hospital Hsinchu Branch</institution><addr-line>Hsinchu</addr-line><country>Taiwan</country></aff><aff id="aff6"><institution>Department of Family Medicine, Taipei Veterans General Hospital</institution><addr-line>Taipei</addr-line><country>Taiwan</country></aff><aff id="aff7"><institution>Department of Post-Baccalaureate Medicine, National Chung Hsing University</institution><addr-line>Taichung</addr-line><country>Taiwan</country></aff><aff id="aff8"><institution>Institute of Hospital and Health Care Administration, School of Medicine, National Yang Ming Chiao Tung University</institution><addr-line>Taipei</addr-line><country>Taiwan</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Lesselroth</surname><given-names>Blake</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Farid</surname><given-names>Ghulam</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhai</surname><given-names>Suodi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Jen</surname><given-names>Wang Yu</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Ying-Mei Wang, MBA, Department of Medical Education and Research, Taipei Veterans General Hospital Hsinchu Branch, 81, Section 1, Zhongfeng Road, Zhudong, Hsinchu, 310, Taiwan, 886 03-5962134 ext 127; <email>teresawang@vhct.gov.tw</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>17</day><month>1</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e56850</elocation-id><history><date date-type="received"><day>28</day><month>01</month><year>2024</year></date><date date-type="rev-recd"><day>26</day><month>09</month><year>2024</year></date><date date-type="accepted"><day>17</day><month>12</month><year>2024</year></date></history><copyright-statement>&#x00A9; Ying-Mei Wang, Hung-Wei Shen, Tzeng-Ji Chen, Shu-Chiung Chiang, Ting-Guan Lin. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 17.1.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2025/1/e56850"/><abstract><sec><title>Background</title><p>OpenAI released versions ChatGPT-3.5 and GPT-4 between 2022 and 2023. GPT-3.5 has demonstrated proficiency in various examinations, particularly the United States Medical Licensing Examination. However, GPT-4 has more advanced capabilities.</p></sec><sec><title>Objective</title><p>This study aims to examine the efficacy of GPT-3.5 and GPT-4 within the Taiwan National Pharmacist Licensing Examination and to ascertain their utility and potential application in clinical pharmacy and education.</p></sec><sec sec-type="methods"><title>Methods</title><p>The pharmacist examination in Taiwan consists of 2 stages: basic subjects and clinical subjects. In this study, exam questions were manually fed into the GPT-3.5 and GPT-4 models, and their responses were recorded; graphic-based questions were excluded. This study encompassed three steps: (1) determining the answering accuracy of GPT-3.5 and GPT-4, (2) categorizing question types and observing differences in model performance across these categories, and (3) comparing model performance on calculation and situational questions. Microsoft Excel and R software were used for statistical analyses.</p></sec><sec sec-type="results"><title>Results</title><p>GPT-4 achieved an accuracy rate of 72.9%, overshadowing GPT-3.5, which achieved 59.1% (<italic>P</italic>&#x003C;.001). In the basic subjects category, GPT-4 significantly outperformed GPT-3.5 (73.4% vs 53.2%; <italic>P</italic>&#x003C;.001). However, in clinical subjects, only minor differences in accuracy were observed. Specifically, GPT-4 outperformed GPT-3.5 in the calculation and situational questions.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study demonstrates that GPT-4 outperforms GPT-3.5 in the Taiwan National Pharmacist Licensing Examination, particularly in basic subjects. While GPT-4 shows potential for use in clinical practice and pharmacy education, its limitations warrant caution. Future research should focus on refining prompts, improving model stability, integrating medical databases, and designing questions that better assess student competence and minimize guessing.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>ChatGPT</kwd><kwd>chat generative pre-trained transformer</kwd><kwd>GPT-4</kwd><kwd>medical education</kwd><kwd>educational measurement</kwd><kwd>pharmacy licensure</kwd><kwd>Taiwan</kwd><kwd>Taiwan national pharmacist licensing examination</kwd><kwd>learning model</kwd><kwd>AI</kwd><kwd>Chatbot</kwd><kwd>pharmacist</kwd><kwd>evaluation and comparison study</kwd><kwd>pharmacy</kwd><kwd>statistical analyses</kwd><kwd>medical databases</kwd><kwd>medical decision-making</kwd><kwd>generative AI</kwd><kwd>machine learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>With the advent of the artificial intelligence (AI) era, applications of AI in the medical field have increased with ChatGPT (OpenAI) being the most notable examples. ChatGPT is a large language model based on a generative pretrained transformer developed by OpenAI. ChatGPT-3.5 (GPT-3.5) was the first publicly accessible version, while ChatGPT-4 (GPT-4) was the subscription version. GPT-4 surpasses GPT-3.5 in advanced reasoning, almost nearing human-level performance in professional and academic examinations [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. For instance, GPT-4 ranked in the top 10% of scores on a law examination, whereas GPT-3.5 ranked in the bottom 10% [<xref ref-type="bibr" rid="ref3">3</xref>]. Additionally, GPT-3.5 resolved 90% of false-belief tasks, achieving the level of a 7-year-old child, whereas GPT-4 resolved 95% of these tasks [<xref ref-type="bibr" rid="ref4">4</xref>]. Following its launch, ChatGPT has been extensively studied and discussed in both the medical and educational fields [<xref ref-type="bibr" rid="ref5">5</xref>]. The most widely recognized performance of GPT-3.5 has been on the United States Medical Licensing Examination (USMLE) [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]; however, GPT-3.5&#x2019;s performance did not meet expectations in other examinations [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. Gradually, Nori et al [<xref ref-type="bibr" rid="ref12">12</xref>]observed that the accuracy of GPT-4 was higher than that of the GPT-3.5 on the USMLE, and further studies confirmed that GPT-4 outperforms GPT-3.5 [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, there has been limited research on its performance in pharmacy examinations.</p><p>In the field of pharmacy, GPT-3.5 has exhibited commendable performance in clinical toxicology and pharmacology [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], although it has not passed the National Pharmacist Licensing Examination (NPLE) in Taiwan [<xref ref-type="bibr" rid="ref19">19</xref>]. However, GPT-4 has outperformed GPT-3.5 in drug information [<xref ref-type="bibr" rid="ref20">20</xref>] and China&#x2019;s Pharmacist Licensing Examination [<xref ref-type="bibr" rid="ref21">21</xref>]. Generative AI models, a large language model, has been applied in drug development and novel drug design [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>], pharmacovigilance [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>], pharmacokinetic model development [<xref ref-type="bibr" rid="ref27">27</xref>], pharmacy education, and research writing [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p></sec><sec id="s1-2"><title>Goal of the Study</title><p>According to previous studies, GPT-3.5 failed to pass the NPLE, indicating its limitations in pharmacy education. Based on these findings, we hypothesized that GPT-4 would outperform GPT-3.5 in this context, demonstrating greater proficiency. To test this hypothesis, this study compared the performance of GPT-3.5 and GPT-4 on Taiwan&#x2019;s NPLE. Additionally, we conducted a comprehensive assessment of their performance across various question types, with a focus on pharmacy-related tasks such as pharmacokinetic calculation and clinical decision-making scenarios. This analysis aims to determine the practical applications of GPT-4 in pharmacy education and establish guidelines for its optimal use in this field.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Background</title><p>The NPLE in Taiwan is divided into 2 stages. The first stage focuses on 3 basic subjects: pharmacology and pharmaceutical chemistry, pharmaceutical analysis and pharmacognosy (including traditional Chinese medicine), and pharmaceutics and biopharmaceutics. The second stage focuses on 3 clinical subjects: dispensing and clinical pharmacy, pharmacotherapy, and pharmacy administration and pharmacy law. The first and second stages of the examination have 240 and 210 multiple-choice questions, respectively. Pharmacy students typically complete the first-stage exam after completing their third year of university coursework. They become eligible for the second-stage exam only after passing the first examination, completing their internships and obtaining their graduation certificates. After passing the second-stage examination, candidates receive their pharmacist certificate, allowing them to practice as a pharmacist legally.</p></sec><sec id="s2-2"><title>Data Source</title><p>This study used the 2-stage NPLE questions released by the Ministry of Examination in February 2023, with each subject exam lasting for 1 hour. The version of NPLE used in this study was the most recent available at the time of research. We used both GPT-3.5 (free version) and GPT-4 (licensed version). No temperature settings were applied. Examination questions were manually fed into GPT-4 and GPT-3.5 sequentially. To simulate student responses, complete questions were entered into the models without tailored prompts. One question was input at a time, and the responses were recorded for analysis. Since GPT-3.5 cannot process images and image functionality of GPT-4 was unavailable during the analysis, only text-based questions were used. Questions containing graphics, such as chemical structures, tables, symbols, and formulas were excluded. Both models were presented with the same set of questions under identical conditions. Due to the limitations on the number of times the model could be used and required cooling time between queries, all questions were answered sequentially and not timed to avoid any potential bias introduced by time constraints.</p></sec><sec id="s2-3"><title>Study Design</title><p>The study was divided into 3 parts; the first part compared the accuracy of GPT-4 and GPT-3.5, as well as in different subjects. The second part compared the accuracy of GPT-4 and GPT-3.5 across different question types. These questions were categorized into 4 types: memory-based questions (1 correct word answer out of 4 options, low-level thinking; <xref ref-type="fig" rid="figure1">Figure 1</xref>), judgment questions (1 correct statement out of 4, medium-level thinking; <xref ref-type="fig" rid="figure2">Figure 2</xref>), reverse questions (1 incorrect statement out of 4, medium to high-level thinking; <xref ref-type="fig" rid="figure3">Figure 3</xref>), and comprehension questions (multiple-choice or matching types, high-level thinking; <xref ref-type="fig" rid="figure4">Figure 4</xref>). One pharmacist classified the questions according to these established categories and the second pharmacist reviewed the classifications. In the event of disagreement, a third pharmacist was consulted for the final decision. All pharmacists had over 10 years of experience in medical center hospitals or community teaching hospitals. The third part compared the accuracy of GPT-4 and GPT-3.5 for calculation-based and case scenario questions (<xref ref-type="fig" rid="figure5">Figure 5</xref>). Model testing for this study was conducted from May 10 to July 20, 2023.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Template of a memory-based question (choose 1 correct word from 4 options, requiring low-level thinking).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig01.png"/></fig><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Template of a judgment question (choose 1 correct statement from 4 options, requiring medium-level thinking).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig02.png"/></fig><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Template of a reverse question (choose 1 incorrect statement from 4 options, requiring medium- to high- level thinking).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Template of a comprehension questions (multiple-choice or matching types, requiring high- level thinking).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Template of a case scenario question.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig05.png"/></fig></sec><sec id="s2-4"><title>Statistical Analysis</title><p>Microsoft Excel 2019 was used to compare the accuracy rates of the 2 models. <italic>&#x03C7;</italic><sup>2</sup> tests were used to compare the overall accuracy rates of answers obtained using GPT-3.5 and GPT-4. McNemar tests were used to compare the consistency in answers between GPT-3.5 and GPT-4, and for the calculation-based and situational question types using R software (version 4.2.2; R Foundation for Statistical Computing).</p></sec><sec id="s2-5"><title>Ethical Considerations</title><p>This study involved comparing the performance of ChatGPT-4 and ChatGPT-3.5 in the pharmacist licensing examination. It did not involve human participants. As per the guidelines of the 'Human Research Cases Exempted from Ethics Review Board' issued by the Ministry of Health and Welfare, Taiwan, this study was exempted from Ethics Review Board analysis.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Accuracy in Different Subjects</title><p>In total, 203 and 210 questions were included for analysis from the first- and second-stage examinations, respectively, after excluding 37 questions containing graphical elements (N=413) (<xref ref-type="fig" rid="figure6">Figure 6</xref>). GPT-4 had an overall accuracy of 72.9% (301/413), easily passing the test (60% threshold) and outperforming GPT-3.5 which achieved an accuracy of 59.1% (244/413; <italic>P</italic>&#x003C;.001). In terms of accuracy by stage, GPT-4&#x2019;s overall accuracy was significantly higher than that of GPT-3.5 (73.4% vs 53.2% or 149/203 vs 108/203; <italic>P</italic>&#x003C;.001) in basic subjects of the first stage. GPT-4 also significantly outperformed GPT-3.5 in each of the 3 basic subjects. In the clinical subjects of the second stage, GPT-4&#x2019;s accuracy was higher but not statistically significant than that of GPT-3.5 (72.4% vs 64.8% or 152/210 vs 136/210; <italic>P</italic>=.096). In pharmacy administration and pharmacy law, GPT-4&#x2019;s accuracy was lower than that of GPT-3.5 (56% vs 60% or 28/50 vs 30/50; <italic>P</italic>=.96). Among individual subjects, significant differences were observed in pharmacology and pharmaceutical chemistry (<italic>P</italic>=.02), pharmaceutical analysis and pharmacognosy (<italic>P</italic>=.02), and pharmaceutics and biopharmaceutics (<italic>P</italic>=.002). No significant differences were noted in dispensing pharmacy and clinical pharmacy (<italic>P</italic>=.07), pharmacotherapeutics (<italic>P</italic>=.10), and pharmacy administration and pharmacy law (<italic>P</italic>=.48).</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Accuracy comparison of ChatGPT-3.5 and ChatGPT-4 across different subjects. *<italic>P</italic>&#x003C;.05.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig06.png"/></fig><p>The overall consistency among answers significantly differed between the 2 models (68%, <italic>P</italic>&#x003C;.001), with GPT-4 showing consistent correct answers in 49.4% (n=204) of cases and consistent incorrect answers in 18.6% (n=77) of cases (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Performance comparison of consistency between ChatGPT-3.5 and ChatGPT-4.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">ChatGPT-3.5 responses</td><td align="left" valign="bottom" colspan="2">GPT-4</td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top">Correct answers, n (%)</td><td align="left" valign="top">Incorrect answers, n (%)</td></tr><tr><td align="left" valign="top">Correct answer</td><td align="left" valign="top">204 (49.4)</td><td align="left" valign="top">38 (9.2)</td></tr><tr><td align="left" valign="top">Incorrect answer</td><td align="left" valign="top">94 (22.8)</td><td align="left" valign="top">77 (18.6)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Accuracy in Different Question Types</title><p>Among the 413 examination questions analyzed, memory-based questions were the most common (n=254, 61.5%), followed by judgment questions (n=82, 19.9%), reverse questions (n=46, 11.1%), and comprehension questions (n=31, 7.5%). GPT-4 and GPT-3.5 did not differ significantly in terms of accuracy of answers between question types (<italic>P</italic>=.461 vs <italic>P</italic>=.18; <xref ref-type="table" rid="table2">Table 2</xref>). GPT-4 is significantly better than GPT-3.5 in memory-based questions (<italic>P</italic>&#x003C;.001) and comprehension-based questions(<italic>P</italic>=.03).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Accuracy comparison of ChatGPT-3.5 and ChatGPT-4 by question type.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Question type</td><td align="left" valign="bottom">GPT-3.5 Correct answers, n (%)</td><td align="left" valign="bottom">GPT-4 Correct answers, n (%)</td><td align="left" valign="bottom">Total, n (%)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Memory-based questions</td><td align="left" valign="top">155 (61)</td><td align="left" valign="top">188 (74)</td><td align="left" valign="top">254 (61.5)</td><td align="char" char="." valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Judgment questions</td><td align="left" valign="top">21 (45.7)</td><td align="left" valign="top">30 (65.2)</td><td align="left" valign="top">46 (11.1)</td><td align="char" char="." valign="top">.06</td></tr><tr><td align="left" valign="top">Reverse questions</td><td align="left" valign="top">51 (62.6)</td><td align="left" valign="top">56 (68.3)</td><td align="left" valign="top">82 (19.9)</td><td align="char" char="." valign="top">.41</td></tr><tr><td align="left" valign="top">Comprehension questions</td><td align="left" valign="top">16 (51.6)</td><td align="left" valign="top">24 (77.4)</td><td align="left" valign="top">31 (7.5)</td><td align="char" char="." valign="top">.03<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup><italic>P</italic>&#x003C;.05.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="fig" rid="figure7">Figure 7</xref> shows the performance comparison of GPT-3.5 and GPT-4 across question types. The data provided insights into the relative strengths and weaknesses of each model.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Performance comparison of GPT-3.5 and GPT-4 across question types (A) memory-based, (B) judgement, (C) reverse , and (D) comprehension. The heatmaps display the number of answers, with darker shades indicating higher counts of correct responses and highlighting model performance.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig07.png"/></fig><p>Further analysis of the discrepancies between the models revealed no significant difference in questions answered incorrectly by GPT-3.5 but correctly by GPT-4 (n=94) and vice versa (n=38) across the 4 question types (<italic>P</italic>=.27 vs <italic>P</italic>=.95).</p><p>For calculation-based questions, GPT-4 showed higher accuracy than that of GPT-3.5 (80% vs 40%, <italic>P</italic>=.03), with the most pronounced difference in pharmaceutics and biopharmaceutics subjects. In scenario-based questions, GPT-4 also outperformed GPT-3.5 in terms of accuracy (63% vs 44.4%, <italic>P</italic>=.41), though the difference was nonsignificant.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This study demonstrates that GPT-4 significantly outperformed GPT-3.5 in the Taiwan NPLE, surpassing the passing threshold, especially in basic pharmacy subjects. These subjects, which have only a 13.82% passing rate among human students, are particularly challenging. GPT-4 excelled in areas such as pharmacology, pharmaceutical chemistry, pharmaceutical analysis, and pharmaceutics, consistently providing correct answers and comprehensive explanations. Although GPT-4 also performed better than GPT-3.5 in clinical subjects such as dispensing pharmacy and therapeutics, the performance gap was narrower in these areas.</p><p>In specific subjects like pharmacodynamics, pharmacokinetics, and drug-related topics in the autonomic nervous system, GPT-4 consistently provided accurate responses, where GPT-3.5 often faltered. Additionally, GPT-4 exhibited superior accuracy in bioavailability, dosing, and pharmacokinetic calculations. However, GPT-4&#x2019;s accuracy dropped in topics like herbal medicines and pharmacy law, emphasizing the need for further model refinement in these areas [<xref ref-type="bibr" rid="ref30">30</xref>].</p></sec><sec id="s4-2"><title>Comparison with Literature</title><p>Previous studies have established that GPT-4 consistently outperforms GPT-3.5 in various medical exams, including the Australian Medical Licensing Examination [<xref ref-type="bibr" rid="ref31">31</xref>], Canadian Radiology Examination [<xref ref-type="bibr" rid="ref15">15</xref>], Turkish Medical Examination [<xref ref-type="bibr" rid="ref32">32</xref>], and Japanese Medical Licensing Examination [<xref ref-type="bibr" rid="ref33">33</xref>]. In many of these examinations, GPT-4 consistently achieved scores above 70% [<xref ref-type="bibr" rid="ref34">34</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. This study aligns with those findings, showing GPT-4&#x2019;s superior performance in the Taiwan NPLE. Unlike prior research that focused on real-world clinical applications [<xref ref-type="bibr" rid="ref37">37</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], this study comprehensively assessed the models across various pharmacy domains.</p><p>A study by Choi [<xref ref-type="bibr" rid="ref44">44</xref>] reported that GPT-3.5 performed well on memory-based questions but struggled with problem-solving, whereas GPT-4 demonstrated better performance in comprehension and judgment tasks. Similarly, a radiology study suggested that GPT-4 outperformed GPT-3.5 on higher-order thinking questions but not on lower-order questions [<xref ref-type="bibr" rid="ref15">15</xref>]. These findings slightly differ from the results of our study, where GPT-3.5 exhibited higher accuracy in both memory-based (low-level thinking) and reverse (mid-level thinking) questions. However, GPT-4 surpassed GPT-3.5 across all question types, particularly in comprehension (high-level thinking) and memory-based (low-level thinking) questions. In judgment, reverse, and comprehension questions&#x2014;tasks that demand more advanced reasoning&#x2014;GPT-4 demonstrated superior accuracy with fewer errors compared to GPT-3.5. Additionally, GPT-4&#x2019;s ability to correct errors made by GPT-3.5 reinforces its potential as a more reliable model for pharmacy-related assessments.</p><p>Further, GPT-4 significantly outperformed GPT-3.5 in calculation questions. While GPT-3.5 provided step-by-step explanations but often guessed the final answer&#x2014;a phenomenon known as hallucination&#x2019; due to insufficient training&#x2014;GPT-4 exhibited stronger logical reasoning (<xref ref-type="fig" rid="figure8">Figure 8</xref>) with over 80% accuracy. However, it still made errors in 20% of cases, indicating the need for needed during its use [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. In clinical applications, modifying prompts has been shown to improve GPT&#x2019;s accuracy [<xref ref-type="bibr" rid="ref46">46</xref>]. For integrated analysis questions, GPT-4&#x2019;s performance was slightly better than GPT-3.5, consistent with findings from a nursing licensure examination in Japan [<xref ref-type="bibr" rid="ref14">14</xref>].</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Template of the questions that GPT-4 exhibited stronger logical reasoning.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e56850_fig08.png"/></fig></sec><sec id="s4-3"><title>Implications for Education</title><p>The study highlights GPT-4&#x2019;s potential as an educational tool, particularly in pharmacy education. GPT-4 can offer extensive practice opportunities for pharmacy students across both basic and clinical subjects, providing both correct answers and detailed explanations [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref47">47</xref>] to enhance understanding. Given the lower passing rates among pharmacy students in basic subjects among that were challenging, GPT-4 could assist in individualized learning. Its strength in comprehension and integrated analysis questions makes it a valuable resource for fostering critical thinking skills.</p><p>Despite its advancements over GPT-3.5, GPT-4&#x2019;s occasional inconsistencies suggest that model stability is not yet perfect. Questions correctly answered by GPT-3.5 were not always consistently answered by GPT-4. Nevertheless, GPT-4&#x2019;s accuracy, approaching 80% suggests that it can serve as an effective learning supplement, provided educators guide students in minimizing potential errors. For instance, specifying clearer prompts, such as &#x201C;Please do not add your own opinions&#x201D;, may help mitigate hallucinations and enhance its use in educational settings.</p><p>In addition, educators should consider adjusting the format of examinations by replacing memory-based questions with comprehension questions, which can reduce the chances of guessing and better assess students&#x2019; true intelligence.</p></sec><sec id="s4-4"><title>Limitations</title><p>The primary limitation of this study is the time frame during which the models were tested (ie, from May 10 to July 20, 2023), which may affect the reproducibility of the results if retested in the future. Additionally, both GPT-3.5 and GPT-4 struggled with recognizing structural diagrams, limiting their performance in areas such as pharmaceutical chemistry and pharmacognosy. These limitations, consistent with previous research, highlight the need for cautious application of GPT models in fields that require visual recognition [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Additionally, the models showed poorer performance in subjects with less available training data and specific medical knowledge such as pharmacy law and traditional medicine, indicating potential biases in the models&#x2019; training. We suggest that future efforts in model development should focus on incorporating more diverse and comprehensive data to reduce such biases.</p></sec><sec id="s4-5"><title>Conclusions</title><p>This study demonstrates that GPT-4 outperforms GPT-3.5 in the Taiwan NPLE, particularly in pharmacy expertise, calculation ability, and situational case studies, with a notable advantage in basic subjects. It is recommended that GPT-4 be applied in clinical pharmacy practice (ie, patient education, drug consultation) and pharmacy education, particularly to support self-directed learning. However, given its limitations, caution is advised when integrating GPT-4 into clinical settings and educational programs. Future research should focus on refining prompts, improving model stability, integrating medical databases, and enhancing comprehensive questions to evaluate student competence more effectively while minimizing the chance of guessing correct answers.</p></sec></sec></body><back><ack><p>This work was supported by Taipei Veterans General Hospital Hsinchu Branch (2024-VHCT-P-0008) and the authors would like to thank Wallace Academic Editing (https://www.editing.tw/) for English language editing.</p></ack><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">GPT-3.5</term><def><p> ChatGPT-3.5</p></def></def-item><def-item><term id="abb2">GPT-4</term><def><p>ChatGPT-4</p></def></def-item><def-item><term id="abb3">NPLE</term><def><p>National Pharmacist Licensing Examination</p></def></def-item><def-item><term id="abb4">USMLE</term><def><p>United States Medical Licensing Examination</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>ChatGPT: optimizing language models for dialogue</article-title><source>OpenAI</source><access-date>2023-03-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chatgpt.r4wand.eu.org/">https://chatgpt.r4wand.eu.org/</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><article-title>Research index</article-title><source>OpenAI</source><access-date>2023-08-03</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/research/gpt-4">https://openai.com/research/gpt-4</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><collab>OpenAI</collab></person-group><article-title>GPT-4 technical report</article-title><source>arXiv</source><comment>Preprint posted online on  Mar 4, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kosinski</surname><given-names>M</given-names> </name></person-group><article-title>Evaluating large language models in theory of mind tasks</article-title><source>Proc Natl Acad Sci U S A</source><year>2024</year><month>11</month><day>5</day><volume>121</volume><issue>45</issue><fpage>e2405460121</fpage><pub-id pub-id-type="doi">10.1073/pnas.2405460121</pub-id><pub-id pub-id-type="medline">39471222</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>YM</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>TJ</given-names> </name></person-group><article-title>ChatGPT surges ahead: GPT-4 has arrived in the arena of medical research</article-title><source>J Chin Med Assoc</source><year>2023</year><month>09</month><day>1</day><volume>86</volume><issue>9</issue><fpage>784</fpage><lpage>785</lpage><pub-id pub-id-type="doi">10.1097/JCMA.0000000000000955</pub-id><pub-id pub-id-type="medline">37406215</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilson</surname><given-names>A</given-names> </name><name name-style="western"><surname>Safranek</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>T</given-names> </name><etal/></person-group><article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? The implications of large language models for medical education and knowledge assessment</article-title><source>JMIR Med Educ</source><year>2023</year><month>02</month><day>8</day><volume>9</volume><fpage>e45312</fpage><pub-id pub-id-type="doi">10.2196/45312</pub-id><pub-id pub-id-type="medline">36753318</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kung</surname><given-names>TH</given-names> </name><name name-style="western"><surname>Cheatham</surname><given-names>M</given-names> </name><name name-style="western"><surname>Medenilla</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title><source>PLOS Dig Health</source><year>2023</year><month>02</month><volume>2</volume><issue>2</issue><fpage>e0000198</fpage><pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id><pub-id pub-id-type="medline">36812645</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huh</surname><given-names>S</given-names> </name></person-group><article-title>Are ChatGPT&#x2019;s knowledge and interpretation ability comparable to those of medical students in Korea for taking a parasitology examination? A descriptive study</article-title><source>J Educ Eval Health Prof</source><year>2023</year><volume>20</volume><fpage>1</fpage><pub-id pub-id-type="doi">10.3352/jeehp.2023.20.1</pub-id><pub-id pub-id-type="medline">36627845</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fija&#x010D;ko</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gosak</surname><given-names>L</given-names> </name><name name-style="western"><surname>&#x0160;tiglic</surname><given-names>G</given-names> </name><name name-style="western"><surname>Picard</surname><given-names>CT</given-names> </name><name name-style="western"><surname>John Douma</surname><given-names>M</given-names> </name></person-group><article-title>Can ChatGPT pass the life support exams without entering the American Heart Association course?</article-title><source>Resuscitation</source><year>2023</year><month>04</month><volume>185</volume><fpage>109732</fpage><pub-id pub-id-type="doi">10.1016/j.resuscitation.2023.109732</pub-id><pub-id pub-id-type="medline">36775020</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weng</surname><given-names>TL</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>YM</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Hwang</surname><given-names>SJ</given-names> </name></person-group><article-title>ChatGPT failed Taiwan&#x2019;s Family Medicine Board Exam</article-title><source>J Chin Med Assoc</source><year>2023</year><month>08</month><day>1</day><volume>86</volume><issue>8</issue><fpage>762</fpage><lpage>766</lpage><pub-id pub-id-type="doi">10.1097/JCMA.0000000000000946</pub-id><pub-id pub-id-type="medline">37294147</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Antaki</surname><given-names>F</given-names> </name><name name-style="western"><surname>Touma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Milad</surname><given-names>D</given-names> </name><name name-style="western"><surname>El-Khoury</surname><given-names>J</given-names> </name><name name-style="western"><surname>Duval</surname><given-names>R</given-names> </name></person-group><article-title>Evaluating the performance of ChatGPT in ophthalmology: an analysis of its successes and shortcomings</article-title><source>Ophthalmol Sci</source><year>2023</year><month>12</month><volume>3</volume><issue>4</issue><fpage>100324</fpage><pub-id pub-id-type="doi">10.1016/j.xops.2023.100324</pub-id><pub-id pub-id-type="medline">37334036</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Nori</surname><given-names>H</given-names> </name><name name-style="western"><surname>King</surname><given-names>N</given-names> </name><name name-style="western"><surname>McKinney</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Carignan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Horvitz</surname><given-names>E</given-names> </name></person-group><article-title>Capabilities of GPT-4 on medical challenge problems</article-title><source>arXiv</source><comment>Preprint posted online on  Apr 12, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2303.13375</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roso&#x0142;</surname><given-names>M</given-names> </name><name name-style="western"><surname>G&#x0105;sior</surname><given-names>JS</given-names> </name><name name-style="western"><surname>&#x0141;aba</surname><given-names>J</given-names> </name><name name-style="western"><surname>Korzeniewski</surname><given-names>K</given-names> </name><name name-style="western"><surname>M&#x0142;y&#x0144;czak</surname><given-names>M</given-names> </name></person-group><article-title>Evaluation of the performance of GPT-3.5 and GPT-4 on the Polish Medical Final Examination</article-title><source>Sci Rep</source><year>2023</year><month>11</month><day>22</day><volume>13</volume><issue>1</issue><fpage>20512</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-46995-z</pub-id><pub-id pub-id-type="medline">37993519</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaneda</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Takahashi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kaneda</surname><given-names>U</given-names> </name><etal/></person-group><article-title>Assessing the performance of GPT-3.5 and GPT-4 on the 2023 Japanese Nursing Examination</article-title><source>Cureus</source><year>2023</year><month>08</month><volume>15</volume><issue>8</issue><fpage>e42924</fpage><pub-id pub-id-type="doi">10.7759/cureus.42924</pub-id><pub-id pub-id-type="medline">37667724</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bhayana</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bleakney</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Krishna</surname><given-names>S</given-names> </name></person-group><article-title>GPT-4 in radiology: improvements in advanced reasoning</article-title><source>Radiology</source><year>2023</year><month>06</month><volume>307</volume><issue>5</issue><fpage>e230987</fpage><pub-id pub-id-type="doi">10.1148/radiol.230987</pub-id><pub-id pub-id-type="medline">37191491</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>WY</given-names> </name></person-group><article-title>ChatGPT goes to the operating room: evaluating GPT-4 performance and its potential in surgical education and training in the era of large language models</article-title><source>Ann Surg Treat Res</source><year>2023</year><month>05</month><volume>104</volume><issue>5</issue><fpage>269</fpage><lpage>273</lpage><pub-id pub-id-type="doi">10.4174/astr.2023.104.5.269</pub-id><pub-id pub-id-type="medline">37179699</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sabry Abdel-Messih</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kamel Boulos</surname><given-names>MN</given-names> </name></person-group><article-title>ChatGPT in clinical toxicology</article-title><source>JMIR Med Educ</source><year>2023</year><month>03</month><day>8</day><volume>9</volume><fpage>e46876</fpage><pub-id pub-id-type="doi">10.2196/46876</pub-id><pub-id pub-id-type="medline">36867743</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Nisar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Aslam</surname><given-names>MS</given-names> </name></person-group><article-title>Is ChatGPT a good tool for T&#x0026;CM students in studying pharmacology?</article-title><source>SSRN</source><comment>Preprint posted online on  Jan 17, 2023</comment><pub-id pub-id-type="doi">10.2139/ssrn.4324310</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>YM</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>HW</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>TJ</given-names> </name></person-group><article-title>Performance of ChatGPT on the Pharmacist Licensing Examination in Taiwan</article-title><source>J Chin Med Assoc</source><year>2023</year><month>07</month><day>1</day><volume>86</volume><issue>7</issue><fpage>653</fpage><lpage>658</lpage><pub-id pub-id-type="doi">10.1097/JCMA.0000000000000942</pub-id><pub-id pub-id-type="medline">37227901</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Chat GPT-4 significantly surpasses GPT-3.5 in drug information queries</article-title><source>J Telemed Telecare</source><year>2023</year><month>06</month><day>22</day><fpage>1357633X231181922</fpage><pub-id pub-id-type="doi">10.1177/1357633X231181922</pub-id><pub-id pub-id-type="medline">37350055</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>D</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>M</given-names> </name></person-group><article-title>ExplainCPE: A free-text explanation benchmark of Chinese Pharmacist Examination</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 26, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2305.12945</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vert</surname><given-names>JP</given-names> </name></person-group><article-title>How will generative AI disrupt data science in drug discovery?</article-title><source>Nat Biotechnol</source><year>2023</year><month>06</month><volume>41</volume><issue>6</issue><fpage>750</fpage><lpage>751</lpage><pub-id pub-id-type="doi">10.1038/s41587-023-01789-6</pub-id><pub-id pub-id-type="medline">37156917</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blanco-Gonz&#x00E1;lez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cabez&#x00F3;n</surname><given-names>A</given-names> </name><name name-style="western"><surname>Seco-Gonz&#x00E1;lez</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The role of AI in drug discovery: challenges, opportunities, and strategies</article-title><source>Pharmaceuticals (Basel)</source><year>2023</year><month>06</month><day>18</day><volume>16</volume><issue>6</issue><fpage>891</fpage><pub-id pub-id-type="doi">10.3390/ph16060891</pub-id><pub-id pub-id-type="medline">37375838</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Savage</surname><given-names>N</given-names> </name></person-group><article-title>Drug discovery companies are customizing ChatGPT: here&#x2019;s how</article-title><source>Nat Biotechnol</source><year>2023</year><month>05</month><volume>41</volume><issue>5</issue><fpage>585</fpage><lpage>586</lpage><pub-id pub-id-type="doi">10.1038/s41587-023-01788-7</pub-id><pub-id pub-id-type="medline">37095351</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>Y</given-names> </name></person-group><article-title>Future of ChatGPT in pharmacovigilance</article-title><source>Drug Saf</source><year>2023</year><month>08</month><volume>46</volume><issue>8</issue><fpage>711</fpage><lpage>713</lpage><pub-id pub-id-type="doi">10.1007/s40264-023-01315-2</pub-id><pub-id pub-id-type="medline">37306853</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carpenter</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>RB</given-names> </name></person-group><article-title>Using GPT-3 to build a lexicon of drugs of abuse synonyms for social media pharmacovigilance</article-title><source>Biomolecules</source><year>2023</year><month>02</month><day>18</day><volume>13</volume><issue>2</issue><fpage>387</fpage><pub-id pub-id-type="doi">10.3390/biom13020387</pub-id><pub-id pub-id-type="medline">36830756</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cloesmeijer</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Janssen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Koopman</surname><given-names>SF</given-names> </name><name name-style="western"><surname>Cnossen</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Math&#x00F4;t</surname><given-names>RAA</given-names> </name><name name-style="western"><surname>consortium</surname><given-names>S</given-names> </name></person-group><article-title>ChatGPT in pharmacometrics? Potential opportunities and limitations</article-title><source>Br J Clin Pharmacol</source><year>2024</year><month>01</month><volume>90</volume><issue>1</issue><fpage>360</fpage><lpage>365</lpage><pub-id pub-id-type="doi">10.1111/bcp.15895</pub-id><pub-id pub-id-type="medline">37621112</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sallam</surname><given-names>M</given-names> </name><name name-style="western"><surname>Salim</surname><given-names>N</given-names> </name><name name-style="western"><surname>Barakat</surname><given-names>M</given-names> </name><name name-style="western"><surname>Al-Tammemi</surname><given-names>A</given-names> </name></person-group><article-title>ChatGPT applications in medical, dental, pharmacy, and public health education: a descriptive study highlighting the advantages and limitations</article-title><source>Narra J</source><year>2023</year><month>03</month><day>29</day><volume>3</volume><issue>1</issue><fpage>e103</fpage><pub-id pub-id-type="doi">10.52225/narra.v3i1.103</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Han</surname><given-names>D</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>C</given-names> </name></person-group><article-title>How can ChatGPT benefit pharmacy: a case report on review writing</article-title><source>Preprints</source><comment>Preprint posted online on  Feb 20, 2023</comment><pub-id pub-id-type="doi">10.20944/preprints202302.0324.v1</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hsu</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>KC</given-names> </name><name name-style="western"><surname>Hou</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Hsieh</surname><given-names>YW</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>YD</given-names> </name></person-group><article-title>Examining real-world medication consultations and drug-herb interactions: ChatGPT performance evaluation</article-title><source>JMIR Med Educ</source><year>2023</year><month>08</month><day>21</day><volume>9</volume><fpage>e48433</fpage><pub-id pub-id-type="doi">10.2196/48433</pub-id><pub-id pub-id-type="medline">37561097</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kleinig</surname><given-names>O</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bacchi</surname><given-names>S</given-names> </name></person-group><article-title>This too shall pass: the performance of ChatGPT-3.5, ChatGPT-4 and New Bing in an Australian Medical Licensing Examination</article-title><source>Med J Aust</source><year>2023</year><month>09</month><day>4</day><volume>219</volume><issue>5</issue><fpage>237</fpage><pub-id pub-id-type="doi">10.5694/mja2.52061</pub-id><pub-id pub-id-type="medline">37528548</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;l&#x0131;&#x00E7;</surname><given-names>ME</given-names> </name></person-group><article-title>AI in medical education: A comparative analysis of GPT-4 and GPT-3.5 on turkish medical specialization exam performance</article-title><source>medRxiv</source><comment>Preprint posted online on  Jul 12, 2023</comment><pub-id pub-id-type="doi">10.1101/2023.07.12.23292564</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Takagi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Watari</surname><given-names>T</given-names> </name><name name-style="western"><surname>Erabi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sakaguchi</surname><given-names>K</given-names> </name></person-group><article-title>Performance of GPT-3.5 and GPT-4 on the Japanese Medical Licensing Examination: comparison study</article-title><source>JMIR Med Educ</source><year>2023</year><month>06</month><day>29</day><volume>9</volume><fpage>e48002</fpage><pub-id pub-id-type="doi">10.2196/48002</pub-id><pub-id pub-id-type="medline">37384388</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guerra</surname><given-names>GA</given-names> </name><name name-style="western"><surname>Hofmann</surname><given-names>H</given-names> </name><name name-style="western"><surname>Sobhani</surname><given-names>S</given-names> </name><etal/></person-group><article-title>GPT-4 artificial intelligence model outperforms ChatGPT, medical students, and neurosurgery residents on neurosurgery written board-like questions</article-title><source>World Neurosurg</source><year>2023</year><month>11</month><volume>179</volume><fpage>e160</fpage><lpage>e165</lpage><pub-id pub-id-type="doi">10.1016/j.wneu.2023.08.042</pub-id><pub-id pub-id-type="medline">37597659</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lewandowski</surname><given-names>M</given-names> </name><name name-style="western"><surname>&#x0141;ukowicz</surname><given-names>P</given-names> </name><name name-style="western"><surname>&#x015A;wietlik</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bara&#x0144;ska-Rybak</surname><given-names>W</given-names> </name></person-group><article-title>ChatGPT-3.5 and ChatGPT-4 dermatological knowledge level based on the Specialty Certificate Examination in dermatology</article-title><source>Clin Exp Dermatol</source><year>2024</year><month>06</month><day>25</day><volume>49</volume><issue>7</issue><fpage>686</fpage><lpage>691</lpage><pub-id pub-id-type="doi">10.1093/ced/llad255</pub-id><pub-id pub-id-type="medline">37540015</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khorshidi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mohammadi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yousem</surname><given-names>DM</given-names> </name><etal/></person-group><article-title>Application of ChatGPT in multilingual medical education: How does ChatGPT fare in 2023&#x2019;s Iranian Residency Entrance Examination</article-title><source>Inform Med Unlocked</source><year>2023</year><volume>41</volume><fpage>101314</fpage><pub-id pub-id-type="doi">10.1016/j.imu.2023.101314</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Estau</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Qin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Z</given-names> </name></person-group><article-title>Evaluating the performance of ChatGPT in clinical pharmacy: a comparative study of ChatGPT and clinical pharmacists</article-title><source>Br J Clin Pharmacol</source><year>2024</year><month>01</month><volume>90</volume><issue>1</issue><fpage>232</fpage><lpage>238</lpage><pub-id pub-id-type="doi">10.1111/bcp.15896</pub-id><pub-id pub-id-type="medline">37626010</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jairoun</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Al-Hemyari</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Shahwan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Humaid Alnuaimi</surname><given-names>GR</given-names> </name><name name-style="western"><surname>Zyoud</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Jairoun</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT: threat or boon to the future of pharmacy practice?</article-title><source>Res Social Adm Pharm</source><year>2023</year><month>07</month><volume>19</volume><issue>7</issue><fpage>975</fpage><lpage>976</lpage><pub-id pub-id-type="doi">10.1016/j.sapharm.2023.03.012</pub-id><pub-id pub-id-type="medline">37061346</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Juhi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pipil</surname><given-names>N</given-names> </name><name name-style="western"><surname>Santra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Behera</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Mondal</surname><given-names>H</given-names> </name></person-group><article-title>The capability of ChatGPT in predicting and explaining common drug-drug interactions</article-title><source>Cureus</source><year>2023</year><month>03</month><volume>15</volume><issue>3</issue><fpage>e36272</fpage><pub-id pub-id-type="doi">10.7759/cureus.36272</pub-id><pub-id pub-id-type="medline">37073184</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Davies</surname><given-names>NM</given-names> </name></person-group><article-title>Adapting artificial intelligence into the evolution of pharmaceutical sciences and publishing: technological darwinism</article-title><source>J Pharm Pharm Sci</source><year>2023</year><volume>26</volume><fpage>11349</fpage><pub-id pub-id-type="doi">10.3389/jpps.2023.11349</pub-id><pub-id pub-id-type="medline">37034476</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kleebayoon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wiwanitkit</surname><given-names>V</given-names> </name></person-group><article-title>Performance and risks of ChatGPT used in drug information: comment</article-title><source>Eur J Hosp Pharm</source><year>2023</year><month>12</month><day>27</day><volume>31</volume><issue>1</issue><fpage>85</fpage><lpage>86</lpage><pub-id pub-id-type="doi">10.1136/ejhpharm-2023-003864</pub-id><pub-id pub-id-type="medline">37339863</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mohammed</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>N</given-names> </name><name name-style="western"><surname>Zawiah</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Psychometric properties and assessment of knowledge, attitude, and practice towards ChatGPT in pharmacy practice and education: a study protocol</article-title><source>J Racial Ethn Health Disparities</source><year>2024</year><month>08</month><volume>11</volume><issue>4</issue><fpage>2284</fpage><lpage>2293</lpage><pub-id pub-id-type="doi">10.1007/s40615-023-01696-1</pub-id><pub-id pub-id-type="medline">37428357</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abu-Farha</surname><given-names>R</given-names> </name><name name-style="western"><surname>Fino</surname><given-names>L</given-names> </name><name name-style="western"><surname>Al-Ashwal</surname><given-names>FY</given-names> </name><etal/></person-group><article-title>Evaluation of community pharmacists&#x2019; perceptions and willingness to integrate ChatGPT into their pharmacy practice: a study from Jordan</article-title><source>J Am Pharm Assoc</source><year>2023</year><month>11</month><volume>63</volume><issue>6</issue><fpage>1761</fpage><lpage>1767</lpage><pub-id pub-id-type="doi">10.1016/j.japh.2023.08.020</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>W</given-names> </name></person-group><article-title>Assessment of the capacity of chatgpt as a self-learning tool in medical pharmacology: a study using mcqs</article-title><source>BMC Med Educ</source><year>2023</year><month>11</month><day>13</day><volume>23</volume><issue>1</issue><fpage>864</fpage><pub-id pub-id-type="doi">10.1186/s12909-023-04832-x</pub-id><pub-id pub-id-type="medline">37957666</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Snoswell</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Falconer</surname><given-names>N</given-names> </name><name name-style="western"><surname>Snoswell</surname><given-names>AJ</given-names> </name></person-group><article-title>Pharmacist vs machine: pharmacy services in the age of large language models</article-title><source>Res Social Adm Pharm</source><year>2023</year><month>06</month><volume>19</volume><issue>6</issue><fpage>843</fpage><lpage>844</lpage><pub-id pub-id-type="doi">10.1016/j.sapharm.2023.03.006</pub-id><pub-id pub-id-type="medline">36907776</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name></person-group><article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title><source>J Med Internet Res</source><year>2023</year><month>10</month><day>4</day><volume>25</volume><fpage>e50638</fpage><pub-id pub-id-type="doi">10.2196/50638</pub-id><pub-id pub-id-type="medline">37792434</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Krumborg</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Mikkelsen</surname><given-names>N</given-names> </name><name name-style="western"><surname>Damkier</surname><given-names>P</given-names> </name><etal/></person-group><article-title>ChatGPT: first glance from a perspective of clinical pharmacology</article-title><source>Basic Clin Pharma Tox</source><year>2023</year><month>07</month><volume>133</volume><issue>1</issue><fpage>3</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1111/bcpt.13879</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fergus</surname><given-names>S</given-names> </name><name name-style="western"><surname>Botha</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ostovar</surname><given-names>M</given-names> </name></person-group><article-title>Evaluating academic answers generated using ChatGPT</article-title><source>J Chem Educ</source><year>2023</year><month>04</month><day>11</day><volume>100</volume><issue>4</issue><fpage>1672</fpage><lpage>1675</lpage><pub-id pub-id-type="doi">10.1021/acs.jchemed.3c00087</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Massey</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Montgomery</surname><given-names>C</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>AS</given-names> </name></person-group><article-title>Comparison of ChatGPT-3.5, ChatGPT-4, and orthopaedic resident performance on orthopaedic assessment examinations</article-title><source>J Am Acad Orthop Surg</source><year>2023</year><month>12</month><day>1</day><volume>31</volume><issue>23</issue><fpage>1173</fpage><lpage>1179</lpage><pub-id pub-id-type="doi">10.5435/JAAOS-D-23-00396</pub-id><pub-id pub-id-type="medline">37671415</pub-id></nlm-citation></ref></ref-list></back></article>