<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JME</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id>
      <journal-title>JMIR Medical Education</journal-title>
      <issn pub-type="epub">2369-3762</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e46939</article-id>
      <article-id pub-id-type="pmid">37428540</article-id>
      <article-id pub-id-type="doi">10.2196/46939</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Putting ChatGPT’s Medical Advice to the (Turing) Test: Survey Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Venkatesh</surname>
            <given-names>Kaushik</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Kamel Boulos</surname>
            <given-names>Maged N.</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yan</surname>
            <given-names>Chao</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ropero</surname>
            <given-names>Jorge</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sebastian</surname>
            <given-names>Glorin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mungoli</surname>
            <given-names>Neelesh</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Nov</surname>
            <given-names>Oded</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Technology Management</institution>
            <institution>Tandon School of Engineering</institution>
            <institution>New York University</institution>
            <addr-line>5 Metrotech, Brooklyn</addr-line>
            <addr-line>New York, NY, 11201</addr-line>
            <country>United States</country>
            <phone>1 646 207 7864</phone>
            <email>onov@nyu.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6410-2995</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>Nina</given-names>
          </name>
          <degrees>BSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4623-2451</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Mann</surname>
            <given-names>Devin</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2099-0852</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Technology Management</institution>
        <institution>Tandon School of Engineering</institution>
        <institution>New York University</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Population Health</institution>
        <institution>Grossman School of Medicine</institution>
        <institution>New York University</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Medical Center Information Technology</institution>
        <institution>Langone Health</institution>
        <institution>New York University</institution>
        <addr-line>New York, NY</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Oded Nov <email>onov@nyu.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>10</day>
        <month>7</month>
        <year>2023</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e46939</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>4</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>26</day>
          <month>5</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>6</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Oded Nov, Nina Singh, Devin Mann. Originally published in JMIR Medical Education (https://mededu.jmir.org), 10.07.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on https://mededu.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mededu.jmir.org/2023/1/e46939" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Chatbots are being piloted to draft responses to patient questions, but patients’ ability to distinguish between provider and chatbot responses and patients’ trust in chatbots’ functions are not well established.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to assess the feasibility of using ChatGPT (Chat Generative Pre-trained Transformer) or a similar artificial intelligence–based chatbot for patient-provider communication.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>A survey study was conducted in January 2023. Ten representative, nonadministrative patient-provider interactions were extracted from the electronic health record. Patients’ questions were entered into ChatGPT with a request for the chatbot to respond using approximately the same word count as the human provider’s response. In the survey, each patient question was followed by a provider- or ChatGPT-generated response. Participants were informed that 5 responses were provider generated and 5 were chatbot generated. Participants were asked—and incentivized financially—to correctly identify the response source. Participants were also asked about their trust in chatbots’ functions in patient-provider communication, using a Likert scale from 1-5.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A US-representative sample of 430 study participants aged 18 and older were recruited on Prolific, a crowdsourcing platform for academic studies. In all, 426 participants filled out the full survey. After removing participants who spent less than 3 minutes on the survey, 392 respondents remained. Overall, 53.3% (209/392) of respondents analyzed were women, and the average age was 47.1 (range 18-91) years. The correct classification of responses ranged between 49% (192/392) to 85.7% (336/392) for different questions. On average, chatbot responses were identified correctly in 65.5% (1284/1960) of the cases, and human provider responses were identified correctly in 65.1% (1276/1960) of the cases. On average, responses toward patients’ trust in chatbots’ functions were weakly positive (mean Likert score 3.4 out of 5), with lower trust as the health-related complexity of the task in the questions increased.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>ChatGPT responses to patient questions were weakly distinguishable from provider responses. Laypeople appear to trust the use of chatbots to answer lower-risk health questions. It is important to continue studying patient-chatbot interaction as chatbots move from administrative to more clinical roles in health care.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>large language model</kwd>
        <kwd>patient-provider interaction</kwd>
        <kwd>chatbot</kwd>
        <kwd>feasibility</kwd>
        <kwd>ethics</kwd>
        <kwd>privacy</kwd>
        <kwd>language model</kwd>
        <kwd>machine learning</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Advances in large language models (LLMs) have enabled dramatic improvements in the quality of artificial intelligence (AI)–generated conversations. Recently, the launch of ChatGPT (Chat Generative Pre-trained Transformer; OpenAI) [<xref ref-type="bibr" rid="ref1">1</xref>] has prompted a surge of interest in AI-based chatbots, both from the health care field [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>] and the general public [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. Several health care systems, including University of California San Diego Health and University of Wisconsin Health, have already announced pilots of using the underlying Generative Pre-trained Transformer (GPT) technology as a means of drafting initial responses to patient portal messages [<xref ref-type="bibr" rid="ref6">6</xref>]. Other health care systems, including Stanford Health Care, are also preparing for pilots of GPT-drafted patient portal message responses [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>This study assessed the feasibility of using ChatGPT or similar AI-based chatbots for answering patient portal messages directed at health care providers. ChatGPT is a chatbot created by OpenAI that is based on the LLM known as GPT [<xref ref-type="bibr" rid="ref1">1</xref>]. At a high level, it was trained to predict the most probable next word using a large body of text data from the internet, and it was optimized to respond to user queries using reinforcement learning with human feedback on its responses to questions. Although it is generally able to generate humanlike and accurate text, LLMs such as ChatGPT have several limitations. These include biases from the underlying data (eg, social biases such as racism and sexism) [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], the ability to “hallucinate” information that is untrue [<xref ref-type="bibr" rid="ref9">9</xref>], and the lack of mental models that would allow for true reasoning rather than simply probabilistic text generation (leading it to make errors in response to queries such as simple arithmetic problems) [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
      <p>Using ChatGPT or similar AI-based chatbots to respond to patient portal messages is of interest given the recently launched pilots, the increasing burden of patient messages being delivered to providers [<xref ref-type="bibr" rid="ref11">11</xref>], and the association between increased electronic health record (EHR) work and provider burnout [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Moreover, providers are generally not allocated time or reimbursement for answering patient messages. In an age when patients increasingly expect providers to be digitally accessible, it is likely that patient message load will continue to increase. As the technology behind AI-based chatbots matures, the time is ripe for exploring chatbots’ potential role in patient-provider communication.</p>
      <p>Recent studies have had health care professionals judge ChatGPT’s responses to health-related questions [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>], with findings such as 84% of answers to cardiovascular disease prevention questions being appropriate [<xref ref-type="bibr" rid="ref15">15</xref>] and ChatGPT overall scoring higher for quality and empathy than health care providers [<xref ref-type="bibr" rid="ref16">16</xref>]. Fewer studies have examined patient attitudes toward ChatGPT providing responses to health-related questions [<xref ref-type="bibr" rid="ref17">17</xref>]. Here, we sought to understand how patients may perceive AI chatbot–generated responses to their questions. We reported on the ability of members of the public to distinguish between AI- and provider-generated responses to patients’ health questions. Further, we characterized participants’ trust in chatbots’ functions. Finally, we discussed the possible implications of the adoption of AI-based chatbots in patient messaging portals.</p>
      <p>Notably, we were not trying to distinguish whether AI- or human-generated responses are a better solution for patients. Rather, we studied whether patients can tell that the response is coming from AI versus a provider and whether they trust AI, which are separate questions.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>Ten representative, nonadministrative patient-provider interactions from one of the authors were extracted from the patient-provider interaction module of the EHR. All identifying details were removed, and typos in the provider’s response were fixed. Patients’ questions were entered into ChatGPT on January 19, 2023, with a request to respond using approximately the same word count as the provider’s response (see <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). Chatbot response text that recommended consultation with the patient’s health care provider was removed. The response accuracy of the human and ChatGPT responses were not evaluated to provide as close as possible to an in-the-wild experience for participants.</p>
        <p>The 10 questions and responses were presented to a US-representative sample of 430 people aged 18 years and older who were recruited on Prolific, a crowdsourcing platform for academic studies. Participants provided written informed consent to take part in the study.</p>
        <p>Participants were informed that 5 of the responses were written by a human provider and 5 were generated by an AI-based chatbot. For each participant, each patient question was followed by either a provider- or ChatGPT-generated response. Participants were asked to determine which responses were written by a provider and which were generated by a chatbot. The setup of 5 human responses versus 5 chatbot responses follows Fisher’s [<xref ref-type="bibr" rid="ref18">18</xref>] seminal work on experimental design, which recommends an equal distribution of items and that participants be told in advance of the distribution. In doing so, we (1) establish a uniform prior belief in the probability associated with each advice source, (2) promote independent decision-making by participants regarding individual responses without considering other questions, and (3) avoid any influence that could sway participants’ preferences toward a specific advice source. The order of the 10 questions and answers, as well as the order of the choices presented to participants, were randomized. Participants were incentivized financially to distinguish between human and chatbot responses (US $2 baseline compensation, with up to a US $3 bonus for answering questions correctly).</p>
        <p>Participants were then asked questions about their trust in chatbots’ use in patient-provider communication using a Likert scale from 1-5 (see the <italic>Results</italic> section). They were asked about their trust in chatbots to provide different types of services (logistical information, preventative care advice, diagnostic advice, and treatment advice), their trust in AI chatbots to answer health questions compared to a Google search, and their overall trust in AI chatbots to help them make better health decisions.</p>
        <p>With respect to distinguishability, a chi-squared test for proportions was used to determine if there was a difference in the proportion of correct identification by men versus women. A chi-square test for goodness of fit was used to investigate whether there were variations in the proportion of correct identification across different participant age groups. Similarly, differences across age and gender in participants’ response to the survey’s trust questions were analyzed using ANOVA. Across all tests, results were considered significant if <italic>P</italic>&#60;.05.</p>
        <boxed-text id="box1" position="float">
          <title>Sample patient question (A) and answers (B and C).</title>
          <p>
            <bold>A. Patient question</bold>
          </p>
          <p>You indicated that based on lab tests conducted last month, my blood sugar levels were at a pre-diabetic stage. Could you provide me with steps I can take to reduce those levels, including any specific dietary recommendations. Thanks very much. </p>
          <p>
            <bold>B. Human response</bold>
          </p>
          <p>It comes down to weight loss with some physical activity which can help as well. How you achieve weight loss really depends on each person. Rules of thumb are to reduce carbohydrate intake both from simple sugars as well as starches like bread pasta and rice. In addition, limiting any juices or sweets helps as well. But most importantly is really increasing the amount of healthy fruit and vegetables and other high nutrient density foods and just limiting the portion sizes of the foods as well. Many people appreciate a more structured program for reducing the risk of diabetes and if you look at the federal government’s National diabetes prevention program they have lots of online materials and there are often group classes at YMCA’s or other locations in the community.</p>
          <p>
            <bold>C. Chatbot response</bold>
          </p>
          <p>To reduce your pre-diabetic blood sugar levels, the first step is to focus on making lifestyle changes. Specifically, you should focus on your diet and exercise. Eating a diet high in whole, unprocessed foods such as fruits, vegetables and whole grains, while limiting your intake of added sugars, saturated fats, and processed foods will help. Incorporating more protein-rich foods such as lean meats, fish, and beans and eating small, frequent meals throughout the day rather than a few large ones can also help. In terms of exercise, try to aim for at least 30 minutes of moderate physical activity per day, such as brisk walking, cycling or swimming. It is also very important to maintain a healthy weight, as being overweight or obese can increase your risk of developing type 2 diabetes. I recommend speaking with a registered dietitian or nutritionist who can help you create a personalized eating plan.</p>
        </boxed-text>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was certified and filed as a Quality Improvement study per NYU Langone Health’s Quality Improvement self-certification protocol. As a Quality Improvement study, institutional review board approval is not needed.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Overall, 426 participants filled out the full survey. After removing participants who spent less than 3 minutes on the survey, 392 survey responses were used in the analysis. Of the 392 respondents, 53.3% (n=209) were women, and the average age was 47.1 (SD 16.0) years.</p>
      <p>The responses to patient questions varied widely in the participants’ ability to identify whether they were written by a human or chatbot, ranging between 49% (192/392) to 85.7% (336/392) for different questions. Each participant received a score between 0-10 based on the number of responses they identified correctly (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). On average, chatbot responses were identified correctly in 65.5% (1284/1960) of the cases, and human provider responses were identified correctly in 65.1% (1276/1960) of the cases. No substantial differences were found in response distinguishability or trust by demographic characteristics.</p>
      <p>On average, patients trusted chatbots (<xref ref-type="table" rid="table1">Table 1</xref>), yet trust was lower as the health-related complexity of the task in the questions increased. Logistical questions (eg, scheduling appointments and insurance questions) had the highest trust rating (mean Likert score 3.94, SD 0.92), followed by preventative care (eg, vaccines and cancer screenings; mean Likert score 3.52, SD 1.10). Diagnostic and treatment advice had the lowest trust ratings (mean Likert scores 2.90, SD 1.14 and 2.89, SD 1.12, respectively). No significant correlations were found between trust in health chatbots and demographics or the ability to correctly identify chatbot versus human responses (all <italic>P</italic>&#62;.05).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Health chatbot trust questions and responses.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="580"/>
          <col width="210"/>
          <col width="210"/>
          <thead>
            <tr valign="top">
              <td>Question</td>
              <td>Patients with Likert response ≥4 (n=392), n (%)</td>
              <td>Likert response (range 1-5), mean (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>I could trust answers from a health chatbot about logistical questions (such as scheduling appointments, insurance questions, medication requests).</td>
              <td>312 (79.6)</td>
              <td>3.94 (0.92)</td>
            </tr>
            <tr valign="top">
              <td>I could trust a chatbot to provide advice about preventative care, such as vaccines, or cancer screenings.</td>
              <td>248 (63.3)</td>
              <td>3.52 (1.10)</td>
            </tr>
            <tr valign="top">
              <td>I could trust a chatbot to provide diagnostic advice about symptoms.</td>
              <td>152 (38.8)</td>
              <td>2.90 (1.14)</td>
            </tr>
            <tr valign="top">
              <td>I could trust a chatbot to provide treatment advice.</td>
              <td>150 (38.3)</td>
              <td>2.89 (1.12)</td>
            </tr>
            <tr valign="top">
              <td>AI<sup>a</sup> chatbots can be a more trustworthy alternative to Google to answer my health questions.</td>
              <td>232 (59.2)</td>
              <td>3.56 (1.02)</td>
            </tr>
            <tr valign="top">
              <td>Health chatbots could help me make better decisions.</td>
              <td>236 (60.2)</td>
              <td>3.49 (0.91)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>AI: artificial intelligence.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Patients increasingly expect <italic>consumer-grade</italic> health care experiences that mirror their experiences with the rest of their digital life. They want omnichannel and interactive communication, frictionless access to care, and personalized education. The resulting overwhelming volume of patient portal messages highlights an opportunity for chatbots to assist health care providers, one that is already being acted upon by several large health care systems [<xref ref-type="bibr" rid="ref6">6</xref>]. Early research on provider perception of these chatbot-generated responses has revealed high degrees of appropriateness [<xref ref-type="bibr" rid="ref15">15</xref>] and has even revealed higher quality and empathy ratings than human-generated responses [<xref ref-type="bibr" rid="ref16">16</xref>]. However, whether patients view chatbot communication as comparable to communication with human providers requires empirical investigation [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>In this study of a US-representative sample, compared to the benchmark of 50% representing random distinguishability and 100% representing perfect distinguishability, laypeople found responses from an AI-based chatbot to be weakly distinguishable from those from a human provider. Notably, there was very little difference between the distinguishability rate of human versus chatbot responses (65.5 vs 65.1%).</p>
        <p>It is likely that in the near future, the level of indistinguishability we found will represent a lower bound of performance, as chatbots trained on medical data specifically, or prompted with medical queries, will likely be less distinguishable [<xref ref-type="bibr" rid="ref14">14</xref>]. Another possible future development is for chatbots to reach a superhuman level as seen in other medical domains [<xref ref-type="bibr" rid="ref22">22</xref>]. The emerging group of vendors designing optimized prompt libraries for health systems is likely to further improve chatbots’ performance on health-related questions (eg, DocsGPT [<xref ref-type="bibr" rid="ref23">23</xref>]). It is important to note that products based on LLMs, such as ChatGPT, merely provide text that resembles good medical advice, and it is only with the addition of medical knowledge that useful health care provider–level advice could be provided.</p>
        <p>Respondents’ trust in chatbots’ functions were mildly positive. Notably, there was a lower level of trust in chatbots as the medical complexity of the task increased, with the highest acceptance for administrative tasks such as scheduling appointments and the lowest acceptance for treatment advice. This is broadly consistent with prior studies [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. In particular, a recent study of user intentions to use ChatGPT for self-diagnosis found that higher performance expectancy and positive risk-reward appraisals were associated with improved perception of decision-making outcomes [<xref ref-type="bibr" rid="ref17">17</xref>]. This improved perception in turn positively impacted participant intentions to use ChatGPT for self-diagnosis (78% of the 476 participants indicated that they were willing to do so) [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        <p>Our study suggests that participants are overall willing to receive health advice from a chatbot (especially for low-risk topics) and are only weakly able to distinguish between ChatGPT- versus human-generated responses. Based on our findings, identifying appropriate scenarios for deploying chatbots within health care systems is an important next step. Although chatbots are widely used in health care administrative tasks (eg, scheduling), optimal clinical use cases are still emerging [<xref ref-type="bibr" rid="ref25">25</xref>]. Chatbots have been developed and deployed for highly specialized clinical scenarios such as symptom triage and postchemotherapy education [<xref ref-type="bibr" rid="ref26">26</xref>]. More generalized chatbots that are similar to ChatGPT represent a new opportunity to use chatbots in support of more common chronic disease management for conditions such as hypertension, diabetes, and asthma. Health care providers’ work may be transformed by using the products of generative AI (such as chatbots’ output) as raw material to construct patient-provider interaction, including advice, the explanation of test results, the discussion of side effects, and many other types of interactions that currently require a human health care provider. For example, chatbots could be deployed with home blood pressure monitoring to support patient questions about treatment plans, medication titrations, and potential side effects [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
        <p>Potential deployment models include chatbots that directly interact with patients (eg, through patient portals) or serve as clinician assistants, generating draft text or transforming clinician documentation into more patient-friendly versions. For health care providers’ work, this would lead to a shift in focus from the <italic>creation</italic> of health care advice to the <italic>curation</italic> of advice in response to patient messages. Of note, it is critical that providers stay alert when curating rather than simply accepting the models’ answers. ChatGPT and other LLMs have known limitations including producing incorrect or biased answers [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>], and automation bias (ie, humans favoring suggestions from automated decision-making systems over their own judgment) is a key concern to watch for [<xref ref-type="bibr" rid="ref28">28</xref>]. Liability will also be a key concern that will necessitate careful curation of chatbot responses [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>The appropriateness of each deployment model likely depends on the clinical complexity and severity of the condition. Higher-risk or -complexity clinical interactions could use chatbots to generate drafts for clinician editing or approval and lower-risk situations may allow for direct patient-chatbot interaction. Alternatively, it may be useful to have chatbots classify questions into administrative versus health questions, replying directly to administrative questions and drafting responses for provider approval to health questions. The role and impact of the disclosure of origination (human vs chatbot) also needs further exploration, especially with regards to ethics, effectiveness, and implications for the patient-provider relationship.</p>
        <p>Although our study addressed new questions with state-of-the-art technology, it has some key limitations. First, ChatGPT was not trained on medical data and could be inferior to medically trained chatbots such as Med-PaLM [<xref ref-type="bibr" rid="ref14">14</xref>]. Second, there was no specialized prompting of ChatGPT (eg, to be empathetic), which can help responses sound more human and could potentially increase patients’ willingness to accept AI chatbot–generated responses [<xref ref-type="bibr" rid="ref30">30</xref>]. Third, it is possible that individual style (of both the human provider and chatbot) can impact distinguishability, although the responses presented were for the most part short and impersonal. Fourth, it is possible that there were biases in the web-based survey since the participants were given the prior knowledge that 5 answers were human generated and 5 answers were chatbot generated. Fifth, this study was conducted using ChatGPT in January 2023 (based on GPT-3.5; OpenAI) [<xref ref-type="bibr" rid="ref1">1</xref>]. Since then, more advanced underlying GPT models such as GPT-4 have been released, and further development has integrated GPT with EHRs and adapted it to medical tasks such as responding to patient portal messages [<xref ref-type="bibr" rid="ref6">6</xref>]. Finally, this study used only 10 real-world questions with human responses from 1 provider. Further studies incorporating larger numbers of real-world questions and responses are warranted.</p>
        <p>In addition, future research may explore how to prompt chatbots to provide an optimal patient experience [<xref ref-type="bibr" rid="ref30">30</xref>], investigate if there are types of questions that chatbots are better at answering than others, and explore if patients feel more trusting if there is clinician review before chatbots respond. Continued studies investigating how model responses differ by patient demographics (eg, gender and race) [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] will be critical to ensure the recognition and mitigation of model biases and work toward equitable responses. Research to mitigate risks of AI chatbot–generated responses, including the potential for patient harm caused by incorrect answers; cybersecurity vulnerabilities [<xref ref-type="bibr" rid="ref31">31</xref>]; and environmental, social, and financial risks [<xref ref-type="bibr" rid="ref32">32</xref>] should also be further explored.</p>
      </sec>
      <sec>
        <title>Conclusion</title>
        <p>Overall, our study shows that ChatGPT responses to patient questions were weakly distinguishable from provider responses. Furthermore, laypeople trusted chatbots to answer lower-risk health questions. It is important to continue studying how patients interact (objectively and emotionally) with chatbots as they become a commodity and move from administrative to more clinical roles in health care.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Distribution of correct responses.</p>
        <media xlink:href="mededu_v9i1e46939_app1.png" xlink:title="PNG File , 93 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ChatGPT</term>
          <def>
            <p>Chat Generative Pre-trained Transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GPT</term>
          <def>
            <p>Generative Pre-trained Transformer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors receive financial support from the US National Science Foundation (awards 1928614 and 2129076) for the submitted work. The funding source had no further role in this study. We used the generative artificial intelligence tool ChatGPT (Chat Generative Pre-trained Transformer) by OpenAI [<xref ref-type="bibr" rid="ref1">1</xref>] to draft the chatbot responses for the research survey.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The anonymized data generated during and/or analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>ON, NS, and DM designed the study, selected the content for the experiment, and wrote the first draft of the manuscript. ON and NS implemented the experiment and performed the statistical analysis. All authors vouch for the data, analyses, and interpretations; critically reviewed and contributed to the preparation of the manuscript; and approved the final version.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing ChatGPT</article-title>
          <source>OpenAI</source>
          <year>2022</year>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/chatgpt">https://openai.com/blog/chatgpt</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bubeck</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Petro</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Benefits, limits, and risks of GPT-4 as an AI chatbot for medicine</article-title>
          <source>N Engl J Med</source>
          <year>2023</year>
          <month>03</month>
          <day>30</day>
          <volume>388</volume>
          <issue>13</issue>
          <fpage>1233</fpage>
          <lpage>1239</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsr2214184</pub-id>
          <pub-id pub-id-type="medline">36988602</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biswas</surname>
              <given-names>SS</given-names>
            </name>
          </person-group>
          <article-title>Role of Chat GPT in public health</article-title>
          <source>Ann Biomed Eng</source>
          <year>2023</year>
          <month>05</month>
          <day>15</day>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>868</fpage>
          <lpage>869</lpage>
          <pub-id pub-id-type="doi">10.1007/s10439-023-03172-7</pub-id>
          <pub-id pub-id-type="medline">36920578</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10439-023-03172-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bruni</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Will ChatGPT make me irrelevant?</article-title>
          <source>The New York Times</source>
          <year>2022</year>
          <month>12</month>
          <day>15</day>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/2022/12/15/opinion/chatgpt-artificial-intelligence.html">https://www.nytimes.com/2022/12/15/opinion/chatgpt-artificial-intelligence.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT wrote my AP English essay—and I passed</article-title>
          <source>The Wall Street Journal</source>
          <year>2022</year>
          <month>12</month>
          <day>21</day>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wsj.com/articles/chatgpt-wrote-my-ap-english-essayand-i-passed-11671628256">https://www.wsj.com/articles/chatgpt-wrote-my-ap-english-essayand-i-passed-11671628256</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Turner</surname>
              <given-names>BEW</given-names>
            </name>
          </person-group>
          <article-title>Epic, Microsoft bring GPT-4 to EHRs</article-title>
          <source>Modern Healthcare</source>
          <year>2023</year>
          <month>4</month>
          <day>17</day>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.modernhealthcare.com/digital-health/himss-2023-epic-microsoft-bring-openais-gpt-4-ehrs">https://www.modernhealthcare.com/digital-health/himss-2023-epic-microsoft-bring-openais-gpt-4-ehrs</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Farooqi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Large language models associate Muslims with violence</article-title>
          <source>Nat Mach Intell</source>
          <year>2021</year>
          <month>06</month>
          <day>17</day>
          <volume>3</volume>
          <issue>6</issue>
          <fpage>461</fpage>
          <lpage>463</lpage>
          <pub-id pub-id-type="doi">10.1038/s42256-021-00359-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bolukbasi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saligrama</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kalai</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Man is to computer programmer as woman is to homemaker? debiasing word embeddings</article-title>
          <year>2016</year>
          <month>12</month>
          <day>5</day>
          <conf-name>NIPS'16: 30th International Conference on Neural Information Processing Systems</conf-name>
          <conf-date>December 5-10, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>4356</fpage>
          <lpage>4364</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/doi/10.5555/3157382.3157584"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Frieske</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Madotto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Survey of hallucination in natural language generation</article-title>
          <source>ACM Comput. Surv</source>
          <year>2023</year>
          <month>03</month>
          <day>03</day>
          <volume>55</volume>
          <issue>12</issue>
          <fpage>1</fpage>
          <lpage>38</lpage>
          <pub-id pub-id-type="doi">10.1145/3571730</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KC-C</given-names>
            </name>
          </person-group>
          <article-title>Towards reasoning in large language models: a survey</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 26, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2212.10403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holmgren</surname>
              <given-names>A Jay</given-names>
            </name>
            <name name-style="western">
              <surname>Downing</surname>
              <given-names>N Lance</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Mitchell</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>Christopher</given-names>
            </name>
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>Christopher</given-names>
            </name>
            <name name-style="western">
              <surname>Huckman</surname>
              <given-names>Robert S</given-names>
            </name>
          </person-group>
          <article-title>Assessing the impact of the COVID-19 pandemic on clinician ambulatory electronic health record use</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2022</year>
          <month>01</month>
          <day>29</day>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>453</fpage>
          <lpage>460</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34888680"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab268</pub-id>
          <pub-id pub-id-type="medline">34888680</pub-id>
          <pub-id pub-id-type="pii">6458072</pub-id>
          <pub-id pub-id-type="pmcid">PMC8689796</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>Rebekah L</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>Emily</given-names>
            </name>
            <name name-style="western">
              <surname>Haskell</surname>
              <given-names>Jacqueline</given-names>
            </name>
            <name name-style="western">
              <surname>Harris</surname>
              <given-names>Daniel A</given-names>
            </name>
            <name name-style="western">
              <surname>Poplau</surname>
              <given-names>Sara</given-names>
            </name>
            <name name-style="western">
              <surname>Kroth</surname>
              <given-names>Philip J</given-names>
            </name>
            <name name-style="western">
              <surname>Linzer</surname>
              <given-names>Mark</given-names>
            </name>
          </person-group>
          <article-title>Physician stress and burnout: the impact of health information technology</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2019</year>
          <month>02</month>
          <day>01</day>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>106</fpage>
          <lpage>114</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30517663"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocy145</pub-id>
          <pub-id pub-id-type="medline">30517663</pub-id>
          <pub-id pub-id-type="pii">5230918</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647171</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marmor</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Clay</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Millen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Savides</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The impact of physician EHR usage on patient satisfaction</article-title>
          <source>Appl Clin Inform</source>
          <year>2018</year>
          <month>01</month>
          <day>03</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>11</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29298451"/>
          </comment>
          <pub-id pub-id-type="doi">10.1055/s-0037-1620263</pub-id>
          <pub-id pub-id-type="medline">29298451</pub-id>
          <pub-id pub-id-type="pmcid">PMC5801886</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singhal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Azizi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mahdavi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>HW</given-names>
            </name>
            <name name-style="western">
              <surname>Scales</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Large language models encode clinical knowledge</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on December 29, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2212.13138</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarraju</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bruemmer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Van Iterson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Rodriguez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Laffin</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Appropriateness of cardiovascular disease prevention recommendations obtained from a popular online chat-based artificial intelligence model</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <month>03</month>
          <day>14</day>
          <volume>329</volume>
          <issue>10</issue>
          <fpage>842</fpage>
          <lpage>844</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2023.1044</pub-id>
          <pub-id pub-id-type="medline">36735264</pub-id>
          <pub-id pub-id-type="pii">2801244</pub-id>
          <pub-id pub-id-type="pmcid">PMC10015303</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ayers</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Poliak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leas</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Kelley</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Faix</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Longhurst</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Hogarth</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Comparing physician and artificial intelligence chatbot responses to patient questions posted to a public social media forum</article-title>
          <source>JAMA Intern Med</source>
          <year>2023</year>
          <month>06</month>
          <day>01</day>
          <volume>183</volume>
          <issue>6</issue>
          <fpage>589</fpage>
          <lpage>596</lpage>
          <pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id>
          <pub-id pub-id-type="medline">37115527</pub-id>
          <pub-id pub-id-type="pii">2804309</pub-id>
          <pub-id pub-id-type="pmcid">PMC10148230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shahsavar</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>User intentions to use ChatGPT for self-diagnosis and health-related purposes: cross-sectional survey study</article-title>
          <source>JMIR Hum Factors</source>
          <year>2023</year>
          <month>05</month>
          <day>17</day>
          <volume>10</volume>
          <fpage>e47564</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://humanfactors.jmir.org/2023//e47564/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/47564</pub-id>
          <pub-id pub-id-type="medline">37195756</pub-id>
          <pub-id pub-id-type="pii">v10i1e47564</pub-id>
          <pub-id pub-id-type="pmcid">PMC10233444</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisher</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Design of experiments</article-title>
          <source>BMJ</source>
          <year>1936</year>
          <month>03</month>
          <day>14</day>
          <volume>1</volume>
          <issue>3923</issue>
          <fpage>554</fpage>
          <lpage>554</lpage>
          <pub-id pub-id-type="doi">10.1136/bmj.1.3923.554-a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Young</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Amara</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bhattacharya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Patient and general public attitudes towards clinical artificial intelligence: a mixed methods systematic review</article-title>
          <source>Lancet Digit Health</source>
          <year>2021</year>
          <month>09</month>
          <volume>3</volume>
          <issue>9</issue>
          <fpage>e599</fpage>
          <lpage>e611</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2589-7500(21)00132-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S2589-7500(21)00132-1</pub-id>
          <pub-id pub-id-type="medline">34446266</pub-id>
          <pub-id pub-id-type="pii">S2589-7500(21)00132-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>IC</given-names>
            </name>
            <name name-style="western">
              <surname>Shih</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>KM</given-names>
            </name>
          </person-group>
          <article-title>Why would you use medical chatbots? interview and survey</article-title>
          <source>Int J Med Inform</source>
          <year>2022</year>
          <month>09</month>
          <volume>165</volume>
          <fpage>104827</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104827</pub-id>
          <pub-id pub-id-type="medline">35797921</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(22)00141-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hogg</surname>
              <given-names>HDJ</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Zubaidy</surname>
              <given-names>M</given-names>
            </name>
            <collab>Technology Enhanced Macular Services Study Reference Group</collab>
            <name name-style="western">
              <surname>Talks</surname>
              <given-names>James</given-names>
            </name>
            <name name-style="western">
              <surname>Denniston</surname>
              <given-names>Alastair K</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>Christopher J</given-names>
            </name>
            <name name-style="western">
              <surname>Malawana</surname>
              <given-names>Johann</given-names>
            </name>
            <name name-style="western">
              <surname>Papoutsi</surname>
              <given-names>Chrysanthi</given-names>
            </name>
            <name name-style="western">
              <surname>Teare</surname>
              <given-names>Marion Dawn</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>Pearse A</given-names>
            </name>
            <name name-style="western">
              <surname>Beyer</surname>
              <given-names>Fiona R</given-names>
            </name>
            <name name-style="western">
              <surname>Maniatopoulos</surname>
              <given-names>Gregory</given-names>
            </name>
          </person-group>
          <article-title>Stakeholder perspectives of clinical artificial intelligence implementation: systematic review of qualitative evidence</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>01</month>
          <day>10</day>
          <volume>25</volume>
          <fpage>e39742</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e39742/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/39742</pub-id>
          <pub-id pub-id-type="medline">36626192</pub-id>
          <pub-id pub-id-type="pii">v25i1e39742</pub-id>
          <pub-id pub-id-type="pmcid">PMC9875023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Attia</surname>
              <given-names>ZI</given-names>
            </name>
            <name name-style="western">
              <surname>Harmon</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Dugan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Manka</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez-Jimenez</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lerman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Siontis</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Noseworthy</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Klavetter</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Halamka</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Asirvatham</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Carter</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Leibovich</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Prospective evaluation of smartwatch-enabled detection of left ventricular dysfunction</article-title>
          <source>Nat Med</source>
          <year>2022</year>
          <month>12</month>
          <day>14</day>
          <volume>28</volume>
          <issue>12</issue>
          <fpage>2497</fpage>
          <lpage>2503</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36376461"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41591-022-02053-1</pub-id>
          <pub-id pub-id-type="medline">36376461</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-022-02053-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9805528</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <article-title>DocsGPT</article-title>
          <source>Doximity</source>
          <year>2023</year>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.doximity.com/docs-gpt">https://www.doximity.com/docs-gpt</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nadarzynski</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Miles</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Cowie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ridge</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Acceptability of artificial intelligence (AI)-led chatbot services in healthcare: a mixed-methods study</article-title>
          <source>Digit Health</source>
          <year>2019</year>
          <month>08</month>
          <day>21</day>
          <volume>5</volume>
          <fpage>2055207619871808</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/2055207619871808?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/2055207619871808</pub-id>
          <pub-id pub-id-type="medline">31467682</pub-id>
          <pub-id pub-id-type="pii">10.1177_2055207619871808</pub-id>
          <pub-id pub-id-type="pmcid">PMC6704417</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Montenegro</surname>
              <given-names>JLZ</given-names>
            </name>
            <name name-style="western">
              <surname>da Costa</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>da Rosa Righi</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Survey of conversational agents in health</article-title>
          <source>Expert Syst Appl</source>
          <year>2019</year>
          <month>09</month>
          <volume>129</volume>
          <fpage>56</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2019.03.054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Winn</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Somai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fergestrom</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Crotty</surname>
              <given-names>BH</given-names>
            </name>
          </person-group>
          <article-title>Association of use of online symptom checkers with patients' plans for seeking care</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <month>12</month>
          <day>02</day>
          <volume>2</volume>
          <issue>12</issue>
          <fpage>e1918561</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31880791"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.18561</pub-id>
          <pub-id pub-id-type="medline">31880791</pub-id>
          <pub-id pub-id-type="pii">2757995</pub-id>
          <pub-id pub-id-type="pmcid">PMC6991310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Lawrence</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Reimagining connected care in the era of digital medicine</article-title>
          <source>JMIR mHealth uHealth</source>
          <year>2022</year>
          <month>04</month>
          <day>15</day>
          <volume>10</volume>
          <issue>4</issue>
          <fpage>e34483</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2022/4/e34483/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/34483</pub-id>
          <pub-id pub-id-type="medline">35436238</pub-id>
          <pub-id pub-id-type="pii">v10i4e34483</pub-id>
          <pub-id pub-id-type="pmcid">PMC9055469</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dratsch</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Rezazade Mehrizi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kloeckner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mähringer-Kunz</surname>
              <given-names>Aline</given-names>
            </name>
            <name name-style="western">
              <surname>Püsken</surname>
              <given-names>Michael</given-names>
            </name>
            <name name-style="western">
              <surname>Baeßler</surname>
              <given-names>Bettina</given-names>
            </name>
            <name name-style="western">
              <surname>Sauer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maintz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Pinto Dos Santos</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Automation bias in mammography: the impact of artificial intelligence BI-RADS suggestions on reader performance</article-title>
          <source>Radiology</source>
          <year>2023</year>
          <month>05</month>
          <volume>307</volume>
          <issue>4</issue>
          <fpage>e222176</fpage>
          <pub-id pub-id-type="doi">10.1148/radiol.222176</pub-id>
          <pub-id pub-id-type="medline">37129490</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mello</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT and physicians' malpractice risk</article-title>
          <source>JAMA Health Forum</source>
          <year>2023</year>
          <month>05</month>
          <day>05</day>
          <volume>4</volume>
          <issue>5</issue>
          <fpage>e231938</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/article.aspx?doi=10.1001/jamahealthforum.2023.1938"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamahealthforum.2023.1938</pub-id>
          <pub-id pub-id-type="medline">37200013</pub-id>
          <pub-id pub-id-type="pii">2805334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sebastian</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>George</given-names>
            </name>
          </person-group>
          <article-title>Persuading patients using rhetoric to improve artificial intelligence adoption: experimental study</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>03</month>
          <day>13</day>
          <volume>25</volume>
          <fpage>e41430</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e41430/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/41430</pub-id>
          <pub-id pub-id-type="medline">36912869</pub-id>
          <pub-id pub-id-type="pii">v25i1e41430</pub-id>
          <pub-id pub-id-type="pmcid">PMC10131865</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sebastian</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Do ChatGPT and other AI chatbots pose a cybersecurity risk?: an exploratory study</article-title>
          <source>International Journal of Security and Privacy in Pervasive Computing\</source>
          <year>2023</year>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.4018/ijsppc.320225</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bender</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Gebru</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>McMillan-Major</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shmitchell</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>On the dangers of stochastic parrots: can language models be too big?</article-title>
          <year>2021</year>
          <month>3</month>
          <conf-name>FAccT '21: 2021 ACM Conference on Fairness, Accountability, and Transparency</conf-name>
          <conf-date>March 3-10, 2021</conf-date>
          <conf-loc>Virtual event, Canada</conf-loc>
          <fpage>610</fpage>
          <lpage>623</lpage>
          <pub-id pub-id-type="doi">10.1145/3442188.3445922</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
