<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JME</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id>
      <journal-title>JMIR Medical Education</journal-title>
      <issn pub-type="epub">2369-3762</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v11i1e81718</article-id>
      <article-id pub-id-type="pmid">41124694</article-id>
      <article-id pub-id-type="doi">10.2196/81718</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Automated Evaluation of Reflection and Feedback Quality in Workplace-Based Assessments by Using Natural Language Processing: Cross-Sectional Competency-Based Medical Education Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eriksen</surname>
            <given-names>Jeppe</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Valanci</surname>
            <given-names>Sofia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hsiao</surname>
            <given-names>Cheng-Ting</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lee</surname>
            <given-names>Li-Ang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Hanmore</surname>
            <given-names>Tessa</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Jeng-Wen</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <address>
            <institution>Department of Otolaryngology–Head and Neck Surgery</institution>
            <institution>Cardinal Tien Hospital</institution>
            <institution>Fu Jen Catholic University</institution>
            <addr-line>362, ZhongZheng Rd</addr-line>
            <addr-line>Xindian Dist</addr-line>
            <addr-line>New Taipei City, 23148</addr-line>
            <country>Taiwan</country>
            <phone>886 2 22193391 ext 67451</phone>
            <fax>886 2 22195821</fax>
            <email>086365@mail.fju.edu.tw</email>
          </address>
          <xref rid="aff02" ref-type="aff">2</xref>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff04" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3635-4815</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Tu</surname>
            <given-names>Hai-Lun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff05" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-1080-6739</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Chang</surname>
            <given-names>Chun-Hsiang</given-names>
          </name>
          <degrees>MSc, MD</degrees>
          <xref rid="aff01" ref-type="aff">1</xref>
          <xref rid="aff02" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4344-4766</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Hsu</surname>
            <given-names>Wei-Chung</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff02" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8583-8459</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Pa-Chun</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff06" ref-type="aff">6</xref>
          <xref rid="aff07" ref-type="aff">7</xref>
          <xref rid="aff08" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6288-9218</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Liao</surname>
            <given-names>Chun-Hou</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff09" ref-type="aff">9</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9414-8660</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Chen</surname>
            <given-names>Mingchih</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff03" ref-type="aff">3</xref>
          <xref rid="aff10" ref-type="aff">10</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8278-0033</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff01">
        <label>1</label>
        <institution>Department of Otolaryngology–Head and Neck Surgery</institution>
        <institution>Cardinal Tien Hospital</institution>
        <institution>Fu Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff02">
        <label>2</label>
        <institution>Department of Otolaryngology–Head and Neck Surgery</institution>
        <institution>National Taiwan University Hospital and Children’s Hospital</institution>
        <addr-line>Taipei</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff03">
        <label>3</label>
        <institution>Department of Hospital Management</institution>
        <institution>Graduate Institute of Business Administration</institution>
        <institution>Fu Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff04">
        <label>4</label>
        <institution>Department of Education and Research</institution>
        <institution>Cardinal Tien Junior College of Healthcare and Management</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff05">
        <label>5</label>
        <institution>Department of Library and Information Science</institution>
        <institution>Fu-Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff06">
        <label>6</label>
        <institution>Cathay General Hospital</institution>
        <institution>Department of Otolaryngology</institution>
        <addr-line>Taipei</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff07">
        <label>7</label>
        <institution>School of Medicine</institution>
        <institution>Fu-Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff08">
        <label>8</label>
        <institution>Department of Medical Research</institution>
        <institution>China Medical University Hospital</institution>
        <institution>China Medical University</institution>
        <addr-line>Taichung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff09">
        <label>9</label>
        <institution>Department of Surgery, Division of Urology</institution>
        <institution>Cardinal Tien Hospital and School of Medicine</institution>
        <institution>Fu Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff10">
        <label>10</label>
        <institution>Artificial Intelligence Development Center</institution>
        <institution>Fu Jen Catholic University</institution>
        <addr-line>New Taipei City</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Jeng-Wen Chen <email>086365@mail.fju.edu.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>22</day>
        <month>10</month>
        <year>2025</year>
      </pub-date>
      <volume>11</volume>
      <elocation-id>e81718</elocation-id>
      <history>
        <date date-type="received">
          <day>1</day>
          <month>8</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>25</day>
          <month>8</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>13</day>
          <month>9</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>10</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Jeng-Wen Chen, Hai-Lun Tu, Chun-Hsiang Chang, Wei-Chung Hsu, Pa-Chun Wang, Chun-Hou Liao, Mingchih Chen. Originally published in JMIR Medical Education (https://mededu.jmir.org), 22.10.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on https://mededu.jmir.org/, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://mededu.jmir.org/2025/1/e81718" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Competency-based medical education relies heavily on high-quality narrative reflections and feedback within workplace-based assessments. However, evaluating these narratives at scale remains a significant challenge.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop and apply natural language processing (NLP) models to evaluate the quality of resident reflections and faculty feedback documented in Entrustable Professional Activities (EPAs) on Taiwan’s nationwide Emyway platform for otolaryngology residency training.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This 4-year cross-sectional study analyzes 300 randomly sampled EPA assessments from 2021 to 2025, covering a pilot year and 3 full implementation years. Two medical education experts independently rated the narratives based on relevance, specificity, and the presence of reflective or improvement-focused language. Narratives were categorized into 4 quality levels—effective, moderate, ineffective, or irrelevant—and then dichotomized into high quality and low quality. We compared the performance of logistic regression, support vector machine, and bidirectional encoder representations from transformers (BERT) models in classifying narrative quality. The best performing model was then applied to track quality trends over time.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The BERT model, a multilingual pretrained language model, outperformed other approaches, achieving 85% and 92% accuracy in binary classification for resident reflections and faculty feedback, respectively. The accuracy for the 4-level classification was 67% for both. Longitudinal analysis revealed significant increases in high-quality reflections (from 70.3% to 99.5%) and feedback (from 50.6% to 88.9%) over the study period.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>BERT-based NLP demonstrated moderate-to-high accuracy in evaluating the narrative quality in EPA assessments, especially in the binary classification. While not a replacement for expert review, NLP models offer a valuable tool for monitoring narrative trends and enhancing formative feedback in competency-based medical education.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>competency-based medical education</kwd>
        <kwd>entrustable professional activities</kwd>
        <kwd>otolaryngology</kwd>
        <kwd>residency</kwd>
        <kwd>workplace-based assessment</kwd>
        <kwd>reflection</kwd>
        <kwd>feedback</kwd>
        <kwd>Emyway platform</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Medical education has undergone a fundamental transformation, with competency-based medical education (CBME) emerging as a central paradigm [<xref ref-type="bibr" rid="ref1">1</xref>]. In contrast to traditional time-based models that focus on the completion of predetermined curricula over fixed durations, CBME emphasizes the direct assessment of learner’s abilities to perform core professional activities safely and effectively in authentic clinical environments [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. This outcomes-oriented approach aims to ensure that physicians are not only knowledgeable but also clinically competent, adaptable, and equipped to address the evolving complexities of patient care [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p>
      <p>The field of otorhinolaryngology–head and neck surgery underscores the urgency of this educational shift, given its demand for proficiency in complex surgical procedures and nuanced clinical decision-making [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In response, the Taiwan Society of Otorhinolaryngology–Head and Neck Surgery (TSO-HNS) launched a structured competency framework in 2020, introducing 11 Entrustable Professional Activities (EPAs) as benchmarks for assessing resident performance (TSO-HNS Entrustable Professional Activities Assessment Framework for Resident Physician Training, second edition; see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To support the systematic implementation of these EPAs, the Emyway digital platform was adopted in 2021, enabling more structured, transparent, and objective competency evaluations [<xref ref-type="bibr" rid="ref9">9</xref>]. Central to Emyway is the integration of workplace-based assessment (WBA), which promotes continuous learning through direct observation, self-reflection, formative feedback, and performance appraisal in real-world clinical settings [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. Unlike traditional assessments, WBAs offer dynamic, individualized insights that inform both clinical decision-making and technical skill development [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>A key challenge in CBME is bridging the gap between assessment and learning. Reflection and feedback play complementary roles in this process. When aligned, feedback shapes the focus of reflection, and reflection deepens engagement with feedback, turning assessments into learning opportunities. However, prior studies show that reflections often remain descriptive, and feedback lacks specificity, limiting their combined educational value [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Evaluating the quality of both processes is therefore essential to understanding how WBAs contribute to learning. A growing body of evidence underscores the role of high-quality reflections and feedback in reinforcing core competencies and enhancing learning outcomes [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. However, the quality of these narrative components within WBAs—particularly in otolaryngology residency programs and in multilingual training environments—remains insufficiently studied.</p>
      <p>A major challenge in the implementation of CBME is managing the substantial volume of narrative data generated through WBAs [<xref ref-type="bibr" rid="ref11">11</xref>]. On digital platforms such as Emyway, thousands of EPA evaluations are recorded, rendering manual review impractical. Traditional assessment methods that rely on human interpretation are time-consuming, resource-intensive, and susceptible to variability, limiting their ability to yield consistent and meaningful insights from large datasets [<xref ref-type="bibr" rid="ref16">16</xref>]. Overcoming this challenge requires innovative strategies to ensure that narrative reflections and feedback remain relevant, specific, and actionable—supporting continuous learning and improvement in residency training [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>This study aims to address the challenge of evaluating narrative data in CBME by applying natural language processing (NLP) to systematically assess the quality of resident reflections and faculty feedback recorded within the Emyway platform. To capture these distinct but interrelated processes at scale, we applied NLP models to evaluate reflection and feedback separately, allowing for a clearer analysis of their respective contributions to CBME. We hypothesize that NLP can provide an objective, consistent, and scalable method for evaluating the effectiveness of narrative assessments, offering valuable insights into how feedback contributes to residents’ competency development [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. By leveraging NLP, this study seeks to improve the relevance, specificity, and actionability of reflections and feedback, thereby enhancing the guidance residents receive for their professional growth [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Resident reflections and faculty feedback are distinct constructs: reflections involve personal self-assessment, while feedback represents external evaluation from faculty. Although different, they occur simultaneously within the same WBA encounter. This study therefore examines both while ensuring that the NLP models and evaluation rubrics for reflections and feedback were developed and analyzed independently. Ultimately, this approach aims to bridge the gap between assessment and learning, strengthen CBME implementation, and support the development of a more robust otolaryngology residency training system.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study adheres to established ethical standards for medical education research. Informed consent was obtained actively. Participants were required to read the “Training-Related Data Collection and Privacy Information” and click an “I agree” button before accessing the Emyway platform. The participants did not receive any compensation for their participation. The system includes built-in data protection mechanisms to prevent confidential information from being displayed. All data were deidentified prior to analysis, with personal identifiers removed, and access was restricted to the research team through secure, password-protected servers. The study protocol was reviewed and approved by the institutional review board of Cardinal Tien Hospital (CTH-112-2-1-002).</p>
      </sec>
      <sec>
        <title>Study Design and Setting</title>
        <p>This cross-sectional study examines the quality of resident reflections and faculty feedback recorded in the Emyway platform of TSO-HNS between 2021 and 2025. Emyway is a nationwide digital platform designed to support CBME by systematically collecting workplace-based EPA assessments from otolaryngology residency programs across Taiwan [<xref ref-type="bibr" rid="ref9">9</xref>]. Basic clinical information, encounter descriptions, resident reflections, and subsequent faculty feedback and ad hoc entrustment decisions were collected within a single standardized electronic form on the Emyway platform [<xref ref-type="bibr" rid="ref9">9</xref>]. The primary objective of this study was to evaluate the narrative quality of resident reflections and faculty feedback by using NLP algorithms, with the goal of improving assessment reliability and enhancing the educational value of feedback in clinical training.</p>
      </sec>
      <sec>
        <title>Data Collection and Sample Selection</title>
        <p>We selected 300 EPA assessment entries from the Emyway national database, covering the period from 2021 to 2025. Each entry included structured fields such as the EPA title, clinical diagnosis, and narrative components authored by both residents and faculty [<xref ref-type="bibr" rid="ref9">9</xref>]. To ensure diversity and representativeness, we employed stratified random sampling across training years, resident levels, and EPA categories. To reduce potential bias related to temporal improvements in narrative quality, we used cross-validation and ensured a balanced distribution of entries across earlier and later phases of implementation. Only complete assessments containing both resident reflections and faculty feedback were included in the final analysis.</p>
      </sec>
      <sec>
        <title>Narrative Quality Assessment</title>
        <p>Two medical education experts—one a physician-educator specializing in otolaryngology residency training and the other a senior faculty developer with expertise in educational measurement and feedback assessment—independently evaluated the quality of resident reflections and faculty feedback by using a structured rubric based on the core principles of CBME. Narratives were evaluated using established rubrics developed by Solano et al [<xref ref-type="bibr" rid="ref17">17</xref>] and Ötleş et al [<xref ref-type="bibr" rid="ref18">18</xref>], which have been previously validated in surgical residency programs and were adopted in our study without modification to ensure consistency with the existing literature. The rubric assesses 3 key dimensions: relevance, specificity, and either reflection content (for resident narratives) or actionability (for faculty feedback). Relevance evaluates the alignment of the narrative with the EPA and the clinical context. Specificity measures the clarity and detail with which strengths, weaknesses, or areas for improvement were identified. Reflection content assesses the presence of self-directed learning goals in resident narratives, while actionability examines whether faculty feedback provided clear, constructive guidance to support resident development. The analysis of interrater reliability showed a fair to moderate agreement in the 4-level classification and a substantial to almost perfect agreement in the 2-level classification (Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). In cases where the 2 expert raters had discrepancies in their ratings, a third reviewer (the corresponding author) adjudicated and made the final decision to ensure consistency and accuracy in the gold standard dataset.</p>
        <p>Based on the evaluation criteria, narratives were categorized into 4 quality levels (<xref ref-type="table" rid="table1">Table 1</xref>): effective, moderate, ineffective, and irrelevant. Effective narratives were both relevant and specific; resident reflections demonstrated meaningful insight, and faculty feedback included actionable guidance. Moderate narratives maintained relevance but demonstrated only one additional element—either specificity or reflection content for residents or actionability for faculty. Ineffective narratives were superficially related to the EPA but lacked depth, with vague language and an absence of both specificity and meaningful reflection or guidance. Irrelevant narratives were off-topic, superficial, or disconnected from the clinical context. In this study, “high quality” refers to the combined category in the 2-level classification (encompassing both effective and moderate narratives) and “low quality” refers to ineffective and irrelevant narratives, whereas “effective” denotes the highest category within the 4-level classification.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Classification of the quality levels in residents’ reflections and faculty feedback.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td>Characteristics according to the 4-level classification<sup>a</sup></td>
                <td colspan="5">Quality of narrative content</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Effective<sup>b</sup></td>
                <td>Moderate<sup>b</sup></td>
                <td>Moderate<sup>b</sup></td>
                <td>Ineffective<sup>c</sup></td>
                <td>Irrelevant<sup>c</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Relevance</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>No</td>
              </tr>
              <tr valign="top">
                <td>Specificity</td>
                <td>Yes</td>
                <td>Yes</td>
                <td>No</td>
                <td>No</td>
                <td>N/A<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>Reflection content in residents’  <break/>  
            reflections</td>
                <td>Yes</td>
                <td>No</td>
                <td>Yes</td>
                <td>No</td>
                <td>N/A</td>
              </tr>
              <tr valign="top">
                <td>Action plan in faculty feedback</td>
                <td>Yes</td>
                <td>No</td>
                <td>Yes</td>
                <td>No</td>
                <td>N/A</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>In the 4-level classification, the categories are effective (highest quality), moderate, ineffective, and irrelevant.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>The combined group of effective and moderate narratives was classified as high quality per the 2-level classification.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Ineffective and irrelevant narratives were classified as low quality per the 2-level classification.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>N/A: not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>NLP Framework</title>
        <p>To enhance the scalability and objectivity of narrative assessment, NLP techniques were applied to analyze resident reflections and faculty feedback. Two independent NLP models were developed and trained separately for reflections and feedback, ensuring that the classification processes remained independent while allowing both dimensions to be examined within the same WBA encounter. Three supervised machine learning models were implemented for classification: logistic regression (LR) [<xref ref-type="bibr" rid="ref23">23</xref>], support vector machine (SVM) [<xref ref-type="bibr" rid="ref24">24</xref>], and bidirectional encoder representations from transformers (BERT) [<xref ref-type="bibr" rid="ref25">25</xref>], which is a state-of-the-art deep learning model for natural language understanding.</p>
      </sec>
      <sec>
        <title>Data Preprocessing and Feature Extraction</title>
        <p>For traditional machine learning models such as LR and SVM, text preprocessing included tokenization using CKIPtagger for Chinese language segmentation, followed by transformation into term frequency–inverse document frequency feature vectors. In contrast, the BERT model processed raw text inputs directly, structured as a combination of context, EPA title, diagnosis, and either reflection or feedback. This approach leveraged BERT’s ability to generate contextualized embeddings without requiring additional preprocessing.</p>
      </sec>
      <sec>
        <title>Model Training and Evaluation</title>
        <p>To evaluate model performance, the dataset was randomly divided into a training set (80%) and a validation set (20%). Both fine-grained (4-level) and binary (2-level) classification models were developed to assess the impact of classification granularity. LR and SVM models were implemented using the <italic>scikit-learn</italic> library, while the BERT model was fine-tuned using the <italic>simpletransformers</italic> library with the pretrained BERT-base-multilingual-uncased model. BERT was trained for 10 epochs with a learning rate of 2e-5. The code used for training all the models is provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p>
      </sec>
      <sec>
        <title>Performance Metrics and Narrative Quality Trend Analysis</title>
        <p>We evaluated model performance by using standard metrics, including accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score. We generated confusion matrices to visualize classification outcomes and identify patterns of misclassification. The analysis aimed to assess the accuracy of distinguishing high-quality and low-quality reflections and feedback, compare the performance across different machine learning models, and explore longitudinal trends in the narrative quality by using the best performing model throughout the study period from 2021 to 2025.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overall Model Performance</title>
        <p>Across the study period, the majority of EPA assessments were complete, containing both resident reflections and faculty feedback. Specifically, 90.1% (1422/1580) were complete in the pilot year (2021-2022), 95.1% (9939/10,447) in 2022-2023, 96.7% (10,601/10,966) in 2023-2024, and 97.1% (12,139/12,497) in 2024-2025. In total, 34,101 out of 35,490 assessments (96.1%) were complete and included in the final analysis. <xref ref-type="table" rid="table2">Table 2</xref> presents the expert-assessed quality distribution of 300 randomly selected EPA entries, comprising resident reflections and faculty feedback, used for developing and validating the NLP models.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> summarizes the prediction outcomes from the 3 models evaluated in the study. The NLP-based classification models demonstrated substantial accuracy in assessing the quality of both resident reflections and faculty feedback, with the BERT model consistently outperforming the LR and SVM models. Specifically, for resident reflections, the BERT model achieved an accuracy of 85% for the 2-level classification and 67% for the more granular 4-level classification. Performance was even stronger for faculty feedback evaluation, where the BERT model attained an accuracy of 92% in the 2-level classification and maintained a 67% accuracy for the 4-level classification. Additionally, precision, recall, and <italic>F</italic><sub>1</sub>-scores showed consistent patterns across these evaluations, supporting the robustness and reliability of the BERT model.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Distribution of expert-assessed quality of 300 randomly selected Entrustable Professional Activity entries (resident reflections and faculty feedback) for natural language processing model development and validation.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="0"/>
            <col width="300"/>
            <col width="0"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Classification/quality rating</td>
                <td colspan="2">Resident reflections (n=300), n (%)</td>
                <td>Faculty feedback (n=300), n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>4-level classification</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Effective</td>
                <td colspan="2">134 (44.7)</td>
                <td colspan="2">168 (56)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Moderate</td>
                <td colspan="2">86 (28.7)</td>
                <td colspan="2">28 (9.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Ineffective</td>
                <td colspan="2">49 (16.3)</td>
                <td colspan="2">24 (8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Irrelevant</td>
                <td colspan="2">31 (10.3)</td>
                <td colspan="2">80 (26.7)</td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>2-level classification</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>High-quality</td>
                <td colspan="2">220 (73.3)</td>
                <td colspan="2">196 (65.3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Low-quality</td>
                <td colspan="2">80 (26.7)</td>
                <td colspan="2">104 (34.7)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Prediction results of the residents’ reflections and faculty feedback by the 3 models in the study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="130"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="90"/>
            <col width="0"/>
            <col width="110"/>
            <col width="110"/>
            <col width="110"/>
            <col width="90"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Narrative content, model</td>
                <td colspan="5">4-level classification</td>
                <td colspan="4">2-level classification</td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td>Accuracy (%)</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td colspan="2">Accuracy (%)</td>
                <td>Precision (%)</td>
                <td>Recall (%)</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="11">
                  <bold>Resident reflections</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR<sup>a</sup></td>
                <td>63</td>
                <td>66</td>
                <td>63</td>
                <td>64</td>
                <td colspan="2">80</td>
                <td>83</td>
                <td>80</td>
                <td>81</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM<sup>b</sup></td>
                <td>60</td>
                <td>63</td>
                <td>60</td>
                <td>60</td>
                <td colspan="2">85</td>
                <td>85</td>
                <td>85</td>
                <td>85</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT<sup>c</sup></td>
                <td>67</td>
                <td>67</td>
                <td>67</td>
                <td>65</td>
                <td colspan="2">85</td>
                <td>85</td>
                <td>85</td>
                <td>85</td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>Faculty feedback</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR</td>
                <td>63</td>
                <td>55</td>
                <td>63</td>
                <td>59</td>
                <td colspan="2">78</td>
                <td>78</td>
                <td>78</td>
                <td>78</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>SVM</td>
                <td>63</td>
                <td>54</td>
                <td>63</td>
                <td>54</td>
                <td colspan="2">78</td>
                <td>81</td>
                <td>78</td>
                <td>76</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>BERT</td>
                <td>67</td>
                <td>65</td>
                <td>67</td>
                <td>64</td>
                <td colspan="2">92</td>
                <td>92</td>
                <td>92</td>
                <td>92</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>LR: logistic regression.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>SVM: support vector machine.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Confusion Matrix Analysis</title>
        <p>To further assess model performance, confusion matrices were generated (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The BERT model exhibited fewer misclassifications than LR and SVM, particularly in distinguishing between effective and moderate narratives. In contrast, LR and SVM frequently misclassified effective narratives as moderate or irrelevant, reflecting their limitations in detecting subtle contextual cues. Notably, BERT’s superior classification capability was most evident in faculty feedback, where its accuracy surpassed 90%, demonstrating its potential to improve automated assessment reliability in competency-based education frameworks. To illustrate the model’s interpretability and limitations, Table S2 in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref> presents anonymized examples of correctly classified and misclassified narratives.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Confusion matrices illustrating the classification performance of 3 natural language processing models—LR, SVM, and BERT—in evaluating the quality of resident reflections (A) and faculty feedback (B). The x-axis represents predicted categories, and the y-axis represents actual expert ratings. For the 2-level classification, narratives were categorized as high quality (H) or low quality (L). For the 4-level classification, the categories are effective (E), moderate (M), ineffective (IE), and irrelevant (IR). Numbers within each cell indicate the count of narratives, while shading intensity reflects frequency (darker=higher count). Compared with LR and SVM, BERT demonstrated fewer misclassifications and stronger performance in distinguishing between adjacent categories, particularly for faculty feedback. BERT: bidirectional encoder representations from transformers; LR: logistic regression; SVM: support vector machine.</p>
          </caption>
          <graphic xlink:href="mededu_v11i1e81718_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Two-Level and Four-Level Quality Classification Outcomes in the Emyway Platform</title>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates the longitudinal trends in the narrative quality of resident reflections and faculty feedback, as classified by the BERT model using both 2-level and 4-level rating algorithms, across 4 academic years: the pilot year (2021-2022) through 2024-2025. Detailed distributions of frequencies and percentages are presented in Table S3 of <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
        <p>In the 2-level classification, the proportion of high-quality resident reflections increased from 70.3% to 99.5%, while high-quality faculty feedback increased from 50.6% to 88.9% over the study period. Chi-square analyses confirmed that these improvements were statistically significant (<italic>P</italic>&#60;.001 for both groups), reflecting meaningful enhancement in the quality of narrative documentation. Similarly, in the 4-level classification, the proportion of “effective” resident reflections increased from 46.9% to 82.2%, and “effective” faculty feedback increased from 39.6% to 83%. These gains were also statistically significant (<italic>P</italic>&#60;.001), suggesting a sustained and substantive improvement in narrative quality over time, likely associated with the ongoing implementation of structured EPA frameworks and digital feedback systems.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Longitudinal trends in the quality of narrative assessments from 2021 to 2025, as classified by the bidirectional encoder representations from transformers model. Panel A displays resident reflections; panel B displays faculty feedback. In each panel, the left graph shows the 2-level classification (high quality vs low quality), and the right graph shows the 4-level classification (effective, moderate, ineffective, irrelevant). The x-axis represents academic years, with 2021-2022 as the pilot year, followed by 3 full implementation years. The y-axis indicates the percentage distribution of the narratives. Over time, both resident reflections and faculty feedback showed a significant increase in the proportion of high-quality and effective narratives.</p>
          </caption>
          <graphic xlink:href="mededu_v11i1e81718_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study demonstrates the utility of NLP, specifically the BERT algorithm, in evaluating the narrative quality within WBAs in otolaryngology residency training. The BERT model achieved high accuracy in the binary classification—85% for resident reflections and 92% for faculty feedback—supporting its potential as a scalable, objective adjunct to manual evaluation. Notably, narrative quality improved significantly over the study period, with high-quality reflections increasing from 70.3% to 99.5% and high-quality faculty feedback from 50.6% to 88.9%. These findings highlight the potential of NLP to enhance quality assurance and longitudinal monitoring in CBME.</p>
        <p>Compared to traditional manual qualitative analysis, NLP offers unique advantages [<xref ref-type="bibr" rid="ref26">26</xref>]. Although human raters can capture contextual nuance and interpret implicit meaning, their assessments are time-intensive and subject to interrater variability. In contrast, NLP enables consistent, rapid, and scalable evaluation across large datasets [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Prior research by Akbasli et al [<xref ref-type="bibr" rid="ref29">29</xref>] has demonstrated the feasibility of applying fine-tuned language models to non-English and multilingual medical texts. Our findings further support this approach, showing that integrating structured contextual inputs such as EPA titles, clinical diagnoses, and narrative components substantially enhance model accuracy. With adequate structured contextual inputs, BERT approximates human interpretive depth while retaining the efficiency and objectivity of automation.</p>
        <p>This approach should also be interpreted through the lens of the educational assessment theory. Beyond its statistical performance, the application of NLP algorithms in this study aligns closely with established educational assessment theories and feedback quality frameworks. The structured rubric used to generate the gold standard—encompassing relevance, specificity, and either having reflection content or actionability—reflects the core principles found in frameworks such as the Feedback Quality Instrument [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>] and the R2C2 model (relationship building, exploring reactions, exploring content, coaching for change) [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. These frameworks emphasize that effective feedback and reflection must be contextually relevant, sufficiently specific, and actionable to promote self-regulated learning and professional growth. By incorporating these dimensions into the training data, BERT’s decision-making process operationalizes these theoretical constructs, mapping narrative text to empirically validated quality indicators. In this way, the model does not merely classify text based on linguistic patterns but also embeds the pedagogical priorities of CBME and EPA assessment. This alignment ensures that automated scoring supports the same developmental goals as expert human raters, enabling the model to serve as a theoretically grounded, scalable complement to manual evaluation.</p>
        <p>However, it is important to clarify that the R2C2 model is a coaching framework designed to structure feedback conversations rather than an evaluation rubric for written comments. In this study, R2C2 was referenced as a conceptual lens to underscore the coaching potential embedded in high-quality narrative feedback and not as a scoring tool. Recent literature has emphasized its role in facilitating meaningful faculty–learner interactions in WBAs [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Our findings on the quality of written reflections and feedback should therefore be viewed as complementary to, rather than substitutive of, coaching frameworks such as R2C2, providing a stronger foundation for effective feedback dialogue.</p>
        <p>In addition to methodological contributions, our findings suggest practical applications for residency programs. NLP outputs could be integrated into dashboards that track reflection and feedback quality over time, enabling program directors to identify gaps and design targeted faculty development workshops. At the same time, residents could receive timely, formative, reflective prompts into the quality of their reflections. By embedding these tools into CBME frameworks, narrative data can serve not only as an assessment record but also as a resource to strengthen feedback culture and support continuous coaching.</p>
      </sec>
      <sec>
        <title>Comparison With Previous Studies</title>
        <p>The superior performance of BERT relative to traditional machine learning models such as LR and SVM is a key contribution of this study. For instance, previous work by Ötleş et al [<xref ref-type="bibr" rid="ref18">18</xref>] reported a mean accuracy of 0.64 by using SVM for the 4-level classification of surgical feedback, which improved to 0.83 when simplified to binary classification. Similarly, Solano et al [<xref ref-type="bibr" rid="ref17">17</xref>] achieved an overall accuracy of 0.83 by using NLP but noted limitations in sensitivity (0.37), suggesting challenges in detecting lower quality feedback. In contrast, our BERT-based model achieved 85% accuracy for resident reflections and 92% for faculty feedback in binary classification, with balanced precision and recall scores. These results highlight BERT’s superior ability to contextualize text and detect nuanced linguistic patterns. Unlike traditional models, BERT effectively interprets the complex, often implicit nature of reflective narratives, validating its use in educational quality assessment within clinical training contexts [<xref ref-type="bibr" rid="ref37">37</xref>]. This capacity is particularly valuable, as reflective writing in medical education is typically layered, context-sensitive, and difficult to assess using rule-based or shallow models [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p>
        <p>Although the 4-level classification achieved only moderate accuracy, its outputs can still inform educational practice. Even without perfect distinction between adjacent categories, the model can highlight patterns of lower quality narratives that may warrant attention. For instance, faculty development dashboards could flag programs or individuals generating a higher proportion of ineffective or moderate entries, prompting targeted coaching or workshops. These applications position the model as a supportive tool for monitoring and guiding feedback culture, complementing human judgment rather than replacing it.</p>
        <p>Unlike prior studies that emphasized cross-sectional performance [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>], this research provides longitudinal evidence of NLP’s ability to track and support improvements in feedback quality over time. Consistent with earlier findings, the model maintained high specificity, particularly in identifying low-quality narratives—a valuable feature for faculty development and system-level monitoring. Although the 4-level classification performance remained moderate (67% accuracy), this aligns with known challenges in distinguishing subtle qualitative gradations and highlights areas for future enhancement.</p>
        <p>The sustained improvement in the reflection quality across the study period underscores the value of structured WBA systems such as those implemented through the Emyway platform. These systems provide clear expectations and guidance, promoting deeper engagement, self-awareness, and professional development [<xref ref-type="bibr" rid="ref40">40</xref>]. This observation aligns with literature indicating that structured reflection fosters clinical reasoning, self-regulated learning, and long-term growth [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>Faculty feedback quality also improved substantially, increasing in specificity, relevance, and actionability. While still trailing resident reflections in overall quality, the upward trajectory from 50.6% to 88.9% suggests growing familiarity with EPA-based frameworks and greater faculty engagement. These findings reinforce the importance of structured systems in supporting effective feedback practices. NLP tools, in this context, can function as educational dashboards—tracking feedback quality across programs and timeframes, flagging low-quality entries, and informing faculty development and institutional policy.</p>
        <p>It is important to note that reflection quality and feedback quality were not conflated in this study; rather, they were modeled separately using independent rubrics and NLP training processes. Presenting them together highlights how these complementary elements of the same assessment encounter can be studied in parallel to inform faculty development and resident learning.</p>
        <p>We selected BERT over commercial large language models such as ChatGPT for both practical and performance-based reasons. As an open-source model, BERT is accessible to academic institutions without licensing constraints, facilitating integration into resource-limited settings. Moreover, internal comparisons indicated that ChatGPT, while powerful, lacked discriminative precision in this context and frequently defaulted to mid-range classifications (<xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>). In contrast, BERT demonstrated greater reliability and accuracy, particularly when provided with structured contextual information.</p>
      </sec>
      <sec>
        <title>Generalizability</title>
        <p>Although our findings highlight the utility of BERT-based NLP within Taiwan’s structured otolaryngology training system, their generalizability to other specialties, languages, and international contexts remains uncertain. Narrative style, cultural norms, and feedback practices vary widely across training environments, potentially affecting model performance. To ensure validity in non-Chinese language settings, rubric recalibration would be needed to align evaluation criteria with local educational practices and expectations. Furthermore, although multilingual pretrained models such as BERT provide a strong foundation, language-specific fine-tuning with locally generated narrative data would be required to capture semantic nuances and ensure accurate classification. These adaptations highlight the importance of international replication and validation, which will be essential to confirm generalizability and extend the impact of NLP-assisted evaluation across medical specialties and cultural contexts.</p>
        <p>The use of open-source NLP tools such as BERT also carries important ethical and practical implications. Although these models provide scalability, accessibility, and adaptability for educational use, they raise concerns about confidentiality, data security, and potential bias. To ensure responsible application, future implementation should include secure data management, careful local fine-tuning, and ongoing evaluation of fairness so that such tools enhance rather than compromise educational integrity.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Despite encouraging results in binary classification, several limitations should be noted. First, the model’s 67% accuracy in the 4-level classification reflects the inherent difficulty of distinguishing subtle qualitative differences in narrative assessments. Overlap in language used across adjacent categories—such as moderate and ineffective—poses challenges for both human raters and machine learning models. This limitation is common in educational NLP research and underscores the need for larger, more diverse training datasets, domain-specific model fine-tuning, and potentially incorporating contextual metadata (eg, resident level or case type). Although model performance stabilized during cross-validation, suggesting that the sample was adequate for the study objectives, larger datasets could further strengthen robustness. Moreover, the limited sample size may have contributed to weaker performance in the 4-level classification. Future strategies to address this limitation include expanding the dataset as the Emyway platform accumulates more entries, exploring data augmentation and domain-adaptive pretraining, and pursuing cross-institutional collaborations to increase sample diversity. These steps would strengthen model robustness and improve its ability to support nuanced educational decision-making. Although 4-level predictions should be interpreted with caution, they can still offer valuable insights for faculty development and formative assessment when combined with human judgment.</p>
        <p>Second, as with all text-based evaluations, important nonverbal cues and dynamic interpersonal interactions are not captured. Future work could extend beyond text-based analysis by integrating audio and video data with NLP. Multimodal inputs would capture tone, pacing, and nonverbal cues, complementing narrative content and offering a more holistic view of feedback interactions. This approach could strengthen competency-based medical education by providing richer insights to guide faculty development and resident learning.</p>
        <p>Third, although improvements were observed in the narrative quality, this study did not directly measure faculty engagement or sustained educational change. Future research should examine how NLP-generated insights might be incorporated into faculty development initiatives and longitudinal assessment strategies to determine whether they enhance faculty participation and support lasting improvements in feedback and reflection quality.</p>
        <p>Finally, the possibility of a Hawthorne effect should be considered. The awareness of being evaluated may have influenced improvements in reflection and feedback quality [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. Complementary qualitative research such as interviews or focus groups with residents and faculty could elucidate underlying motivations and perceptions, providing a richer perspective on behavioral change.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrates that BERT-based NLP, when applied with structured contextual inputs, can effectively evaluate the quality of multilingual resident reflections and faculty feedback in WBAs. The model achieved moderate to high accuracy, particularly in binary classification, suggesting its utility as a scalable adjunct to human evaluation. While not a substitute for expert judgment, NLP can facilitate large-scale monitoring of narrative quality and enhance the analysis of formative feedback in CBME. The progressive improvement in the narrative quality over 4 years highlights the value of structured EPA frameworks and digital platforms such as Emyway in promoting reflective practice and faculty development. Future research should explore the generalizability of this approach across medical specialties and investigate the integration of multimodal data to further enhance assessment validity and educational outcomes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Taiwan Society of Otorhinolaryngology–Head and Neck Surgery Entrustable Professional Activities Assessment Framework for Resident Physician Training, second edition.</p>
        <media xlink:href="mededu_v11i1e81718_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 795 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Quantified agreement results (interrater reliability) for expert scoring.</p>
        <media xlink:href="mededu_v11i1e81718_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 54 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Logistic regression, support vector machine, and bidirectional encoder representations from transformers codes in the Google Colaboratory.</p>
        <media xlink:href="mededu_v11i1e81718_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 1265 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Sample outputs from the bidirectional encoder representations from transformers model for classifying narrative quality in resident reflections and faculty feedback.</p>
        <media xlink:href="mededu_v11i1e81718_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 177 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Distribution of numbers (percentages) of 4-level and 2-level quality ratings for resident reflections and faculty feedback across pilot year (2021-2022), 2022-2023, 2023-2024, and 2024-2025.</p>
        <media xlink:href="mededu_v11i1e81718_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 61 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Detailed process and results for evaluating resident reflections and faculty feedback quality by using ChatGPT-4o.</p>
        <media xlink:href="mededu_v11i1e81718_app6.pdf" xlink:title="PDF File  (Adobe PDF File), 273 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CBME</term>
          <def>
            <p>competency-based medical education</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EPA</term>
          <def>
            <p>Entrustable Professional Activity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LR</term>
          <def>
            <p>logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">TSO-HNS</term>
          <def>
            <p>Taiwan Society of Otorhinolaryngology–Head and Neck Surgery</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">WBA</term>
          <def>
            <p>workplace-based assessment</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors are grateful to Taiwan Society of Otorhinolaryngology-Head and Neck Surgery and all its faculties and resident physicians for utilizing the Joint Commission of Taiwan’s Emyway platform. The authors also thank the information technology team of Dalin Tzu Chi Hospital for their support with the platform. Additionally, the authors are grateful for the administrative assistance provided by Chiu-Ping Wang, Shu-Hwei Fan, Uan-Shr Jan, and Wan-Ning Luo in this project. They received no additional compensation for their contributions. This study was supported by the National Science and Technology Council of the Republic of China (Taiwan) under grants NSTC 109-2511-H-567-001-MY2, NSTC 110-2511-H-567-001-MY2, NSTC 112-2410-H-567-001-MY3, and in part, funded by Cardinal Tien Hospital under grants CTH110AK-2220 and CTH111AK-2221. The funders had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets used and analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>Conceptualization: J-WC, C-HC, W-CH, P-WC</p>
        <p>Data curation: J-WC, H-LT, C-HC</p>
        <p>Methodology/formal analysis/validation: J-WC, H-LT, W-CH, P-CW</p>
        <p>Project administration: W-CH, C-HL, MC, P-CW</p>
        <p>Funding acquisition: J-WC, C-HC</p>
        <p>Visualization: C-HL, MC, J-WC</p>
        <p>Writing – original draft: J-WC, H-LT, C-HC</p>
        <p>Writing – review &#38; editing: J-WC, H-LT, C-HC, W-CH, P-CW, C-HL, and MC</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JX</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Gray</surname>
              <given-names>ST</given-names>
            </name>
          </person-group>
          <article-title>A needs assessment for the future of otolaryngology education</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2023</year>
          <month>07</month>
          <volume>169</volume>
          <issue>1</issue>
          <fpage>192</fpage>
          <lpage>193</lpage>
          <pub-id pub-id-type="doi">10.1177/01945998221128292</pub-id>
          <pub-id pub-id-type="medline">36125895</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kovatch</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Prince</surname>
              <given-names>MEP</given-names>
            </name>
            <name name-style="western">
              <surname>Sandhu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Weighing entrustment decisions with patient care during residency training</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2018</year>
          <month>06</month>
          <volume>158</volume>
          <issue>6</issue>
          <fpage>1024</fpage>
          <lpage>1027</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29558240"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/0194599818764652</pub-id>
          <pub-id pub-id-type="medline">29558240</pub-id>
          <pub-id pub-id-type="pmcid">PMC5984141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lucey</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Thibault</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>ten Cate</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Competency-based, time-variable education in the health professions</article-title>
          <source>Academic Medicine</source>
          <year>2018</year>
          <volume>93</volume>
          <issue>3S</issue>
          <fpage>S1</fpage>
          <lpage>S5</lpage>
          <pub-id pub-id-type="doi">10.1097/acm.0000000000002080</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Fahim</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Reid</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sonnadara</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Otolaryngology residency education: a scoping review on the shift towards competency-based medical education</article-title>
          <source>Clin Otolaryngol</source>
          <year>2017</year>
          <month>06</month>
          <volume>42</volume>
          <issue>3</issue>
          <fpage>564</fpage>
          <lpage>572</lpage>
          <pub-id pub-id-type="doi">10.1111/coa.12772</pub-id>
          <pub-id pub-id-type="medline">27754613</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Implementing an entrustable professional activities programmatic assessments for nurse practitioner training in emergency care: a pilot study</article-title>
          <source>Nurse Educ Today</source>
          <year>2022</year>
          <month>08</month>
          <volume>115</volume>
          <fpage>105409</fpage>
          <pub-id pub-id-type="doi">10.1016/j.nedt.2022.105409</pub-id>
          <pub-id pub-id-type="medline">35636245</pub-id>
          <pub-id pub-id-type="pii">S0260-6917(22)00145-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chiang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tsou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Developing an entrustable professional activity for providing health education and consultation in occupational therapy and examining its validity</article-title>
          <source>BMC Med Educ</source>
          <year>2024</year>
          <month>06</month>
          <day>28</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>705</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-024-05670-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-024-05670-1</pub-id>
          <pub-id pub-id-type="medline">38943116</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-024-05670-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC11214254</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huynh</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Malkin</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Otolaryngology resident education: beyond procedural case logs-a 10-year single institutional review</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2025</year>
          <month>03</month>
          <volume>172</volume>
          <issue>3</issue>
          <fpage>1077</fpage>
          <lpage>1084</lpage>
          <pub-id pub-id-type="doi">10.1002/ohn.1082</pub-id>
          <pub-id pub-id-type="medline">39756016</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singer</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>The future of otolaryngology training threatened: the negative impact of residency training reforms</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2010</year>
          <month>03</month>
          <volume>142</volume>
          <issue>3</issue>
          <fpage>303</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1016/j.otohns.2009.12.010</pub-id>
          <pub-id pub-id-type="medline">20172370</pub-id>
          <pub-id pub-id-type="pii">S0194-5998(09)01854-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>EMYWAY workplace-based entrustable professional activities assessments in otolaryngology residency training: a nationwide experience</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2025</year>
          <month>04</month>
          <volume>172</volume>
          <issue>4</issue>
          <fpage>1242</fpage>
          <lpage>1253</lpage>
          <pub-id pub-id-type="doi">10.1002/ohn.1104</pub-id>
          <pub-id pub-id-type="medline">39739526</pub-id>
          <pub-id pub-id-type="pmcid">PMC11947863</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norcini</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burch</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Workplace-based assessment as an educational tool: AMEE Guide No. 31</article-title>
          <source>Med Teach</source>
          <year>2007</year>
          <month>11</month>
          <volume>29</volume>
          <issue>9</issue>
          <fpage>855</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1080/01421590701775453</pub-id>
          <pub-id pub-id-type="medline">18158655</pub-id>
          <pub-id pub-id-type="pii">788884784</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahle</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Eskender</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Carnes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Koehler</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Willey</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Latif</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Doyle</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wnuk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Fryer</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Mellinger</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>The quality of operative performance narrative feedback: a retrospective data comparison between end of rotation evaluations and workplace-based assessments</article-title>
          <source>Ann Surg</source>
          <year>2022</year>
          <month>03</month>
          <day>01</day>
          <volume>275</volume>
          <issue>3</issue>
          <fpage>617</fpage>
          <lpage>620</lpage>
          <pub-id pub-id-type="doi">10.1097/SLA.0000000000003907</pub-id>
          <pub-id pub-id-type="medline">32511125</pub-id>
          <pub-id pub-id-type="pii">00000658-202203000-00032</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Archer</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>State of the science in health professional education: effective feedback</article-title>
          <source>Med Educ</source>
          <year>2010</year>
          <month>01</month>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>101</fpage>
          <lpage>108</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1365-2923.2009.03546.x</pub-id>
          <pub-id pub-id-type="medline">20078761</pub-id>
          <pub-id pub-id-type="pii">MED3546</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Watling</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ginsburg</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessment, feedback and the alchemy of learning</article-title>
          <source>Med Educ</source>
          <year>2019</year>
          <month>01</month>
          <volume>53</volume>
          <issue>1</issue>
          <fpage>76</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1111/medu.13645</pub-id>
          <pub-id pub-id-type="medline">30073692</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faucett</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>McCrary</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Barry</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Saleh</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Erman</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Ishman</surname>
              <given-names>SL</given-names>
            </name>
          </person-group>
          <article-title>High-quality feedback regarding professionalism and communication skills in otolaryngology resident education</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2018</year>
          <month>01</month>
          <volume>158</volume>
          <issue>1</issue>
          <fpage>36</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1177/0194599817737758</pub-id>
          <pub-id pub-id-type="medline">29065274</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernandes</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>de Vries</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>McEwen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zevin</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the quality of narrative feedback for entrustable professional activities in a surgery residency program</article-title>
          <source>Ann Surg</source>
          <year>2024</year>
          <month>12</month>
          <day>01</day>
          <volume>280</volume>
          <issue>6</issue>
          <fpage>916</fpage>
          <lpage>924</lpage>
          <pub-id pub-id-type="doi">10.1097/SLA.0000000000006308</pub-id>
          <pub-id pub-id-type="medline">38660808</pub-id>
          <pub-id pub-id-type="pii">00000658-202412000-00003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spadafore</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yilmaz</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rally</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Russell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thoma</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Monteiro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pardhan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Monrad</surname>
              <given-names>SU</given-names>
            </name>
            <name name-style="western">
              <surname>Woods</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing to evaluate the quality of supervisor narrative comments in competency-based medical education</article-title>
          <source>Acad Med</source>
          <year>2024</year>
          <month>05</month>
          <day>01</day>
          <volume>99</volume>
          <issue>5</issue>
          <fpage>534</fpage>
          <lpage>540</lpage>
          <pub-id pub-id-type="doi">10.1097/ACM.0000000000005634</pub-id>
          <pub-id pub-id-type="medline">38232079</pub-id>
          <pub-id pub-id-type="pii">00001888-202405000-00019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Solano</surname>
              <given-names>QP</given-names>
            </name>
            <name name-style="western">
              <surname>Hayward</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chopra</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Quanstrom</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kendrick</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abbott</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Kunzmann</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ahle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ötleş</surname>
              <given-names>Erkin</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing and assessment of resident feedback quality</article-title>
          <source>J Surg Educ</source>
          <year>2021</year>
          <volume>78</volume>
          <issue>6</issue>
          <fpage>e72</fpage>
          <lpage>e77</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jsurg.2021.05.012</pub-id>
          <pub-id pub-id-type="medline">34167908</pub-id>
          <pub-id pub-id-type="pii">S1931-7204(21)00153-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ötleş</surname>
              <given-names>Erkin</given-names>
            </name>
            <name name-style="western">
              <surname>Kendrick</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Solano</surname>
              <given-names>QP</given-names>
            </name>
            <name name-style="western">
              <surname>Schuller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ahle</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Eskender</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Carnes</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>BC</given-names>
            </name>
          </person-group>
          <article-title>Using natural language processing to automatically assess feedback quality: findings from 3 surgical residencies</article-title>
          <source>Acad Med</source>
          <year>2021</year>
          <month>10</month>
          <day>01</day>
          <volume>96</volume>
          <issue>10</issue>
          <fpage>1457</fpage>
          <lpage>1460</lpage>
          <pub-id pub-id-type="doi">10.1097/ACM.0000000000004153</pub-id>
          <pub-id pub-id-type="medline">33951682</pub-id>
          <pub-id pub-id-type="pii">00001888-202110000-00030</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>HB</given-names>
            </name>
            <name name-style="western">
              <surname>Hoang</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lopreiato</surname>
              <given-names>JO</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hemmer</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Montgomery</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gagarin</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Assessing the ability of a large language model to score free-text medical student clinical notes: quantitative study</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <month>07</month>
          <day>25</day>
          <volume>10</volume>
          <fpage>e56342</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e56342/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/56342</pub-id>
          <pub-id pub-id-type="medline">39118469</pub-id>
          <pub-id pub-id-type="pii">v10i1e56342</pub-id>
          <pub-id pub-id-type="pmcid">PMC11327632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Van Ostaeyen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>De Langhe</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>De Clercq</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Embo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schellens</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Valcke</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Automating the identification of feedback quality criteria and the CanMEDS roles in written feedback comments using natural language processing</article-title>
          <source>Perspect Med Educ</source>
          <year>2023</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>540</fpage>
          <lpage>549</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38144670"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/pme.1056</pub-id>
          <pub-id pub-id-type="medline">38144670</pub-id>
          <pub-id pub-id-type="pmcid">PMC10742245</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dine</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shea</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Clancy</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Heath</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Pluta</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Kogan</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Finding the needle in the haystack: can natural language processing of students' evaluations of teachers identify teaching concerns?</article-title>
          <source>J Gen Intern Med</source>
          <year>2025</year>
          <month>01</month>
          <volume>40</volume>
          <issue>1</issue>
          <fpage>119</fpage>
          <lpage>123</lpage>
          <pub-id pub-id-type="doi">10.1007/s11606-024-08990-6</pub-id>
          <pub-id pub-id-type="medline">39167336</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-024-08990-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC11780028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Le</surname>
              <given-names>KDR</given-names>
            </name>
            <name name-style="western">
              <surname>Tay</surname>
              <given-names>SBP</given-names>
            </name>
            <name name-style="western">
              <surname>Choy</surname>
              <given-names>KT</given-names>
            </name>
            <name name-style="western">
              <surname>Verjans</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sasanelli</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>JCH</given-names>
            </name>
          </person-group>
          <article-title>Applications of natural language processing tools in the surgical journey</article-title>
          <source>Front Surg</source>
          <year>2024</year>
          <volume>11</volume>
          <fpage>1403540</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38826809"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fsurg.2024.1403540</pub-id>
          <pub-id pub-id-type="medline">38826809</pub-id>
          <pub-id pub-id-type="pmcid">PMC11140056</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hosmer</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Lemeshow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sturdivant</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Applied Logistic Regression</source>
          <year>2013</year>
          <publisher-loc>Hoboken, New Jersey</publisher-loc>
          <publisher-name>John Wiley &#38; Sons</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hearst</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dumais</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Osuna</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Scholkopf</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Support vector machines</article-title>
          <source>IEEE Intell Syst Their Appl</source>
          <year>1998</year>
          <month>7</month>
          <day>10</day>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>18</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1109/5254.708428</pub-id>
          <pub-id pub-id-type="pii">S0003-2670(11)00968-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <year>2019</year>
          <conf-name>Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name>
          <conf-date>June 2-7</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <fpage>4171</fpage>
          <lpage>4186</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deiner</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Honcharov</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mackey</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Porco</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Large language models can enable inductive thematic analysis of a social media corpus in a single prompt: human validation study</article-title>
          <source>JMIR Infodemiology</source>
          <year>2024</year>
          <month>08</month>
          <day>29</day>
          <volume>4</volume>
          <fpage>e59641</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://infodemiology.jmir.org/2024//e59641/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59641</pub-id>
          <pub-id pub-id-type="medline">39207842</pub-id>
          <pub-id pub-id-type="pii">v4i1e59641</pub-id>
          <pub-id pub-id-type="pmcid">PMC11393503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jacennik</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zawadzka-Gosk</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Moreira</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Glinkowski</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Evaluating patients' experiences with healthcare services: extracting domain and language-specific information from free-text narratives</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2022</year>
          <month>08</month>
          <day>17</day>
          <volume>19</volume>
          <issue>16</issue>
          <fpage>10182</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph191610182"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph191610182</pub-id>
          <pub-id pub-id-type="medline">36011816</pub-id>
          <pub-id pub-id-type="pii">ijerph191610182</pub-id>
          <pub-id pub-id-type="pmcid">PMC9408527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khanbhai</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Anyadi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Symons</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Flott</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Darzi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Applying natural language processing and machine learning techniques to patient experience feedback: a systematic review</article-title>
          <source>BMJ Health Care Inform</source>
          <year>2021</year>
          <month>03</month>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>e100262</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://informatics.bmj.com/lookup/pmidlookup?view=long&#38;pmid=33653690"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjhci-2020-100262</pub-id>
          <pub-id pub-id-type="medline">33653690</pub-id>
          <pub-id pub-id-type="pii">bmjhci-2020-100262</pub-id>
          <pub-id pub-id-type="pmcid">PMC7929894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akbasli</surname>
              <given-names>IT</given-names>
            </name>
            <name name-style="western">
              <surname>Birbilen</surname>
              <given-names>AZ</given-names>
            </name>
            <name name-style="western">
              <surname>Teksam</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Leveraging large language models to mimic domain expert labeling in unstructured text-based electronic healthcare records in non-english languages</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2025</year>
          <month>03</month>
          <day>31</day>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>154</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-025-02871-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-025-02871-6</pub-id>
          <pub-id pub-id-type="medline">40165165</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-025-02871-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC11959812</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amirzadeh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rasouli</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dargahi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Assessment of validity and reliability of the feedback quality instrument</article-title>
          <source>BMC Res Notes</source>
          <year>2024</year>
          <month>08</month>
          <day>16</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>227</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-024-06881-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-024-06881-x</pub-id>
          <pub-id pub-id-type="medline">39152449</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-024-06881-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC11328439</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Keating</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Leech</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Congdon</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Farlie</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Molloy</surname>
              <given-names>EK</given-names>
            </name>
          </person-group>
          <article-title>Development of the Feedback Quality Instrument: a guide for health professional educators in fostering learner-centred discussions</article-title>
          <source>BMC Med Educ</source>
          <year>2021</year>
          <month>07</month>
          <day>12</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>382</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-021-02722-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-021-02722-8</pub-id>
          <pub-id pub-id-type="medline">34253221</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-021-02722-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8276464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bok</surname>
              <given-names>HGJ</given-names>
            </name>
            <name name-style="western">
              <surname>Teunissen</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Favier</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Rietbroek</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Theyse</surname>
              <given-names>LFH</given-names>
            </name>
            <name name-style="western">
              <surname>Brommer</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Haarhuis</surname>
              <given-names>JCM</given-names>
            </name>
            <name name-style="western">
              <surname>van Beukelen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>van der Vleuten</surname>
              <given-names>CPM</given-names>
            </name>
            <name name-style="western">
              <surname>Jaarsma</surname>
              <given-names>DADC</given-names>
            </name>
          </person-group>
          <article-title>Programmatic assessment of competency-based workplace learning: when theory meets practice</article-title>
          <source>BMC Med Educ</source>
          <year>2013</year>
          <month>09</month>
          <day>11</day>
          <volume>13</volume>
          <fpage>123</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/1472-6920-13-123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6920-13-123</pub-id>
          <pub-id pub-id-type="medline">24020944</pub-id>
          <pub-id pub-id-type="pii">1472-6920-13-123</pub-id>
          <pub-id pub-id-type="pmcid">PMC3851012</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sargeant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lockyer</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Armson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Warren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zetkulic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soklaridis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Könings</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Holmboe</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Shearer</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Boudreau</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The R2C2 model in residency education</article-title>
          <source>Academic Medicine</source>
          <year>2018</year>
          <volume>93</volume>
          <issue>7</issue>
          <fpage>1055</fpage>
          <lpage>1063</lpage>
          <pub-id pub-id-type="doi">10.1097/acm.0000000000002131</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sargeant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lockyer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Holmboe</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Silver</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Armson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Driessen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>MacLeod</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Power</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Facilitated reflective performance feedback</article-title>
          <source>Academic Medicine</source>
          <year>2015</year>
          <volume>90</volume>
          <issue>12</issue>
          <fpage>1698</fpage>
          <lpage>1706</lpage>
          <pub-id pub-id-type="doi">10.1097/acm.0000000000000809</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patocka</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cooke</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>IWY</given-names>
            </name>
            <name name-style="western">
              <surname>Ellaway</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Untangling feedback: mapping the patterns behind the practice</article-title>
          <source>Med Educ. Online ahead of print</source>
          <year>2025</year>
          <month>04</month>
          <day>07</day>
          <pub-id pub-id-type="doi">10.1111/medu.15706</pub-id>
          <pub-id pub-id-type="medline">40194907</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Armson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hanmore</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lee-Krueger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Könings</surname>
              <given-names>Karen D</given-names>
            </name>
            <name name-style="western">
              <surname>Roze des Ordons</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zetkulic</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sargeant</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lockyer</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Could the R2C2 feedback and coaching model enhance feedback literacy behaviors: a qualitative study exploring learner-preceptor feedback conversations</article-title>
          <source>Perspect Med Educ</source>
          <year>2025</year>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>9</fpage>
          <lpage>19</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.5334/pme.1368"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/pme.1368</pub-id>
          <pub-id pub-id-type="medline">39831131</pub-id>
          <pub-id pub-id-type="pmcid">PMC11740720</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Babu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Boddu</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>BERT-based medical chatbot: enhancing healthcare communication through natural language understanding</article-title>
          <source>Explor Res Clin Soc Pharm</source>
          <year>2024</year>
          <month>03</month>
          <volume>13</volume>
          <fpage>100419</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2667-2766(24)00014-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.rcsop.2024.100419</pub-id>
          <pub-id pub-id-type="medline">38495953</pub-id>
          <pub-id pub-id-type="pii">S2667-2766(24)00014-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC10940906</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Preiksaitis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ashenburg</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bunney</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kabeer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>The role of large language models in transforming emergency medicine: scoping review</article-title>
          <source>JMIR Med Inform</source>
          <year>2024</year>
          <month>05</month>
          <day>10</day>
          <volume>12</volume>
          <fpage>e53787</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2024//e53787/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53787</pub-id>
          <pub-id pub-id-type="medline">38728687</pub-id>
          <pub-id pub-id-type="pii">v12i1e53787</pub-id>
          <pub-id pub-id-type="pmcid">PMC11127144</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Revolutionizing health care: the transformative impact of large language models in medicine</article-title>
          <source>J Med Internet Res</source>
          <year>2025</year>
          <month>01</month>
          <day>07</day>
          <volume>27</volume>
          <fpage>e59069</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2025//e59069/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/59069</pub-id>
          <pub-id pub-id-type="medline">39773666</pub-id>
          <pub-id pub-id-type="pii">v27i1e59069</pub-id>
          <pub-id pub-id-type="pmcid">PMC11751657</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ginsburg</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stroud</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brydges</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Melvin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hatala</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Dual purposes by design: exploring alignment between residents' and academic advisors' documents in a longitudinal program</article-title>
          <source>Adv Health Sci Educ Theory Pract</source>
          <year>2024</year>
          <month>11</month>
          <volume>29</volume>
          <issue>5</issue>
          <fpage>1631</fpage>
          <lpage>1647</lpage>
          <pub-id pub-id-type="doi">10.1007/s10459-024-10318-2</pub-id>
          <pub-id pub-id-type="medline">38438699</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10459-024-10318-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cheung</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bhanji</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gofton</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Karpinski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Frank</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Dudek</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Design and implementation of a national program of assessment model - integrating entrustable professional activity assessments in Canadian specialist postgraduate medical education</article-title>
          <source>Perspect Med Educ</source>
          <year>2024</year>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>44</fpage>
          <lpage>55</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38343554"/>
          </comment>
          <pub-id pub-id-type="doi">10.5334/pme.956</pub-id>
          <pub-id pub-id-type="medline">38343554</pub-id>
          <pub-id pub-id-type="pmcid">PMC10854461</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Maart</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Clinical assessment strategies for competency-based education in prosthetic dentistry</article-title>
          <source>J Dent Educ</source>
          <year>2025</year>
          <month>03</month>
          <volume>89</volume>
          <issue>3</issue>
          <fpage>375</fpage>
          <lpage>382</lpage>
          <pub-id pub-id-type="doi">10.1002/jdd.13746</pub-id>
          <pub-id pub-id-type="medline">39436275</pub-id>
          <pub-id pub-id-type="pmcid">PMC11903901</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chan</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Dowling</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tastad</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thoma</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Integrating training, practice, and reflection within a new model for Canadian medical licensure: a concept paper prepared for the Medical Council of Canada</article-title>
          <source>Can Med Educ J</source>
          <year>2022</year>
          <month>08</month>
          <volume>13</volume>
          <issue>4</issue>
          <fpage>68</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36091730"/>
          </comment>
          <pub-id pub-id-type="doi">10.36834/cmej.73717</pub-id>
          <pub-id pub-id-type="medline">36091730</pub-id>
          <pub-id pub-id-type="pii">CMEJ-13-068</pub-id>
          <pub-id pub-id-type="pmcid">PMC9441128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rogers</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Priddis</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Michels</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Tieman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Van Winkle</surname>
              <given-names>LJ</given-names>
            </name>
          </person-group>
          <article-title>Applications of the reflective practice questionnaire in medical education</article-title>
          <source>BMC Med Educ</source>
          <year>2019</year>
          <month>02</month>
          <day>07</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>47</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-019-1481-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-019-1481-6</pub-id>
          <pub-id pub-id-type="medline">30732611</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-019-1481-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC6367754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sedgwick</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Greenwood</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Understanding the Hawthorne effect</article-title>
          <source>BMJ</source>
          <year>2015</year>
          <month>09</month>
          <day>04</day>
          <volume>351</volume>
          <fpage>h4672</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://core.ac.uk/reader/74393583?utm_source=linkout"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.h4672</pub-id>
          <pub-id pub-id-type="medline">26341898</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demetriou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>TO</given-names>
            </name>
            <name name-style="western">
              <surname>Hing</surname>
              <given-names>CB</given-names>
            </name>
          </person-group>
          <article-title>Hawthorne effect on surgical studies</article-title>
          <source>ANZ J Surg</source>
          <year>2019</year>
          <month>12</month>
          <volume>89</volume>
          <issue>12</issue>
          <fpage>1567</fpage>
          <lpage>1576</lpage>
          <pub-id pub-id-type="doi">10.1111/ans.15475</pub-id>
          <pub-id pub-id-type="medline">31621178</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
