<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="review-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v12i1e71572</article-id><article-id pub-id-type="doi">10.2196/71572</article-id><article-categories><subj-group subj-group-type="heading"><subject>Review</subject></subj-group></article-categories><title-group><article-title>Augmented Reality in Surgical Training: Systematic Review of Its Impact on Technical Performance in Surgical Trainees</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>El Ashry</surname><given-names>Mahmoud</given-names></name><degrees>MBChB</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>El Ashry</surname><given-names>Ahmed</given-names></name><degrees>MBChB, MSc</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Khalique</surname><given-names>Hamza</given-names></name><degrees>MBChB</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Abdalle</surname><given-names>Yahya</given-names></name><degrees>MBChB</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yeung</surname><given-names>Thomas</given-names></name><degrees>MBBCh</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Bristol Medical School, University of Bristol</institution><addr-line>Beacon House, Queens Road</addr-line><addr-line>Bristol</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>ENT/Otolaryngology Department, University Hospitals of North Midlands NHS Trust</institution><addr-line>Stoke-on-Trent</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Stone</surname><given-names>Alicia</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Niroomand</surname><given-names>Behnaz</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Demirel</surname><given-names>Doga</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Agresta</surname><given-names>Ferdinando</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Mahmoud El Ashry, MBChB, Bristol Medical School, University of Bristol, Beacon House, Queens Road, Bristol, BS8 1QU, United Kingdom, 44 7448827361; <email>ds21911@bristol.ac.uk</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>3</day><month>6</month><year>2026</year></pub-date><volume>12</volume><elocation-id>e71572</elocation-id><history><date date-type="received"><day>21</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>01</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>04</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Mahmoud El Ashry, Ahmed El Ashry, Hamza Khalique, Yahya Abdalle, Thomas Yeung. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 3.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2026/1/e71572"/><abstract><sec><title>Background</title><p>Surgical training has changed over the past decade. Augmented reality (AR) has become one of the more talked-about developments within that space. At its core, AR works by placing digital information over the real-world environment. This gives trainees guidance and spatial cues during a procedure as they perform it. What remains uncertain is whether AR moves the needle on technical skill development in trainees. The studies that address this directly are few, and the ones that do exist rarely speak to each other in any meaningful way. Outcome measures shift from paper to paper, the hardware studied spans a wide range of maturity, and methodological consistency is hard to find.</p></sec><sec><title>Objective</title><p>This systematic review assesses the impact of AR on the objective technical skills of surgical trainees when compared with traditional methods.</p></sec><sec sec-type="methods"><title>Methods</title><p>We searched PubMed, MEDLINE, Embase, IEEE Xplore, Scopus, and Web of Science for studies published between January 1, 2020, and September 15, 2025. From 4799 initial records, 1417 remained after deduplication. Of these, 101 underwent detailed abstract review and 29 were assessed in full text. Eleven studies met the inclusion criteria. Two reviewers (MEA and YA) independently screened all records, with a third senior reviewer (TY) resolving disagreements. We performed a narrative synthesis following SWiM (Synthesis Without Meta-Analysis) guidelines across 5 thematic domains to account for study heterogeneity.</p></sec><sec sec-type="results"><title>Results</title><p>The final analysis included 11 studies (347 participants across 7 specialties) published between 2021 and 2025. These included 9 randomized controlled trials and 2 prospective cohort studies. The studies used platforms such as the Microsoft HoloLens (1 and 2), Magic Leap One, and Vuzix M300XL. Of the 11 studies, 9 reported improvements in one or more objective technical metrics. Key findings included consistent error reduction (5/5 studies), faster learning curves (4/11 studies), and lower cognitive workload (3/11 studies). Notably, an &#x201C;expertise reversal&#x201D; effect was observed, where AR provided substantial benefits to novices but diminishing returns for experienced surgeons.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>AR significantly improves technical performance for surgical novices, particularly in tasks involving complex visuospatial reasoning. AR is an effective tool in surgical education. Future research should focus on multicenter trials to evaluate long-term skill retention and cost-effectiveness in clinical practice.</p></sec></abstract><kwd-group><kwd>augmented reality</kwd><kwd>surgical education</kwd><kwd>systematic review</kwd><kwd>head-mounted display</kwd><kwd>simulation</kwd><kwd>PRISMA</kwd><kwd>telestration</kwd><kwd>skill acquisition</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Surgical training has transformed over the past two decades. The erosion of the traditional apprenticeship model, driven by reduced working hours, patient safety concerns, and the expansion of minimally invasive techniques, has created an urgent need for high-fidelity, reproducible alternatives to case-based learning [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. The consequences of this shift are well documented: surgical residents are entering independent practice with fewer operative experiences than their predecessors, and the early learning curve, particularly in complex and high-stakes procedures, carries measurable risk to patients [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Thus, simulation-based surgical education has emerged as a necessary complement to the operating room, and the last decade has witnessed extraordinary investment in the development and evaluation of digital training modalities [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Virtual reality (VR) and augmented reality (AR) have particularly attracted attention among users. Both AR and VR rely on immersive technology to create a virtual surgical learning environment for students. However, they differ fundamentally in their relationship to the physical world. VR relies solely on a synthetic environment, keeping the student separate from real-world stimuli, and has demonstrated efficacy in improving laparoscopic and robotic surgical skills across several procedural domains [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. AR, by contrast, overlays computer-generated information, such as anatomical structures, navigational guides, procedural annotations, or expert telestration, directly onto the user&#x2019;s real-world field of view through optical head-mounted displays (HMDs), smart glasses, or screen-based systems [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. This technical workflow, as illustrated in <xref ref-type="fig" rid="figure1">Figure 1</xref>, relies on a closed-loop system where physical data is captured, processed, and reprojected as a digital overlay in real time. These AR modalities maintain contact with physical instruments, simulators, and task environments while simultaneously providing contextually embedded guidance. The relative merits of VR and AR in surgical training remain an active area of investigation; the two modalities address different cognitive and technical challenges and are most productively viewed as complementary rather than competitive [<xref ref-type="bibr" rid="ref11">11</xref>]. Mixed reality (MR), exemplified by platforms such as the Microsoft HoloLens, extends AR by enabling dynamic interaction between virtual and physical objects through spatial mapping, though the boundary between AR and MR in the surgical training literature remains inconsistently defined. While the boundary between AR and MR is often inconsistently defined in broader literature, this review strictly defines AR as the unidirectional overlay of digital data onto a physical field. We explicitly exclude bidirectional, spatially mapped &#x201C;mixed reality&#x201D; interactions to isolate the cognitive impact of the digital overlay itself.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Schematic diagram illustrating the basic principles of augmented reality as applied in a surgical context: a camera captures the physical operative environment; a computer processor generates a digital overlay (eg, anatomical structures, procedural guidance, or expert telestration); the overlay is projected onto a display visible to the surgeon, while a tracker provides positional feedback for accurate registration of virtual content to the physical world. AR: augmented reality.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v12i1e71572_fig01.png"/></fig><p>The theoretical basis for AR&#x2019;s educational value is well established. Cognitive load theory posits that learners have finite working memory capacity and that effective instruction should minimize extraneous cognitive load while maximizing germane load, the mental effort directed toward schema formation [<xref ref-type="bibr" rid="ref12">12</xref>]. Conventional verbal instruction in minimally invasive surgery is particularly demanding, requiring trainees to mentally translate auditory descriptions into spatial awareness of the operative field in real time. AR telestration and anatomical overlay systems bypass this cognitive translation step by providing visual guidance precisely colocated with the task being performed, theoretically reducing extraneous load and accelerating the formation of procedural schemas [<xref ref-type="bibr" rid="ref13">13</xref>]. The expertise reversal effect predicts that while AR provides essential scaffolding for novices (maximizing germane load), these same overlays may function as extraneous cognitive load for experts who have already developed robust internal schemas, potentially hindering rather than helping performance [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>Since 2020, published evidence for AR usage has expanded significantly, but despite this, existing systematic reviews have significant limitations: they frequently analyze AR, VR, and MR simultaneously or separate them with loose boundaries, and sometimes include older prototype systems. Studies also rely predominantly on subjective or process-based outcomes rather than objective technical performance metrics [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. No contemporary synthesis has focused exclusively on AR&#x2019;s impact on trainee technical performance using validated, objective outcome measures across the most recent generation of AR technology. This study aims to achieve this while also maintaining strict boundaries surrounding the inclusion of solely AR-based projects for review.</p><p>This systematic review addresses that gap. Using a prespecified PICO (population, intervention, comparison, outcome) framework: surgical trainees (population), AR-based training interventions (intervention), traditional surgical teaching methods (comparison), and objective measures of technical performance (outcome), we synthesize contemporary evidence from 2020 to 2025 to answer the question: does augmented reality, when used as an adjunct to or replacement for traditional surgical training, improve objective technical performance in surgical novices? By confining our scope to studies with defined AR interventions, controlled comparators, and objective outcome measures, this review provides a focused, methodologically rigorous assessment of AR&#x2019;s current standing as a surgical training modality.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Registration</title><p>This systematic review included a literature search and a write-up that were both carried out with respect to the PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses) 2020 statement [<xref ref-type="bibr" rid="ref17">17</xref>]. The completed PRISMA 2020 expanded checklist is provided as <xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>. A narrative synthesis was performed in accordance with the Synthesis Without Meta-Analysis (SWiM) reporting guideline [<xref ref-type="bibr" rid="ref18">18</xref>]. This type of analysis was chosen as significant clinical and statistical heterogeneity existed across included studies making statistical pooling and group analysis of studies unrealistic. The approach is consistent with the methodological guidance of the <italic>Cochrane Handbook for Systematic Reviews of Interventions</italic> [<xref ref-type="bibr" rid="ref19">19</xref>]. Searches were additionally reported in accordance with the PRISMA-S extension for the reporting of literature searches in systematic reviews [<xref ref-type="bibr" rid="ref20">20</xref>]. The review was not prospectively registered. However, an a priori protocol specifying the research question, eligibility criteria, databases, search strategy, data extraction variables, and planned synthesis approach was developed and followed throughout.</p></sec><sec id="s2-2"><title>Eligibility Criteria</title><p>Eligible studies for analysis had to be original, peer-reviewed articles published in English, with a publication window between January 1, 2020, and September 15, 2025. Participants could be trainees at any stage of their training. This meant if participants of studies were medical students, residents, and fellows, the study was appropriate for inclusion.</p><p>On the intervention side, a clearly described augmented reality component was required. For the purposes of this review, AR was understood as technology that overlays digital content directly onto a person&#x2019;s view of the real world. Since AR, VR, and MR tend to appear together frequently in the literature, screening by YA and MEA was agreed beforehand to be meticulous and deliberate in only including studies that clearly focused on independently analyzing AR only. Each study also needed to include a comparator condition, whether that entailed traditional instruction, conventional operative guidance, or a freehand approach. At least one objective measure of technical performance or skill acquisition had to be reported (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Inclusion and exclusion criteria for study eligibility.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Category</td><td align="left" valign="bottom">Inclusion criteria</td><td align="left" valign="bottom">Exclusion criteria</td></tr></thead><tbody><tr><td align="left" valign="top">Publication</td><td align="left" valign="top">Peer-reviewed original research; published after January 1, 2020; English language</td><td align="left" valign="top">Reviews, editorials, letters, conference abstracts; published before 2020; non-English</td></tr><tr><td align="left" valign="top">Participants</td><td align="left" valign="top">Human surgical or procedural trainees (medical students, residents, and fellows) at any training stage</td><td align="left" valign="top">Expert surgeons performing clinical procedures without a trainee component; nonmedical participants</td></tr><tr><td align="left" valign="top">Intervention</td><td align="left" valign="top">Clearly described AR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> component overlaying digital information onto real-world view; outcomes attributable to AR isolable from VR<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>/MR<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">AR inseparable from VR or MR; purely passive AR use without guidance or training function; technology validation without trainee performance outcomes</td></tr><tr><td align="left" valign="top">Comparator</td><td align="left" valign="top">Traditional instruction, verbal guidance, conventional teaching, or freehand technique</td><td align="left" valign="top">No comparator or control condition present</td></tr><tr><td align="left" valign="top">Outcomes</td><td align="left" valign="top">At least one objective measure of technical performance (eg, accuracy, error count, validated skill score, and procedure time)</td><td align="left" valign="top">Outcomes entirely subjective (questionnaire only); usability/feasibility data only; no measurable performance data reported</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>AR: augmented reality.</p></fn><fn id="table1fn2"><p><sup>b</sup>VR: virtual reality.</p></fn><fn id="table1fn3"><p><sup>c</sup>MR: mixed reality.</p></fn></table-wrap-foot></table-wrap><p>Several categories of studies were excluded from the outset. Work published before 2020 was not considered, nor were non-original outputs such as reviews, editorials, or conference abstracts. Studies conducted exclusively with expert surgeons performing live clinical procedures fell outside the scope unless a formal training element was present. Where AR could not be disentangled from VR or MR, or where all reported outcomes were subjective in nature, studies were similarly excluded&#x2014;as were those lacking any comparator.</p><p>The 2020 cutoff was a deliberate methodological choice rather than an arbitrary date. AR hardware and software underwent substantial development around 2018&#x2010;2019, and a good deal of earlier work was conducted using prototype or near-prototype systems that bear little resemblance to the tools in use today. Including that literature risked drawing conclusions that would not generalize meaningfully to contemporary training contexts, so it was excluded on those grounds. The 2020 cutoff marks the transition from prototype-based research to the use of enterprise-grade, high-fidelity hardware (eg, Microsoft HoloLens 2 and Magic Leap One). Including earlier data from low-resolution prototype systems would introduce technological bias and yield conclusions that do not generalize to contemporary surgical training environments.</p></sec><sec id="s2-3"><title>Information Sources</title><p>Six electronic databases were searched: PubMed (MEDLINE), Ovid MEDLINE, Embase, IEEE Xplore, Scopus, and Web of Science. The selection was deliberate rather than exhaustive for its own sake. AR in surgical training sits at the intersection of clinical medicine and engineering, and no single database captures that breadth adequately&#x2014;so the combination was chosen to reflect it. PubMed, Ovid MEDLINE, and Embase covered the biomedical literature, with Embase included specifically because its indexing patterns differ enough from PubMed to reduce the risk of missing relevant work, consistent with <italic>Cochrane Handbook</italic> recommendations on database selection [<xref ref-type="bibr" rid="ref19">19</xref>]. IEEE Xplore addressed the engineering and technology side of the literature, where much of the platform development and human-computer interaction research is published. Scopus and Web of Science brought broader multidisciplinary coverage across both domains.</p><p>Google Scholar was not included in the primary database search. Reasons for this included the search base&#x2019;s nontransparent indexing algorithm, inclusion of non&#x2013;peer-reviewed sources, and absence of a reproducible search interface. These issues would result in the search falling short of the reproducibility standards required by PRISMA and the <italic>Cochrane Handbook,</italic> which we aimed for [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. The search across all 6 databases was executed on April 24, 2026, with results filtered to the eligibility window of January 1, 2020, through September 15, 2025. Reference lists of relevant systematic reviews identified during screening were then hand-searched to capture any eligible work that database searching alone might have missed.</p></sec><sec id="s2-4"><title>Search Strategy</title><p>Our search strings were drafted by combining Medical Subject Headings (MeSH) terms with free-text keywords, referring to the PICO framework. We also reflected on guidance from Chapter 4 of the <italic>Cochrane Handbook</italic> [<xref ref-type="bibr" rid="ref19">19</xref>]. There were 3 domains in particular that formed the backbone of the search strategy: the technology itself (augmented reality), the population and setting (surgical or procedural trainees), and the outcome domain (technical performance or skill acquisition). Boolean operators (AND, OR) were then used to combine terms within search strings. Truncation with wildcards was applied where database syntax allowed. The full PubMed search string is provided in <xref ref-type="other" rid="box1">Textbox 1</xref>.</p><boxed-text id="box1"><title> Full PubMed search string.</title><p>(&#x201C;Augmented Reality&#x201D;[MeSH] OR &#x201C;augmented reality&#x201D;[tiab] OR &#x201C;mixed reality&#x201D;[tiab] OR &#x201C;head-mounted display&#x201D;[tiab] OR &#x201C;HMD&#x201D;[tiab] OR &#x201C;heads-up display&#x201D;[tiab] OR &#x201C;HUD&#x201D;[tiab] OR &#x201C;holographic&#x201D;[tiab] OR &#x201C;telestration&#x201D;[tiab] OR &#x201C;HoloLens&#x201D;[tiab] OR &#x201C;Magic Leap&#x201D;[tiab] OR &#x201C;smart glasses&#x201D;[tiab] OR &#x201C;optical see-through&#x201D;[tiab] OR &#x201C;AR-assisted&#x201D;[tiab] OR &#x201C;AR-guided&#x201D;[tiab] OR &#x201C;AR-enhanced&#x201D;[tiab]) AND (&#x201C;Education, Medical&#x201D;[MeSH] OR &#x201C;Education, Medical, Graduate&#x201D;[MeSH] OR &#x201C;Clinical Competence&#x201D;[MeSH] OR &#x201C;Simulation Training&#x201D;[MeSH] OR &#x201C;Internship and Residency&#x201D;[MeSH] OR &#x201C;surgical train*&#x201C;[tiab] OR &#x201C;surgical educat*&#x201C;[tiab] OR &#x201C;surgical skill*&#x201C;[tiab] OR &#x201C;surgical simulat*&#x201C;[tiab] OR &#x201C;procedural train*&#x201C;[tiab] OR &#x201C;procedural skill*&#x201C;[tiab] OR &#x201C;resident*&#x201C;[tiab] OR &#x201C;novice*&#x201C;[tiab] OR &#x201C;trainee*&#x201C;[tiab] OR &#x201C;medical student*&#x201C;[tiab] OR &#x201C;laparoscopic train*&#x201C;[tiab] OR &#x201C;minimally invasive train*&#x201C;[tiab] OR &#x201C;neurosurgery train*&#x201C;[tiab]) AND (&#x201C;learning curve&#x201D;[tiab] OR &#x201C;technical performance&#x201D;[tiab] OR &#x201C;skill acquisition&#x201D;[tiab] OR &#x201C;psychomotor&#x201D;[tiab] OR &#x201C;accuracy&#x201D;[tiab] OR &#x201C;proficiency&#x201D;[tiab] OR &#x201C;competency&#x201D;[tiab] OR &#x201C;OSATS&#x201D;[tiab] OR &#x201C;GOALS&#x201D;[tiab] OR &#x201C;error rate&#x201D;[tiab] OR &#x201C;performance score&#x201D;[tiab] OR &#x201C;procedure time&#x201D;[tiab]) AND (&#x201C;2020/01/01&#x201C;[PDat]:&#x201D;2025/09/15&#x201D;[PDat])</p></boxed-text><p>This search strategy was adapted for the syntax and controlled vocabulary of each database. For IEEE Xplore, MeSH terms were replaced with IEEE Thesaurus terms if appropriate. For Scopus and Web of Science, equivalent field tags (TITLE-ABS-KEY) were used with the same conceptual terms. For Ovid MEDLINE and Embase, the Ovid MeSH explode function was used to capture all relevant subheadings. Full search strategies for all 6 databases are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, reported in line with the PRISMA-S guideline [<xref ref-type="bibr" rid="ref20">20</xref>].</p><p>A data restriction of January 1, 2020, to September 15, 2025, was applied across all databases. No language filter was applied at the search stage itself. We felt that by restricting the language at that point, we risked inadvertently suppressing relevant records before they could be assessed. Non-English records that did not come through were excluded at screening. Similarly, no publication type or study design filters were applied during the initial search. This was on the basis that this runs the risk of missing eligible studies that could be indexed or tagged inconsistently across databases.</p></sec><sec id="s2-5"><title>Study Selection</title><p>All records retrieved from the 6 databases were imported into Rayyan (Qatar Computing Research Institute) for deduplication and screening. Rayyan automatically identified potential duplicates, which were manually verified and removed, leaving 1417 unique records for screening. Study selection was then conducted in 3 sequential stages by two independent reviewers (MEA and YA). In the first stage, titles of all 1417 unique records were screened against the prespecified eligibility criteria, reducing the pool to 101 records. In the second stage, the 101 records were reviewed through a detailed assessment of their abstracts against the full eligibility criteria to determine if they could pass onto the next stage. In the third stage, full texts of the 29 records were retrieved and assessed independently for final inclusion. Records for which full text could not be obtained via institutional access or interlibrary loan were pursued via direct author contact before being counted as inaccessible. Discrepancies at all stages were resolved through discussion and consensus between the two reviewers. If consensus could not be reached, a third senior reviewer (TY) acted as adjudicator. The reasons for exclusion at each stage are documented and reported in the PRISMA flow diagram.</p></sec><sec id="s2-6"><title>Data Collection Process and Items</title><p>Data extraction was carried out independently by two reviewers (MEA and YA) using a prestandardized form built in Microsoft Excel. Before the main extraction began, the form was piloted on two studies and adjusted where wording was ambiguous, or fields needed refinement. This was a small but useful step that avoided inconsistencies surfacing later in the process.</p><p>The form captured 7 categories of information: study identification details (first author, year, country, and journal); study design; participant characteristics, including specialty, training level, total sample size, and group allocation; details of the AR intervention, covering the specific platform and device used, what the AR content actually consisted of, and how it was delivered; comparator characteristics; the task or procedural scenario; and all reported outcomes measures including primary and secondary endpoints alongside their associated statistical results, including means (SDs), <italic>P</italic> values, and effect sizes if reported.</p><p>Responsibility for extraction was shared equally between MEA and YA. If the two reviewers disagreed on whether a data point should be included, they discussed this together with adjudication by a third reviewer (TY) if no consensus could be reached.</p></sec><sec id="s2-7"><title>Risk of Bias Assessment</title><p>Risk of bias was assessed independently by two reviewers (MEA and YA) using a tool selection determined by the study design. Randomized controlled trials (RCTs) were assessed using the Cochrane Risk of Bias 2 (RoB 2) tool [<xref ref-type="bibr" rid="ref22">22</xref>]. Nonrandomized prospective cohort studies were assessed using the Risk of Bias in Nonrandomized Studies of Interventions (ROBINS-I) tool [<xref ref-type="bibr" rid="ref23">23</xref>]. Crossover randomized trials were assessed using the RoB 2 tool with the crossover extension, as crossover designs remain RCTs and ROBINS-I is not applicable to them. Disagreements between reviewers were resolved by discussion; unresolved disagreements were adjudicated by the third reviewer (TY). Risk-of-bias findings are presented narratively in the Results section and considered in the interpretation of the overall body of evidence.</p></sec><sec id="s2-8"><title>Synthesis Methods</title><p>Given the substantial clinical heterogeneity across included studies in terms of AR platforms used, participant training levels, surgical specialties, comparator conditions, and outcome measures used, statistical pooling (meta-analysis) was not appropriate and was not performed. No standardized effect sizes (eg, Cohen <italic>d</italic> and standardized mean difference) were reported by included studies in a sufficiently consistent form to enable pooling; where individual studies reported effect sizes, these are noted in the narrative synthesis. Findings were synthesized narratively following the SWiM reporting guideline [<xref ref-type="bibr" rid="ref18">18</xref>]. Studies were grouped according to five prespecified thematic outcome domains derived during the protocol stage: (1) technical accuracy and procedural performance, (2) error reduction and procedural safety, (3) learning trajectory and skill acquisition, (4) cognitive load and gaze efficiency, and (5) operational efficiency and procedure time. Within each domain, consistency and heterogeneity of findings across studies were assessed narratively, and the direction and magnitude of effects were described in relation to methodological quality.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Selection</title><p>The systematic search returned 4596 records across the 6 databases (Scopus: 1640; Web of Science: 1039; IEEE Xplore: 790; Embase: 638; Ovid MEDLINE: 438; PubMed: 51). A further 203 records came from reference list checking and gray literature searching, bringing the total to 4799. Rayyan automatically identified 3382 potential duplicates, which were manually verified and removed, leaving 1417 unique records for screening.</p><p>MEA and YA independently screened all 1417 records by title, excluding 1316. The most common reasons were: no AR component or AR not separable from VR/MR (n&#x2248;478), no objective performance outcome (n&#x2248;369), not involving surgical or procedural trainees (n&#x2248;210), not an interventional study design (n&#x2248;171), no comparator group (n&#x2248;85), and non-English language (n&#x2248;3). Disagreements were resolved by consensus. The remaining 101 records proceeded to a detailed abstract review.</p><p>Both reviewers independently assessed all 101 abstracts, excluding 72. The main reasons were technology development or validation studies with no trainee performance outcomes (n=26), no clearly defined novice or trainee population (n=18), outside scope (n=14), non-original research formats including systematic reviews and editorials (n=8), and nontrainee participants (n=6). Where the two reviewers could not reach agreement through discussion, TY adjudicated. Twenty-nine records were then retrieved for full-text assessment.</p><p>At full-text review, again conducted independently by MEA and YA, 18 articles were excluded. Reasons were: AR present in the setting but not functioning as the instructional component (n=5); usability or feasibility assessment only, with no objective performance outcomes (n=4); AR inseparable from a VR or MR environment (n=3); no comparator or control condition (n=3); anatomy identification study without any surgical skill training component (n=2); and full text inaccessible despite institutional access attempts, an interlibrary loan request, and direct contact with the authors (n=1). Disagreements were again resolved by discussion, with TY available for adjudication where needed. Eleven studies met all prespecified eligibility criteria and were included in the final narrative synthesis. The full selection process is shown in the PRISMA 2020 flow diagram (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>PRISMA 2020 flow diagram illustrating the study selection process. Records were identified across 6 databases and additional sources. Sequential screening at title/abstract, detailed abstract review, and full-text stages progressively narrowed the pool from 4799 records to 11 included studies. AR: augmented reality; MR: mixed reality; VR: virtual reality.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v12i1e71572_fig02.png"/></fig></sec><sec id="s3-2"><title>Study Characteristics</title><p>The 11 included studies span 2021 to 2025 and together enrolled 347 participants. Nine were RCTs&#x2014;5 parallel-group [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], 3 crossover [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], and 1 three-arm [<xref ref-type="bibr" rid="ref11">11</xref>], with the remaining 2 being prospective cohort comparisons [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Sample sizes ranged from 8 (Liu et al [<xref ref-type="bibr" rid="ref2">2</xref>]) to 60 (Wild et al [<xref ref-type="bibr" rid="ref7">7</xref>]). Across all studies, participants had no or very limited prior experience in the procedure being trained. Seven specialties featured in the included work: minimally invasive and laparoscopic surgery (n=4), open and basic surgical skills (n=2), neurosurgery (n=2), neurovascular and cerebrovascular surgery (n=1), otology (n=1), and spine surgery (n=1). Full characteristics of each included study are provided in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Characteristics of included studies (n=11), covering 7 surgical specialties, published between 2021 and 2025, encompassing 347 participants with novice or limited prior procedural experience.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author (year)</td><td align="left" valign="bottom">Specialty/procedure</td><td align="left" valign="bottom">n</td><td align="left" valign="bottom">Participants</td><td align="left" valign="bottom">Design</td><td align="left" valign="bottom">AR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> platform</td><td align="left" valign="bottom">Comparator</td><td align="left" valign="bottom">Primary outcome</td></tr></thead><tbody><tr><td align="left" valign="top">Wolf et al (2021) [<xref ref-type="bibr" rid="ref1">1</xref>]</td><td align="left" valign="top">ECMO<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> cannulation</td><td align="left" valign="top">21</td><td align="left" valign="top">Medical students (Y3-Y4)</td><td align="left" valign="top">Crossover RCT<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">HoloLens 2 step-by-step AR guide</td><td align="left" valign="top">Conventional SOP<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup> (paper/video)</td><td align="left" valign="top">Error count; UEQ<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr><tr><td align="left" valign="top">Liu et al (2024) [<xref ref-type="bibr" rid="ref2">2</xref>]</td><td align="left" valign="top">Neurosurgery: MCA<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup> aneurysm localization</td><td align="left" valign="top">8</td><td align="left" valign="top">Neurosurgery residents (PGY1-4; &#x003C;5 aneurysm cases)</td><td align="left" valign="top">Prospective cohort</td><td align="left" valign="top">Magic Leap One + Brainlab HUD<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup> (Zeiss Kinevo 900)</td><td align="left" valign="top">2D imaging review only</td><td align="left" valign="top">Aneurysm localization deviation (mm)</td></tr><tr><td align="left" valign="top">Cizmic et al (2023) [<xref ref-type="bibr" rid="ref3">3</xref>]</td><td align="left" valign="top">Laparoscopic cholecystectomy (ex vivo &#x00D7; 10)</td><td align="left" valign="top">40</td><td align="left" valign="top">Medical students (Y3-Y6)</td><td align="left" valign="top">Parallel RCT</td><td align="left" valign="top">iSurgeon AR telestration</td><td align="left" valign="top">Verbal guidance only</td><td align="left" valign="top">Cumulative GOALS<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup>/OSATS<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup>; complications; CVS<sup><xref ref-type="table-fn" rid="table2fn10">j</xref></sup> achievement</td></tr><tr><td align="left" valign="top">Felinska et al (2023) [<xref ref-type="bibr" rid="ref4">4</xref>]</td><td align="left" valign="top">Laparoscopic basic tasks + ex vivo cholecystectomy</td><td align="left" valign="top">40</td><td align="left" valign="top">MIS<sup><xref ref-type="table-fn" rid="table2fn11">k</xref></sup>-naive medical students</td><td align="left" valign="top">Crossover RCT</td><td align="left" valign="top">iSurgeon + Pupil Core eye-tracking</td><td align="left" valign="top">Verbal guidance only</td><td align="left" valign="top">Gaze latency, errors, OSATS, NASA-TLX<sup><xref ref-type="table-fn" rid="table2fn12">l</xref></sup></td></tr><tr><td align="left" valign="top">Lopes et al (2022) [<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">Basic open surgical skills: suturing (x 5 types)</td><td align="left" valign="top">20</td><td align="left" valign="top">Medical students (no prior suturing experience)</td><td align="left" valign="top">Parallel RCT (evaluator-blinded)</td><td align="left" valign="top">Vuzix M300XL smart glasses + remote telestration</td><td align="left" valign="top">Traditional on-site teaching</td><td align="left" valign="top">Independent performance time; mOSATS score</td></tr><tr><td align="left" valign="top">Van Gestel et al (2021) [<xref ref-type="bibr" rid="ref6">6</xref>]</td><td align="left" valign="top">Neurosurgery: EVD<sup><xref ref-type="table-fn" rid="table2fn13">m</xref></sup> placement (phantom)</td><td align="left" valign="top">16</td><td align="left" valign="top">Medical students (no prior EVD experience)</td><td align="left" valign="top">Parallel RCT (pre/post training)</td><td align="left" valign="top">HoloLens 1 + IR<sup><xref ref-type="table-fn" rid="table2fn14">n</xref></sup> inside-out tracking</td><td align="left" valign="top">Freehand technique</td><td align="left" valign="top">Mean target error (mm); mKS<sup><xref ref-type="table-fn" rid="table2fn15">o</xref></sup> grade</td></tr><tr><td align="left" valign="top">Wild et al (2022) [<xref ref-type="bibr" rid="ref7">7</xref>]</td><td align="left" valign="top">Laparoscopic basic skills + ex vivo cholecystectomy</td><td align="left" valign="top">60</td><td align="left" valign="top">Laparoscopic novices (medical students Y3-Y6)</td><td align="left" valign="top">Crossover RCT</td><td align="left" valign="top">iSurgeon AR telestration</td><td align="left" valign="top">Verbal guidance only</td><td align="left" valign="top">Total training time; GOALS/OSATS; complications; NASA-TLX</td></tr><tr><td align="left" valign="top">Hadida Barzilai et al (2025) [<xref ref-type="bibr" rid="ref8">8</xref>]</td><td align="left" valign="top">Otology: mastoidectomy drilling (3D-printed model)</td><td align="left" valign="top">21</td><td align="left" valign="top">Medical students (clinical clerkship; no prior temporal bone drilling)</td><td align="left" valign="top">Parallel RCT</td><td align="left" valign="top">HoloLens 2 + D2P QR-code registration</td><td align="left" valign="top">Anatomy review + instructional video + dissection manual</td><td align="left" valign="top">Modified Welling Scale (mWS, /25)</td></tr><tr><td align="left" valign="top">Nagayo et al (2022) [<xref ref-type="bibr" rid="ref9">9</xref>]</td><td align="left" valign="top">Open surgery: subcuticular interrupted suturing</td><td align="left" valign="top">38</td><td align="left" valign="top">Medical students (suturing novices)</td><td align="left" valign="top">Parallel RCT (evaluator-blinded)</td><td align="left" valign="top">HoloLens 2 self-training (3D expert procedure replication)</td><td align="left" valign="top">2D instructional video</td><td align="left" valign="top">Global rating; task-specific suturing scores</td></tr><tr><td align="left" valign="top">Kong et al (2025) [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">Spine surgery: pedicle screw placement (L2 lumbar model)</td><td align="left" valign="top">4 surgeons / 80 screws</td><td align="left" valign="top">1 experienced surgeon + 1 novice per group</td><td align="left" valign="top">Prospective cohort</td><td align="left" valign="top">HoloLens 2 + Vuforia 3D registration</td><td align="left" valign="top">Freehand technique</td><td align="left" valign="top">Linear deviation (mm); angular deviation; Gertzbein-Robbins accuracy</td></tr><tr><td align="left" valign="top">Dodier et al (2024) [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Neurosurgery: intracranial aneurysm clipping (perfused phantom)</td><td align="left" valign="top">9 residents</td><td align="left" valign="top">Neurosurgery residents (PGY1-6; no prior clipping as lead surgeon)</td><td align="left" valign="top">3-arm RCT</td><td align="left" valign="top">HoloLens 1 holographic AR clipping simulation (SOFA<sup><xref ref-type="table-fn" rid="table2fn16">p</xref></sup>)</td><td align="left" valign="top">No interim training or video review only</td><td align="left" valign="top">Occlusion rate (Raymond-Roy class 1); clipping attempts; wrist tremor</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AR: augmented reality.</p></fn><fn id="table2fn2"><p><sup>b</sup>ECMO: extracorporeal membrane oxygenation.</p></fn><fn id="table2fn3"><p><sup>c</sup>RCT: randomized controlled trial.</p></fn><fn id="table2fn4"><p><sup>d</sup>SOP: standard operating procedure.</p></fn><fn id="table2fn5"><p><sup>e</sup>UEQ: User Experience Questionnaire.</p></fn><fn id="table2fn6"><p><sup>f</sup>MCA: middle cerebral artery.</p></fn><fn id="table2fn7"><p><sup>g</sup>HUD: heads-up display.</p></fn><fn id="table2fn8"><p><sup>h</sup>GOALS: Global Operative Assessment of Laparoscopic Skills.</p></fn><fn id="table2fn9"><p><sup>i</sup>OSATS: Objective Structured Assessment of Technical Skills.</p></fn><fn id="table2fn10"><p><sup>j</sup>CVS: critical view of safety.</p></fn><fn id="table2fn11"><p><sup>k</sup>MIS: minimally invasive surgery.</p></fn><fn id="table2fn12"><p><sup>l</sup>NASA-TLX: NASA Task Load Index.</p></fn><fn id="table2fn13"><p><sup>m</sup>EVD: external ventricular drain.</p></fn><fn id="table2fn14"><p><sup>n</sup>IR: infrared.</p></fn><fn id="table2fn15"><p><sup>o</sup>mKS: Modified Kakarla Scale.</p></fn><fn id="table2fn16"><p><sup>p</sup>SOFA: Simulation Open Framework Architecture.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>AR Technology Platforms</title><p>The Microsoft HoloLens (versions 1 and 2) was the most commonly used AR platform, applied in 5 of the 11 included studies [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. The HoloLens is an optical see-through HMD capable of projecting holographic content into the user&#x2019;s visual field while preserving contact with the physical environment. Three studies used the iSurgeon telestration system, a laparoscopic screen-based AR device that projects a real-time feed of the instructor&#x2019;s hand gestures onto the operative monitor [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. The Magic Leap One combined with the Brainlab Mixed Reality Viewer and intraoperative heads-up display was used in one neurosurgical study [<xref ref-type="bibr" rid="ref2">2</xref>]. The Vuzix M300XL smart glasses were evaluated in one suturing study [<xref ref-type="bibr" rid="ref5">5</xref>]. Kong et al [<xref ref-type="bibr" rid="ref10">10</xref>] used the HoloLens 2 in combination with custom surgical guides and the Vuforia 3D registration software for spinal navigation, and Dodier et al [<xref ref-type="bibr" rid="ref11">11</xref>] used the HoloLens 1 to deliver holographic finite-element simulation of aneurysm clipping.</p></sec><sec id="s3-4"><title>Risk of Bias</title><p>Risk of bias assessment findings are summarized narratively below. Among the 9 RCTs, 6 were assessed as having some concerns regarding randomization or blinding processes [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]; full blinding of participants and instructors to group allocation is inherently unfeasible in AR training studies, representing a structural limitation of all trials in this field. The remaining 3 RCTs [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] were assessed as low risk across all domains. The 2 nonrandomized studies [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref10">10</xref>] were assessed using ROBINS-I and rated as moderate risk, reflecting their small sample sizes and lack of formal randomization, though both did use internal controls. Outcome assessment blinding was reported in 4 studies [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], which goes some way toward reducing detection bias. None of the included studies reported any long-term follow-up or skill retention data&#x2014;a gap that runs consistently across the entire evidence base.</p></sec><sec id="s3-5"><title>Narrative Synthesis of Outcomes</title><sec id="s3-5-1"><title>Domain 1: Technical Accuracy and Procedural Performance</title><p>Of the 11 included studies, 6 provided direct evidence of AR improving objective technical accuracy. The most pronounced effects were observed in procedural tasks with a strong visuospatial component. Van Gestel et al [<xref ref-type="bibr" rid="ref6">6</xref>] demonstrated that untrained medical students using AR guidance for external ventricular drain (EVD) placement on a phantom model achieved a mean target error of 11.9 mm, compared with 19.9 mm for the untrained freehand group (<italic>P</italic>=.003). Critically, untrained AR-guided performance matched that of trained freehand performers, indicating that AR effectively compressed the procedural learning curve. The quality of EVD placement was also significantly superior in the AR group (59.4% vs 25% Modified Kakarla Scale grade 1, <italic>P</italic>=.005) [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Hadida Barzilai et al [<xref ref-type="bibr" rid="ref8">8</xref>] reported significantly superior overall mastoidectomy performance in the AR group (median Modified Welling Scale 19.5/25) compared with controls (12/25; <italic>P</italic>=.001), with significant advantages on 6 of 8 subscales including mastoidectomy margin definition, sinodural angle, and tegmen exposure. The AR group&#x2019;s score exceeded published novice benchmark values.</p><p>Liu et al [<xref ref-type="bibr" rid="ref2">2</xref>] demonstrated that AR-assisted visuospatial training significantly reduced aneurysm localization deviation among neurosurgical residents, from 8.1 mm at AR Test 1 to 2.7 mm at AR Test 2 (<italic>P</italic>&#x003C;.001). Crucially, this improvement was retained in the final test conducted without any AR assistance (AR group: 2.1 mm vs control: 5.9 mm; <italic>P</italic>&#x003C;.001), confirming durable skill acquisition rather than performance scaffolding alone.</p><p>Kong et al [<xref ref-type="bibr" rid="ref10">10</xref>] demonstrated AR&#x2019;s equalizing effect on novice-expert performance disparity in pedicle screw placement: overall AR accuracy 95% versus 77.5% freehand (<italic>P</italic>&#x003C;.05 for both linear and angular deviation). In contrast, Nagayo et al [<xref ref-type="bibr" rid="ref9">9</xref>] found no significant difference between AR and video-based self-training in suturing skill improvement (global rating: <italic>P</italic>=.54; task specific: <italic>P</italic>=.91), and Lopes et al [<xref ref-type="bibr" rid="ref5">5</xref>] similarly found no significant difference in mOSATS scores, although the telestration group performed tasks significantly faster when working independently (1393 s vs 1679 s; <italic>P</italic>=.04). These findings represent noninferiority rather than inferiority of AR.</p></sec><sec id="s3-5-2"><title>Domain 2: Error Reduction and Procedural Safety</title><p>AR was consistently associated with reduced procedural errors across all 5 studies that measured this outcome. Felinska et al [<xref ref-type="bibr" rid="ref4">4</xref>] demonstrated the most dramatic reduction, with AR-instructed trainees making a mean of 0.18 errors per task compared with 1.94 for the verbal instruction group (<italic>P</italic>&#x003C;.01; &#x03B7;p&#x00B2;=0.92), representing a tenfold reduction in error rate. Complementary eye-tracking data revealed the mechanism: AR reduced gaze latency from 2.04 to 0.21 seconds (<italic>P</italic>&#x003C;.01; &#x03B7;p&#x00B2;=0.95), confirming that AR telestration functions by directing trainees&#x2019; visual attention to operationally relevant structures more rapidly and precisely than verbal instruction [<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>Wolf et al [<xref ref-type="bibr" rid="ref1">1</xref>] found that AR-based extracorporeal membrane oxygenation cannulation instructions resulted in a 66% reduction in knowledge-related errors for the more complex second procedure (18 vs 53 errors; <italic>P</italic>&#x003C;.05), while handling errors were unchanged, suggesting that AR&#x2019;s error-reducing effect is specifically mediated by improved information accessibility and cognitive offloading. Cizmic et al [<xref ref-type="bibr" rid="ref3">3</xref>] reported that the iSurgeon group incurred significantly fewer total complications and achieved the critical view of safety in 79.5% of procedures compared with only 41.4% in the verbal guidance group (<italic>P</italic>&#x2264;.001). Wild et al [<xref ref-type="bibr" rid="ref7">7</xref>] similarly reported a significant reduction in complication rates with AR telestration (13.3% vs 40%; <italic>P</italic>=.02).</p></sec><sec id="s3-5-3"><title>Domain 3: Learning Trajectory and Skill Acquisition</title><p>Four studies provided explicit evidence regarding AR&#x2019;s effect on the learning trajectory. Liu et al [<xref ref-type="bibr" rid="ref2">2</xref>] showed a steep decline in localization deviation across successive AR test blocks while the control groups remained relatively flat, indicating an accelerated learning curve rather than a simple one-time performance advantage. Cizmic et al [<xref ref-type="bibr" rid="ref3">3</xref>] provided longitudinal evidence across 10 cholecystectomy sessions, demonstrating that the AR telestration group maintained consistently higher GOALS (Global Operative Assessment of Laparoscopic Skills) and OSATS (Objective Structured Assessment of Technical Skills) scores from the first session onwards, with the performance gap not narrowing over time. Van Gestel et al [<xref ref-type="bibr" rid="ref6">6</xref>] finding that untrained AR performers matched trained freehand performers is perhaps the most striking demonstration of learning curve compression in this review. Dodier et al [<xref ref-type="bibr" rid="ref11">11</xref>] found that only the video-plus-AR cohort achieved a statistically significant improvement in aneurysm occlusion rate between the first and final sessions (67% to 93%; <italic>P</italic>=.05), demonstrating that AR adds value beyond video review alone for complex microsurgical skill acquisition.</p></sec><sec id="s3-5-4"><title>Domain 4: Cognitive Load and Gaze Efficiency</title><p>Three studies measured cognitive load using validated instruments. Felinska et al [<xref ref-type="bibr" rid="ref4">4</xref>] reported significantly lower NASA Task Load Index scores during basic laparoscopic tasks with AR telestration compared with verbal instruction (mean 50 [SD 21] vs mean 56 [SD 22]; <italic>P</italic>&#x003C;.01), alongside a lower objective blink rate. Wild et al [<xref ref-type="bibr" rid="ref7">7</xref>] reported that participants found AR training significantly less mentally demanding (mean 33.3 [SD 14.8] vs mean 48.9 [SD 14.3]; <italic>P</italic>&#x003C;.001) and less physically demanding (mean 35.1 [SD 13.8] vs mean 38.1 [SD 13.3]; <italic>P</italic>=.002). Liu et al [<xref ref-type="bibr" rid="ref2">2</xref>] noted that the AR group took significantly longer to complete tasks in early test phases (<italic>P</italic>=.003), attributed to the additional cognitive processing required to colocate spatial AR hologram information with physical understanding, an effect that decreased as participants became more familiar with the AR system.</p></sec><sec id="s3-5-5"><title>Domain 5: Operational Efficiency and Procedure Time</title><p>The effect of AR on procedure and training time was mixed across studies. Wild et al [<xref ref-type="bibr" rid="ref7">7</xref>] reported the most pronounced efficiency gain: total laparoscopic training time was reduced by 29.8% with AR telestration (mean 1163 [SD 275] vs mean 1658 [SD 375] seconds; <italic>P</italic>&#x003C;.001). Lopes et al [<xref ref-type="bibr" rid="ref5">5</xref>] reported significantly faster independent suture completion in the AR group (1393 vs 1679 seconds; <italic>P</italic>=.04). For more complex procedures, no significant differences in total operative time were observed (mean 79.6 [SD 25.7] vs mean 84.5 [SD 33.2] minutes; <italic>P</italic>=.09), suggesting that AR&#x2019;s efficiency benefits are most readily detectable in discrete, structured tasks (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Summary of primary outcomes and key quantitative findings across included studies (n=11), covering participants enrolled in surgical and procedural training programs across 7 specialties, 2021&#x2010;2025.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Author (year)</td><td align="left" valign="bottom">Primary outcome</td><td align="left" valign="bottom">AR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> result</td><td align="left" valign="bottom">Control result</td><td align="left" valign="bottom"><italic>P</italic> value</td><td align="left" valign="bottom">Key interpretation</td></tr></thead><tbody><tr><td align="left" valign="top">Wolf et al (2021) [<xref ref-type="bibr" rid="ref1">1</xref>]</td><td align="left" valign="top">Error count (procedure 2)</td><td align="left" valign="top">18 knowledge errors</td><td align="left" valign="top">53 knowledge errors</td><td align="left" valign="top">&#x003C;.05</td><td align="left" valign="top">66% reduction in knowledge errors; handling errors unchanged&#x2014;AR reduces cognitive errors specifically</td></tr><tr><td align="left" valign="top">Liu et al (2024) [<xref ref-type="bibr" rid="ref2">2</xref>]</td><td align="left" valign="top">Localization deviation (mm)</td><td align="left" valign="top">Mean 2.7 (SD 1.0) mm (AR test 2); 2.1 mm (final test without AR)</td><td align="left" valign="top">Mean 5.8 (SD 4.1) mm; 5.9 mm (final test)</td><td align="left" valign="top">.01; &#x003C;.001</td><td align="left" valign="top">AR accelerates visuospatial learning curve; improvement retained in final unassisted test</td></tr><tr><td align="left" valign="top">Cizmic et al (2023) [<xref ref-type="bibr" rid="ref3">3</xref>]</td><td align="left" valign="top">GOALS<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>; OSATS<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup>; CVS<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">GOALS 17.3; OSATS 50.8; CVS 79.5%</td><td align="left" valign="top">GOALS 16.0; OSATS 41.2; CVS 41.4%</td><td align="left" valign="top">&#x003C;.001 (all)</td><td align="left" valign="top">AR telestration maintains higher performance trajectory across 10 LCs<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup>; CVS achievement nearly doubled</td></tr><tr><td align="left" valign="top">Felinska et al (2023) [<xref ref-type="bibr" rid="ref4">4</xref>]</td><td align="left" valign="top">Error count; gaze latency; OSATS</td><td align="left" valign="top">0.18 errors; 0.21 s gaze latency</td><td align="left" valign="top">1.94 errors; 2.04 s gaze latency</td><td align="left" valign="top">&#x003C;.01 (all)</td><td align="left" valign="top">Tenfold error reduction; gaze guidance mechanism confirmed by eye-tracking; reduced NASA-TLX<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td></tr><tr><td align="left" valign="top">Lopes et al (2022) [<xref ref-type="bibr" rid="ref5">5</xref>]</td><td align="left" valign="top">Independent task time; mOSATS</td><td align="left" valign="top">1393 s total; mOSATS trend higher</td><td align="left" valign="top">1679 s total</td><td align="left" valign="top">.04 (time)</td><td align="left" valign="top">AR telestration produces faster independent performance; quality comparable&#x2014;viable alternative to on-site teaching</td></tr><tr><td align="left" valign="top">Van Gestel et al (2021) [<xref ref-type="bibr" rid="ref6">6</xref>]</td><td align="left" valign="top">Mean target error (mm); mKS<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup> grade 1</td><td align="left" valign="top">11.9 mm; 59.4% grade 1</td><td align="left" valign="top">19.9 mm; 25% grade 1</td><td align="left" valign="top">.003; .005</td><td align="left" valign="top">AR eliminates procedural learning curve for EVD<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> placement; untrained AR matches trained freehand</td></tr><tr><td align="left" valign="top">Wild et al (2022) [<xref ref-type="bibr" rid="ref7">7</xref>]</td><td align="left" valign="top">Training time; GOALS/OSATS; complications</td><td align="left" valign="top">1163 s; GOALS 21; OSATS 67; 13.3% complications</td><td align="left" valign="top">1658 s; GOALS 18; OSATS 61; 40% complications</td><td align="left" valign="top">&#x003C;.001; .007; .015; .020</td><td align="left" valign="top">29.8% training time; significant quality and safety improvement; reduced NASA-TLX</td></tr><tr><td align="left" valign="top">Hadida Barzilai et al (2025) [<xref ref-type="bibr" rid="ref8">8</xref>]</td><td align="left" valign="top">Modified Welling Scale (/25)</td><td align="left" valign="top">19.5/25 (median)</td><td align="left" valign="top">12/25 (median)</td><td align="left" valign="top">.001</td><td align="left" valign="top">7.5-point advantage; AR group exceeds published novice benchmarks; 6/8 subscales significant</td></tr><tr><td align="left" valign="top">Nagayo et al (2022) [<xref ref-type="bibr" rid="ref9">9</xref>]</td><td align="left" valign="top">Global rating (GR); task-specific (TS) scores</td><td align="left" valign="top">GR 16.03; TS 15.03 (posttest)</td><td align="left" valign="top">GR 15.5; TS 15.11 (posttest)</td><td align="left" valign="top">.54; .91 (NS<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup>)</td><td align="left" valign="top">Noninferior to video self-training; AR rated more useful for 3D instrument motion (<italic>P</italic>=.02)</td></tr><tr><td align="left" valign="top">Kong et al (2025) [<xref ref-type="bibr" rid="ref10">10</xref>]</td><td align="left" valign="top">Linear deviation; angular deviation; Gertzbein-Robbins accuracy</td><td align="left" valign="top">Novice: 1.73 mm / 2.87 degrees; 90% accuracy</td><td align="left" valign="top">Novice: 5.25 mm / 7.15 degrees; 70% accuracy</td><td align="left" valign="top">&#x003C;.05 (all)</td><td align="left" valign="top">AR equalizes novice-expert performance gap; overall accuracy 95% vs 77.5% freehand</td></tr><tr><td align="left" valign="top">Dodier et al (2024) [<xref ref-type="bibr" rid="ref11">11</xref>]</td><td align="left" valign="top">Aneurysm occlusion rate (Raymond-Roy class 1)</td><td align="left" valign="top">67%-93% (video + AR cohort)</td><td align="left" valign="top">67%-73% (video only); stable (control)</td><td align="left" valign="top">.046</td><td align="left" valign="top">Only video + AR cohort achieved significant occlusion improvement; AR adds value beyond video alone</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>AR: augmented reality.</p></fn><fn id="table3fn2"><p><sup>b</sup>GOALS: Global Operative Assessment of Laparoscopic Skills.</p></fn><fn id="table3fn3"><p><sup>c</sup>OSATS: Objective Structured Assessment of Technical Skills.</p></fn><fn id="table3fn4"><p><sup>d</sup>CVS: critical view of safety.</p></fn><fn id="table3fn5"><p><sup>e</sup>LC: laparoscopic cholecystectomy.</p></fn><fn id="table3fn6"><p><sup>f</sup>NASA-TLX: NASA Task Load Index.</p></fn><fn id="table3fn7"><p><sup>g</sup>mKS: Modified Kakarla Scale.</p></fn><fn id="table3fn8"><p><sup>h</sup>EVD: external ventricular drain.</p></fn><fn id="table3fn9"><p><sup>i</sup>NS: not significant. </p></fn></table-wrap-foot></table-wrap></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>This systematic review synthesizes evidence from 11 contemporary studies (2021&#x2010;2025) evaluating AR&#x2019;s impact on the objective technical performance of surgical trainees. The principal finding is that AR demonstrates a consistent, measurable positive effect on technical performance, most strongly in domains requiring visuospatial reasoning, spatial anatomical understanding, and procedural accuracy. Of the 11 studies, 9 reported at least one significant improvement in an objective technical performance metric. The 2 studies that did not demonstrate AR superiority [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref9">9</xref>] nonetheless showed noninferiority, with AR-trained groups performing equivalently to comparators on skill quality metrics while achieving time advantages in independent performance. The absence of superiority in lower-stakes basic skills tasks is not evidence of inefficacy; it may reflect a ceiling effect in tasks where traditional instruction is already adequate for novice performance.</p><p>Across the body of evidence, the most robust performance advantages were observed in tasks with a strong visuospatial or spatial navigation component: EVD placement [<xref ref-type="bibr" rid="ref6">6</xref>], aneurysm localization [<xref ref-type="bibr" rid="ref2">2</xref>], mastoidectomy drilling [<xref ref-type="bibr" rid="ref8">8</xref>], and pedicle screw placement [<xref ref-type="bibr" rid="ref10">10</xref>]. This pattern is theoretically coherent: AR&#x2019;s capacity to render 3D anatomical structures in the trainee&#x2019;s visual field directly addresses a fundamental cognitive challenge in procedural surgery: the mental reconstruction of volumetric anatomy from 2D imaging data.</p></sec><sec id="s4-2"><title>Interpretation and Comparison With Prior Literature</title><p>The findings of this review are consistent with, and substantially extend, the conclusions of prior systematic reviews. Abu Halimah et al [<xref ref-type="bibr" rid="ref15">15</xref>] and Xiong et al [<xref ref-type="bibr" rid="ref16">16</xref>] identified broad potential for AR in surgical skills training, but their reviews included older studies with heterogeneous definitions of AR and outcomes. By restricting our scope to post-2020 studies with objective outcomes and clear AR definitions, we provide a more precise assessment applicable to current training environments. Importantly, this review positions AR and VR as complementary rather than competitive modalities, a distinction emphasized in the literature [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>The observed expertise reversal pattern, AR providing the greatest benefit to novices with diminishing returns at higher levels of proficiency, is consistent with predictions from both cognitive load theory [<xref ref-type="bibr" rid="ref12">12</xref>] and the expertise reversal effect [<xref ref-type="bibr" rid="ref14">14</xref>]. In Kong et al [<xref ref-type="bibr" rid="ref10">10</xref>], AR navigation essentially equalized the novice-expert performance gap. This pattern has direct implications for curriculum design: AR-assisted training may be most efficiently used during the early stages of procedural learning, with progressive withdrawal of AR guidance as competency develops, a strategy consistent with the scaffolding framework in educational theory [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>The gaze-guidance mechanism elucidated by Felinska et al [<xref ref-type="bibr" rid="ref4">4</xref>] provides the most direct experimental evidence for the cognitive mechanism underpinning AR&#x2019;s training benefit. By demonstrating that AR telestration reduced gaze latency tenfold and aligned trainee gaze with expert gaze, this study demonstrates that AR&#x2019;s error-reducing effect is mediated by directing visual attention to operationally relevant anatomical regions more efficiently than verbal instruction.</p><p>Dodier et al [<xref ref-type="bibr" rid="ref11">11</xref>] finding that only the combined video-plus-AR cohort achieved a significant improvement in aneurysm occlusion rate (67% to 93%; <italic>P</italic>=.05) is particularly noteworthy. The holographic AR clipping simulation allowed residents to test different clipping strategies on the exact same patient-specific anatomy as the physical phantom, a form of deliberate practice that is impossible with traditional simulation.</p></sec><sec id="s4-3"><title>Limitations</title><p>Several limitations of the constituent studies and of this review must be acknowledged. The most fundamental limitation is heterogeneity: the 11 included studies span 7 surgical specialties, use 6 distinct AR platforms, and measure outcomes using a wide variety of instruments, precluding statistical synthesis. Sample sizes were consistently small (range: 8&#x2010;60 participants), limiting statistical power. Publication bias cannot be excluded.</p><p>None of the 11 included studies reported long-term follow-up of skill retention or assessed the transfer of AR-trained skills to the real operating room or clinical environment. This is perhaps the most significant gap in the current evidence base. Device-related limitations were noted across several studies, including physical discomfort associated with prolonged HMD use [<xref ref-type="bibr" rid="ref6">6</xref>] and interface familiarization time across HoloLens-based studies. The risk of bias assessment found some concerns in 6 of 11 RCTs, predominantly related to allocation concealment and blinding, which is an inherent structural limitation of AR training trials rather than a correctable methodological weakness. The review was not prospectively registered, which is acknowledged as a limitation.</p><p>One study for which full text could not be obtained despite institutional access, interlibrary loan request, and direct author contact was excluded; this represents 1 of 29 full-text articles reviewed (3.4%) and, given the consistency of findings across the 11 included studies, is unlikely to materially alter the direction of the conclusions.</p></sec><sec id="s4-4"><title>Future Research Directions</title><p>The current evidence points toward several concrete priorities for future work. Most pressing is the need for adequately powered multicenter randomized trials across the more promising AR platforms and specialty domains, using standardized outcome measures that would actually allow findings to be compared across studies&#x2014;something the current literature makes difficult. Alongside this, longitudinal studies with skill-retention assessments at 3, 6, and 12 months posttraining are needed to establish whether the performance gains associated with AR hold over time or fade once the technology is removed. Transfer studies examining whether AR-trained skills translate meaningfully into clinical performance are a logical next step that the field has yet to take seriously. Finally, cost-effectiveness analyses will matter enormously for any health system considering curriculum-level adoption&#x2014;the training benefit needs to be weighed against the real costs of hardware, software, and implementation, and that work has not yet been done.</p></sec><sec id="s4-5"><title>Conclusions</title><p>This review found consistent evidence that AR improves technical performance in surgical novices&#x2014;reduced procedural errors, better accuracy, faster progression along the learning curve, and lower cognitive load, particularly in tasks with high visuospatial demands. The expertise reversal pattern that emerged across multiple studies is worth taking seriously: AR appears to deliver its greatest benefit during the early, high-error phase of skill acquisition, with returns diminishing as experience accumulates. That finding has practical implications for how AR should be used&#x2014;not as a permanent scaffold, but as a targeted intervention in early training, with guidance progressively withdrawn as competency develops.</p><p>What this review cannot claim is that the evidence is mature. Sample sizes are small, platforms vary enormously, outcome measures are inconsistent, and no study has yet examined whether skills are retained or transferred to real clinical settings. AR shows genuine promise as an adjunct within structured surgical curricula&#x2014;not a replacement for expert mentorship or traditional teaching, but something that adds real value when used thoughtfully alongside them. Turning that promise into confident implementation guidance will require the kind of rigorous, large-scale, longitudinal work that the field has not yet produced.</p></sec></sec></body><back><ack><p>The authors thank the editorial team and peer reviewers at <italic>JMIR Medical Education</italic> for their rigorous and constructive engagement with this manuscript across multiple review rounds.</p><p>Several generative AI tools were used at specific stages of this manuscript&#x2019;s preparation. Claude (Anthropic) and ChatGPT (OpenAI) were used (1) to assist with the verification and optimization of database search string syntax during the development of the search strategy (all search terms, conceptual domains, and PICO components were defined by the human authors; AI was used to check syntactic compatibility with database field codes and Boolean operators); (2) to support calculations and structured analysis of objective outcome measures extracted from included studies (all primary data were extracted independently by human reviewers; AI assisted in organizing and cross-checking numerical results for consistency); (3) for grammar, punctuation, and language editing of the final manuscript draft; and (4) for drafting of the Acknowledgements section. No AI tool was used to independently generate, fabricate, or interpret scientific findings, formulate conclusions, conduct screening or eligibility assessment, or make any intellectual judgment regarding the inclusion or exclusion of studies. All scientific content, data synthesis, risk-of-bias assessments, interpretations, and conclusions are the sole responsibility of the human authors.</p></ack><notes><sec><title>Funding</title><p>No external funding was received for this study. No funder was involved in any aspect of the study design, data collection, analysis, interpretation, or the writing of the manuscript.</p></sec><sec><title>Data Availability</title><p>This systematic review analyzed data reported in the published studies cited in the reference list. No new primary data were generated. The data extraction spreadsheet and screening records are available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>MEA conceived the review, developed the protocol, conducted database searches, performed title/abstract and full-text screening (first reviewer), conducted data extraction and risk of bias assessment, and drafted the manuscript. YA performed independent title/abstract and full-text screening (second reviewer) and independent data extraction and risk of bias assessment. TY adjudicated screening and data extraction discrepancies. AEA and HK contributed to manuscript review and revision. All authors reviewed and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AR</term><def><p>augmented reality</p></def></def-item><def-item><term id="abb2">EVD</term><def><p>external ventricular drain</p></def></def-item><def-item><term id="abb3">GOALS</term><def><p>Global Operative Assessment of Laparoscopic Skills</p></def></def-item><def-item><term id="abb4">HMD</term><def><p>head-mounted display</p></def></def-item><def-item><term id="abb5">MeSH</term><def><p>Medical Subject Headings</p></def></def-item><def-item><term id="abb6">MR</term><def><p>mixed reality</p></def></def-item><def-item><term id="abb7">NASA-TLX</term><def><p>NASA Task Load Index</p></def></def-item><def-item><term id="abb8">OSATS</term><def><p>Objective Structured Assessment of Technical Skills</p></def></def-item><def-item><term id="abb9">PICO</term><def><p>population, intervention, comparison, outcome</p></def></def-item><def-item><term id="abb10">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb11">RCT</term><def><p>randomized controlled trial</p></def></def-item><def-item><term id="abb12">RoB 2</term><def><p>Cochrane Risk of Bias 2 Tool</p></def></def-item><def-item><term id="abb13">ROBINS-I</term><def><p>Risk of Bias in Nonrandomized Studies of Interventions</p></def></def-item><def-item><term id="abb14">SWiM</term><def><p>Synthesis Without Meta-Analysis</p></def></def-item><def-item><term id="abb15">VR</term><def><p>virtual reality</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wolf</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wolfer</surname><given-names>V</given-names> </name><name name-style="western"><surname>Halbe</surname><given-names>M</given-names> </name><name name-style="western"><surname>Maisano</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lohmeyer</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Meboldt</surname><given-names>M</given-names> </name></person-group><article-title>Comparing the effectiveness of augmented reality-based and conventional instructions during single ECMO cannulation training</article-title><source>Int J Comput Assist Radiol Surg</source><year>2021</year><month>07</month><volume>16</volume><issue>7</issue><fpage>1171</fpage><lpage>1180</lpage><pub-id pub-id-type="doi">10.1007/s11548-021-02408-y</pub-id><pub-id pub-id-type="medline">34023976</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xiao</surname><given-names>W</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>F</given-names> </name></person-group><article-title>Augmented reality technology shortens aneurysm surgery learning curve for residents</article-title><source>Comput Assist Surg (Abingdon)</source><year>2024</year><month>12</month><volume>29</volume><issue>1</issue><fpage>2311940</fpage><pub-id pub-id-type="doi">10.1080/24699322.2024.2311940</pub-id><pub-id pub-id-type="medline">38315080</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cizmic</surname><given-names>A</given-names> </name><name name-style="western"><surname>M&#x00FC;ller</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wise</surname><given-names>PA</given-names> </name><etal/></person-group><article-title>Telestration with augmented reality improves the performance of the first ten ex vivo porcine laparoscopic cholecystectomies: a randomized controlled study</article-title><source>Surg Endosc</source><year>2023</year><month>10</month><volume>37</volume><issue>10</issue><fpage>7839</fpage><lpage>7848</lpage><pub-id pub-id-type="doi">10.1007/s00464-023-10360-y</pub-id><pub-id pub-id-type="medline">37612445</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Felinska</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Fuchs</surname><given-names>TE</given-names> </name><name name-style="western"><surname>Kogkas</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Telestration with augmented reality improves surgical performance through gaze guidance</article-title><source>Surg Endosc</source><year>2023</year><month>05</month><volume>37</volume><issue>5</issue><fpage>3557</fpage><lpage>3566</lpage><pub-id pub-id-type="doi">10.1007/s00464-022-09859-7</pub-id><pub-id pub-id-type="medline">36609924</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Neves Lopes</surname><given-names>V</given-names> </name><name name-style="western"><surname>Dantas</surname><given-names>I</given-names> </name><name name-style="western"><surname>Barbosa</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Barbosa</surname><given-names>J</given-names> </name></person-group><article-title>Telestration in the teaching of basic surgical skills: a randomized trial</article-title><source>J Surg Educ</source><year>2022</year><volume>79</volume><issue>4</issue><fpage>1031</fpage><lpage>1042</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2022.02.013</pub-id><pub-id pub-id-type="medline">35331681</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Gestel</surname><given-names>F</given-names> </name><name name-style="western"><surname>Frantz</surname><given-names>T</given-names> </name><name name-style="western"><surname>Vannerom</surname><given-names>C</given-names> </name><etal/></person-group><article-title>The effect of augmented reality on the accuracy and learning curve of external ventricular drain placement</article-title><source>Neurosurg Focus</source><year>2021</year><month>08</month><volume>51</volume><issue>2</issue><fpage>E8</fpage><pub-id pub-id-type="doi">10.3171/2021.5.FOCUS21215</pub-id><pub-id pub-id-type="medline">34333479</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wild</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gerh&#x00E4;user</surname><given-names>AS</given-names> </name><etal/></person-group><article-title>Telestration with augmented reality for visual presentation of intraoperative target structures in minimally invasive surgery: a randomized controlled study</article-title><source>Surg Endosc</source><year>2022</year><month>10</month><volume>36</volume><issue>10</issue><fpage>7453</fpage><lpage>7461</lpage><pub-id pub-id-type="doi">10.1007/s00464-022-09158-1</pub-id><pub-id pub-id-type="medline">35266048</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hadida Barzilai</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tejman-Yarden</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yogev</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Augmented reality-guided mastoidectomy simulation: a randomized controlled trial assessing surgical proficiency</article-title><source>Laryngoscope</source><year>2025</year><month>02</month><volume>135</volume><issue>2</issue><fpage>894</fpage><lpage>900</lpage><pub-id pub-id-type="doi">10.1002/lary.31791</pub-id><pub-id pub-id-type="medline">39315469</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nagayo</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Saito</surname><given-names>T</given-names> </name><name name-style="western"><surname>Oyama</surname><given-names>H</given-names> </name></person-group><article-title>Augmented reality self-training system for suturing in open surgery: a randomized controlled trial</article-title><source>Int J Surg</source><year>2022</year><month>06</month><volume>102</volume><fpage>106650</fpage><pub-id pub-id-type="doi">10.1016/j.ijsu.2022.106650</pub-id><pub-id pub-id-type="medline">35525415</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kong</surname><given-names>H</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name></person-group><article-title>Augmented reality navigation using surgical guides versus conventional techniques in pedicle screw placement</article-title><source>J Shanghai Jiaotong Univ (Sci)</source><year>2025</year><month>02</month><volume>30</volume><issue>1</issue><fpage>10</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.1007/s12204-023-2689-5</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dodier</surname><given-names>P</given-names> </name><name name-style="western"><surname>Civilla</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mallouhi</surname><given-names>A</given-names> </name><etal/></person-group><article-title>An evaluation of physical and augmented patient-specific intracranial aneurysm simulators on microsurgical clipping performance and skills: a randomized controlled study</article-title><source>Neurosurg Focus</source><year>2024</year><month>01</month><volume>56</volume><issue>1</issue><fpage>E9</fpage><pub-id pub-id-type="doi">10.3171/2023.10.FOCUS23640</pub-id><pub-id pub-id-type="medline">38163349</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load during problem solving: effects on learning</article-title><source>Cogn Sci</source><year>1988</year><month>04</month><volume>12</volume><issue>2</issue><fpage>257</fpage><lpage>285</lpage><pub-id pub-id-type="doi">10.1207/s15516709cog1202_4</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Barsom</surname><given-names>EZ</given-names> </name><name name-style="western"><surname>Graafland</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schijven</surname><given-names>MP</given-names> </name></person-group><article-title>Systematic review on the effectiveness of augmented reality applications in medical training</article-title><source>Surg Endosc</source><year>2016</year><month>10</month><volume>30</volume><issue>10</issue><fpage>4174</fpage><lpage>4183</lpage><pub-id pub-id-type="doi">10.1007/s00464-016-4800-6</pub-id><pub-id pub-id-type="medline">26905573</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ayres</surname><given-names>P</given-names> </name><name name-style="western"><surname>Kalyuga</surname><given-names>S</given-names> </name></person-group><article-title>The expertise reversal effect</article-title><source>Cognitive Load Theory</source><year>2011</year><publisher-name>Springer</publisher-name><fpage>155</fpage><lpage>170</lpage><pub-id pub-id-type="doi">10.1007/978-1-4419-8126-4_12</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abu Halimah</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mojiri</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>AA</given-names> </name><etal/></person-group><article-title>Assessing the impact of augmented reality on surgical skills training for medical students: a systematic review</article-title><source>Cureus</source><year>2024</year><month>10</month><day>10</day><volume>16</volume><pub-id pub-id-type="doi">10.7759/cureus.71221</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xiong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dai</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>X</given-names> </name></person-group><article-title>Augmented reality for basic skills training in laparoscopic surgery: a systematic review and meta-analysis</article-title><source>Surg Endosc</source><year>2025</year><month>01</month><volume>39</volume><issue>1</issue><fpage>307</fpage><lpage>318</lpage><pub-id pub-id-type="doi">10.1007/s00464-024-11387-5</pub-id><pub-id pub-id-type="medline">39532736</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Bossuyt</surname><given-names>PM</given-names> </name><etal/></person-group><article-title>The PRISMA 2020 statement: an updated guideline for reporting systematic reviews</article-title><source>BMJ</source><year>2021</year><month>03</month><day>29</day><volume>372</volume><fpage>n71</fpage><pub-id pub-id-type="doi">10.1136/bmj.n71</pub-id><pub-id pub-id-type="medline">33782057</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Campbell</surname><given-names>M</given-names> </name><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Sowden</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Synthesis Without Meta-Analysis (SWiM) in systematic reviews: reporting guideline</article-title><source>BMJ</source><year>2020</year><month>01</month><day>16</day><volume>368</volume><fpage>l6890</fpage><pub-id pub-id-type="doi">10.1136/bmj.l6890</pub-id><pub-id pub-id-type="medline">31948937</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Lefebvre</surname><given-names>C</given-names> </name><name name-style="western"><surname>Glanville</surname><given-names>J</given-names> </name><name name-style="western"><surname>Briscoe</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Searching for and selecting studies</article-title><source>Cochrane Handbook for Systematic Reviews of Interventions</source><year>2019</year><publisher-name>Cochrane</publisher-name><fpage>67</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.1002/9781119536604.ch4</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rethlefsen</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Kirtley</surname><given-names>S</given-names> </name><name name-style="western"><surname>Waffenschmidt</surname><given-names>S</given-names> </name><etal/></person-group><article-title>PRISMA-S: an extension to the PRISMA statement for reporting literature searches in systematic reviews</article-title><source>Syst Rev</source><year>2021</year><month>01</month><day>26</day><volume>10</volume><issue>1</issue><fpage>39</fpage><pub-id pub-id-type="doi">10.1186/s13643-020-01542-z</pub-id><pub-id pub-id-type="medline">33499930</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haddaway</surname><given-names>NR</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Coughlin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kirk</surname><given-names>S</given-names> </name></person-group><article-title>The role of Google Scholar in evidence reviews and its applicability to grey literature searching</article-title><source>PLoS ONE</source><year>2015</year><volume>10</volume><issue>9</issue><fpage>e0138237</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0138237</pub-id><pub-id pub-id-type="medline">26379270</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JAC</given-names> </name><name name-style="western"><surname>Savovi&#x0107;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Page</surname><given-names>MJ</given-names> </name><etal/></person-group><article-title>RoB 2: a revised tool for assessing risk of bias in randomised trials</article-title><source>BMJ</source><year>2019</year><month>08</month><day>28</day><volume>366</volume><fpage>l4898</fpage><pub-id pub-id-type="doi">10.1136/bmj.l4898</pub-id><pub-id pub-id-type="medline">31462531</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sterne</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Hern&#x00E1;n</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Reeves</surname><given-names>BC</given-names> </name><etal/></person-group><article-title>ROBINS-I: a tool for assessing risk of bias in non-randomised studies of interventions</article-title><source>BMJ</source><year>2016</year><month>10</month><day>12</day><volume>355</volume><fpage>i4919</fpage><pub-id pub-id-type="doi">10.1136/bmj.i4919</pub-id><pub-id pub-id-type="medline">27733354</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McKnight</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Pean</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Buck</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Hwang</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Hsu</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Pierrie</surname><given-names>SN</given-names> </name></person-group><article-title>Virtual reality and augmented reality-translating surgical training into surgical technique</article-title><source>Curr Rev Musculoskelet Med</source><year>2020</year><month>12</month><volume>13</volume><issue>6</issue><fpage>663</fpage><lpage>674</lpage><pub-id pub-id-type="doi">10.1007/s12178-020-09667-3</pub-id><pub-id pub-id-type="medline">32779019</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wood</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bruner</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Ross</surname><given-names>G</given-names> </name></person-group><article-title>The role of tutoring in problem solving</article-title><source>J Child Psychol Psychiatry</source><year>1976</year><month>04</month><volume>17</volume><issue>2</issue><fpage>89</fpage><lpage>100</lpage><pub-id pub-id-type="doi">10.1111/j.1469-7610.1976.tb00381.x</pub-id><pub-id pub-id-type="medline">932126</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aggarwal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mytton</surname><given-names>OT</given-names> </name><name name-style="western"><surname>Derbrew</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Training and simulation for patient safety</article-title><source>BMJ Qual Safety</source><year>2010</year><volume>19</volume><issue>Suppl 2</issue><fpage>i34</fpage><lpage>i43</lpage><pub-id pub-id-type="doi">10.1136/qshc.2009.038562</pub-id><pub-id pub-id-type="medline">20693215</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Full electronic search strategies.</p><media xlink:href="mededu_v12i1e71572_app1.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>PRISMA 2020 checklist.</p><media xlink:href="mededu_v12i1e71572_app2.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material></app-group></back></article>