<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Med Educ</journal-id><journal-id journal-id-type="publisher-id">mededu</journal-id><journal-id journal-id-type="index">20</journal-id><journal-title>JMIR Medical Education</journal-title><abbrev-journal-title>JMIR Med Educ</abbrev-journal-title><issn pub-type="epub">2369-3762</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v11i1e72264</article-id><article-id pub-id-type="doi">10.2196/72264</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Game-Based Assessment of Cognitive Abilities and Personality Characteristics for Surgical Resident Selection: A Preliminary Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Gazit</surname><given-names>Noa</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Ben-Gal</surname><given-names>Gilad</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Eliashar</surname><given-names>Ron</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Prosthodontics, Faculty of Dental Medicine, Hebrew University of Jerusalem, Hadassah Medical Center</institution><addr-line>Kalman Ya'akov Man 1</addr-line><addr-line>Jerusalem</addr-line><country>Israel</country></aff><aff id="aff2"><institution>Department of Otolaryngology/HNS, Faculty of Medicine, Hebrew University of Jerusalem, Hadassah Medical Center</institution><addr-line>Jerusalem</addr-line><country>Israel</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Car</surname><given-names>Lorainne Tudor</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>O'Keeffe</surname><given-names>Dara A</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Doherty</surname><given-names>Eva M</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to  Noa Gazit, PhD, Department of Prosthodontics, Faculty of Dental Medicine, Hebrew University of Jerusalem, Hadassah Medical Center, Kalman Ya'akov Man 1, Jerusalem, Israel, 972 547567448; <email>gazit.noa@mail.huji.ac.il</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>15</day><month>8</month><year>2025</year></pub-date><volume>11</volume><elocation-id>e72264</elocation-id><history><date date-type="received"><day>06</day><month>02</month><year>2025</year></date><date date-type="rev-recd"><day>23</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>31</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Noa Gazit, Gilad Ben-Gal, Ron Eliashar. Originally published in JMIR Medical Education (<ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org">https://mededu.jmir.org</ext-link>), 15.8.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Medical Education, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://mededu.jmir.org/">https://mededu.jmir.org/</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://mededu.jmir.org/2025/1/e72264"/><abstract><sec><title>Background</title><p>Assessment of nontechnical attributes is important in selecting candidates for surgical training. Currently, these assessments are typically made based on ineffective methods, which have been shown to be poorly correlated with later performance.</p></sec><sec><title>Objective</title><p>The study aimed to examine preliminary evidence regarding the use of game-based assessment (GBA) for assessing cognitive abilities and personality characteristics in candidates for surgical residencies.</p></sec><sec sec-type="methods"><title>Methods</title><p>The study had 2 phases. In the first phase, a gamified test was developed to assess competencies relevant for surgical residents. Three games were chosen, assessing 14 competencies: planning, problem-solving, ingenuity, goal orientation, self-reflection, endurance, analytical thinking, learning ability, flexibility, concentration, conformity, multitasking, working memory, and precision. In the second phase, we collected data from 152 medical interns and 30 expert surgeons to evaluate the test&#x2019;s feasibility, acceptability, and validity for candidate selection.</p></sec><sec sec-type="results"><title>Results</title><p>Feedback from the interns and surgeons supported the relevance of the test for selection of surgical residents. In addition, analyses of the interns&#x2019; performance data supported the appropriateness of the score calculation process and the internal structure of the test. Based on this data, the test showed good psychometric properties, including reliability (&#x03B1;=0.76) and discrimination (mean discrimination 0.39, SD 0.18). Correlations between test scores and background variables indicated significant correlations with gender, video game experience, and technical aptitude test scores (all<italic> P</italic>&#x003C;.001).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study presents an innovative GBA testing cognitive abilities and personality characteristics. Preliminary evidence supports the validity, feasibility, and acceptability of the test for the selection of surgical residents. However, evidence for test-criterion relationships, particularly the GBA&#x2019;s ability to predict future surgical performance, remains to be established. Future longitudinal studies are necessary to confirm its utility as a selection tool.</p></sec></abstract><kwd-group><kwd>resident selection</kwd><kwd>assessment</kwd><kwd>surgical training</kwd><kwd>cognitive abilities</kwd><kwd>personality characteristics</kwd><kwd>gamification</kwd><kwd>game-based assessment</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Selection of surgical training residents is an essential process aimed at ensuring that only the most capable candidates are chosen to undergo the rigorous training required to become qualified surgeons. Alongside technical skills, there is broad consensus that it is also crucial to assess nontechnical attributes, including cognitive abilities (eg, deductive reasoning, learning ability, and concentration) and personality characteristics (eg, decision-making, stress tolerance, and communication skills), in potential surgical residents [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Indeed, some even consider nontechnical attributes to be more relevant for selecting surgical trainees than technical aptitude [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. In a recent study [<xref ref-type="bibr" rid="ref7">7</xref>], 19 nontechnical competencies were identified as relevant to surgeons in the 21st century (6 cognitive abilities and 13 personality characteristics).</p><p>Traditionally, surgical training programs have assessed nontechnical attributes almost exclusively through proxies such as academic achievement, curricula vitae, letters of recommendation, and unstructured interviews [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]. However, studies suggest that these methods are poorly correlated with later performance during residency [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. In light of such findings, some studies have examined the use of self-report measures as a potential alternative. For example, studies have explored the potential of self-report questionnaires for assessing personality, emotional intelligence, and grit. But there is as yet no consistent evidence that these methods improve the selection of surgical residents [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]; and these tools are subject to all the potential problems and biases of self-reports, from poor introspective ability to outright dishonesty [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Hence, better ways of assessing surgical residency candidates are needed.</p><p>One promising new approach is to analyze behavior itself using simulated tasks, where examinees are exposed to controlled situations designed to elicit behaviors relevant to the assessment of specific competencies. This method is expected to have higher predictive value than either traditional methods or self-reports.</p><p>A simulation test can be conducted in the real world by evaluators or actors, or on a computer using emerging technologies such as virtual reality and gamification. Gamification refers to the incorporation of game elements into nongaming activities, and its application to personnel selection has led to the development of game-based assessments (GBAs). GBAs use gameplay behaviors to assess job-related skills, abilities, and characteristics, and they have many advantages over traditional assessments and noncomputerized simulation tests for predicting job performance [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref23">23</xref>]. First, GBAs promote a more positive assessment experience that reduces examinees&#x2019; stress levels and increases their engagement and motivation. Second, GBAs are based on an automated scoring system, which eliminates the bias often associated with human assessments. Finally, GBAs can collect rich high-resolution spatiotemporal data capturing examinees&#x2019; behavior throughout the test, allowing the entire solving process to be examined rather than just the final result or answer. These advantages may lead to a more reliable and valid assessment of examinees&#x2019; skills and abilities.</p><p>As GBAs are still relatively new, only a limited number of studies have examined their use in hiring and recruitment [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref26">26</xref>], and to the best of our knowledge, no study has evaluated GBAs as a tool for selecting medical residents. The current study examines the use of GBA for assessing cognitive abilities and personality characteristics identified as relevant for surgical residents in an initial phase of job analysis [<xref ref-type="bibr" rid="ref7">7</xref>]. This study is the first in a planned series of studies aimed at establishing the validity of the GBA. Here, we present preliminary evidence of its feasibility, acceptability, and validity in the context of surgical resident selection, based on feedback and behavioral data from potential candidates and expert surgeons. Further research linking the GBA scores to future surgical performance will be necessary to complete the validation process.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>We developed a gamified assessment test relevant for appraising the cognitive abilities and personality characteristics of potential surgical residents and examined preliminary evidence for its validity, feasibility, and acceptability. In accordance with the contemporary understanding of validity as a unified concept, we collected and evaluated evidence related to 4 sources of validity: content, internal structure, response process, and relationships with other variables [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>], although the evidence for relationships with other variables was limited and did not include test-criterion relationships. The evidence collected is based on both the procedures used in the development and revision of the test and the empirical data collected during the study. </p><sec id="s2-1"><title>Ethical Considerations</title><p>The study was approved by the ethics committee of the Hebrew University of Jerusalem (approval no. 13032023), and all participants provided informed consent. Participant data were stored using a unique fake identifier; the key linking these identifiers to real identities was kept in a password-protected file stored offline, ensuring that no identifying information was accessible online. Interns received US $75 for participating in the study, as well as feedback regarding their performance in both tests relative to the rest of the sample (the percentile rankings of their total scores). </p></sec><sec id="s2-2"><title>Test Development</title><sec id="s2-2-1"><title>The GBA</title><p>The GBA used in this study was developed in cooperation with Benchmark.games LTD (Hungary), a company that produces GBAs for use in organizational hiring and recruitment. Tests are tailored to the organization&#x2019;s needs, based on video games developed specifically for the assessment of various competencies (eg, analytical thinking, planning, or multitasking). Each test is administered on a standard computer and requires only a stable internet connection and a mouse.</p><p>The test developed for this study is based on three video games adapted to capture competencies needed by surgical residents: (1) Dotto, (2) CurioCity, and (3) MultiTask (refer to <xref ref-type="fig" rid="figure1">Figure 1</xref>). In the Dotto game, the goal is to build a structure by inserting and manipulating points and lines to reach a target while overcoming physics-based challenges. The game confronts examinees with a problem-solving situation that is not clearly defined, requiring them to discover the rules for solving the problem on their own. In CurioCity, examinees are tasked with finding their way through a maze to reach the target area. The game consists of 16 levels with varying requirements and levels of difficulty. Once again, some of the rules must be discovered by examinees, and some rules change as the game proceeds, to test the adaptability and flexibility of the examinees. Finally, in the MultiTask game, examinees are asked to perform 2 nonverbal tasks simultaneously (eg, a swing balancing task and a simple arithmetic task). The game has three levels, each using a different combination of 2 tasks. The initial versions of the games were developed by psychometricians and psychologists employed by Benchmark.games, and the games were validated using data from hundreds of employees by Benchmark.games for general personnel selection. For this study, all 3 games were modified based on feedback from the research team in 3 ways: levels that were insufficiently challenging for candidates with high abilities were excluded; tasks that assessed irrelevant competencies (eg, typing speed and accuracy) were replaced with tasks assessing competencies relevant for surgical trainees (eg, concentration and working memory); and, to ensure that the assessment would be objective and standardized, the instructions and demonstrations for each game were revised and improved. Instructions were provided in English and included both written instructions and video demonstrations. Furthermore, to ensure that the instructions were understood correctly, each game was preceded by a few minutes of practice. The initial version of the test was then pilot-tested with 8 medical students. Based on their feedback, changes were made in the instructions and in the test interface. The entire test takes about 45&#x2010;60 minutes to complete, with each game taking 15&#x2010;20 minutes.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Illustrations of the video game assessments selected for the test. The games are shown in the order in which they appeared in the test.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e72264_fig01.png"/></fig><p>The video games were selected to assess 14 relevant competencies: planning, problem-solving, ingenuity, goal orientation, self-reflection, endurance, analytical thinking, learning ability, flexibility, concentration, conformity, multitasking, working memory, and precision. Definitions of the competencies are provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p><p>The competencies were drawn from a set of cognitive abilities and personality characteristics identified as relevant for selection of surgical residents in a previous phase of job analysis conducted by the research team [<xref ref-type="bibr" rid="ref7">7</xref>]. However, the GBA does not assess some of the competencies which were identified as relevant to selection for surgical training (in particular, &#x201C;soft skills&#x201D; such as interpersonal skills, teamwork, leadership, and integrity). These competencies were not assessed in the present research because they are not susceptible to measurement using computerized and automated systems. The GBA was designed such that each game would elicit specific behaviors relevant to 2 or more of the 14 competencies, with each competency assessed using information obtained from one game (except for precision, which was assessed in all 3 games).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Competencies assessed in the game-based assessment (GBA) test.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Competency</td><td align="left" valign="bottom">Description</td><td align="left" valign="bottom">Video game used to assess the competency</td></tr></thead><tbody><tr><td align="left" valign="top">Planning</td><td align="left" valign="top">Ability to plan the steps required to solve the task, and to implement the plan in order to achieve the goal.</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Problem-solving</td><td align="left" valign="top">Ability to work through unexpected obstacles and challenges that arise during the task.</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Ingenuity</td><td align="left" valign="top">Ability to test the boundaries of a problem and to seek unique and creative solutions.</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Goal orientation</td><td align="left" valign="top">Ability to translate an intention into action (ie, to stay focused on achieving the goal).</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Self-reflection</td><td align="left" valign="top">Ability to learn from failure and to adopt a new approach.</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Endurance</td><td align="left" valign="top">Ability to invest effort for an extended period of time.</td><td align="left" valign="top">Dotto</td></tr><tr><td align="left" valign="top">Analytical thinking</td><td align="left" valign="top">Ability to collect, organize, and implement the information needed to solve the problem.</td><td align="left" valign="top">CurioCity</td></tr><tr><td align="left" valign="top">Learning ability</td><td align="left" valign="top">Ability to recognize &#x201C;rules&#x201D; quickly and effectively and apply them in the relevant situation.</td><td align="left" valign="top">CurioCity</td></tr><tr><td align="left" valign="top">Flexibility</td><td align="left" valign="top">Ability to adapt to changes in the situation.</td><td align="left" valign="top">CurioCity</td></tr><tr><td align="left" valign="top">Concentration</td><td align="left" valign="top">Ability to stay focused and to maintain high performance even in monotonous repetitive tasks.</td><td align="left" valign="top">CurioCity</td></tr><tr><td align="left" valign="top">Conformity</td><td align="left" valign="top">Ability and willingness to follow rules and instructions.</td><td align="left" valign="top">CurioCity</td></tr><tr><td align="left" valign="top">Multitasking</td><td align="left" valign="top">Ability to split attention between two tasks without harming performance.</td><td align="left" valign="top">MultiTask</td></tr><tr><td align="left" valign="top">Working memory</td><td align="left" valign="top">Ability to store and retrieve information in short-term memory.</td><td align="left" valign="top">MultiTask</td></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top">Ability to perform the task in an accurate manner, with few errors.</td><td align="left" valign="top">All games</td></tr></tbody></table></table-wrap></sec><sec id="s2-2-2"><title>Scoring</title><p>The gamified tasks provide the stimuli by which the program measures candidates&#x2019; behavior. In each game, all actions of examinees (eg, mouse movements and key presses) are recorded and logged. Approximately 2000 data points are recorded for each 15-minute gameplay session. These raw data are then transformed into higher-level variables that describe a set of meaningful measurements (eg, time to first response, time between actions, accuracy, number of steps, and learning curve). Then, competency scores are calculated using an aggregation (ie, linear combination) of the relevant variables, with higher weight given to variables characterized by larger variance between candidates.</p><p>The initial mapping between different variables and competencies was determined by a team of psychologists and psychometricians employed by the company following a theory-driven approach [<xref ref-type="bibr" rid="ref20">20</xref>]. This mapping was tested and improved based on empirical data from hundreds of employees, and variables that did not converge with the expected pattern were excluded from consideration. The mapping was then further validated based on correlations with other measures of cognitive abilities and personality (eg, Raven&#x2019;s Progressive Matrices, the Stroop test, scales of the International Personality Item Pool, and the Bar-On Emotional Quotient Inventory; refer to Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Competency scores are computed and standardized based on a norm created using a database of over 5000 observations. Scores are presented on a scale of 1&#x2010;10. For this study, we also calculated a total test score for each examinee by averaging the individual competency scores (with equal weight for each competency). To facilitate interpretation of the results, the total scores were then scaled to have a mean of 100 and a SD of 20.</p></sec></sec><sec id="s2-3"><title>Validation</title><sec id="s2-3-1"><title>Sample and Procedure</title><p>To evaluate the test&#x2019;s validity, feasibility, and acceptability, we recruited 30 experienced surgeons from 3 hospitals and 152 medical interns from 10 hospitals in Israel. The surgeons were asked to review the test and then complete a feedback questionnaire (see below). The interns were asked to complete the test, and their test data was collected and analyzed to evaluate the internal structure and psychometric characteristics of the test (discrimination, reliability, and correlations between competency scores). The interns also completed a feedback questionnaire similar to that filled in by the surgeons.</p><p>The expert surgeons were recruited using an email invitation. Email addresses of potential participants were obtained from hospital websites or from the Israeli medical association  database. Recruitment continued until we had 30 participants. Surgeons who were willing to participate in the study were invited to review the gamified test and to complete the feedback questionnaire.</p><p>The interns were recruited using an invitation posted in relevant Facebook and WhatsApp groups. Recruitment continued until at least 150 participants were enrolled. Participants were invited to attend a session in which we administered the gamified assessment test and a separate technical aptitude test developed by Gazit et al [<xref ref-type="bibr" rid="ref29">29</xref>]. The technical aptitude test included 10 basic tasks performed on the Lap-X VR laparoscopic simulator [<xref ref-type="bibr" rid="ref30">30</xref>] and was designed to assess technical skills relevant for surgery such as dexterity, visuospatial perception, coordination, and arm-hand steadiness. The order of the tests varied, such that some participants started with the GBA and others with the technical aptitude test, with a short break between the two. The interns were told that each game in the GBA should take around 15&#x2010;20 minutes to complete. </p></sec><sec id="s2-3-2"><title>Questionnaire</title><p>The questionnaires filled in by the surgeons and interns were nearly identical. Participants in both samples were asked to provide four main ratings for each game: (1) its relevance for selecting candidates for surgical training (on a 5-point Likert scale, 1=not relevant, 5=extremely relevant); (2) its difficulty (also on a 5-point Likert scale, 1=very easy, 5=extremely difficult); (3) whether the time limit was sufficient (yes or no); and (4) whether the instructions were clear (yes or no). In addition, participants provided 2 ratings for the test as a whole: the relevance of the entire test and the comfort of the test platform (both on 5-point Likert scales, 1=not relevant or not comfortable, 5=extremely relevant or comfortable). Participants were also invited to share general comments and suggestions for improving each game and the whole test using free text. Finally, each participant provided demographic information (for interns: age, gender, dominant hand, desired training field [surgical or nonsurgical], and previous experience with video games; for the surgeons: age, gender, surgical specialty, and number of years working in the field). Previous experience with video games was reported on a 5-point scale (1=no experience, 5=very extensive experience).</p></sec><sec id="s2-3-3"><title>Analyses</title><p>Some validity evidence is encompassed in the procedures used in the development of the test described above (selection of games and tasks based on job analysis; development of the games and scoring method by psychometricians and psychologists; and calculation of scores based on a norm sample). Further evidence of validity is derived from the empirical data collected in this study. In particular, internal structure evidence, response process evidence, and relationships with other variables were obtained from analysis of the interns&#x2019; test performance data. Content evidence, feasibility, and acceptability were obtained from the feedback questionnaires completed by both the interns and surgeons.</p><p>To analyze the performance data of the interns, we first examined the distribution of the competency scores and calculated Pearson correlations between them to support computation of a composite score for each participant, representing that participant&#x2019;s total performance in the test (response process evidence of validity). We then conducted an item analysis to assess the discrimination of each competency and the reliability of the whole test, and a factor analysis to assess whether the structure of the test variables accords with what is theoretically expected (together these provide internal structure evidence for validity). Finally, we calculated correlations between participants&#x2019; scores in the gamified test and other variables: their demographic characteristics (age, gender, dominant hand, desired training field, and previous experience with video games) and their technical aptitude test scores (evidence of relationship to other variables).</p><p>To analyze the data from the feedback questionnaires of the interns and surgeons, we first calculated, for each sample, mean relevance and difficulty ratings for each game. We then analyzed the data on the time limits and clarity of instructions for each game, as described above, and calculated the mean relevance and comfort ratings for the whole test. Finally, we analyzed the general comments obtained from participants in the open-ended question to identify common remarks and suggestions. All statistical analyses were performed using R, version 4.2.2 (R Foundation for Statistical Computing, Vienna, Austria).</p></sec></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>In total, 152 interns (71 females, 46%) from 10 academic hospitals in Israel and 30 expert surgeons (4 females, 13%) from three academic hospitals in Israel participated in the study. Demographic characteristics of the participants are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Demographic characteristics of study participants.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Group and characteristic</td><td align="left" valign="bottom">Values</td></tr></thead><tbody><tr><td align="left" valign="top">Interns (n=152)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age in years, mean (SD)</td><td align="left" valign="top">28.3 (3.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gender (female), n (%)</td><td align="left" valign="top">71 (46)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Dominant hand (left), n (%)</td><td align="left" valign="top">13 (9)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Desired training field, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Surgical training</td><td align="left" valign="top">100 (65)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nonsurgical training</td><td align="left" valign="top">36 (24)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not decided</td><td align="left" valign="top">17 (11)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Experience with video games, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No experience</td><td align="left" valign="top">22 (14)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Little experience</td><td align="left" valign="top">45 (29)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Moderate experience</td><td align="left" valign="top">46 (30)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Considerable experience</td><td align="left" valign="top">20 (13)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Very extensive experience</td><td align="left" valign="top">20 (13)</td></tr><tr><td align="left" valign="top">Expert surgeons (&#xFEFF;n=30)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Age in years, mean (SD)</td><td align="left" valign="top">53.8 (8.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gender (female), n (%)</td><td align="left" valign="top">4 (13)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Years of experience, mean (SD)</td><td align="left" valign="top">13.5 (7.9)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Surgical specialty, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>General surgery</td><td align="left" valign="top">8 (27)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Gynecology</td><td align="left" valign="top">5 (17)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Orthopedics</td><td align="left" valign="top">10 (33)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2003;Otorhinolaryngology&#x2013;head and neck surgery</td><td align="left" valign="top">4 (13)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Urology</td><td align="left" valign="top">3 (10)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Performance Data of Interns</title><p>First, competency scores and total test scores were calculated for each of the interns. The means and SDs of the competency scores and total scores are presented in  <xref ref-type="table" rid="table3">Table 3</xref>. The total test scores ranged from 44 to 142 (a range of 98). <xref ref-type="fig" rid="figure2">Figure 2</xref> displays the distribution of the total scores for the 152 interns (The distributions of the individual competency scores can be found in Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Descriptive statistics and item analysis of the game-based assessment (GBA) test.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Competency</td><td align="left" valign="bottom">Mean</td><td align="left" valign="bottom">SD</td><td align="left" valign="bottom">Skew</td><td align="left" valign="bottom">Competency discrimination</td><td align="left" valign="bottom">Cronbach &#x03B1; if deleted</td></tr></thead><tbody><tr><td align="left" valign="top">Planning</td><td align="left" valign="top">6.30</td><td align="left" valign="top">2.24</td><td align="left" valign="top">&#x2212;0.29</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.72</td></tr><tr><td align="left" valign="top">Problem-solving</td><td align="left" valign="top">5.75</td><td align="left" valign="top">2.51</td><td align="left" valign="top">&#x2212;0.08</td><td align="left" valign="top">0.47</td><td align="left" valign="top">0.73</td></tr><tr><td align="left" valign="top">Ingenuity</td><td align="left" valign="top">4.22</td><td align="left" valign="top">2.19</td><td align="left" valign="top">0.13</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top">Goal orientation</td><td align="left" valign="top">4.79</td><td align="left" valign="top">2.38</td><td align="left" valign="top">&#x2212;0.29</td><td align="left" valign="top">0.21</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top">Self-reflection</td><td align="left" valign="top">4.87</td><td align="left" valign="top">3.09</td><td align="left" valign="top">0.14</td><td align="left" valign="top">0.23</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top">Endurance</td><td align="left" valign="top">3.77</td><td align="left" valign="top">2.50</td><td align="left" valign="top">0.31</td><td align="left" valign="top">0.06</td><td align="left" valign="top">0.77</td></tr><tr><td align="left" valign="top">Analytical thinking</td><td align="left" valign="top">7.79</td><td align="left" valign="top">1.87</td><td align="left" valign="top">&#x2212;1.13</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top">Learning ability</td><td align="left" valign="top">7.23</td><td align="left" valign="top">1.97</td><td align="left" valign="top">&#x2212;0.70</td><td align="left" valign="top">0.40</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top">Flexibility</td><td align="left" valign="top">6.15</td><td align="left" valign="top">2.71</td><td align="left" valign="top">&#x2212;0.25</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.74</td></tr><tr><td align="left" valign="top">Concentration</td><td align="left" valign="top">7.98</td><td align="left" valign="top">1.91</td><td align="left" valign="top">&#x2212;1.30</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.75</td></tr><tr><td align="left" valign="top">Conformity</td><td align="left" valign="top">4.63</td><td align="left" valign="top">2.33</td><td align="left" valign="top">&#x2212;0.10</td><td align="left" valign="top">0.17</td><td align="left" valign="top">0.76</td></tr><tr><td align="left" valign="top">Multitasking</td><td align="left" valign="top">7.39</td><td align="left" valign="top">2.38</td><td align="left" valign="top">&#x2212;1.13</td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.72</td></tr><tr><td align="left" valign="top">Working memory</td><td align="left" valign="top">6.22</td><td align="left" valign="top">3.47</td><td align="left" valign="top">&#x2212;0.43</td><td align="left" valign="top">0.46</td><td align="left" valign="top">0.73</td></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top">7.24</td><td align="left" valign="top">1.82</td><td align="left" valign="top">&#x2212;0.71</td><td align="left" valign="top">0.71</td><td align="left" valign="top">0.72</td></tr><tr><td align="left" valign="top">Total test score<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">100.00</td><td align="left" valign="top">20.00</td><td align="left" valign="top">&#x2212;0.55</td><td align="left" valign="top"><named-content content-type="indent">&#x2014;<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></named-content></td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Cronbach &#x03B1;=0.76.</p></fn><fn id="table3fn2"><p><sup>b</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Distribution of total GBA test scores in the intern sample (n=152). GBA: game-based assessment.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="mededu_v11i1e72264_fig02.png"/></fig><p>To support the calculation of a total test score based on the competency scores, we examined the Pearson correlations between the competency scores. Most of the correlations were high (refer to Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). To support the internal structure of the test, an item analysis was then conducted to assess the discrimination of each competency and the reliability of the whole test (see <xref ref-type="table" rid="table3">Table 3</xref>).</p><p>The results showed good psychometric properties: the discrimination was satisfactory for all competencies (mean 0.39, SD 0.18), and the test&#x2019;s internal reliability was high (&#x03B1;=0.76). In addition, we conducted exploratory factor analysis with Promax rotation on the 14 competencies. The Kaiser&#x2013;Meyer&#x2013;Olkin measure of sampling adequacy suggested that the data was factorable (Kaiser&#x2013;Meyer&#x2013;Olkin=0.69). The factor analysis yielded a 2-factor solution, such that seven competencies (analytical thinking, learning, flexibility, concentration, working memory, multitasking, and precision) loaded on one factor, and 6 competencies (planning, problem-solving, ingenuity, goal orientation, self-reflection, and endurance) loaded on the second factor. The only exception was conformity, which did not load on either of the factors. Based on our previous job analysis [<xref ref-type="bibr" rid="ref7">7</xref>], we defined the first group as cognitive abilities and the second group as personality characteristics. The correlation between the two factors was 0.5. Detailed results for the factor loadings can be found in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>We next calculated correlations between the total test scores in the GBA and external variables, including participants&#x2019; demographic characteristics and their scores in the separate technical aptitude test described earlier. No significant correlations were found between age, dominant hand, or desired training field and the total GBA scores. However, a significant difference emerged with respect to gender, such that males (mean 104.6, SD 16.8) scored significantly higher than females (mean 94.3, SD 21.9) on the gamified test (mean difference 10.9, 95% CI 3.1-17.6, <italic>t</italic><sub>150</sub>=2.8, <italic>P</italic>=.002, Cohen <italic>d</italic>=0.52). This represents a small-to-medium effect size. In addition, we found a significant low positive correlation between the total GBA scores and reported amount of previous experience with video games (<italic>r</italic><sub>150</sub>=0.26, <italic>P</italic>&#x003C;.001). Interestingly, when we controlled for video game experience, the difference between the genders was no longer significant, suggesting that this difference is mainly due to different levels of video game experience.</p><p>Finally, we also calculated the correlation between the total GBA scores and scores in the technical aptitude test. We found a significant correlation between the 2 sets of scores (<italic>r</italic><sub>150</sub>=0.46, <italic>P</italic>&#x003C;.001). When controlling for video game experience, the correlation remained significant, though slightly reduced (semipartial <italic>r</italic><sub>152</sub>=0.38, <italic>P</italic>&#x003C;.001), suggesting that while gaming experience contributes to the association, the majority of the shared variance likely reflects underlying competencies relevant to both assessments. Supporting this interpretation, we found significant correlations between technical aptitude test scores and several nontechnical competencies measured by the GBA: planning, <italic>r</italic><sub>150</sub>=0.28; problem-solving, <italic>r</italic><sub>150</sub>=0.28; analytical thinking, <italic>r</italic><sub>150</sub>=0.27; learning ability, <italic>r</italic><sub>150</sub>=0.30; flexibility, <italic>r</italic><sub>150</sub>=0.50; and precision, <italic>r</italic><sub>150</sub>=0.30; all <italic>P</italic>&#x003C;.001. In the absence of these 6 competencies, the total GBA scores showed no significant correlation with the technical aptitude test (<italic>r</italic><sub>150</sub>=0.11, <italic>P</italic>=.17). These findings suggest that shared cognitive and behavioral attributes may play an important role in performance on both tests.</p></sec><sec id="s3-3"><title>Questionnaire Data</title><p><xref ref-type="table" rid="table4">Table 4</xref> presents the main results for the questionnaire data, including mean relevance and difficulty ratings for each game, and the rates at which participants judged the time limits as sufficient and the instructions as clear.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Feedback of interns and expert surgeons on the relevance,<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> difficulty,<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> time limit,<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> and clarity of instructions<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup> for each game in the game-based assessment (GBA) test.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Game and group</td><td align="left" valign="bottom">Relevance rating, mean (SD)</td><td align="left" valign="bottom">Difficulty rating, mean (SD)</td><td align="left" valign="bottom">Time limit, n (%)</td><td align="left" valign="bottom">Clarity of instructions, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Dotto</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" rowspan="2"/><td align="left" valign="top">Interns</td><td align="left" valign="top">3.5 (0.8)</td><td align="left" valign="top">4.5 (0.4)</td><td align="left" valign="top">95 (62)</td><td align="left" valign="top">94 (61)</td></tr><tr><td align="left" valign="top">Surgeons</td><td align="left" valign="top">3.8 (0.6)</td><td align="left" valign="top">4.2 (0.7)</td><td align="left" valign="top">22 (73)</td><td align="left" valign="top">21 (70)</td></tr><tr><td align="left" valign="top" colspan="2">CurioCity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" rowspan="2"/><td align="left" valign="top">Interns</td><td align="left" valign="top">3.8 (0.6)</td><td align="left" valign="top">2.9 (0.8)</td><td align="left" valign="top">151 (99)</td><td align="left" valign="top">144 (94)</td></tr><tr><td align="left" valign="top">Surgeons</td><td align="left" valign="top">3.7 (0.7)</td><td align="left" valign="top">3.7 (0.6)</td><td align="left" valign="top">27 (90)</td><td align="left" valign="top">24 (80)</td></tr><tr><td align="left" valign="top" colspan="2">MultiTask</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" rowspan="2"/><td align="left" valign="top">Interns</td><td align="left" valign="top">3.7 (0.6)</td><td align="left" valign="top">2.9 (0.7)</td><td align="left" valign="top">142 (93)</td><td align="left" valign="top">147 (96)</td></tr><tr><td align="left" valign="top">Surgeons</td><td align="left" valign="top">3.6 (0.7)</td><td align="left" valign="top">2.5 (0.5)</td><td align="left" valign="top">29 (97)</td><td align="left" valign="top">27 (90)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>The relevance rating scale ranged from 1 to 5, with higher scores indicating greater relevance for selection of surgical residents (1=&#x201C;not relevant&#x201D;, 2=&#x201C;slightly relevant&#x201D;, 3=&#x201C;moderately relevant&#x201D;, 4=&#x201C;very relevant&#x201D;, 5=&#x201C;extremely relevant&#x201D;).</p></fn><fn id="table4fn2"><p><sup>b</sup>The difficulty rating scale ranged from 1 to 5, with higher scores indicating greater difficulty (1=&#x201C;very easy&#x201D;, 2=&#x201C;easy&#x201D;, 3=&#x201C;moderately difficult&#x201D;, 4=&#x201C;very difficult&#x201D;, 5=&#x201C;extremely difficult&#x201D;).</p></fn><fn id="table4fn3"><p><sup>c</sup>Participants were asked whether the time limit was sufficient for the task. The number in the table represents the number of interns and surgeons who responded &#x201C;yes.&#x201D;</p></fn><fn id="table4fn4"><p><sup>d</sup>Participants were asked whether the instructions for the task were clear. The number in the table represents the number of interns and surgeons who responded &#x201C;yes.&#x201D; The instructions were modified slightly based on the surgeons&#x2019; feedback before the test was administered to the interns.</p></fn></table-wrap-foot></table-wrap><p>Addressing the latter first, overall, both the interns and expert surgeons regarded the time limits as sufficient (the lowest time limit approval rating was 62% of the interns for the Dotto game; for CurioCity and MultiTask, all ratings were 90% or above). Both samples also considered the instructions to be generally clear (again, the lowest approval rating was by the interns for the Dotto game, at 61%; see <xref ref-type="table" rid="table4">Table 4</xref>). Before the test was administered to the interns, some of the instructions were modified slightly and improved based on feedback provided by the expert surgeons either verbally or in writing.</p><p>The difficulty ratings varied between games, with the CurioCity and MultiTask games perceived overall as being moderately difficult, and the Dotto game largely perceived as very difficult to extremely difficult. The mean difficulty rating across the games and samples was 3.5 (SD 0.7), meaning that the test as a whole was perceived as moderately to very difficult. All games were considered by both the expert surgeons and the interns as relevant for assessing cognitive abilities and personality characteristics in the selection of candidates for surgical training (manifested in average ratings of 3.5 or above; see <xref ref-type="table" rid="table4">Table 4</xref>). The mean relevance rating across the games and samples was 3.6 (SD 0.1). Looking at the whole-test ratings, the mean relevance ratings were relatively high (interns: mean 3.6, SD 0.7; expert surgeons: mean 3.7, SD 0.6). In addition, the test platform was perceived as comfortable to use (interns: mean 4.2, SD 0.2; expert surgeons: mean 4.0, SD 0.3).</p><p>As noted, we also analyzed participants&#x2019; written feedback (in the free-text portion of the questionnaire), as well as feedback provided orally by the expert surgeons. Some of the surgeons indicated that their relevance ratings would have been higher if the tasks in the GBA were more directly related to surgical tasks and scenarios. Some participants also suggested that the test would be more relevant if it assessed other important competencies not covered in the current version, such as interpersonal skills, teamwork, leadership, and integrity. Finally, participants also expressed concern that prior experience with video games could affect performance on the test.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Study Overview and Significance</title><p>This paper presents an innovative gamified test designed to assess cognitive abilities and personality characteristics relevant to the selection of surgical residents. While several studies have evaluated the use of GBAs in assessing applicants for employment, this is, to our knowledge, the first to evaluate their use in selecting surgical residents. As part of a broader program of validation research, this initial study provides preliminary evidence supporting the tool&#x2019;s feasibility, acceptability, and validity.</p></sec><sec id="s4-2"><title>Evidence for Validity</title><sec id="s4-2-1"><title>Overview</title><p>On the basis of feedback from surgeons and interns regarding the test&#x2019;s relevance, difficulty, and administration, the results of this study support the feasibility and acceptability of the test. We also present preliminary evidence concerning 4 of the 5 main components of construct validity: content, response process, internal structure, and relationships with other variables (the fifth component, consequences, could not be examined in this study) [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. In some cases, the evidence is based on procedures used in the development and adaptation of the test; in others, it is based on empirical data collected during the study.</p></sec><sec id="s4-2-2"><title>Content</title><p>In terms of content, the games used in the GBA were selected to assess relevant cognitive abilities and personality characteristics based on competencies identified in a previous job analysis [<xref ref-type="bibr" rid="ref7">7</xref>]. The games were developed and validated by psychometricians and psychologists to evaluate these specific competencies, and both the interns and surgeons participating in the study rated the games as relevant for selecting candidates for surgical training. Some of the expert surgeons indicated that their relevance ratings would have been higher if the content of the games were more directly related to surgery or medicine. This weakens somewhat the content evidence for validity. However, the literature on gamification suggests that GBAs can effectively assess relevant competencies even when the game scenario seems unrelated to the profession [<xref ref-type="bibr" rid="ref26">26</xref>]. Future studies should examine whether GBAs that more directly mimic job-related situations are more valid for selecting qualified candidates.</p></sec><sec id="s4-2-3"><title>Response Process Evidence</title><p>Response process evidence of validity has 2 components. The first is the elimination of sources of error associated with test administration [<xref ref-type="bibr" rid="ref28">28</xref>]. Toward this end, we provided detailed and thorough instructions for each game. The instructions were revised based on feedback provided by the expert surgeons before the test was administered to the interns. The ratings of both the expert surgeons and interns indicate that on the whole, the instructions were perceived as clear.</p><p>The second component of response process evidence is the appropriateness of the methods used to combine different performance parameters to produce a composite score. To support the calculation of a total test score based on the competency scores, we examined the correlations between the competency scores. Strong correlations were obtained, supporting the calculation of a composite performance score.</p></sec><sec id="s4-2-4"><title>Internal Structure Evidence</title><p>Internal structure, as a source of validity, relates to the statistical or psychometric characteristics of the test. The item analysis conducted on the test data of the interns showed good psychometric properties, supporting the internal structure of the test. In addition, the factor analysis yielded two groups of competencies, one reflecting cognitive abilities and the other personality characteristics. This result is consistent with previous classifications of these competencies [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], and therefore also in keeping with the test&#x2019;s expected internal structure.</p></sec><sec id="s4-2-5"><title>Relationships With Other Variables</title><p>This source of evidence relates to the &#x201C;degree to which these relationships are consistent with the construct underlying the proposed test score interpretation&#x201D; [<xref ref-type="bibr" rid="ref32">32</xref>]. Most commonly, this evidence is assessed based on correlations of assessment scores with a criterion measure of future workplace performance. While this type of evidence is indeed crucial for the validation of the current test, it was not available in this initial study.</p><p>Instead, the present analysis relies on a different methodology, namely, examining whether the relationships found in this study between test scores and external variables are consistent with what is known from the literature regarding the relationship between nontechnical competencies and those variables. Based on the data of interns, we calculated the correlations between participants&#x2019; performance on the gamified test and other variables.</p><p>As expected, no correlations were found with age, dominant hand, or the intern&#x2019;s desired training field. We found relatively small but statistically significant correlations with both gender and self-reported video game experience, with males and frequent gamers obtaining higher GBA scores. Notably, the gender difference was largely accounted for by differences in video game experience, suggesting that the observed gender effect is explained by greater familiarity with video games among males. These findings are in line with other studies showing that gamers and males may potentially have advantages over nongamers and females in the context of GBAs [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], and they raise questions regarding the fairness of these tests. Since there is evidence that playing video games improves cognitive and mental abilities [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>], it is unclear whether the correlation between video game experience and the gamified test scores found in this study reflects a genuine positive influence of video games on gamers&#x2019; abilities, or whether it is simply an artifact of the test format that may bias the selection process. Future research should examine whether changes in instructions, allowing more practice time before the test, or changes in GBA features and measures may eliminate these advantages [<xref ref-type="bibr" rid="ref33">33</xref>]. In addition, further studies should examine whether increasing women&#x2019;s exposure to video games in general would help to minimize this gender gap. However, it is important to note that the observed gender effect was small to medium in size, and the effect of video game experience was small. Thus, while caution is warranted, these differences should not be overstated. Until further evidence is available, the use of adjusted cutoffs or gender-specific norms may help avoid exacerbating the underrepresentation of women in surgical fields.</p><p>In addition, it is important to acknowledge that the GBA examined in this study does not encompass the full range of cognitive abilities and personality characteristics relevant for selecting surgical residents. Notably, key nontechnical competencies such as interpersonal skills, teamwork, leadership, and integrity were not addressed in the current assessment. Furthermore, the tasks included were primarily procedural and did not involve verbal abilities. As previous research has shown that males and females may excel in different domains&#x2014;with females often demonstrating strengths in tasks that require verbal abilities [<xref ref-type="bibr" rid="ref37">37</xref>] and interpersonal skills [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]&#x2014;it is plausible that a more comprehensive assessment approach could mitigate the small gender differences observed in this study. For example, incorporating tools that evaluate verbal and interpersonal competencies might balance the overall selection outcomes. Future research should investigate whether expanding the assessment battery to include gamified situational judgment tests [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref40">40</xref>] or other instruments targeting these nontechnical domains could enhance fairness and reduce gender disparities in selection.</p><p>Moreover, we found a medium correlation between the gamified test scores and scores on a technical aptitude test performed using a virtual reality laparoscopic simulator. Since video game experience has been shown to correlate with initial performance on laparoscopic simulators [<xref ref-type="bibr" rid="ref41">41</xref>], we considered the possibility that this shared factor may contribute to the observed association, that is, that previous video game experience might positively influence performance on both assessments. However, the correlation remained significant even after controlling for video game experience, suggesting that gaming experience only partially explains the relationship between the 2 tests.</p><p>In addition to this shared factor, our findings suggest that common underlying competencies may also play a role. Specifically, scores on the technical aptitude test were significantly associated with nontechnical competencies measured by the GBA, such as planning, problem-solving, analytical thinking, learning ability, flexibility, and precision. These results indicate that both assessments may tap into similar cognitive processes or behavioral tendencies. This interpretation is supported by prior research demonstrating meaningful correlations between nontechnical skills and performance on laparoscopic simulators [<xref ref-type="bibr" rid="ref42">42</xref>-<xref ref-type="bibr" rid="ref44">44</xref>].</p><p>To further disentangle the effects of gaming experience from shared competencies, future research should examine whether the correlation between GBA and laparoscopic simulator performance persists among individuals with previous laparoscopic experience. Alternatively, exploring the relationship between GBA scores and performance on open surgery tasks&#x2014;which are not influenced by video game experience&#x2014;could help clarify whether the observed correlation is driven by familiarity with gaming or by genuine overlap in nontechnical competencies.</p><p>Finally, as only 21% of the variance in GBA scores is explained by the technical aptitude test, it is clear that the GBA primarily measures competencies beyond those assessed by the laparoscopic simulator. This finding supports both the convergent and divergent validity of the GBA and aligns with its intended construct interpretation [<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec></sec><sec id="s4-3"><title>Implications</title><p>Nontechnical skills are important for surgeons no less, and perhaps even more, than technical skills [<xref ref-type="bibr" rid="ref7">7</xref>]. Indeed, many underlying causes of error within and outside the operating room originate from nontechnical aspects of performance [<xref ref-type="bibr" rid="ref8">8</xref>]. Hence, training programs recognize the importance of assessing candidates&#x2019; cognitive abilities and personality characteristics when selecting each year&#x2019;s cohort of surgical residents. Yet traditional assessment methods (academic achievement, curricula vitae, letters of recommendation, and interviews) are poorly correlated with later performance; and self-report measures, a potential alternative, are subject to bias and dishonesty.</p><p>The present study introduces an innovative solution for assessing relevant competencies: game-based assessment [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Building on existing GBAs developed for hiring and recruitment contexts, we implemented a systematic process to develop a gamified test tailored for surgical resident selection and conducted an initial investigation into its validity. Gamified assessment tests offer numerous advantages over other assessment approaches. First, they examine the entire solving process, as opposed to traditional tests which only examine the final product, allowing for a deeper understanding of the candidate&#x2019;s competencies and work style. Compared to self-report measures, GBAs measure candidates&#x2019; actual behavior, which is harder to fake. Finally, gamified tests are based on automated scoring, thus minimizing the influence of bias in the selection process.</p><p>The present findings provide preliminary support for the feasibility, acceptability, and validity of the gamified test, suggesting that it may contribute to improving the selection of surgical residents by offering a potentially more reliable assessment of candidates&#x2019; abilities and attributes. It follows that implementing this test&#x2014;or a similar tool&#x2014;may assist program directors in identifying candidates with strong potential for success in surgical training. This improved selection process should, in turn, result in more capable surgical residents and surgeons, ultimately leading to better surgical outcomes and increased patient safety. Our findings may be relevant to nonsurgical training programs as well, since some of the competencies assessed in the gamified test developed in this study apply to residents in all medical fields.</p><p>The gamified test presented in this study does not assess all cognitive abilities and personality characteristics relevant for selecting surgical residents. As mentioned by the participants in this study, competencies missing in the present work include interpersonal skills, teamwork, leadership, and integrity. Future studies should examine whether other types of GBAs, such as gamified situational judgment tests [<xref ref-type="bibr" rid="ref21">21</xref>], or other assessment methods may be useful in improving this area.</p></sec><sec id="s4-4"><title>Strengths and Limitations</title><p>This is the first study to examine the use of GBAs in selecting surgical residents, or indeed medical residents in any field. As such, one of its key strengths is use of a systematic process to develop a novel test for assessing candidates&#x2019; cognitive abilities and personality characteristics and to evaluate its validity, feasibility, and acceptability. Another strength is the large sample of expert surgeons (30) and interns (152) from various hospitals who provided data for statistical analysis (the interns) and feedback (both samples).</p><p>The study has some limitations. First, our participants came from a single country, thereby restricting the generalizability of our findings. However, it seems unlikely that the competencies we assessed are distributed differently among candidates from other nations. In addition, since the interns in our study were volunteers, it is possible that our sample does not represent the population of candidates for surgical training. Future studies should aim to recruit a more randomized and representative sample to ensure the findings are generalizable to the broader population of surgical trainees. However, the large variance in competency and test scores observed in our sample suggests that our sample was likely sufficiently representative of candidates with different qualifications. Finally, an important limitation of this study is the absence of evidence for test-criterion relationships. While we present data supporting various sources of validity, we have not yet assessed whether the GBA scores predict future performance in surgical residency. Given the high-stakes nature of surgical selection, establishing evidence for test-criterion relationships is critical before the tool can be adopted for widespread use. Longitudinal studies that track residents&#x2019; real-world performance over time are planned to address this essential aspect.</p></sec><sec id="s4-5"><title>Conclusions</title><p>The use of GBAs holds potential for contributing to improvements in resident selection. The present study presents an innovative gamified test designed to assess cognitive abilities and personality characteristics relevant to the selection of surgical residents. Preliminary evidence supports the feasibility, acceptability, and validity of the gamified test. However, further research is needed, particularly to assess evidence for test-criterion relationships, before the tool can be fully recommended for surgical resident selection.</p></sec></sec></body><back><ack><p>This research was supported by the Israel Science Foundation (grant No. 1830/20). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p></ack><notes><sec><title>Data Availability</title><p>The datasets used and analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">GBA</term><def><p>game-based assessment</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cuschieri</surname><given-names>A</given-names> </name><name name-style="western"><surname>Francis</surname><given-names>N</given-names> </name><name name-style="western"><surname>Crosby</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hanna</surname><given-names>GB</given-names> </name></person-group><article-title>What do master surgeons think of surgical competence and revalidation?</article-title><source>Am J Surg</source><year>2001</year><month>08</month><volume>182</volume><issue>2</issue><fpage>110</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1016/s0002-9610(01)00667-5</pub-id><pub-id pub-id-type="medline">11574079</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baldwin</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Paisley</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>SP</given-names> </name></person-group><article-title>Consultant surgeons&#x2019; opinion of the skills required of basic surgical trainees</article-title><source>Br J Surg</source><year>1999</year><month>08</month><volume>86</volume><issue>8</issue><fpage>1078</fpage><lpage>1082</lpage><pub-id pub-id-type="doi">10.1046/j.1365-2168.1999.01169.x</pub-id><pub-id pub-id-type="medline">10460649</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dean</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>L</given-names> </name><name name-style="western"><surname>Garfjeld Roberts</surname><given-names>P</given-names> </name><name name-style="western"><surname>Rees</surname><given-names>J</given-names> </name></person-group><article-title>What is known about the attributes of a successful surgical trainer? A systematic review</article-title><source>J Surg Educ</source><year>2017</year><volume>74</volume><issue>5</issue><fpage>843</fpage><lpage>850</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2017.01.010</pub-id><pub-id pub-id-type="medline">28392267</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gardner</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Cavanaugh</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Willis</surname><given-names>RE</given-names> </name><etal/></person-group><article-title>Great expectations? Future competency requirements among candidates entering surgery training</article-title><source>J Surg Educ</source><year>2020</year><volume>77</volume><issue>2</issue><fpage>267</fpage><lpage>272</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2019.09.001</pub-id><pub-id pub-id-type="medline">31606376</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Darzi</surname><given-names>A</given-names> </name></person-group><article-title>Selection of individuals for training in surgery</article-title><source>Am J Surg</source><year>2005</year><month>07</month><volume>190</volume><issue>1</issue><fpage>98</fpage><lpage>102</lpage><pub-id pub-id-type="doi">10.1016/j.amjsurg.2005.04.002</pub-id><pub-id pub-id-type="medline">15972179</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grantcharov</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Reznick</surname><given-names>RK</given-names> </name></person-group><article-title>Training tomorrow&#x2019;s surgeons: what are we looking for and how can we achieve it?</article-title><source>ANZ J Surg</source><year>2009</year><month>03</month><volume>79</volume><issue>3</issue><fpage>104</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.1111/j.1445-2197.2008.04823.x</pub-id><pub-id pub-id-type="medline">19317771</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gazit</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ben-Gal</surname><given-names>G</given-names> </name><name name-style="western"><surname>Eliashar</surname><given-names>R</given-names> </name></person-group><article-title>Using job analysis for identifying the desired competencies of 21st-century surgeons for improving trainees selection</article-title><source>J Surg Educ</source><year>2023</year><month>01</month><volume>80</volume><issue>1</issue><fpage>81</fpage><lpage>92</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2022.08.015</pub-id><pub-id pub-id-type="medline">36175291</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yule</surname><given-names>S</given-names> </name><name name-style="western"><surname>Flin</surname><given-names>R</given-names> </name><name name-style="western"><surname>Paterson-Brown</surname><given-names>S</given-names> </name><name name-style="western"><surname>Maran</surname><given-names>N</given-names> </name></person-group><article-title>Non-technical skills for surgeons in the operating room: a review of the literature</article-title><source>Surgery</source><year>2006</year><month>02</month><volume>139</volume><issue>2</issue><fpage>140</fpage><lpage>149</lpage><pub-id pub-id-type="doi">10.1016/j.surg.2005.06.017</pub-id><pub-id pub-id-type="medline">16455321</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flin</surname><given-names>R</given-names> </name><name name-style="western"><surname>Yule</surname><given-names>S</given-names> </name><name name-style="western"><surname>Paterson-Brown</surname><given-names>S</given-names> </name><name name-style="western"><surname>Maran</surname><given-names>N</given-names> </name><name name-style="western"><surname>Rowley</surname><given-names>D</given-names> </name><name name-style="western"><surname>Youngson</surname><given-names>G</given-names> </name></person-group><article-title>Teaching surgeons about non-technical skills</article-title><source>Surgeon</source><year>2007</year><month>04</month><volume>5</volume><issue>2</issue><fpage>86</fpage><lpage>89</lpage><pub-id pub-id-type="doi">10.1016/S1479-666X(07)80059-X</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schaverien</surname><given-names>MV</given-names> </name></person-group><article-title>Selection for surgical training: an evidence-based review</article-title><source>J Surg Educ</source><year>2016</year><volume>73</volume><issue>4</issue><fpage>721</fpage><lpage>729</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2016.02.007</pub-id><pub-id pub-id-type="medline">27133583</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lipman</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Colbert</surname><given-names>CY</given-names> </name><name name-style="western"><surname>Ashton</surname><given-names>R</given-names> </name><etal/></person-group><article-title>A systematic review of metrics utilized in the selection and prediction of future performance of residents in the United States</article-title><source>J Grad Med Educ</source><year>2023</year><month>12</month><volume>15</volume><issue>6</issue><fpage>652</fpage><lpage>668</lpage><pub-id pub-id-type="doi">10.4300/JGME-D-22-00955.1</pub-id><pub-id pub-id-type="medline">38045930</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bowe</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Laury</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Gray</surname><given-names>ST</given-names> </name></person-group><article-title>Associations between otolaryngology applicant characteristics and future performance in residency or practice: a systematic review</article-title><source>Otolaryngol Head Neck Surg</source><year>2017</year><month>06</month><volume>156</volume><issue>6</issue><fpage>1011</fpage><lpage>1017</lpage><pub-id pub-id-type="doi">10.1177/0194599817698430</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harfmann</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Zirwas</surname><given-names>MJ</given-names> </name></person-group><article-title>Can performance in medical school predict performance in residency? A compilation and review of correlative studies</article-title><source>J Am Acad Dermatol</source><year>2011</year><month>11</month><volume>65</volume><issue>5</issue><fpage>1010</fpage><lpage>1022</lpage><pub-id pub-id-type="doi">10.1016/j.jaad.2010.07.034</pub-id><pub-id pub-id-type="medline">21612841</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kenny</surname><given-names>S</given-names> </name><name name-style="western"><surname>McInnes</surname><given-names>M</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>V</given-names> </name></person-group><article-title>Associations between residency selection strategies and doctor performance: a meta-analysis</article-title><source>Med Educ</source><year>2013</year><month>08</month><volume>47</volume><issue>8</issue><fpage>790</fpage><lpage>800</lpage><pub-id pub-id-type="doi">10.1111/medu.12234</pub-id><pub-id pub-id-type="medline">23837425</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oldfield</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Beasley</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>J</given-names> </name><name name-style="western"><surname>Anthony</surname><given-names>A</given-names> </name><name name-style="western"><surname>Watt</surname><given-names>A</given-names> </name></person-group><article-title>Correlation of selection scores with subsequent assessment scores during surgical training</article-title><source>ANZ J Surg</source><year>2013</year><month>06</month><volume>83</volume><issue>6</issue><fpage>412</fpage><lpage>416</lpage><pub-id pub-id-type="doi">10.1111/ans.12176</pub-id><pub-id pub-id-type="medline">23647783</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stephenson-Famy</surname><given-names>A</given-names> </name><name name-style="western"><surname>Houmard</surname><given-names>BS</given-names> </name><name name-style="western"><surname>Oberoi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Manyak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chiang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name></person-group><article-title>Use of the interview in resident candidate selection: a review of the literature</article-title><source>J Grad Med Educ</source><year>2015</year><month>12</month><volume>7</volume><issue>4</issue><fpage>539</fpage><lpage>548</lpage><pub-id pub-id-type="doi">10.4300/JGME-D-14-00236.1</pub-id><pub-id pub-id-type="medline">26692964</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gardner</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Dunkin</surname><given-names>BJ</given-names> </name></person-group><article-title>Evaluation of validity evidence for personality, emotional intelligence, and situational judgment tests to identify successful residents</article-title><source>JAMA Surg</source><year>2018</year><month>05</month><day>1</day><volume>153</volume><issue>5</issue><fpage>409</fpage><lpage>416</lpage><pub-id pub-id-type="doi">10.1001/jamasurg.2017.5013</pub-id><pub-id pub-id-type="medline">29282462</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Niessen</surname><given-names>ASM</given-names> </name><name name-style="western"><surname>Meijer</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Tendeiro</surname><given-names>JN</given-names> </name></person-group><article-title>Measuring non-cognitive predictors in high-stakes contexts: the effect of self-presentation on self-report instruments used in admission to higher education</article-title><source>Pers Individ Dif</source><year>2017</year><month>02</month><volume>106</volume><fpage>183</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1016/j.paid.2016.11.014</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Griffin</surname><given-names>B</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>IG</given-names> </name></person-group><article-title>Faking good: self-enhancement in medical school applicants</article-title><source>Med Educ</source><year>2012</year><month>05</month><volume>46</volume><issue>5</issue><fpage>485</fpage><lpage>490</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2923.2011.04208.x</pub-id><pub-id pub-id-type="medline">22515756</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landers</surname><given-names>RN</given-names> </name><name name-style="western"><surname>Sanchez</surname><given-names>DR</given-names> </name></person-group><article-title>Game&#x2010;based, gamified, and gamefully designed assessments for employee selection: definitions, distinctions, design, and validation</article-title><source>Int J Selection Assessment</source><year>2022</year><month>03</month><volume>30</volume><issue>1</issue><fpage>1</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1111/ijsa.12376</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Georgiou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gouras</surname><given-names>A</given-names> </name><name name-style="western"><surname>Nikolaou</surname><given-names>I</given-names> </name></person-group><article-title>Gamification in employee selection: the development of a gamified assessment</article-title><source>Int J Selection Assessment</source><year>2019</year><month>06</month><volume>27</volume><issue>2</issue><fpage>91</fpage><lpage>103</lpage><pub-id pub-id-type="doi">10.1111/ijsa.12240</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gomez</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Ruip&#x00E9;rez-Valiente</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Clemente</surname><given-names>FJG</given-names> </name></person-group><article-title>A systematic literature review of game-based assessment studies: trends and challenges</article-title><source>IEEE Trans Learning Technol</source><year>2023</year><volume>16</volume><issue>4</issue><fpage>500</fpage><lpage>515</lpage><pub-id pub-id-type="doi">10.1109/TLT.2022.3226661</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramos-Villagrasa</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Fern&#x00E1;ndez-del-R&#x00ED;o</surname><given-names>E</given-names> </name><name name-style="western"><surname>Castro</surname><given-names>&#x00C1;</given-names> </name></person-group><article-title>Game-related assessments for personnel selection: a systematic review</article-title><source>Front Psychol</source><year>2022</year><volume>13</volume><fpage>952002</fpage><pub-id pub-id-type="doi">10.3389/FPSYG.2022.952002/BIBTEX</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Simons</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wohlgenannt</surname><given-names>I</given-names> </name><name name-style="western"><surname>Zelt</surname><given-names>S</given-names> </name><name name-style="western"><surname>Weinmann</surname><given-names>M</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>J</given-names> </name><name name-style="western"><surname>vom Brocke</surname><given-names>J</given-names> </name></person-group><article-title>Intelligence at play: game-based assessment using a virtual-reality application</article-title><source>Virtual Real</source><year>2023</year><month>09</month><volume>27</volume><issue>3</issue><fpage>1827</fpage><lpage>1843</lpage><pub-id pub-id-type="doi">10.1007/s10055-023-00752-9</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wiernik</surname><given-names>BM</given-names> </name><name name-style="western"><surname>Raghavan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Caretta</surname><given-names>TR</given-names> </name><name name-style="western"><surname>Coovert</surname><given-names>MD</given-names> </name></person-group><article-title>Developing and validating a serious game&#x2010;based assessment for cyber occupations in the US Air Force</article-title><source>Int J Selection Assessment</source><year>2022</year><month>03</month><volume>30</volume><issue>1</issue><fpage>27</fpage><lpage>47</lpage><pub-id pub-id-type="doi">10.1111/ijsa.12378</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landers</surname><given-names>RN</given-names> </name><name name-style="western"><surname>Armstrong</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Collmus</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Mujcic</surname><given-names>S</given-names> </name><name name-style="western"><surname>Blaik</surname><given-names>J</given-names> </name></person-group><article-title>Theory-driven game-based assessment of general cognitive ability: design theory, measurement, prediction of performance, and test fairness</article-title><source>J Appl Psychol</source><year>2022</year><month>10</month><volume>107</volume><issue>10</issue><fpage>1655</fpage><lpage>1677</lpage><pub-id pub-id-type="doi">10.1037/apl0000954</pub-id><pub-id pub-id-type="medline">34672652</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Hatala</surname><given-names>R</given-names> </name></person-group><article-title>Validation of educational assessments: a primer for simulation and beyond</article-title><source>Adv Simul</source><year>2016</year><month>01</month><volume>1</volume><issue>1</issue><fpage>1</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.1186/s41077-016-0033-y</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Downing</surname><given-names>SM</given-names> </name></person-group><article-title>Validity: on meaningful interpretation of assessment data</article-title><source>Med Educ</source><year>2003</year><month>09</month><volume>37</volume><issue>9</issue><fpage>830</fpage><lpage>837</lpage><pub-id pub-id-type="doi">10.1046/j.1365-2923.2003.01594.x</pub-id><pub-id pub-id-type="medline">14506816</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gazit</surname><given-names>N</given-names> </name><name name-style="western"><surname>Ben-Gal</surname><given-names>G</given-names> </name><name name-style="western"><surname>Eliashar</surname><given-names>R</given-names> </name></person-group><article-title>Development and validation of an objective virtual reality tool for assessing technical aptitude among potential candidates for surgical training</article-title><source>BMC Med Educ</source><year>2024</year><month>03</month><day>14</day><volume>24</volume><issue>1</issue><fpage>286</fpage><pub-id pub-id-type="doi">10.1186/s12909-024-05228-1</pub-id><pub-id pub-id-type="medline">38486166</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kawaguchi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Egi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hattori</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sawada</surname><given-names>H</given-names> </name><name name-style="western"><surname>Suzuki</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ohdan</surname><given-names>H</given-names> </name></person-group><article-title>Validation of a novel basic virtual reality simulator, the LAP-X, for training basic laparoscopic skills</article-title><source>Minim Invasive Ther Allied Technol</source><year>2014</year><month>10</month><volume>23</volume><issue>5</issue><fpage>287</fpage><lpage>293</lpage><pub-id pub-id-type="doi">10.3109/13645706.2014.903853</pub-id><pub-id pub-id-type="medline">24773373</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Patterson</surname><given-names>F</given-names> </name><name name-style="western"><surname>Ferguson</surname><given-names>E</given-names> </name><name name-style="western"><surname>Thomas</surname><given-names>S</given-names> </name></person-group><article-title>Using job analysis to identify core and specific competencies: implications for selection and recruitment</article-title><source>Med Educ</source><year>2008</year><month>12</month><volume>42</volume><issue>12</issue><fpage>1195</fpage><lpage>1204</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2923.2008.03174.x</pub-id><pub-id pub-id-type="medline">19120950</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="book"><person-group person-group-type="author"><collab>American Educational Research Association</collab><collab>American Psychological Association</collab><collab>National Council on Measurement in Education</collab></person-group><source>Standards for Educational and Psychological Testing</source><year>2014</year><publisher-name>American Educational Research Association</publisher-name></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>YJ</given-names> </name><name name-style="western"><surname>Shute</surname><given-names>VJ</given-names> </name></person-group><article-title>The interplay of game elements with psychometric qualities, learning, and enjoyment in game-based assessment</article-title><source>Computers &#x0026; Education</source><year>2015</year><month>09</month><volume>87</volume><fpage>340</fpage><lpage>356</lpage><pub-id pub-id-type="doi">10.1016/j.compedu.2015.07.009</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ventura</surname><given-names>M</given-names> </name><name name-style="western"><surname>Shute</surname><given-names>V</given-names> </name></person-group><article-title>The validity of a game-based assessment of persistence</article-title><source>Comput Human Behav</source><year>2013</year><month>11</month><volume>29</volume><issue>6</issue><fpage>2568</fpage><lpage>2572</lpage><pub-id pub-id-type="doi">10.1016/j.chb.2013.06.033</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Granic</surname><given-names>I</given-names> </name><name name-style="western"><surname>Lobel</surname><given-names>A</given-names> </name><name name-style="western"><surname>Engels</surname><given-names>RCME</given-names> </name></person-group><article-title>The benefits of playing video games</article-title><source>Am Psychol</source><year>2014</year><month>01</month><volume>69</volume><issue>1</issue><fpage>66</fpage><lpage>78</lpage><pub-id pub-id-type="doi">10.1037/a0034857</pub-id><pub-id pub-id-type="medline">24295515</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reynaldo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Christian</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hosea</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gunawan</surname><given-names>AAS</given-names> </name></person-group><article-title>Using video games to improve capabilities in decision making and cognitive skill: a literature review</article-title><source>Procedia Comput Sci</source><year>2021</year><volume>179</volume><fpage>211</fpage><lpage>221</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2020.12.027</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kheloui</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jacmin-Park</surname><given-names>S</given-names> </name><name name-style="western"><surname>Larocque</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Sex/gender differences in cognitive abilities</article-title><source>Neurosci Biobehav Rev</source><year>2023</year><month>09</month><volume>152</volume><fpage>105333</fpage><pub-id pub-id-type="doi">10.1016/j.neubiorev.2023.105333</pub-id><pub-id pub-id-type="medline">37517542</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sugawara</surname><given-names>A</given-names> </name><name name-style="western"><surname>Ishikawa</surname><given-names>K</given-names> </name><name name-style="western"><surname>Motoya</surname><given-names>R</given-names> </name><name name-style="western"><surname>Kobayashi</surname><given-names>G</given-names> </name><name name-style="western"><surname>Moroi</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Fukushima</surname><given-names>T</given-names> </name></person-group><article-title>Characteristics and gender differences in the medical interview skills of Japanese medical students</article-title><source>Intern Med</source><year>2017</year><volume>56</volume><issue>12</issue><fpage>1507</fpage><lpage>1513</lpage><pub-id pub-id-type="doi">10.2169/internalmedicine.56.8135</pub-id><pub-id pub-id-type="medline">28626175</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Graf</surname><given-names>J</given-names> </name><name name-style="western"><surname>Smolka</surname><given-names>R</given-names> </name><name name-style="western"><surname>Simoes</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Communication skills of medical students during the OSCE: gender-specific differences in a longitudinal trend study</article-title><source>BMC Med Educ</source><year>2017</year><month>12</month><volume>17</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1186/s12909-017-0913-4</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gardner</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>P</given-names> </name></person-group><article-title>Predicting surgical resident performance with situational judgment tests</article-title><source>Acad Med</source><year>2024</year><month>08</month><day>1</day><volume>99</volume><issue>8</issue><fpage>884</fpage><lpage>888</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000005680</pub-id><pub-id pub-id-type="medline">38412475</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lynch</surname><given-names>J</given-names> </name><name name-style="western"><surname>Aughwane</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hammond</surname><given-names>TM</given-names> </name></person-group><article-title>Video games and surgical ability: a literature review</article-title><source>J Surg Educ</source><year>2010</year><volume>67</volume><issue>3</issue><fpage>184</fpage><lpage>189</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2010.02.010</pub-id><pub-id pub-id-type="medline">20630431</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kengen</surname><given-names>B</given-names> </name><name name-style="western"><surname>IJgosse</surname><given-names>WM</given-names> </name><name name-style="western"><surname>van Goor</surname><given-names>H</given-names> </name><name name-style="western"><surname>Luursema</surname><given-names>JM</given-names> </name></person-group><article-title>Fast or safe? The role of impulsiveness in laparoscopic simulator performance</article-title><source>Am J Surg</source><year>2020</year><month>10</month><volume>220</volume><issue>4</issue><fpage>914</fpage><lpage>919</lpage><pub-id pub-id-type="doi">10.1016/j.amjsurg.2020.02.056</pub-id><pub-id pub-id-type="medline">32145917</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wetzel</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Black</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Hanna</surname><given-names>GB</given-names> </name><etal/></person-group><article-title>The effects of stress and coping on surgical performance during simulations</article-title><source>Ann Surg</source><year>2010</year><month>01</month><volume>251</volume><issue>1</issue><fpage>171</fpage><lpage>176</lpage><pub-id pub-id-type="doi">10.1097/SLA.0b013e3181b3b2be</pub-id><pub-id pub-id-type="medline">20032721</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rosendal</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Sloth</surname><given-names>SB</given-names> </name><name name-style="western"><surname>R&#x00F6;lfing</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Bie</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>RD</given-names> </name></person-group><article-title>Technical, non-technical, or both? A scoping review of skills in simulation-based surgical training</article-title><source>J Surg Educ</source><year>2023</year><month>05</month><volume>80</volume><issue>5</issue><fpage>731</fpage><lpage>749</lpage><pub-id pub-id-type="doi">10.1016/j.jsurg.2023.02.011</pub-id><pub-id pub-id-type="medline">36906398</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary material.</p><media xlink:href="mededu_v11i1e72264_app1.pdf" xlink:title="PDF File, 339 KB"/></supplementary-material></app-group></back></article>