<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e91260</article-id><article-id pub-id-type="doi">10.2196/91260</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>An Augmented Reality Audio-Motor Training Game for Improving Speech-in-Noise Perception: Single-Arm Pilot Feasibility Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Koh</surname><given-names>Pooseung</given-names></name><degrees>BS, MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Choi</surname><given-names>Inyong</given-names></name><degrees>BS, MS, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Hyo-jeong</given-names></name><degrees>MM, MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kim</surname><given-names>Sungyoung</given-names></name><degrees>BEng, MM, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Graduate School of Culture Technology, Korea Advanced Institute of Science and Technology</institution><addr-line>Yuseong-gu Daehakro 291</addr-line><addr-line>Daejeon</addr-line><addr-line>Daejeon</addr-line><country>Republic of Korea</country></aff><aff id="aff2"><institution>Department of Communication Sciences and Disorderes, University of Iowa</institution><addr-line>Iowa City</addr-line><addr-line>IA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Laboratory of Hearing, Balance and Integrated Neuroscience, College of Medicine, Hallym University</institution><addr-line>Anyang</addr-line><addr-line>Gyeongi-do</addr-line><country>Republic of Korea</country></aff><aff id="aff4"><institution>Department of Otorhinolaryngology-Head and Neck Surgery, College of Medicine, Hallym University</institution><addr-line>Chuncheon</addr-line><addr-line>Gangwon-do</addr-line><country>Republic of Korea</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Steenstra</surname><given-names>Ivan</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Park</surname><given-names>Jeongmi</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Costa</surname><given-names>Jose Ferrer</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Sungyoung Kim, BEng, MM, PhD, Graduate School of Culture Technology, Korea Advanced Institute of Science and Technology, Yuseong-gu Daehakro 291, Daejeon, Daejeon, 34141, Republic of Korea, 82 01027537697; <email>sungyoung.kim@kaist.ac.kr</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>14</day><month>5</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e91260</elocation-id><history><date date-type="received"><day>12</day><month>01</month><year>2026</year></date><date date-type="rev-recd"><day>16</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>23</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Pooseung Koh, Inyong Choi, Hyo-jeong Lee, Sungyoung Kim. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 14.5.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e91260"/><abstract><sec><title>Background</title><p>Difficulty understanding speech in noisy environments is a primary challenge of hearing impairment, inadequately addressed by hearing aids alone. While auditory training can enhance selective attention and speech perception, current digital programs face poor user adherence and lack realistic 3D spatial audio.</p></sec><sec><title>Objective</title><p>This pilot study evaluated the feasibility, usability, and preliminary efficacy of ARIA (Augmented Reality Immersive Auditory training), a handheld mobile intervention that provides gamified at-home auditory training to middle-aged adults via earbud-delivered spatial audio.</p></sec><sec sec-type="methods"><title>Methods</title><p>In this single-arm, pre-post&#x2013;follow-up pilot study, 11 adults (mean age 53.0, SD 3.0 y) with functional hearing not requiring amplification completed a 4-week at-home training program using ARIA on provided devices (iPhone 14 Pro, AirPods Pro 2). Speech-in-noise perception was assessed via the Korean Matrix Sentence Test at baseline, 4 weeks, and 8 weeks at 3 signal-to-noise ratios (SNRs; 0 dB, &#x2212;6 dB, and &#x2212;9 dB, respectively). Feasibility, usability (System Usability Scale), user experience (Player Experience of Need Satisfaction), in-game performance, and qualitative feedback were collected.</p></sec><sec sec-type="results"><title>Results</title><p>Protocol completion was 100% (11/11), demonstrating technical feasibility. Exploratory efficacy analyses revealed statistically significant speech-in-noise improvements posttraining across all conditions (0 dB: <italic>t</italic><sub>10</sub>=3.43, <italic>P</italic>=.02; &#x2212;6 dB: <italic>t</italic><sub>10</sub>=5.34, <italic>P</italic>&#x003C;.001; &#x2212;9 dB: <italic>t</italic><sub>10</sub>=4.34<italic>, P</italic>=.004). Gains were maintained at the 8-week follow-up. In-game localization improvements correlated significantly with speech perception gains at &#x2212;6 dB SNR (&#x03C1;=0.639; <italic>P</italic>=.03) and &#x2212;9 dB SNR (&#x03C1;=0.612; <italic>P</italic>=.045). User experience showed mixed results: the mean System Usability Scale score was 70.2 (SD 19.6; range 47.5&#x2010;92.5), reflecting substantial individual differences in usability perception. While 72% (n=8) reported difficulties with the augmented reality (AR) environmental setup, 63% reported genuine mastery-driven engagement with core gameplay. Thematic analysis revealed a dissociation between peripheral usability challenges (setup friction, &#x201C;homework&#x201D; characterization due to protocol structure) and successful engagement with the training paradigm itself.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This pilot demonstrated the feasibility of AR-based audio-motor training for at-home delivery and revealed encouraging preliminary efficacy signals, warranting progression to controlled efficacy trials. Formative findings identified specific usability refinements needed for broader implementation, particularly streamlining AR setup while preserving the core gameplay elements that successfully fostered competence and engagement. These insights provide clear guidance for platform optimization and randomized controlled trial design.</p></sec></abstract><kwd-group><kwd>augmented reality</kwd><kwd>auditory training</kwd><kwd>selective auditory attention</kwd><kwd>gamification</kwd><kwd>mobile health</kwd><kwd>mHealth</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Growing Challenge of Hearing Loss and the Need for Auditory Training</title><p>Age-related hearing loss affects 466 million people worldwide, a number projected to reach 630 million by 2030, and represents the fourth leading cause of disability, as well as one of the largest modifiable risk factors for dementia [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref5">5</xref>]. A primary challenge for individuals with hearing impairment is understanding speech in noisy environments, which demands a complex interplay between sensory input and cognitive processing [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. While hearing aids restore audibility, they inadequately address the cognitive demands of listening in noise [<xref ref-type="bibr" rid="ref8">8</xref>], highlighting a critical need for auditory training interventions that enhance selective auditory attention&#x2014;the ability to focus on target sounds amidst distractors, exemplified by the &#x201C;cocktail party effect&#x201D; [<xref ref-type="bibr" rid="ref9">9</xref>]. Neuroscientific research using functional magnetic resonance imaging and electroencephalogram confirms that specific neural networks dedicated to selective attention are critical for both sound localization and speech segregation, and that targeted training can improve these functions in hearing-impaired individuals [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Systematic reviews demonstrate that auditory training paradigms can enhance auditory and cognitive skills, including speech perception, working memory, attention, and processing speed [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. However, traditional programs face significant barriers including poor user adherence and limited accessibility [<xref ref-type="bibr" rid="ref18">18</xref>]. Digital platforms like LACE and Amptify have improved accessibility through at-home delivery [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. However, an opportunity to incorporate spatial audio cues was identified&#x2014;a dimension absent from current programs but fundamental to inherently 3D, real-world listening. Real-world listening is inherently 3D, with the brain leveraging binaural cues to localize and segregate sound sources, primarily interaural time differences (microsecond delays between ears) and interaural level differences (intensity differences between ears) [<xref ref-type="bibr" rid="ref21">21</xref>].</p></sec><sec id="s1-2"><title>Audio-Motor Training</title><p>Beyond acoustic properties, the listener&#x2019;s state of engagement critically modulates auditory processing and learning [<xref ref-type="bibr" rid="ref22">22</xref>]. Research demonstrates that &#x201C;active listening&#x201D;&#x2014;the cognitive engagement with sound&#x2014;can enhance auditory performance, with studies showing that active listening postures improve 3D sound localization and that embodied training activities can benefit related perceptual skills [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. This engagement often manifests through spontaneous head movements, which dynamically alter interaural time difference and interaural level difference cues to improve localization and resolve spatial ambiguities [<xref ref-type="bibr" rid="ref25">25</xref>]. Studies in virtual reality (VR) environments suggest that listeners increase head movements when facing challenging listening conditions, reflecting this as an instinctual learning strategy [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. Building on this, research by Valzolgher et al [<xref ref-type="bibr" rid="ref27">27</xref>] supports the use of active, sensorimotor training to improve spatial auditory performance. While the evidence for immediate transfer to speech-in-noise remains an area of ongoing investigation, a closed-loop audio-motor game requiring continuous sensorimotor prediction and correction has been shown to yield a 25% improvement in speech-in-noise perception [<xref ref-type="bibr" rid="ref28">28</xref>]. This suggests that such paradigms may achieve generalized learning that transfers beyond trained stimuli, potentially addressing the specificity limits common in traditional training [<xref ref-type="bibr" rid="ref28">28</xref>]. Therefore, our system design integrates active listening tasks, natural head movements via 6-degree-of-freedom tracking, and goal-directed audio-motor interaction to explore whether these mechanisms can support robust and generalizable auditory learning.</p></sec><sec id="s1-3"><title>Augmented Reality and Gamification</title><p>Translating embodied audio-motor training from the laboratory to daily life requires technology that is both environmentally aware and accessible. Modern augmented reality (AR) frameworks are a promising modality for this challenge, having become increasingly accessible through advances in mobile hardware and software [<xref ref-type="bibr" rid="ref29">29</xref>]. While previous work has explored audio-motor feedback on tablets or in VR [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref30">30</xref>], AR provides the critical advantage of integrating virtual elements into the user&#x2019;s real environment rather than replacing it. We developed ARIA (Augmented Reality Immersive Auditory training), a handheld mobile intervention that enables in situ training, where the system acoustically adapts to the participant&#x2019;s actual room geometry. By tracking smartphone position and using earbud-embedded gyroscopes for head rotation, ARIA delivers spatial audio that facilitates natural, embodied training within users&#x2019; physical spaces. While serious games show mixed results, well-designed gamification incorporating clear goals, immediate feedback, and structured progression can address adherence challenges [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Following these principles, our system was refined from an earlier prototype that informed the initial design [<xref ref-type="bibr" rid="ref33">33</xref>], synthesizing evidence-based audio-motor training with AR&#x2019;s environmental mapping to create an engaging, acoustically realistic training framework.</p></sec><sec id="s1-4"><title>Study Objectives</title><p>The primary aims of this pilot study were to (1) evaluate the feasibility and acceptability of delivering AR-based auditory training to a foundational cohort of middle-aged adults (aged 50&#x2010;65 y) to establish platform viability, (2) assess usability through standardized scales and qualitative feedback, and (3) conduct exploratory analyses of speech-in-noise outcomes to inform future efficacy trials.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This study was a single-arm, pre-post&#x2013;follow-up pilot feasibility trial conducted over a period of 8 weeks, evaluating an at-home training protocol for ARIA. All participants provided written informed consent.</p></sec><sec id="s2-2"><title>Participants and Recruitment</title><p>The study recruited middle-aged adults aged 50 to 65 years, an at-risk demographic for age-related sensorineural hearing loss, via university and company email lists supplemented by snowball sampling. Inclusion criteria were as follows: (1) aged 50 to 65 years, (2) self-reported functional hearing not requiring amplification, (3) ability to commit to an 8-week protocol, and (4) Korean fluency. Exclusion criteria were as follows: (1) diagnosed hearing impairment requiring hearing aids, (2) history of ear surgery or chronic ear disease, (3) neurological conditions affecting auditory or cognitive processing, and (4) inability to complete touchscreen-based tasks. Following screening, 11 participants (8 males and 3 females) enrolled. This cohort was specifically selected to evaluate the technical feasibility of the ARIA interface in a relatively high-functioning group before extending the intervention to clinical populations with advanced hearing loss.</p></sec><sec id="s2-3"><title>Ethical Considerations</title><p>This study was reviewed and approved by the Institutional Review Board of the Korea Advanced Institute of Science and Technology (KAISTIRB-2025&#x2010;32). All participants provided written informed consent prior to enrollment. Participants were informed of the study&#x2019;s purpose, procedures, potential risks, and their right to withdraw at any time without penalty. Personal data were anonymized using participant identification codes, and all data were stored on password-protected, encrypted servers accessible only to the research team. Participants received financial compensation (up to &#x20A9;300,000, approximately US $216) contingent on protocol adherence.</p></sec><sec id="s2-4"><title>The Intervention Delivery Method</title><p>The ARIA intervention was delivered as a handheld mobile app running on iOS devices. To ensure standardization across participants, all were provided with an iPhone 14 Pro and Apple AirPods Pro 2 earbuds. The app was developed using <italic>Unity</italic> 2022.3 LTS with ARKit 4.0 for spatial tracking and used the <italic>Wwise</italic> audio engine for 3D sound rendering [<xref ref-type="bibr" rid="ref34">34</xref>,<xref ref-type="bibr" rid="ref35">35</xref>].</p><sec id="s2-4-1"><title>Hardware Configuration</title><p>The iPhone 14 Pro&#x2019;s Light Detection and Ranging sensor and ARKit capabilities enabled real-time environmental mapping and 6-degree-of-freedom position tracking. The AirPods Pro 2 provided spatial audio rendering with dynamic head tracking via embedded gyroscopes, updating audio presentation based on head orientation. This configuration allowed participants to experience naturalistic spatial cues while moving freely within their physical environment.</p></sec><sec id="s2-4-2"><title>Gameplay and Training Protocol</title><p>Each game session follows a structured workflow (<xref ref-type="fig" rid="figure1">Figure 1</xref>), beginning with participants reporting their current state, modeled after the Affective Digital Sliders (hours of sleep, fatigue 1&#x2010;100, and mood valence 1&#x2010;100) [<xref ref-type="bibr" rid="ref36">36</xref>].</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>App flow diagram from the user&#x2019;s perspective. Augmented Reality Immersive Auditory training (ARIA) session flow depicting setup, gameplay loop, and session end from the user&#x2019;s perspective. AR: augmented reality; UI: user interface.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e91260_fig01.png"/></fig><p>Before gameplay, participants scan their physical environment with the mobile device and designate a play area. The ARIA system analyzes the detected surfaces and applies acoustic reflectors to enable first-order early reflections through the <italic>Wwise</italic> audio engine. This environmental calibration ensures that the spatial audio matches the geometric properties of the user&#x2019;s actual space, creating an in situ training experience. A video demonstration of all session phases, including environment setup, active listening, response interface, and feedback visualization, is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>Following setup, participants engaged in 36 training trials. During each trial, participants localized and discriminated two concurrent sound sources: 1 &#x201C;hostile&#x201D; and 1 &#x201C;friendly,&#x201D; consisting of distinct filtered noise bursts presented against continuous nonspatialized multitalker speech babble. For 20 seconds, players actively explored the soundscape by physically moving and rotating their heads to optimize spatial cues. Following this listening period, a 2D representation of the play area appeared on-screen, where players identified both the location and type of each sound source (<xref ref-type="fig" rid="figure2">Figure 2</xref>A: Listening &#x0026; Response UI). After each response, participants received immediate feedback via a user interface display and AR spatial visualization showing correct source locations for errors. Including environment setup and condition reporting (~3 min) and active gameplay (~20 min depending on individual response speed), the total session duration was approximately 20 to 25 minutes.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Representative interface screenshots illustrating key phases of the Augmented Reality Immersive Auditory training (ARIA) system. Screenshots were captured on the study-provided hardware (iPhone 14 Pro) and are illustrative of the app interface rather than being taken from participant sessions. UI: user interface.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e91260_fig02.png"/></fig><p>Participants completed a 4-week protocol consisting of 4 training sessions and 1 evaluation session per week. Each session contained 36 trials. Training sessions provided immediate visual and auditory feedback to reinforce correct performance and narrative engagement. In contrast, weekly evaluation sessions withheld feedback to assess skill consolidation. To ensure the training was appropriately challenging, ARIA used an adaptive difficulty progression with 6 levels that varied systematically in acoustic complexity: changing the stimuli from stationary (fixed in 1 location) to moving (following a set path), decreasing the signal-to-noise ratio (SNR), and narrowing the correct answer threshold. All participants began at level 1, and subsequent levels were unlocked only after achieving 75% or more accuracy on the current level, allowing participants to advance at their own pace. The details of the level progression are outlined in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Augmented Reality Immersive Auditory training (ARIA) level difficulty parameters used in the 4-week at-home training protocol with middle-aged adults (N=11).</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Level</td><td align="left" valign="bottom">SNR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (dB)</td><td align="left" valign="bottom">Auditory stimuli</td><td align="left" valign="bottom">Correct threshold (m)</td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">Stationary</td><td align="left" valign="top">1.0</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">0</td><td align="left" valign="top">Moving</td><td align="left" valign="top">1.0</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">&#x2212;3</td><td align="left" valign="top">Stationary</td><td align="left" valign="top">1.0</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">&#x2212;3</td><td align="left" valign="top">Moving</td><td align="left" valign="top">1.0</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">&#x2212;6</td><td align="left" valign="top">Stationary</td><td align="left" valign="top">0.75</td></tr><tr><td align="left" valign="top">6</td><td align="left" valign="top">&#x2212;6</td><td align="left" valign="top">Moving</td><td align="left" valign="top">0.75</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>SNR: signal-to-noise ratio.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-4-3"><title>Deployment Context</title><p>The prototype developed for this study is not yet publicly available and was manually installed by the research team on all study devices. All participants were provided with identical devices (iPhone 14 Pro and Apple AirPods Pro 2) with preconfigured account credentials, eliminating the need for participant-managed login or setup. In-game performance data were uploaded automatically after each session to a laboratory-managed cloud database using anonymized participant identifiers, with access restricted to the research team. No personally identifiable information was stored alongside performance data. The system remained stable throughout the study period, with no instances of failed data uploads, mislogged sessions, or participant-reported technical failures. No modifications were made to the app or training protocol during the study period.</p></sec></sec><sec id="s2-5"><title>Study Procedure</title><p>The study was conducted over 8 weeks and consisted of a baseline assessment, a 4-week training period, and a follow-up retention assessment. At baseline (week 0), participants attended an in-person session where they received study devices (iPhone 14 Pro and AirPods Pro 2) and completed a 30-minute tutorial with research staff guidance. Baseline speech-in-noise perception was assessed using the Korean Matrix Sentence Test (KMST) at 3 fixed SNR conditions (0 dB, &#x2212;6 dB, and &#x2212;9 dB) delivered diotically through Sennheiser HD 650 headphones at each participant&#x2019;s most comfortable listening level in a sound-treated room [<xref ref-type="bibr" rid="ref37">37</xref>]. Hearing thresholds were screened using the mobile Mimi Hearing Test app, which has previously been found to produce results comparable to standard audiograms [<xref ref-type="bibr" rid="ref38">38</xref>]. Additionally, to account for potential baseline differences in auditory processing skills, participants&#x2019; musical experience was quantified using the Goldsmiths Musical Sophistication Index [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>During the 4-week training period (weeks 1&#x2010;4), participants completed training and evaluation sessions at home on self-selected schedules. The research team sent scheduled reminders about remaining sessions and current progress. Participants returned for posttraining assessment at week 4, where the KMST was readministered using identical procedures, and participants completed the System Usability Scale (SUS) and Player Experience of Need Satisfaction (PENS) questionnaire, followed by a 20- to 30-minute semistructured exit interview [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. A final follow-up at week 8 consisted of a KMST reassessment to evaluate retention of training effects.</p></sec><sec id="s2-6"><title>Outcome Measures</title><p>Speech-in-noise perception was measured using the KMST, with the percentage of correctly identified words at 3 SNRs (0 dB, &#x2212;6 dB, and &#x2212;9 dB) as outcomes. The ARIA system logged trial-by-trial performance, including localization accuracy (identifying sound source position), overall accuracy (both source position and discrimination), response time, and distance error. These metrics were analyzed separately for training sessions and evaluation sessions. User experience was evaluated through the SUS (0&#x2010;100 scale) and PENS, which assessed game engagement through Competence, Autonomy, and Presence subscales (1&#x2010;7 scale), with the Relatedness subscale omitted as ARIA lacks social features and nonplayable characters. Semistructured exit interviews were conducted to explore participants&#x2019; experiences with the AR technology, their perceived benefits and challenges, and their suggestions for improvement (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Interview data were analyzed using reflexive thematic analysis. The lead author (PK) performed initial line-by-line inductive coding, followed by a process of collaborative refinement with the research team (SYK, HJL, and IYC). This peer debriefing allowed for a multiperspective reading of the data to establish the credibility and dependability of the thematic structure. All themes remained grounded in specific participant excerpts to maintain confirmability, while transferability was supported through the detailed reporting of our recruitment context and demographics [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>].</p><p>All analyses were conducted using R version 4.3.1 (R Foundation for Statistical Computing) with an &#x03B1; level of .05. KMST improvements were assessed using paired 2-tailed <italic>t</italic> tests with Bonferroni correction for multiple comparisons across 3 SNR conditions. Effect sizes were calculated using Cohen <italic>d</italic>. Spearman correlations examined relationships between in-game performance improvements and KMST gains. Linear mixed-effects models assessed session-by-session learning trajectories for in-game metrics during evaluation sessions.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Participant Characteristics and Feasibility</title><p>A total of 11 participants (8 males and 3 females) with a mean age of 53.0 (SD 3.0) years completed the 8-week study protocol. Demographic details, including education level, living conditions, usage of smartphones and video games, are summarized in <xref ref-type="table" rid="table2">Table 2</xref>. All participants who enrolled completed all phases of the study, resulting in a retention rate of 100%. Initial performance on the KMST at baseline showed a mean score of 92.5% (SD 2.3) at 0 dB SNR, 72.9% (SD 8.7) at &#x2212;6 dB SNR, and 46.6% (SD 15.2) at &#x2013;9 dB SNR. These 3 conditions represent graded levels of difficulty for the same speech-in-noise perception outcome, with lower SNRs imposing progressively greater perceptual and cognitive demands. Baseline scores confirmed that the more challenging conditions provided sufficient room for improvement while the near-ceiling performance at 0 dB SNR reflected participants&#x2019; functional hearing status.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Participant demographics and characteristics (N=11).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Demographics and characteristics</td><td align="left" valign="bottom">Value, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Age (years)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>50-60<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">11 (100)</td></tr><tr><td align="left" valign="top" colspan="2">Sex</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">8 (72.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">3 (27.3)</td></tr><tr><td align="left" valign="top" colspan="2">Education level</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High school graduate</td><td align="left" valign="top">1 (9.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Bachelor&#x2019;s degree</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Graduate degree</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="left" valign="top" colspan="2">Household type</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Single person</td><td align="left" valign="top">1 (9.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>With spouse</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>With spouse and children</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="left" valign="top" colspan="2">Musical training</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content> None</td><td align="left" valign="top">10 (90.9)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Musically trained</td><td align="left" valign="top">1 (9.1)</td></tr><tr><td align="left" valign="top" colspan="2">Daily smartphone usage (hours)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;1</td><td align="left" valign="top">3 (27.3)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1&#x2010;3</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3&#x2010;5</td><td align="left" valign="top">3 (27.3)</td></tr><tr><td align="left" valign="top" colspan="2">Game experience<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Nongamer</td><td align="left" valign="top">5 (45.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Casual gamer</td><td align="left" valign="top">4 (36.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Frequent gamer</td><td align="left" valign="top">2 (18.2)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>mean 53.0 (SD 3) years.</p></fn><fn id="table2fn2"><p><sup>b</sup>Gaming experience was categorized based on self-reported frequency: "nongamer" indicates no play, "casual gamer" indicates monthly or biweekly play, and "frequent gamer" indicates weekly or daily play.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Speech-in-Noise Perception (KMST) Improvements</title><p>To evaluate the preliminary efficacy of the ARIA training program, exploratory analyses of speech-in-noise perception across all tested conditions were conducted. Paired 2-tailed <italic>t</italic> tests with Bonferroni correction yielded significant gains from pretraining to posttraining at 0 dB SNR (<italic>t</italic><sub>10</sub>=3.43; <italic>P</italic>=.02; <italic>d</italic>=1.20),&#x2212;6 dB SNR (<italic>t</italic><sub>10</sub>=5.34; <italic>P</italic>&#x003C;.001; <italic>d</italic>=1.14), and &#x2212;9 dB SNR (<italic>t</italic><sub>10</sub>=4.34; <italic>P</italic>=.004; <italic>d</italic>=0.58; <xref ref-type="fig" rid="figure3">Figure 3</xref>). Improvements were largest at the more challenging listening conditions, where baseline performance left greater room for change. All effect sizes met or exceeded the Cohen convention for medium effect (<italic>d</italic>&#x003E;0.5); however, given the single-arm design and small sample size, these should be interpreted as preliminary efficacy signals. Individual participant analysis revealed consistent improvement patterns: 9 of 11 (81.8%) participants improved at 0 dB SNR, all participants improved at &#x2212;6 dB SNR, and 10 of 11 (90.9%) participants improved at &#x2212;9 dB SNR. All participants improved in at least 2 conditions. Follow-up assessment at 8 weeks showed no significant decline from posttraining scores at either &#x2212;6 dB (<italic>P</italic>=.05) or &#x2212;9 dB (<italic>P</italic>=.77), suggesting short-term retention. However, repeated exposure to the same test and the absence of a control group limit the interpretation.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Speech-in-noise perception across study phases (N=11). Korean Matrix Sentence Test (KMST) scores at pretraining, posttraining, and 8-week follow-up, faceted by signal-to-noise ratio (SNR) conditions (at 0 dB, &#x2212;6 dB, and &#x2212;9 dB), representing graded levels of listening difficulty. Box plots display the median (IQR) and individual participant scores. Higher scores indicate better performance.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e91260_fig03.png"/></fig></sec><sec id="s3-3"><title>In-Game Learning Trajectories</title><p>Participants demonstrated significant learning within the ARIA game during weekly evaluation sessions. Level-matched analysis comparing the first and last evaluation sessions revealed robust improvements in foundational spatial hearing component skills: localization accuracy improved by 12.4% (<italic>t</italic><sub>10</sub>=3.85; <italic>P</italic>=.003; <italic>d</italic>=1.16) and distance error reduced by 20.1% (<italic>t</italic><sub>10</sub>=6.50; <italic>P</italic>&#x003C;.001; <italic>d</italic>=1.96; <xref ref-type="fig" rid="figure4">Figure 4</xref>). However, overall accuracy&#x2014;requiring simultaneous correct identification of both sound type and location&#x2014;showed only a nonsignificant trend (6.9% gain; <italic>t</italic><sub>10</sub>=1.58; <italic>P</italic>=.145; <italic>d</italic>=0.48). This dissociation suggests that while component spatial skills improved reliably, integrating these skills under the attentional demands of the full task remained more challenging. Linear mixed-effects models across all 4 evaluation sessions confirmed these patterns, with significant session-by-session improvements for localization (&#x03B2;=.038; <italic>P</italic>=.02) and distance (&#x03B2;=&#x2212;.067; <italic>P</italic>&#x003C;.001) but not overall accuracy (&#x03B2;=.020; <italic>P</italic>=.21). Presession self-report measures of sleep, fatigue, and valence did not show significant associations with in-game performance or learning trajectories, with individual response patterns suggesting limited variability across sessions.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>In-game skill improvement during evaluation sessions. Mean performance on key in-game metrics across the 4 weekly evaluation sessions, where no performance feedback was provided. (A) Overall accuracy (proportion of trials with correct sound identification and position). (B) Localization accuracy (proportion of correctly identified source positions). (C) Distance error between response and actual source position (meters; lower values indicate better performance). Error bars represent the SE of the mean.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e91260_fig04.png"/></fig></sec><sec id="s3-4"><title>Transfer Effects: Skill-Specific Correlations</title><p>To explore potential mechanisms of improvement, correlations between in-game skill development and KMST gains were analyzed. Total training time showed a moderate positive correlation with KMST improvement at &#x2212;6 dB SNR (&#x03C1;=0.547; <italic>P</italic>=.082), which approached but did not reach statistical significance.</p><p>Component skill analysis revealed stronger relationships for localization accuracy. Localization improvements during training correlated significantly with KMST gains at &#x2212;6 dB SNR (&#x03C1;=0.639; <italic>P</italic>=.03) and &#x2212;9 dB SNR (&#x03C1;=0.612; <italic>P</italic>=.045). Distance error reduction showed a weaker relationship with KMST gains at &#x2212;6 dB (&#x03C1;=0.547; <italic>P</italic>=.08). These skill-specific correlations suggest a potential association between trained spatial hearing abilities and speech-in-noise performance, though the direction and underlying mechanisms of this relationship require confirmation in controlled trials.</p></sec><sec id="s3-5"><title>Usability and User Experience: System Usability and Engagement</title><p>The acceptability and usability of the AR-based intervention for the target population were assessed, as these factors are critical for determining real-world viability. The ARIA app demonstrated acceptable usability for the target population. The mean SUS score was 70.2 (SD 19.6), just above the industry definition of &#x201C;average&#x201D; usability [<xref ref-type="bibr" rid="ref45">45</xref>]. More notable than the mean, however, was the wide range (47.5&#x2010;92.5), indicating substantial individual differences in usability perception that warranted further investigation through qualitative analysis. The PENS scale indicated that the core gameplay loop was engaging, with participants reporting high levels of Competence (mean 5.2, SD 0.89; range=3&#x2010;7) but more moderate levels of Autonomy (mean 4.5, SD 1.62; range=2&#x2010;5) and Presence (mean 4.8, SD 1.08; range=3&#x2010;6; <xref ref-type="table" rid="table3">Table 3</xref>). This pattern of high competence with moderate autonomy may reflect the structured training paradigm, where progression is prescribed rather than self-directed, and suggests that introducing greater player agency in session structure or difficulty selection could improve engagement.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Usability and player experience scores (N=11).</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Measure</td><td align="left" valign="bottom">Scale range</td><td align="left" valign="bottom">Mean (SD)</td><td align="left" valign="bottom">Median (range)</td></tr></thead><tbody><tr><td align="left" valign="top">Usability</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>System usability scale (SUS)</td><td align="char" char="." valign="top">0&#x2010;100</td><td align="char" char="." valign="top">70.2 (19.6)</td><td align="char" char="." valign="top">72.5 (42.5&#x2010;95)</td></tr><tr><td align="left" valign="top">Player experience</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PENS: competence</td><td align="char" char="." valign="top">1&#x2010;7</td><td align="char" char="." valign="top">5.2 (0.89)</td><td align="char" char="." valign="top">5.3 (5&#x2010;7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PENS: autonomy</td><td align="char" char="." valign="top">1&#x2010;7</td><td align="char" char="." valign="top">4.5 (1.62)</td><td align="char" char="." valign="top">5.0 (2&#x2010;5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PENS: presence</td><td align="char" char="." valign="top">1&#x2010;7</td><td align="char" char="." valign="top">4.8 (1.09)</td><td align="char" char="." valign="top">4.9 (3&#x2010;6)</td></tr></tbody></table></table-wrap></sec><sec id="s3-6"><title>Qualitative Findings</title><sec id="s3-6-1"><title>Overview</title><p>To understand the drivers of the observed usability variance and to identify specific barriers to long-term adoption, a thematic analysis of exit interviews was conducted. Five themes emerged that contextualize the quantitative usability findings and provide actionable insights for intervention refinement.</p><p>Thematic analysis of the 11 participant interviews revealed 5 major themes regarding their experience with the AR Auditory Training Game. Four of these themes were highly dominant: (1) usability of AR and game setup, (2) attention and cognitive load, (3) the training experience as &#x201C;homework,&#x201D; and (4) enjoyment and motivation through mastery. A fifth theme, desire for multiplayer and social features, was also represented, mentioned by 45% (5/11) of participants. Interview durations were, on average, 23.1 (SD 2.1) minutes long.</p></sec><sec id="s3-6-2"><title>Theme 1: Usability of AR and Game Setup</title><p>A highly dominant theme was the challenge participants faced with the game&#x2019;s initial AR setup. Eight (72%) participants reported difficulties, primarily with the room scanning and space definition process, which they found to be inconsistent, time-consuming, and a recurring source of friction:</p><disp-quote><p>The game itself once I had it setup was very simple and I got the hang of it after a couple of trials. But setting up the area to play? The part where I had to scan the room was very confusing, I did not know when to stop or if I did it correctly or not. After a while I got used to it, but I am still unsure if I did it well.</p><attrib>P03</attrib></disp-quote><disp-quote><p>When setting the location for the play area before playing...I realized that the degree markers were slightly different each time. Like the day before what was 0 degrees differed even though I set it at the same location. It was not consistent which made it confusing.</p><attrib>P10</attrib></disp-quote><p>Several participants suggested that the process could be streamlined by allowing the game to save a previously scanned space, thereby removing the need to rescan for every session:</p><disp-quote><p>The scanning itself I did not have too much difficulty getting used to [...] But honestly, it was annoying to set the same space for each session. I played at home and in the same location for all my sessions and a feature to save the space would be really helpful and save time.</p><attrib>P08</attrib></disp-quote></sec><sec id="s3-6-3"><title>Theme 2: Attention and Cognitive Load</title><p>All 11 participants identified that the game demanded a significant level of sustained attention and concentration. This was seen as the core cognitive skill required for success. The task became particularly demanding in higher-difficulty stages, where distinguishing between multiple, similar sounds added to the cognitive load:</p><disp-quote><p>Listening to 2 sounds at once was a bit difficult...when I'm listening to one sound, it&#x2019;s clear, but with two, I have to listen to this side and that side, and it gets confusing. Sometimes I even forgot which one was the hostile or friendly one as I was moving.</p><attrib>P02</attrib></disp-quote><disp-quote><p>You really need to be focused. When you're just a little distracted by something else, you miss it. Especially as the difficulty level is high, there&#x2019;s almost no difference [between the sounds]...You have to be completely focused to catch the subtle characteristics.</p><attrib>P06</attrib></disp-quote><p>This cognitive demand was also tied to physical and mental fatigue, with many noting that performance dropped when they were tired or when their concentration waned:</p><disp-quote><p>The training while it was simple really demanded all my attention and on the days I had a tough day at work or slept too little. My performance dropped noticeably.</p><attrib>P05</attrib></disp-quote></sec><sec id="s3-6-4"><title>Theme 3: The Training Experience as "Homework"</title><p>The sentiment that the training felt like an obligation or &#x201C;homework&#x201D; was expressed by 9 (81%) of participants. This feeling was driven by the structured nature of the research, the required frequency of play, and the lengthy sessions, with a few exceeding 25 minutes, including the setup and condition report:</p><disp-quote><p>To be honest, the feeling of it being a chore was stronger [...] Especially as I had to find time in my own life routine[...] During the weekdays I was only able to find time to do it after my work so I mostly did the training sessions at night.</p><attrib>P02</attrib></disp-quote><p>The required duration of each session was a specific point of feedback related to this theme.</p><disp-quote><p>Thirty six trials felt a bit long. My concentration would drop after about 15 to 20 trials and my performance really went down until like trial 30 when I realized I was almost done. I think having some way to pause the game so I could take a short break before continuing again would have helped a lot.</p><attrib>P09</attrib></disp-quote></sec><sec id="s3-6-5"><title>Theme 4: Enjoyment and Motivation Through Mastery</title><p>Despite the challenges, 7 (63%) participants found motivation and enjoyment in the process of improving and mastering the game. The positive feedback loop of successfully identifying a sound and seeing their accuracy scores increase was a primary driver of engagement. This sense of accomplishment was most potent when overcoming difficult levels:</p><disp-quote><p>To pinpoint the exact moment where I found the game fun is difficult as the game itself was pretty simple and repetitive, but what I remember most is gradually getting more and more correct as I played&#x2026; I could feel that my own skills were improving and which was the most satisfying feeling while playing the game.</p><attrib>P01</attrib></disp-quote><disp-quote><p>The most memorable moment was when I went to the higher levels. Because I was struggling at the earlier levels, it was intimidating to start the next level but even though I went to level 4 and I was still able to find them I knew I got a lot better which was rewarding.</p><attrib>P05</attrib></disp-quote><p>The evaluation sessions, while sometimes frustrating, also served as a catalyst for motivation:</p><disp-quote><p>While the first evaluation session was very difficult especially when I had to go through the levels that I did not try yet. The evaluation actually motivated me more. It made me want to get better quickly to challenge the next level and do better at the next evaluation session.</p><attrib>P07</attrib></disp-quote></sec><sec id="s3-6-6"><title>Theme 5: Desire for Multiplayer and Social Features</title><p>Several participants (5/11, 45%) suggested that adding multiplayer or social components would make the game more engaging and fun. The primary suggestion was to introduce a competitive element, such as leaderboards or head-to-head challenges, to foster motivation:</p><disp-quote><p>Because I did the training at home, my family asked what I was doing and were interested in the game [...] If I could have played with them in some sort of competition maybe comparing scores or how fast we could find the sounds it would make it easier.</p><attrib>P02</attrib></disp-quote><disp-quote><p>If there was a system where you could see other people&#x2019;s scores online or what level people are at, that would provide motivation...a little bit of a competitive spirit would kick in.</p><attrib>P03</attrib></disp-quote><p>Cooperative gameplay was also mentioned, with 1 participant envisioning working together with family to complete the tasks:</p><disp-quote><p>My kid loves playing games...I thought it would have been nice to play with him&#x2026; just being able to spend time with him while doing something together, something simple like this game would be a good experience...It would be fun to find [the sounds] together.</p><attrib>P08</attrib></disp-quote></sec></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><sec id="s4-1-1"><title>Overview</title><p>This formative pilot study evaluated the feasibility, acceptability, and preliminary efficacy of ARIA, a novel AR-based audio-motor training system for auditory training in middle-aged adults. Three primary aims were assessed. First, the study achieved 100% protocol completion over 8 weeks, demonstrating technical feasibility for at-home delivery; however, this retention rate was likely supported by performance-contingent financial compensation and may not generalize to nonincentivized contexts. Second, usability was acceptable (mean SUS=70.2, SD 19.6) but revealed substantial individual variability and critical barriers, particularly in AR environment setup. Third, exploratory efficacy analyses revealed preliminary signals of speech-in-noise improvement across all tested SNR conditions, with component skill improvements in localization correlating with KMST gains. Given the single-arm design and small sample, these findings warrant confirmation in controlled trials rather than interpretation as definitive intervention effects.</p></sec><sec id="s4-1-2"><title>Critical Usability Barriers and Design Solutions</title><p>Thematic analysis revealed a stark dissociation between technical feasibility and user experience. Despite achieving 100% retention, 72% (n=8) of participants identified the AR environmental setup as confusing and time-consuming. This friction reflects a combination of current limitations in handheld AR environmental mapping technology and interface-level design issues within ARIA. Environmental mapping challenges, such as inconsistent scan results from day to day and misalignment of visual cues on the physical scene, are partially inherent to current consumer-grade mobile hardware. These represent a critical constraint that future development must address through interface-level innovations. Interface-level solutions, such as saved environments, real-time scanning feedback, and adjustable visual anchors, are immediately actionable and can help mitigate these hardware-level inconsistencies. This barrier aligns with broader literature documenting technology acceptance challenges among middle-aged and older adults using AR or VR systems [<xref ref-type="bibr" rid="ref46">46</xref>]. The wide variance in the SUS scores (range 47.5&#x2010;92.5, SD 19.6) may partially reflect these setup difficulties, with setup friction predicting lower usability ratings. Participants&#x2019; suggestions converged on clear solutions: allowing the app to save previously scanned spaces to eliminate redundant scanning, providing clearer real-time feedback during scanning, adding adjustable visual indicators, and implementing a comparison of acoustics changed by the scanning process. Given that even this relatively young and technologically comfortable sample experienced substantial setup difficulties, these barriers may be more pronounced for older adult populations (aged &#x2265;65 y) with less technology familiarity. However, because these barriers are identifiable, they represent addressable implementation constraints rather than fundamental limitations of the AR-based approach. Resolving these issues prior to deployment with older or clinical populations will be essential for scaling beyond controlled research settings.</p></sec><sec id="s4-1-3"><title>Engagement Dynamics and &#x201C;Homework&#x201D; Perception</title><p>Despite setup frustrations, the core training paradigm demonstrated genuine engagement potential. PENS Competence scores averaged 5.2/7 with SD of 0.89, and 63% (n=7) of participants reported intrinsic motivation through mastery experiences, which aligns with research showing that middle-aged and older adults are primarily motivated by meaningful learning and tangible skill development in serious games [<xref ref-type="bibr" rid="ref47">47</xref>].</p><p>However, 81% characterized the experience as &#x201C;homework.&#x201D; Qualitative analysis suggests this perception was primarily driven by the mandatory training regimen and fixed scheduling required by the research protocol rather than the game design itself. Participants reported difficulty integrating the required frequency into their daily routines, often resorting to completing sessions at night after work when cognitive resources were already depleted. While some users noted that the session length and the 36 consecutive trials felt demanding, this burden was significantly compounded by the external obligation to maintain the protocol&#x2019;s required frequency. Notably, participants who described the training as homework simultaneously reported genuine enjoyment through mastery,&#x201C;motivat[ing] me more...made me want to get better quickly&#x201D; [P07]. This suggests the core gameplay successfully fostered competence motivation even within a compliance-driven context, while real-world deployment with self-paced scheduling may reduce this perceived burden.</p></sec><sec id="s4-1-4"><title>Evidence for Skill Acquisition and Transfer</title><p>Participants described the training as cognitively demanding, emphasizing that sustained attention was critical for successful performance. This is consistent with our design intent: the dual-task requirement engaged participants in effortful selective attention, characteristic of real-world listening environments.</p><p>During training, participants demonstrated learning in foundational spatial hearing component skills such as localization, though integrated performance requiring simultaneous accuracy on both subtasks of discrimination and localization showed more modest gains. Qualitative feedback identified a specific constraint: at higher difficulty levels, sound discrimination became unreliably difficult, with users reporting they could &#x201C;accurately localize sounds but having to guess&#x201D; on source identity. This suggests that acoustic discriminability at high noise levels, rather than dual-task learning capacity, may have limited integrated performance. Future iterations should refine sound selection criteria or provide additional acoustic differentiation at higher difficulty levels.</p><p>Localization improvements during training correlated significantly with KMST gains at &#x2212;6 dB and &#x2212;9 dB, suggesting a potential skill-specific association that warrants investigation in controlled trials. These exploratory correlations suggest a potential association between trained spatial hearing abilities and speech-in-noise performance, consistent with audio-motor training frameworks proposing that spatial hearing training may enhance speech-in-noise perception through strengthened selective attention mechanisms [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. However, without targeted cognitive assessments, it is not possible to establish whether improvements reflect enhanced spatial attention, domain-general cognitive gains, or other mechanisms. Future studies should incorporate comprehensive cognitive assessments including spatial working memory, auditory selective attention, and executive function to clarify transfer pathways.</p></sec><sec id="s4-1-5"><title>An Unexpected Benefit: Hearing Health Awareness</title><p>Beyond the intended training effects, 1 participant reported that gameplay prompted them to seek a clinical hearing assessment after realizing they were missing sounds that family members easily detected&#x2014;a phenomenon they would have previously dismissed. While based on a single case, this health-seeking behavior illustrates a potentially valuable secondary function of performance-based interventions: heightening awareness of one&#x2019;s own capabilities through direct, experiential feedback rather than abstract self-evaluation.</p><p>This observation aligns with health belief model research identifying experiential &#x201C;cues to action&#x201D; as powerful predictors of health-seeking behaviors [<xref ref-type="bibr" rid="ref48">48</xref>]. Interestingly, this awareness did not correlate with subjective ratings of real-world improvement, as participants felt that &#x201C;daily life lacked the challenging noisy conditions of the game.&#x201D; Rather than indicating transfer failure, this reveals an opportunity: ARIA successfully raised hearing capability awareness, but users lacked the context to recognize real-world applications of trained skills. Future versions could incorporate educational content explicitly connecting in-game performance to everyday listening scenarios, potentially serving dual purposes as both a training tool and a hearing health screening platform.</p></sec></sec><sec id="s4-2"><title>Limitations</title><p>One limitation of this study is the absence of a control group and the small sample size (N=11), which together preclude definitive causal attribution. Observed KMST improvements may reflect practice effects, maturation, or placebo effects rather than training-specific benefits. While the short-term retention of gains at 8 weeks and the skill-specific correlations suggest a preliminary efficacy signal, only a randomized controlled trial with an active control can establish causality.</p><p>The assessment battery&#x2019;s scope presents another key constraint. Our reliance solely on the KMST prevents conclusions about the cognitive breadth of training effects. Without targeted cognitive assessments, it is not possible to distinguish whether improvements reflect enhanced spatial selective attention&#x2014;our proposed mechanism&#x2014;or more general cognitive gains. Furthermore, the training occurred in varied home environments, where factors such as room geometry, lighting, and surface reflectivity were not modeled analytically, potentially influencing both user experience and performance outcomes.</p><p>Additionally, the financial compensation provided (up to &#x20A9;300,000, approximately US $216) may have influenced the 100% adherence rate and the reported &#x201C;homework&#x201D; sentiment. These results may not generalize to nonincentivized, real-world deployment contexts. Finally, sample homogeneity restricts generalizability. Participants were highly educated, technologically proficient, middle-aged adults (mean age 53.0, SD 3.0 y) with functional hearing. Findings may not generalize to older adult populations (aged &#x2265;65 y) who may present with more advanced presbycusis, greater cognitive decline, and different technology adoption patterns. The current sample served as a feasibility testbed, but testing with older adult populations remains an essential next step. Additionally, participants had functional hearing not requiring amplification at baseline, whereas individuals with diagnosed hearing loss may show different training responses, engagement patterns, and usability challenges.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study demonstrated the feasibility of delivering a gamified, AR-based audio-motor training paradigm to middle-aged adults in their home environments. Beyond feasibility, several findings carry broader implications. The dissociation between successful core engagement and peripheral usability friction suggests that, for AR-delivered health interventions more generally, streamlining environmental setup is as critical as refining the intervention content itself. The exploratory correlations between spatial skill development and speech-in-noise gains provide a preliminary rationale for investigating audio-motor training as a mechanism for auditory intervention and potential rehabilitation. This hypothesis warrants further testing in controlled trials involving diverse clinical populations. Finally, the observed potential for performance-based training to promote hearing health awareness suggests a dual function for gamified auditory interventions&#x2014;as both training tools and experiential screening platforms. These findings provide a foundation for refining the ARIA platform and designing a larger randomized controlled trial.</p></sec></sec></body><back><ack><p>The authors thank Audiokinetic for providing a complimentary license for their Wwise audio engine and associated commercial plugins, which facilitated the development of the Augmented Reality Immersive Auditory training (ARIA) app. Generative artificial intelligence (AI) tools were used in two aspects of this work: (1) the cover image visualization was created using generative AI with source imagery derived from the ARIA app, and (2) AI-assisted grammatical editing was used during the draft revision process. All AI-generated or AI-assisted content was reviewed, verified, and approved by the authors. No generative AI was used for data analysis, interpretation of results, or the generation of scientific content.</p></ack><notes><sec><title>Funding</title><p>This research was supported by the National Research Foundation of Korea under a Young Scientist Research Grant (Project RS-2024-00354651).</p></sec><sec><title>Data Availability</title><p>In-game performance data are publicly available in json format on GitHub [<xref ref-type="bibr" rid="ref49">49</xref>]. Behavioral outcome data (speech-in-noise scores, questionnaire responses) are available upon reasonable request from the corresponding author.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: SK, PK, HL, IC</p><p>Data curation: PK</p><p>Formal analysis: PK</p><p>Funding acquisition: SK, HL, IC</p><p>Investigation: SK, PK, HL, IC</p><p>Methodology: SK, PK, HL, IC</p><p>Project administration: SK</p><p>Resources: SK, PK, HL</p><p>Software: PK</p><p>Supervision: SK, HL, IC</p><p>Validation: SK, PK, HL, IC</p><p>Visualization: PK</p><p>Writing&#x2014;original draft: PK</p><p>Writing&#x2014;review and editing: SK, PK</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AR</term><def><p>augmented reality</p></def></def-item><def-item><term id="abb2">ARIA</term><def><p>Augmented Reality Immersive Auditory training</p></def></def-item><def-item><term id="abb3">KMST</term><def><p>Korean Matrix Sentence Test</p></def></def-item><def-item><term id="abb4">PENS</term><def><p>Player Experience of Need Satisfaction</p></def></def-item><def-item><term id="abb5">SNR</term><def><p>signal-to-noise ratio</p></def></def-item><def-item><term id="abb6">SUS</term><def><p>System Usability Scale</p></def></def-item><def-item><term id="abb7">VR</term><def><p>virtual reality</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilson</surname><given-names>BS</given-names> </name><name name-style="western"><surname>Tucci</surname><given-names>DL</given-names> </name></person-group><article-title>Addressing the global burden of hearing loss</article-title><source>Lancet</source><year>2021</year><month>03</month><day>13</day><volume>397</volume><issue>10278</issue><fpage>945</fpage><lpage>947</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(21)00522-5</pub-id><pub-id pub-id-type="medline">33714376</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="report"><article-title>Addressing the rising prevalence of hearing loss</article-title><year>2018</year><access-date>2025-12-23</access-date><publisher-name>World Health Organization</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/i/item/addressing-the-rising-prevalence-of-hearing-loss">https://www.who.int/publications/i/item/addressing-the-rising-prevalence-of-hearing-loss</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ciorba</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bianchini</surname><given-names>C</given-names> </name><name name-style="western"><surname>Pelucchi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pastore</surname><given-names>A</given-names> </name></person-group><article-title>The impact of hearing loss on the quality of life of elderly adults</article-title><source>Clin Interv Aging</source><year>2012</year><volume>7</volume><fpage>159</fpage><lpage>163</lpage><pub-id pub-id-type="doi">10.2147/CIA.S26059</pub-id><pub-id pub-id-type="medline">22791988</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Livingston</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sommerlad</surname><given-names>A</given-names> </name><name name-style="western"><surname>Orgeta</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Dementia prevention, intervention, and care</article-title><source>Lancet</source><year>2017</year><month>12</month><day>16</day><volume>390</volume><issue>10113</issue><fpage>2673</fpage><lpage>2734</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(17)31363-6</pub-id><pub-id pub-id-type="medline">28735855</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dixon</surname><given-names>PR</given-names> </name><name name-style="western"><surname>Feeny</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tomlinson</surname><given-names>G</given-names> </name><name name-style="western"><surname>Cushing</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Krahn</surname><given-names>MD</given-names> </name></person-group><article-title>Health-related quality of life changes associated with hearing loss</article-title><source>JAMA Otolaryngol Head Neck Surg</source><year>2020</year><month>07</month><day>1</day><volume>146</volume><issue>7</issue><fpage>630</fpage><lpage>638</lpage><pub-id pub-id-type="doi">10.1001/jamaoto.2020.0674</pub-id><pub-id pub-id-type="medline">32407468</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schoof</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rosen</surname><given-names>S</given-names> </name></person-group><article-title>The role of auditory and cognitive factors in understanding speech in noise by normal-hearing older listeners</article-title><source>Front Aging Neurosci</source><year>2014</year><volume>6</volume><fpage>307</fpage><pub-id pub-id-type="doi">10.3389/fnagi.2014.00307</pub-id><pub-id pub-id-type="medline">25429266</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shinn-Cunningham</surname><given-names>BG</given-names> </name><name name-style="western"><surname>Best</surname><given-names>V</given-names> </name></person-group><article-title>Selective attention in normal and impaired hearing</article-title><source>Trends Amplif</source><year>2008</year><month>12</month><volume>12</volume><issue>4</issue><fpage>283</fpage><lpage>299</lpage><pub-id pub-id-type="doi">10.1177/1084713808325306</pub-id><pub-id pub-id-type="medline">18974202</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fu</surname><given-names>QJ</given-names> </name><name name-style="western"><surname>Galvin 3rd</surname><given-names>JJ</given-names>  <suffix>III</suffix></name></person-group><article-title>Perceptual learning and auditory training in cochlear implant recipients</article-title><source>Trends Amplif</source><year>2007</year><month>09</month><volume>11</volume><issue>3</issue><fpage>193</fpage><lpage>205</lpage><pub-id pub-id-type="doi">10.1177/1084713807301379</pub-id><pub-id pub-id-type="medline">17709574</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arons</surname><given-names>B</given-names> </name></person-group><article-title>A review of the cocktail party effect</article-title><source>J Am Voice I/O soc</source><year>1992</year><access-date>2026-04-23</access-date><volume>12</volume><issue>7</issue><fpage>35</fpage><lpage>50</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ocw.mit.edu/courses/mas-632-conversational-computer-systems-fall-2008/51618a76e39f92ff8340b72e68dfdff2_arons_cocktail.pdf">https://ocw.mit.edu/courses/mas-632-conversational-computer-systems-fall-2008/51618a76e39f92ff8340b72e68dfdff2_arons_cocktail.pdf</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stevens</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fanning</surname><given-names>J</given-names> </name><name name-style="western"><surname>Coch</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sanders</surname><given-names>L</given-names> </name><name name-style="western"><surname>Neville</surname><given-names>H</given-names> </name></person-group><article-title>Neural mechanisms of selective auditory attention are enhanced by computerized training: electrophysiological evidence from language-impaired and typically developing children</article-title><source>Brain Res</source><year>2008</year><month>04</month><day>18</day><volume>1205</volume><fpage>55</fpage><lpage>69</lpage><pub-id pub-id-type="doi">10.1016/j.brainres.2007.10.108</pub-id><pub-id pub-id-type="medline">18353284</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klatt</surname><given-names>LI</given-names> </name><name name-style="western"><surname>Getzmann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wascher</surname><given-names>E</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>D</given-names> </name></person-group><article-title>The contribution of selective spatial attention to sound detection and sound localization: evidence from event-related potentials and lateralized alpha oscillations</article-title><source>Biol Psychol</source><year>2018</year><month>10</month><volume>138</volume><fpage>133</fpage><lpage>145</lpage><pub-id pub-id-type="doi">10.1016/j.biopsycho.2018.08.019</pub-id><pub-id pub-id-type="medline">30165081</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><name name-style="western"><surname>Emory</surname><given-names>C</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>I</given-names> </name></person-group><article-title>Neurofeedback training of auditory selective attention enhances speech-in-noise perception</article-title><source>Front Hum Neurosci</source><year>2021</year><volume>15</volume><fpage>676992</fpage><pub-id pub-id-type="doi">10.3389/fnhum.2021.676992</pub-id><pub-id pub-id-type="medline">34239430</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Sullivan</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Power</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Mesgarani</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Attentional selection in a cocktail party environment can be decoded from single-trial EEG</article-title><source>Cereb Cortex</source><year>2015</year><month>07</month><volume>25</volume><issue>7</issue><fpage>1697</fpage><lpage>1706</lpage><pub-id pub-id-type="doi">10.1093/cercor/bht355</pub-id><pub-id pub-id-type="medline">24429136</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dai</surname><given-names>L</given-names> </name><name name-style="western"><surname>Best</surname><given-names>V</given-names> </name><name name-style="western"><surname>Shinn-Cunningham</surname><given-names>BG</given-names> </name></person-group><article-title>Sensorineural hearing loss degrades behavioral and physiological measures of human spatial selective auditory attention</article-title><source>Proc Natl Acad Sci U S A</source><year>2018</year><month>04</month><day>3</day><volume>115</volume><issue>14</issue><fpage>E3286</fpage><lpage>E3295</lpage><pub-id pub-id-type="doi">10.1073/pnas.1721226115</pub-id><pub-id pub-id-type="medline">29555752</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stropahl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Besser</surname><given-names>J</given-names> </name><name name-style="western"><surname>Launer</surname><given-names>S</given-names> </name></person-group><article-title>Auditory training supports auditory rehabilitation: a state-of-the-art review</article-title><source>Ear Hear</source><year>2020</year><volume>41</volume><issue>4</issue><fpage>697</fpage><lpage>704</lpage><pub-id pub-id-type="doi">10.1097/AUD.0000000000000806</pub-id><pub-id pub-id-type="medline">31613823</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Henshaw</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ferguson</surname><given-names>MA</given-names> </name></person-group><article-title>Efficacy of individual computer-based auditory training for people with hearing loss: a systematic review of the evidence</article-title><source>PLoS ONE</source><year>2013</year><volume>8</volume><issue>5</issue><fpage>e62836</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0062836</pub-id><pub-id pub-id-type="medline">23675431</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cambridge</surname><given-names>G</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>T</given-names> </name><name name-style="western"><surname>Arnott</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>WJ</given-names> </name></person-group><article-title>Auditory training for adults with cochlear implants: a systematic review</article-title><source>Int J Audiol</source><year>2022</year><month>11</month><volume>61</volume><issue>11</issue><fpage>896</fpage><lpage>904</lpage><pub-id pub-id-type="doi">10.1080/14992027.2021.2014075</pub-id><pub-id pub-id-type="medline">35080191</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sweetow</surname><given-names>RW</given-names> </name><name name-style="western"><surname>Sabes</surname><given-names>JH</given-names> </name></person-group><article-title>Auditory training and challenges associated with participation and compliance</article-title><source>J Am Acad Audiol</source><year>2010</year><month>10</month><volume>21</volume><issue>9</issue><fpage>586</fpage><lpage>593</lpage><pub-id pub-id-type="doi">10.3766/jaaa.21.9.4</pub-id><pub-id pub-id-type="medline">21241646</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lai</surname><given-names>CYY</given-names> </name><name name-style="western"><surname>Ng</surname><given-names>PS</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>AHD</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>FCK</given-names> </name></person-group><article-title>Effects of auditory training in older adults</article-title><source>J Speech Lang Hear Res</source><year>2023</year><month>10</month><day>4</day><volume>66</volume><issue>10</issue><fpage>4137</fpage><lpage>4149</lpage><pub-id pub-id-type="doi">10.1044/2023_JSLHR-22-00621</pub-id><pub-id pub-id-type="medline">37656601</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tye-Murray</surname><given-names>N</given-names> </name><name name-style="western"><surname>Spehar</surname><given-names>B</given-names> </name><name name-style="western"><surname>Mauze</surname><given-names>E</given-names> </name><name name-style="western"><surname>Cardinal</surname><given-names>C</given-names> </name></person-group><article-title>Hearing health care digital therapeutics: patient satisfaction evidence</article-title><source>Am J Audiol</source><year>2022</year><month>09</month><day>21</day><volume>31</volume><issue>3S</issue><fpage>905</fpage><lpage>913</lpage><pub-id pub-id-type="doi">10.1044/2022_AJA-21-00236</pub-id><pub-id pub-id-type="medline">36037482</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faller</surname><given-names>C</given-names> </name><name name-style="western"><surname>Merimaa</surname><given-names>J</given-names> </name></person-group><article-title>Source localization in complex listening situations: selection of binaural cues based on interaural coherence</article-title><source>J Acoust Soc Am</source><year>2004</year><month>11</month><volume>116</volume><issue>5</issue><fpage>3075</fpage><lpage>3089</lpage><pub-id pub-id-type="doi">10.1121/1.1791872</pub-id><pub-id pub-id-type="medline">15603153</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Higgins</surname><given-names>NC</given-names> </name><name name-style="western"><surname>McLaughlin</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Rinne</surname><given-names>T</given-names> </name><name name-style="western"><surname>Stecker</surname><given-names>GC</given-names> </name></person-group><article-title>Evidence for cue-independent spatial representation in the human auditory cortex during active listening</article-title><source>Proc Natl Acad Sci U S A</source><year>2017</year><month>09</month><day>5</day><volume>114</volume><issue>36</issue><fpage>E7602</fpage><lpage>E7611</lpage><pub-id pub-id-type="doi">10.1073/pnas.1707522114</pub-id><pub-id pub-id-type="medline">28827357</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gaveau</surname><given-names>V</given-names> </name><name name-style="western"><surname>Coudert</surname><given-names>A</given-names> </name><name name-style="western"><surname>Salemme</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Benefits of active listening during 3D sound localization</article-title><source>Exp Brain Res</source><year>2022</year><month>11</month><volume>240</volume><issue>11</issue><fpage>2817</fpage><lpage>2833</lpage><pub-id pub-id-type="doi">10.1007/s00221-022-06456-x</pub-id><pub-id pub-id-type="medline">36071210</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Baills</surname><given-names>F</given-names> </name><name name-style="western"><surname>Prieto</surname><given-names>P</given-names> </name></person-group><article-title>Training with embodied musical activities has positive effects on unfamiliar language imitation skills</article-title><access-date>2026-04-23</access-date><conf-name>Proceedings of Speech Prosody, 2022</conf-name><conf-date>May 23-26, 2022</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/speechprosody_2022/zhang22b_speechprosody.html#">https://www.isca-archive.org/speechprosody_2022/zhang22b_speechprosody.html#</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gessa</surname><given-names>E</given-names> </name><name name-style="western"><surname>Giovanelli</surname><given-names>E</given-names> </name><name name-style="western"><surname>Spinella</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Spontaneous head-movements improve sound localization in aging adults with hearing loss</article-title><source>Front Hum Neurosci</source><year>2022</year><volume>16</volume><fpage>1026056</fpage><pub-id pub-id-type="doi">10.3389/fnhum.2022.1026056</pub-id><pub-id pub-id-type="medline">36310849</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Steadman</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lestang</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Goodman</surname><given-names>DFM</given-names> </name><name name-style="western"><surname>Picinali</surname><given-names>L</given-names> </name></person-group><article-title>Short-term effects of sound localization training in virtual reality</article-title><source>Sci Rep</source><year>2019</year><month>12</month><day>4</day><volume>9</volume><issue>1</issue><fpage>18284</fpage><pub-id pub-id-type="doi">10.1038/s41598-019-54811-w</pub-id><pub-id pub-id-type="medline">31798004</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Valzolgher</surname><given-names>C</given-names> </name><name name-style="western"><surname>Capra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sum</surname><given-names>K</given-names> </name><name name-style="western"><surname>Finos</surname><given-names>L</given-names> </name><name name-style="western"><surname>Pavani</surname><given-names>F</given-names> </name><name name-style="western"><surname>Picinali</surname><given-names>L</given-names> </name></person-group><article-title>Spatial hearing training in virtual reality with simulated asymmetric hearing loss</article-title><source>Sci Rep</source><year>2024</year><month>01</month><day>30</day><volume>14</volume><issue>1</issue><fpage>2469</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-51892-0</pub-id><pub-id pub-id-type="medline">38291126</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Whitton</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Hancock</surname><given-names>KE</given-names> </name><name name-style="western"><surname>Shannon</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Polley</surname><given-names>DB</given-names> </name></person-group><article-title>Audiomotor perceptual training enhances speech intelligibility in background noise</article-title><source>Curr Biol</source><year>2017</year><month>11</month><day>6</day><volume>27</volume><issue>21</issue><fpage>3237</fpage><lpage>3247</lpage><pub-id pub-id-type="doi">10.1016/j.cub.2017.09.014</pub-id><pub-id pub-id-type="medline">29056453</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lam</surname><given-names>KY</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>LH</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Hui</surname><given-names>P</given-names> </name><name name-style="western"><surname>Su</surname><given-names>X</given-names> </name></person-group><article-title>Mobile augmented reality: user interfaces, frameworks, and intelligence</article-title><source>ACM Comput Surv</source><year>2023</year><month>09</month><day>30</day><volume>55</volume><issue>9</issue><fpage>1</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1145/3557999</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gilberto</surname><given-names>LG</given-names> </name><name name-style="western"><surname>Bermejo</surname><given-names>FR</given-names> </name><name name-style="western"><surname>Tommasini</surname><given-names>FC</given-names> </name><name name-style="western"><surname>Garc&#x00ED;a Bauza</surname><given-names>C</given-names> </name></person-group><article-title>Virtual reality audio game for entertainment and sound localization training</article-title><source>ACM Trans Appl Percept</source><year>2025</year><month>01</month><day>31</day><volume>22</volume><issue>1</issue><fpage>1</fpage><lpage>24</lpage><pub-id pub-id-type="doi">10.1145/3676557</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Connolly</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Boyle</surname><given-names>EA</given-names> </name><name name-style="western"><surname>MacArthur</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hainey</surname><given-names>T</given-names> </name><name name-style="western"><surname>Boyle</surname><given-names>JM</given-names> </name></person-group><article-title>A systematic literature review of empirical evidence on computer games and serious games</article-title><source>Comput Educ</source><year>2012</year><month>09</month><volume>59</volume><issue>2</issue><fpage>661</fpage><lpage>686</lpage><pub-id pub-id-type="doi">10.1016/j.compedu.2012.03.004</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dama&#x0161;evi&#x010D;ius</surname><given-names>R</given-names> </name><name name-style="western"><surname>Maskeli&#x016B;nas</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bla&#x017E;auskas</surname><given-names>T</given-names> </name></person-group><article-title>Serious games and gamification in healthcare: a meta-review</article-title><source>Information</source><year>2023</year><volume>14</volume><issue>2</issue><fpage>105</fpage><pub-id pub-id-type="doi">10.3390/info14020105</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Pooseung</surname><given-names>K</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name></person-group><article-title>Designing a augmented reality auditory training game for in-situ training and diagnostic tool for the hearing impaired</article-title><access-date>2026-04-23</access-date><conf-name>AES 6th International Conference on Audio for Games</conf-name><conf-date>Apr 27-29, 2024</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://pure.kaist.ac.kr/en/publications/designing-a-augmented-reality-auditory-training-game-for-in-situ-/">https://pure.kaist.ac.kr/en/publications/designing-a-augmented-reality-auditory-training-game-for-in-situ-/</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><source>Unity Technologies</source><year>2022</year><access-date>2026-04-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://unity.com/">https://unity.com/</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="web"><article-title>Wwise</article-title><source>Audiokinetic inc</source><year>2023</year><access-date>2026-04-23</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.audiokinetic.com/products/wwise/">https://www.audiokinetic.com/products/wwise/</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Betella</surname><given-names>A</given-names> </name><name name-style="western"><surname>Verschure</surname><given-names>P</given-names> </name></person-group><article-title>The Affective Slider: A digital self-assessment scale for the measurement of human emotions</article-title><source>PLOS ONE</source><year>2016</year><volume>11</volume><issue>2</issue><fpage>e0148037</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0148037</pub-id><pub-id pub-id-type="medline">26849361</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JH</given-names> </name></person-group><article-title>Evaluation of the Korean Matrix Sentence Test: verification of the list equivalence and the effect of word position</article-title><source>Audiol Speech Res</source><year>2018</year><month>04</month><volume>14</volume><issue>2</issue><fpage>100</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.21848/asr.2018.14.2.100</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vercammen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Strelcyk</surname><given-names>O</given-names> </name></person-group><article-title>Development and validation of a self-administered online hearing test</article-title><source>Trends Hear</source><year>2025</year><volume>29</volume><fpage>23312165251317923</fpage><pub-id pub-id-type="doi">10.1177/23312165251317923</pub-id><pub-id pub-id-type="medline">40101250</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>M&#x00FC;llensiefen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Gingras</surname><given-names>B</given-names> </name><name name-style="western"><surname>Musil</surname><given-names>J</given-names> </name><name name-style="western"><surname>Stewart</surname><given-names>L</given-names> </name></person-group><article-title>The musicality of non-musicians: an index for assessing musical sophistication in the general population</article-title><source>PLOS ONE</source><year>2014</year><volume>9</volume><issue>2</issue><fpage>e89642</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0089642</pub-id><pub-id pub-id-type="medline">24586929</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Parbery-Clark</surname><given-names>A</given-names> </name><name name-style="western"><surname>Skoe</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lam</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kraus</surname><given-names>N</given-names> </name></person-group><article-title>Musician enhancement for speech-in-noise</article-title><source>Ear Hear</source><year>2009</year><month>12</month><volume>30</volume><issue>6</issue><fpage>653</fpage><lpage>661</lpage><pub-id pub-id-type="doi">10.1097/AUD.0b013e3181b412e9</pub-id><pub-id pub-id-type="medline">19734788</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lewis</surname><given-names>JR</given-names> </name></person-group><article-title>The System Usability Scale: Past, present, and future</article-title><source>Int J Hum Comput Interact</source><year>2018</year><month>07</month><day>3</day><volume>34</volume><issue>7</issue><fpage>577</fpage><lpage>590</lpage><pub-id pub-id-type="doi">10.1080/10447318.2018.1455307</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ryan</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Rigby</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Przybylski</surname><given-names>A</given-names> </name></person-group><article-title>The motivational pull of video games: a self-determination theory approach</article-title><source>Motiv Emot</source><year>2006</year><month>12</month><day>12</day><volume>30</volume><issue>4</issue><fpage>344</fpage><lpage>360</lpage><pub-id pub-id-type="doi">10.1007/s11031-006-9051-8</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Braun</surname><given-names>V</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>V</given-names> </name></person-group><article-title>Using thematic analysis in psychology</article-title><source>Qual Res Psychol</source><year>2006</year><month>01</month><volume>3</volume><issue>2</issue><fpage>77</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1191/1478088706qp063oa</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Braun</surname><given-names>V</given-names> </name><name name-style="western"><surname>Clarke</surname><given-names>V</given-names> </name></person-group><article-title>Toward good practice in thematic analysis: avoiding common problems and be(com)ing a knowing researcher</article-title><source>Int J Transgend Health</source><year>2023</year><volume>24</volume><issue>1</issue><fpage>1</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1080/26895269.2022.2129597</pub-id><pub-id pub-id-type="medline">36713144</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bangor</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kortum</surname><given-names>PT</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>JT</given-names> </name></person-group><article-title>An empirical evaluation of the system usability scale</article-title><source>Int J Hum Comput Interact</source><year>2008</year><month>07</month><day>29</day><volume>24</volume><issue>6</issue><fpage>574</fpage><lpage>594</lpage><pub-id pub-id-type="doi">10.1080/10447310802205776</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>LN</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Hwang</surname><given-names>WJ</given-names> </name></person-group><article-title>Potential of augmented reality and virtual reality technologies to promote wellbeing in older adults</article-title><source>Appl Sci (Basel)</source><year>2019</year><volume>9</volume><issue>17</issue><fpage>3556</fpage><pub-id pub-id-type="doi">10.3390/app9173556</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cardona</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Lopez</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Vela</surname><given-names>FLG</given-names> </name><name name-style="western"><surname>Moreira</surname><given-names>F</given-names> </name></person-group><article-title>Meaningful learning: motivations of older adults in serious games</article-title><source>Univers Access Inf Soc</source><year>2023</year><month>03</month><day>14</day><volume>23</volume><issue>4</issue><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1007/s10209-023-00987-y</pub-id><pub-id pub-id-type="medline">37361677</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Janz</surname><given-names>NK</given-names> </name><name name-style="western"><surname>Becker</surname><given-names>MH</given-names> </name></person-group><article-title>The health belief model: a decade later</article-title><source>Health Educ Q</source><year>1984</year><volume>11</volume><issue>1</issue><fpage>1</fpage><lpage>47</lpage><pub-id pub-id-type="doi">10.1177/109019818401100101</pub-id><pub-id pub-id-type="medline">6392204</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="web"><article-title>JMIRFormativeResearchARAuditoryTrainingData</article-title><source>GitHub</source><access-date>2026-05-07</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/seank0h/JMIRFormativeResearchARAuditoryTrainingData/">https://github.com/seank0h/JMIRFormativeResearchARAuditoryTrainingData/</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Gameplay demonstration video.</p><media xlink:href="formative_v10i1e91260_app1.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Interview guide questions.</p><media xlink:href="formative_v10i1e91260_app2.docx" xlink:title="DOCX File, 18 KB"/></supplementary-material></app-group></back></article>