<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e86069</article-id>
      <article-id pub-id-type="pmid">41636824</article-id>
      <article-id pub-id-type="doi">10.2196/86069</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Prediction of Aspiration Risk by Using Vocal Biomarkers: Machine Learning Development and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sarvestan</surname>
            <given-names>Javad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mahmoud</surname>
            <given-names>Mahmoud Badee Rokaya</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Varghese</surname>
            <given-names>Cyril</given-names>
          </name>
          <degrees>MD, MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Division of Pulmonary and Department of Critical Care Medicine</institution>
            <institution>Mayo Clinic in Arizona</institution>
            <addr-line>5777 East Mayo Blvd</addr-line>
            <addr-line>Phoenix, AZ, 85054</addr-line>
            <country>United States</country>
            <phone>1 480 301 8244</phone>
            <email>varghese.cyril@mayo.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-8415-8258</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Jianwei</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6419-2038</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Charney</surname>
            <given-names>Sara</given-names>
          </name>
          <degrees>MS, CCC-SLP</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0755-9280</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Abdalla</surname>
            <given-names>Abdelmohaymin A</given-names>
          </name>
          <degrees>MBBS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4566-9325</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Reeves</surname>
            <given-names>Elizabeth</given-names>
          </name>
          <degrees>MS, CCC-SLP</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-8376-772X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Holyfield</surname>
            <given-names>Stacy</given-names>
          </name>
          <degrees>MS, CCC-SLP</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0008-2211-999X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Brown</surname>
            <given-names>Adam E</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9457-7374</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Higgins</surname>
            <given-names>Michelle K</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4179-9942</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Stearns</surname>
            <given-names>Hunter</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-4287-9860</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Liss</surname>
            <given-names>Julie</given-names>
          </name>
          <degrees>PhD, CCC-SLP</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8782-2901</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Zhang</surname>
            <given-names>Nan</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0985-4227</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Orbelo</surname>
            <given-names>Diana</given-names>
          </name>
          <degrees>PhD, CCC-SLP</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3035-2077</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Pittelko</surname>
            <given-names>Rebecca L</given-names>
          </name>
          <degrees>MS, CCC-SLP</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0003-2639-9566</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Rigelman</surname>
            <given-names>Lindsay</given-names>
          </name>
          <degrees>MA, CCC-SLP</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-1472-7273</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Ortega</surname>
            <given-names>Victor</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff8" ref-type="aff">8</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6361-7372</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>Lott</surname>
            <given-names>David G</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6541-9687</ext-link>
        </contrib>
        <contrib id="contrib17" contrib-type="author">
          <name name-style="western">
            <surname>Berisha</surname>
            <given-names>Visar</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8804-8874</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Division of Pulmonary and Department of Critical Care Medicine</institution>
        <institution>Mayo Clinic in Arizona</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>College of Health Solutions</institution>
        <institution>College of Engineering</institution>
        <institution>Arizona State University</institution>
        <addr-line>Tempe, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Division of Laryngology and Department of Otolaryngology- Head &amp; Neck Surgery</institution>
        <institution>Mayo Clinic in Arizona</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Mayo Clinic Alix School of Medicine</institution>
        <institution>Mayo Clinic in Arizona</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>College of Health Solutions</institution>
        <institution>Arizona State University</institution>
        <addr-line>Tempe, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Quantitative Health Sciences</institution>
        <institution>Mayo Clinic in Arizona</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Division of Laryngology and Department of Otolaryngology- Head &amp; Neck Surgery</institution>
        <institution>Mayo Clinic</institution>
        <addr-line>Rochester, MN</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff8">
        <label>8</label>
        <institution>Division of Pulmonary</institution>
        <institution>Mayo Clinic in Arizona</institution>
        <addr-line>Phoenix, AZ</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Cyril Varghese <email>varghese.cyril@mayo.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>4</day>
        <month>3</month>
        <year>2026</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e86069</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>10</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>1</day>
          <month>12</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>3</day>
          <month>2</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Cyril Varghese, Jianwei Zhang, Sara Charney, Abdelmohaymin A Abdalla, Elizabeth Reeves, Stacy Holyfield, Adam E Brown, Michelle K Higgins, Hunter Stearns, Julie Liss, Nan Zhang, Diana Orbelo, Rebecca L Pittelko, Lindsay Rigelman, Victor Ortega, David G Lott, Visar Berisha. Originally published in JMIR Formative Research (https://formative.jmir.org), 04.03.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2026/1/e86069" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Aspiration causes or aggravates a variety of respiratory diseases. Subjective bedside evaluations of aspiration are limited by poor interrater and intrarater reliability, while gold standard diagnostic tests for aspiration, such as video fluoroscopic swallow study and fiberoptic endoscopic evaluation of swallowing, are cumbersome or invasive and health care resource-intensive.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop and validate a novel machine learning (ML) algorithm that can analyze simple vowel phonations to aid in predicting aspiration risk.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Recorded [i] phonations during routine nasal endoscopy from 163 unique patients were retrospectively analyzed for acoustic features, including pitch, jitter, shimmer, harmonic to noise ratio, and others. Supervised ML was performed on the vowel phonations of those at high-risk for aspiration versus those at low-risk for aspiration. Ground truth of aspiration risk classification for model development was established using a video fluoroscopic swallow study. The performance of the ML model was tested on an independent, external cohort of patient voice samples. The performance of trained speech language pathologists to categorize high versus low-risk aspirators by listening to phonations was compared against the ML model.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Mean ML risk score for those with the ground truth of high versus low aspiration risk was 0.530 (SD 0.310) vs 0.243 (SD 0.249), which was a significant difference (0.287, 95% CI 0.192-0.381; <italic>P</italic>&lt;.001). In the development cohort, the model showed an area under the curve for the receiver operator characteristic of 0.76 (0.67-0.84) with specificity of 0.76 and <italic>F</italic><sub>1</sub>-score of 0.63. The performance of the model in an external testing cohort was comparable, with an area under the curve of 0.70 (0.52-0.88), a specificity of 0.81, and an <italic>F</italic><sub>1</sub>-score of 0.67. The ML model had comparable accuracy, sensitivity, specificity, negative, and positive predictive values compared to trained speech language pathologists in classifying aspiration risk by evaluating vowel phonations.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Otolaryngology (ear, nose, and throat) patients at high risk for aspiration have quantifiable voice characteristics that significantly differ from those who are at a low risk for aspiration, as detected by an ML model trained to analyze sustained phonation and tested on an independent cohort.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>aspiration</kwd>
        <kwd>voice</kwd>
        <kwd>speech</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>ARDS</kwd>
        <kwd>acute respiratory distress syndrome</kwd>
        <kwd>lung transplant</kwd>
        <kwd>ILD</kwd>
        <kwd>interstitial lung disease</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Abnormalities of the oropharynx or larynx can cause clinically relevant aspiration of oropharyngeal or gastrointestinal contents into the lungs [<xref ref-type="bibr" rid="ref1">1</xref>]. Such abnormalities include tumor bulk, resection or radiation injury, pathological or age-related deterioration of nerves or muscles of the upper airways, esophageal abnormalities, and altered sensorium. Acute aspiration can cause significant injuries leading to pneumonias and acute respiratory distress syndrome [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Chronic aspiration of gastrointestinal contents can aggravate airway and parenchymal lung diseases, such as chronic lipoid pneumonia, bronchiectasis, obliterative bronchiolitis, refractory asthma, and pulmonary fibrosis [<xref ref-type="bibr" rid="ref3">3</xref>], and is a major risk factor for transplanted lung rejection [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      <p>Aspiration is common but is often undetected and usually only suspected after significant pulmonary damage has occurred. Despite its high prevalence in ambulatory and hospitalized patients [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>], proactively detecting aspiration risk to prevent downstream sequelae is challenging. The most widely used hospital-based screening method to rule out anterograde aspiration (aspiration that occurs during swallowing) is nursing or speech-language pathologist (SLP) administered bedside swallowing evaluations (BSEs). BSEs vary across institutions but often include listening for a wet cough or wet voice quality after patients swallow varying amounts of liquid. Due to interrater and intrarater reliability issues inherent with these subjective assessments [<xref ref-type="bibr" rid="ref9">9</xref>], the sensitivity for BSEs ranges from 27% to 85%, with specificities ranging from 50-80% [<xref ref-type="bibr" rid="ref10">10</xref>], and poor predictive values [<xref ref-type="bibr" rid="ref11">11</xref>]. BSE results inform referral decisions for gold standard confirmatory testing, including a video fluoroscopic swallow study (VFSS) [<xref ref-type="bibr" rid="ref12">12</xref>] and fiberoptic endoscopic evaluation of swallowing (FEES) [<xref ref-type="bibr" rid="ref13">13</xref>]. The VFSS exposes patients to radiation, requires coordinated participation, and can be challenging if patients are acutely ill, delirious, or have mobility issues, for example, in intensive care units. FEES is an invasive and uncomfortable evaluation where a scope is inserted through the nose and oropharynx to visualize the larynx during swallowing. Both VFSS and FEES are resource-intensive, requiring specialized equipment and the expertise of radiologists, laryngologists, and SLPs for administration and proper interpretation, limiting wide-scale deployment as screening tests. Consequently, there is an unmet need for an objective bedside screening test that is easily administered and provides a valid indication of aspiration risk to support decisions regarding referral for VFSS or FEES.</p>
      <p>Aspiration involves contact of gastrointestinal/or oropharyngeal contents with the vocal folds as they move through the airway and into the lungs. While perceptual-acoustic studies of postprandial phonation have shown some limited evidence of acute aspiration immediately after a swallow [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>], chronic exposure to gastrointestinal or oropharyngeal contents is likely to degrade the mucosal surfaces in ways that manifest as changes to the vocal folds’ vibratory and acoustic characteristics. Exposure to gastrointestinal contents can induce histopathological changes to the vibratory margins of the vocal folds, leading to changes in vocal quality [<xref ref-type="bibr" rid="ref17">17</xref>]. We therefore anticipate that aspirators will exhibit changes in vocal quality relative to non-aspirators either due to underlying pathophysiology or as a consequence of chronic aspiration. In this study, we evaluated voice samples available from Mayo Clinic’s Otolaryngology clinical practices to develop, validate, and externally test a machine learning (ML) algorithm that analyzes voice to predict aspiration risk. We hypothesize that increased risk of aspiration may be associated with changes in human voice quality that can be detected by ML approaches for the objective prediction of aspiration risk.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Voice Data Collection</title>
        <p>Voice recordings collected during laryngoscopy exams at Mayo Clinic Arizona (MCA), Otolaryngology clinics were extracted. All voice samples had been originally recorded during routine clinical workflow, using the lapel microphone (of the Pentax Laryngeal Strobe system) clipped near the clavicle. Endoscopy and VFSS databases from January 2020 to July 2021 were curated to identify patients with VFSS, an endoscopy exam, and voice recordings to identify 332 consecutive patients with both VFSS and voice recordings that were within 1 year of each other. Notably, since this is a retrospective study, voice sample collections were not specifically protocolled as would be done in a prospective study. Three SLPs (15 years combined experience) screened and manually excluded (Audacity 3.1.3) recordings with background noise, overlapping sounds (eg, multiple speakers and machine sounds), or low volume. The samples were discarded if SLPs were unable to reliably tag and label the individual samples, per the protocol below. Robust recordings were analyzed without computerized preprocessing to avoid artifacts from being introduced into the signal analysis pipeline. These samples were exported as .wav files, and PRAAT (version 6.2.15) was used to tag the samples with specific labels, including sustained vowel phonation, cued speech, reading, and spontaneous speech. To maximize the number of participants used for training, only [i] vowel phonations were analyzed, as these were most consistently collected across all patients, during routine clinical practice. Focusing on a single vowel type also reduced variability arising from differences in speech elicitation, dialect, or other idiosyncratic speech characteristics. Reducing variability ensures that observed differences in acoustic features reflect true underlying vocal characteristics rather than differences in speaking style or task conditions. Of 772 voice recording sessions from 332 patients, 283 recordings (with each recording containing one to several sustained [i] phonations) from 163 patients were analyzed. An average of 3 clips of the [i] vowel phonations lasting a minimum of 0.5 seconds from the central portion of each recording per patient were used to develop the ML model. These clips were treated as repeated measurements from the same patient rather than independent samples. The patient was defined as the fundamental unit of analysis for both model training and evaluation. All vowel clips from a given patient were always kept together and assigned to the same fold during cross-validation. No clips from the same patient were ever split across training, validation, or test folds.</p>
      </sec>
      <sec>
        <title>Clinical Data Collection</title>
        <p>Clinical data extracted included aspiration risk factors, such as upper airway involvement (vocal fold disease, head and/or neck surgery, radiation exposures), esophageal diseases (strictures, impaired motility, GERD), neurological compromise (stroke, neuromuscular, neurodegenerative, or peripheral nerve disease), BMI, and obstructive sleep apnea.</p>
        <p>The Rosenbek Penetration-Aspiration Scale (PAS) [<xref ref-type="bibr" rid="ref18">18</xref>], a well validated and widely used clinical standard [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], was used to classify patients VFSS exams as high-risk for aspiration (PAS scores of 6-8, materials into the lower airways); moderate-risk (PAS 3-5 (materials in laryngeal vestibule and/or on vocal folds without being ejecting); and low-risk (PAS 1-2, no penetration or ejected out of airway). PAS measurements are collected as part of routine clinical SLP practice at Mayo Clinic, as is the case in most major medical centers. The moderate aspiration risk group was not included in the test set, as the goal of testing is to observe if this model developed on retrospective voice data can discriminate between high and low risk aspirators. Notably, sustained /i/ phonations were recorded separately from the VFSS on different days. As such, the [i] phonations performed by patients were not influenced by simultaneous VFSS testing.</p>
      </sec>
      <sec>
        <title>Data Preprocessing, ML, and Statistical Analysis</title>
        <p>All recordings were processed using a standardized pipeline prior to feature extraction to reduce sensitivity to recording conditions. Audio was converted to mono and resampled to 16 kHz (16-bit pulse-code modulation). A band-pass filter (70-8,000 Hz; 4th-order Butterworth) was applied to reduce low-frequency, handling noise and high-frequency hiss. The retained phonation segment was amplitude-normalized to a fixed level (eg, peak normalization to −1 dBFS [decibels relative to full scale] or root mean square normalization to −20 dBFS). Traditional acoustic features were computed using short-time analysis with 25 ms Hamming windows and 10 ms hop, and summary statistics were aggregated across frames to obtain per-recording feature values.</p>
        <p>In our initial model development and training, traditional voice features including pitch, jitter, shimmer, harmonics-to-noise ratio (HNR), cepstral peak prominence (CPP), and relative average perturbation (RAP) were extracted from tagged /i/ samples using audio processing toolboxes PRAAT (GNU General Public License Version 3, 29 June 2007) and Collaborative Voice Analysis Repository for Speech Technologies (COVAREP) [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
        <p>Acoustic features were aggregated at the participant level (by averaging repeated vowel clips) to ensure independence of observations. Group differences between high- and low-risk aspirators were evaluated using 2-sample parametric 2-sided <italic>t</italic> tests. Although the 2-sided <italic>t</italic> test assumes approximate normality, it is generally robust to moderate departures from this assumption when applied to participant-level summaries. Feature distributions were therefore examined for gross irregularities (eg, extreme skewness or outliers), and no violations severe enough to invalidate mean-based comparisons were observed; accordingly, 2-sided <italic>t</italic> tests were used as a univariate screening analysis.</p>
        <p>Since multiple acoustic features were evaluated, false discovery rate–adjusted q-values were computed using the Benjamini–Hochberg procedure to account for multiplicity. Raw <italic>P</italic> values are retained in the main text for comparability with prior studies, while the complete set of false discovery rate-adjusted q-values is reported in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> to guide interpretation under multiple testing.</p>
        <p>Comparisons of demographics and clinical characteristics between the groups were performed with the Fisher exact test for categorical variables and the Kruskal-Wallis rank sum test for continuous variables. These analyses were performed with the arsenal package (R4.2.2; R Foundation for Statistical Computing). 0.05 was chosen as the cut-off criterion for statistical significance.</p>
        <p>Neural additive models (NAM) [<xref ref-type="bibr" rid="ref22">22</xref>] were used to classify the voice data and analyze the [i] phonation data of 163 patients. The following dependencies were used for ML coding in Python3: PyTorch, numPy, and sciPy. For NAM, we included 33 features that are well described to have validity in characterizing pathological voice states [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>The patient was defined as the unit of analysis. Five-fold patient-level cross-validation was used for model development and internal validation. All modeling steps, including feature selection, oversampling, hyperparameter tuning, and calibration, were performed exclusively within the training data of each fold, with no information from validation folds used during model fitting or selection.</p>
        <p>Recursive feature addition (RFA) [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>] was nested within the cross-validation framework to avoid optimistic selection bias. Within each training fold, features were added sequentially based on improvement in cross-validated area under the receiver operator characteristic curve (AUC). Feature addition was terminated when performance reached a plateau. To ensure robustness and stability, the final feature subset was defined as the intersection of selected features across cross-validation folds, and validation performance was evaluated using only the feature subset determined from the corresponding training fold.</p>
        <p>Hyperparameter tuning for the NAM, including the learning rate and number of training epochs, was conducted using a grid search within each training fold, with performance assessed via an inner validation split.</p>
        <p>Class imbalance was addressed during training using random oversampling of the minority class to achieve full class balance, applied only to the training portion of each fold. No oversampling was applied to validation data.</p>
        <p>Model calibration was evaluated to assess the reliability of predicted risks. Because uncalibrated NAM outputs exhibited overconfident probability estimates, post-hoc logistic recalibration was applied. Calibration intercept and slope were estimated using predictions from the training folds and then applied to the corresponding held-out validation folds to obtain out-of-sample calibrated probabilities. Calibration performance was summarized using the calibration intercept, calibration slope, and the Brier score.</p>
        <p>Model development, internal validation, feature selection, and calibration procedures were conducted and reported in accordance with the TRIPOD-AI (Transparent Reporting of a multivariable Prediction model for Individual Prognosis or Diagnosis-Artificial Intelligence) guidelines for multivariable prediction models using artificial intelligence.</p>
      </sec>
      <sec>
        <title>Independent Model Testing</title>
        <p>The development NAM model was trained on voice data from MCA, while independent validation of the model was performed with voice data of patients from another distinct Mayo Clinic site: Mayo Clinic in Rochester. The clinicians, recordings, equipment, rooms, geographic location, and patient demographics of the external testing cohort (Mayo Clinic in Rochester) were independent of the training cohort (MCA). These differences naturally introduced a domain shift, meaning that the distribution of test samples differed from that of the training samples. Evaluating the model under this shift allowed us to assess its out-of-distribution generalizability, ensuring that its predictions reflect aspiration-related signals rather than site-specific confounders. The selection, extraction, tagging, and processing of these externally collected voice samples used the same methodology as described previously for the development cohort. The recording systems in both sites were similar (ie, Pentax systems). ML risk score for each patient in the testing cohort was calculated in a blinded fashion by the model by analyzing [i] phonation clips in the context of the patient’s age and sex. The gold standard designation (ie, aspiration risk category based on VFSS) or clinical information was not known to the investigators running the ML code. Statistical analyses, including sensitivity, specificity, and receiver operator characteristics (ROC), were calculated to assess the model’s performance on both the testing and development cohorts.</p>
      </sec>
      <sec>
        <title>Comparing Human Raters With ML Model</title>
        <p>Overall, 4 SLPs, from the 2 medical centers (40 years combined experience), blinded to medical history and aspiration risk, classified patients as at high- or low-risk for aspiration based on perceptual judgements of the [i] phonation clips. This is identical to the information that was provided to the ML model in testing. Notably, the moderate aspiration risk group is not included in the test set, as the goal of testing is to observe if this model developed on retrospective voice data can discriminate between high and low risk aspirators. The interrater reliability for the human raters was assessed using pairwise Cohen kappa. The sensitivity, specificity, positive and negative predictive values, and accuracy of the human raters and the ML model for predicting high versus low-risk aspirators were calculated.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was performed in accordance with the Declaration of Helsinki. This human study was approved by Mayo Clinic Institutional Review Board (approval 21-008975). As part of routine clinical care at Mayo clinic, patients can opt out of having their retrospective clinical data used for research purposes. Patients were not paid for this retrospective study. However, no individual patient’s identifiable information can be found in any part of this manuscript or any of its appendices, ensuring patient privacy. The workflow for voice data curation, ML model development, and validation is shown in <xref rid="figure1" ref-type="fig">Figure 1</xref>.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>A graphical representation summarizing the workflow for this study from voice sample collection, phonation tagging, and subsequent machine learning model development based on gold standard testing (video fluoroscopic swallow study, determined aspiration risk). Subsequently, independent model evaluation was performed wherein the aspiration risk category based on the video fluoroscopic swallow study was unknown to the model. Contained within the green box is a schematic of the neural additive model [<xref ref-type="bibr" rid="ref23">23</xref>] trained to estimate risk of aspiration using 33 voice features, in the full cohort (N=163), while adjusting for age and sex, the main confounders between the groups. Σ: summation of all subfeature network outputs and learnable offset β; σ: represent the sigmoid function σ(x)=1/(1+e^(-x) ); β: learnable offset of neural additive model. ML: machine learning; VFSS: video fluoroscopic swallow study.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e86069_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Feature Analysis</title>
        <p>We initially analyzed retrospective samples from 87 patients (19 high-risk aspirators, 10 medium-risk aspirators, and 58 low-risk aspirators) with an average of approximately 2 voice files per patient. Nonparametric analysis of variance of demographic and clinically relevant risks of aspiration in the pilot data revealed that only age and sex were different among the groups. On age and sex-matching prior to analysis, 17 high-risk aspirators were found to be age- and sex-matched to 17 low-risk aspirators in our exploratory cohort. Of the traditional voice features that were explored, high-risk aspirators were found to have higher jitter and shimmer but lower harmonics richness factor than low-risk aspirators (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Pilot data of 17 age- and sex-matched aspirators with 17 nonaspirators reveal that aspirators had a higher mean jitter (mean difference 0.0085, 95% CI 0.000756-0.0161), shimmer (mean difference 0.0266, 95% CI 0.00228-0.0506), and lower harmonics-to-noise ratio (mean difference 3.1, 95% CI 0.603-5.6). Means represented by black diamonds. Shimmer is the cycle-to-cycle variability in amplitude, while jitter is the cycle-to-cycle variability in frequency. HNR: harmonics-to-noise ratio.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e86069_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Development of ML Model and Internal Cross-Validation</title>
        <p>Our primary ML model development and training cohort evaluated 163 patients within the 3 groups (47 high-risk aspirators, 17 moderate-risk aspirators, and 99 low-risk aspirators) that significantly differed by sex, age, and BMI (with high-risk aspirators having a lower BMI than low-risk aspirators). Additional differences included more reported solid and liquid dysphagia symptoms and a higher frequency of structural changes to head and neck anatomy in the high-risk group than in the low-risk group, as expected in an otolaryngology population (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Baseline characteristics of the training cohort.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="210"/>
            <col width="200"/>
            <col width="200"/>
            <col width="230"/>
            <col width="0"/>
            <col width="130"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Characteristics</td>
                <td>High aspiration risk (n=47)</td>
                <td>Low aspiration risk (n=99)</td>
                <td>Moderate aspiration risk (n=17)</td>
                <td colspan="2"><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Sex, n (%)</bold>
                </td>
                <td>&lt;.001<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Male</td>
                <td>39 (83)</td>
                <td>45 (45.5)</td>
                <td>9 (52.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Female</td>
                <td>8 (17)</td>
                <td>54 (54.5)</td>
                <td>8 (47.1)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Age at scope examination</bold>
                </td>
                <td>&lt;.001<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean (SD)</td>
                <td>72.3 (9.9)</td>
                <td>62.9 (12.6)</td>
                <td>75.1 (9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Median (IQR)</td>
                <td>73 (67-78)</td>
                <td>61 (55-72)</td>
                <td>77 (65-82)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Range</td>
                <td>40-88</td>
                <td>31-90</td>
                <td>62-89</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>BMI (kg/m<sup>2</sup>)</bold>
                </td>
                <td>.009<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean (SD)</td>
                <td>24.1 (3.2)</td>
                <td>27.0 (5.3)</td>
                <td>26.4 (7.7)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Median (IQR)</td>
                <td>23.8 (22-26.1)</td>
                <td>26.8 (23.4-29.7)</td>
                <td>25.3 (21.3-29.4)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Range</td>
                <td>18.7-31.2</td>
                <td>17-44.2</td>
                <td>16-48.9</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Dysphagia, n (%)</bold>
                </td>
                <td>.06<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td>4 (8.5)</td>
                <td>25 (25.25)</td>
                <td>2 (11.8)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td>40 (85.1)</td>
                <td>72 (72.7)</td>
                <td>14 (82.4)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>NA<sup>c</sup></td>
                <td>3 (6.4)</td>
                <td>2 (2)</td>
                <td>1 (5.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Esophageal disease group, n (%)</bold>
                </td>
                <td>.55<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Clinical GERD<sup>d</sup></td>
                <td>23 (48.9)</td>
                <td>46 (46.5)</td>
                <td>5 (29.4)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Other esophageal disease</td>
                <td>3 (6.4)</td>
                <td>6 (6.1)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Multiple esophageal diseases</td>
                <td>2 (4.3)</td>
                <td>3 (3)</td>
                <td>1 (5.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>GERD on impedance or manometry studies</td>
                <td>1 (2.1)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No esophageal disease</td>
                <td>9 (19.1)</td>
                <td>24 (24.2)</td>
                <td>4 (23.5)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>NA</td>
                <td>9 (19.1)</td>
                <td>20 (20.2)</td>
                <td>7 (41.2)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>OSA<sup>e</sup>, n (%)</bold>
                </td>
                <td>.87<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Compliant with CPAP<sup>f</sup></td>
                <td>8 (17)</td>
                <td>19 (19.2)</td>
                <td>1 (5.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not compliant with CPAP</td>
                <td>2 (4.3)</td>
                <td>5 (5.1)</td>
                <td>1 (5.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Compliance not reported</td>
                <td>2 (4.3)</td>
                <td>4 (4)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No OSA</td>
                <td>35 (74.5)</td>
                <td>71 (71.7)</td>
                <td>15 (88.2)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Neurological illness, n (%)</bold>
                </td>
                <td>.14<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CVA<sup>g</sup> without any deficit</td>
                <td>1 (2.1)</td>
                <td>4 (4)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>ALS<sup>h</sup></td>
                <td>0 (0)</td>
                <td>1 (1)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Neuromuscular diseases</td>
                <td>3 (6.4)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CVA with dysphagia only</td>
                <td>1 (2.1)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>CVA with residual neuro deficit</td>
                <td>1 (2.1)</td>
                <td>0 (0)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td>41 (87.2)</td>
                <td>94 (94.9)</td>
                <td>17 (100)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Vocal fold disease, n (%)</bold>
                </td>
                <td>.34<sup>g</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Yes</td>
                <td>23 (48.9)</td>
                <td>35 (35.4)</td>
                <td>8 (47.1)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>No</td>
                <td>22 (46.8)</td>
                <td>62 (62.6)</td>
                <td>9 (52.9)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>NA</td>
                <td>2 (4.3)</td>
                <td>2 (2)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Head and neck anatomical disease, n (%)</bold>
                </td>
                <td>.01<sup>g</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Surgery</td>
                <td>6 (12.8)</td>
                <td>13 (13.1)</td>
                <td>2 (11.8)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Cancer</td>
                <td>1 (2.1)</td>
                <td>1 (1)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Radiation</td>
                <td>0 (0)</td>
                <td>2 (2)</td>
                <td>0 (0)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Multiple head and neck anatomical diseases</td>
                <td>25 (53.2)</td>
                <td>22 (22.2)</td>
                <td>5 (29.4)</td>
                <td colspan="2"/>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>None</td>
                <td>15 (31.9)</td>
                <td>61 (61.6)</td>
                <td>10 (58.8)</td>
                <td colspan="2"/>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Fisher exact test for count data.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Kruskal-Wallis rank sum test.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>NA: not reported by patient or not found in chart review.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>GERD: gastroesophageal reflux disease.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>OSA: obstructive sleep apnea.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>CPAP: continuous positive airway pressure.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>CVA: cerebrovascular accident.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>ALS: amyotrophic lateral sclerosis.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Baseline characteristics for the age- and sex-matched cohort of 36 high-risk aspirators and 36 low-risk aspirators (N=72) showed that BMI and incidence of head and neck disease were different between the groups (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>).</p>
        <p>Because age and sex influence voice, and were different between the groups, they were controlled for in the NAM (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The model used 33 extracted features to differentiate between aspirators and controls (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Each subnetwork within the model processed a single extracted voice feature along with the corresponding sample sex and age as inputs (<xref rid="figure1" ref-type="fig">Figure 1</xref>). The subnetwork output was a single scalar reflecting the adjustments made for age and sex within the feature. The whole NAM output for aspiration risk was a scalar ranging from 0 to 1. High- and low-risk aspirators were clearly distinguished: high-risk aspirator score=0.530 (SD 0.310) versus low-risk aspirator risk score of 0.243 (SD 0.249); mean risk difference between groups was 0.287 (95% CI 0.192-0.381; <italic>P</italic>&lt;.001; <xref rid="figure3" ref-type="fig">Figure 3</xref>A). Moderate-risk aspirators had a risk score between high-risk aspirators and low-risk aspirators, although this difference when compared to the other groups was not significant. RFA showed an elbow point at 7 features (<xref rid="figure3" ref-type="fig">Figure 3</xref>B), revealing that the most significant voice features adding to the model’s discriminability were the average fundamental frequency and SD (F0_mean; importance=0.45 and F0_std [SD]; 0.29), the maximum fundamental frequency during phonation (Max Pitch; 0.57), the SD of the quai-open quotient that measures the proportion of the glottal cycle when the glottis is open (QOQ_std; 0.04), the average of the harmonic richness factor (HRF_mean; 0.18), the average of the CPP (CPP_mean; 0.07), and the SD of the CPP (CPP_std; 0.04). Feature importance was quantified as the average absolute weights of the corresponding feature-specific subnetworks in the NAM, reflecting each feature’s relative contribution to the model’s overall discriminative performance at a global level.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>(A) High-risk aspirator neural additive model risk score mean 0.530 (SD 0.31) versus low-risk aspirator risk score of mean 0.243 (SD 0.249); Mean risk difference between groups 0.287 (95% CI 0.192-0.381; <italic>P</italic>&lt;.001). Means represented by black diamonds. Moderate-risk aspirators’ score fell between high- and low-risk aspirators. (B) The recursive feature addition method was used to identify the minimal set of features required to effectively distinguish between high and low-risk aspirators. This method initiates with a single feature and iteratively adds the voice feature that enhances the performance of the existing set. The key metric used to assess the performance of the feature set is the mean risk score difference. CPP: cepstral peak prominence; HRF: harmonic richness factor; NAM: neural additive model; RFA: recursive feature addition.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e86069_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>A description of these features and the other 33 features analyzed in the NAM is in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. After cross-validated recalibration, the model demonstrated good calibration, with a calibration intercept of –0.06 and a calibration slope of 0.89, indicating minimal global bias and substantially reduced overconfidence. The Brier score improved to 0.189.</p>
      </sec>
      <sec>
        <title>Estimation of Model Performance</title>
        <p>Decision curve analysis performed on the model showed that using ML risk score for prediction provided more benefit than binary strategies of “Treating all” versus “Treating None”. This was true over a wide range of thresholding probabilities (<xref rid="figure4" ref-type="fig">Figure 4</xref>A). Additionally, a bootstrapped bias-corrected calibration analysis showed good agreement between predicted and observed probabilities (<xref rid="figure4" ref-type="fig">Figure 4</xref>B).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>(A) Machine learning risk score to predict aspirators consistently provided higher net benefit than both the “Treat All” and “Treat Non” strategies across a wide range of threshold probabilities (approximately 0 to 0.75). (B) Bootstrap bias-corrected calibration curve closely followed the ideal 45° reference line across the central range of predicted risks.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e86069_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>External Testing of the ML Model</title>
        <p>The external cohort contained 19 high- and 16 low-risk aspirators based on VFSS, which totaled 24% of the sample size of high and low-risk aspirators in the development cohort. The mean ML risk score output for the high-risk aspirators was significantly higher than the low-risk aspirators (mean 0.469, SD 0.327 vs mean 0.245, SD 0.265; <italic>P</italic>=.047; <xref rid="figure5" ref-type="fig">Figure 5</xref>A). The optimal ML risk score cut-off to distinguish high- from low-risk aspirators by analyzing the training cohort was 0.35 based on the Youden index (which is calculated as sensitivity+specificity–1 and determines the optimal cutoff for balancing sensitivity and specificity). The demographics between the training and testing cohorts were different. (<xref rid="figure5" ref-type="fig">Figure 5</xref>B). The AUC for the ROC for the testing cohort was 0.697 (0.517-0.878) compared to the AUC of 0.755 (0.666-0.843) for the training cohort (<xref rid="figure5" ref-type="fig">Figure 5</xref>B). The precision, recall, <italic>F</italic><sub>1</sub>-score, and specificity of the model for the testing cohort were 0.79, 0.57, 0.67, and 0.81, respectively (<xref ref-type="table" rid="table2">Table 2</xref>). There was not a significant drop in AUC despite the demographic differences between the training and testing cohorts (<xref ref-type="table" rid="table3">Table 3</xref>). For example, the testing cohort had a higher proportion of female high-risk aspirators (20% vs 5.5%), and a lower proportion of male low-risk aspirators (8.6% vs 30.8%; <italic>P</italic>=.005). The testing cohort was also older (mean age 70.3, SD 13.4 years vs mean 65.9, SD 12.6 years; <italic>P</italic>=.03).</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>(A) Box plot of machine learning risk score for training versus the external testing cohort. (B) Receiver operator curve for development versus the external testing cohort. AUC: area under the curve; ML: machine learning.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e86069_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Analyses evaluating the performance of the model on training versus validation cohort, where F1-score= 2*Precision*Recall/ (Precision + Recall).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Cohorts</td>
                <td>Precision</td>
                <td>Recall</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
                <td>Specificity</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Development</td>
                <td>0.58</td>
                <td>0.70</td>
                <td>0.63</td>
                <td>0.76</td>
              </tr>
              <tr valign="top">
                <td>External testing</td>
                <td>0.79</td>
                <td>0.58</td>
                <td>0.67</td>
                <td>0.81</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Illustration of demographic differences between the development and testing cohorts, especially in factors like sex that significantly influence voice.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="310"/>
            <col width="330"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Aspiration risk</td>
                <td>Female</td>
                <td>Male</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>High, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Development cohort (%)</td>
                <td>8 (5)</td>
                <td>39 (26.7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Testing cohort</td>
                <td>7 (20)</td>
                <td>12 (34.3)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Low, n (%)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Development cohort</td>
                <td>54 (37)</td>
                <td>45 (30.8)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Testing cohort</td>
                <td>13 (37.1)</td>
                <td>3 (8.6)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Human Raters vs ML Model</title>
        <p>Other than 2 raters that had very good agreement (pairwise Cohen Kappa of 0.8), the rest had fair agreement (Cohen κ range 0.34-0.59; <xref ref-type="table" rid="table4">Table 4</xref>). In classifying patients as a high- or low-risk aspirator by analyzing [i] phonations, the ML model had comparable accuracy to human raters’ range (69% vs 46-60%), sensitivity (58% vs 32-47%) PPV (79% vs 50-78%) and NPV (62% vs 41%-54%; <xref ref-type="table" rid="table5">Table 5</xref>). The ML model also had comparable specificity to human raters (81% vs 44%-88%). No statistically significant differences were found while comparing the ML model’s performance metrics to the performance of human raters using the McNemar or DeLong test.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Interrater reliability pairwise κ coefficient between human speech language pathologists’ raters. In parenthesis, interrater reliability between raters is mentioned, as they made predictions of high-risk versus low-risk aspirators.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="260"/>
            <col width="250"/>
            <col width="230"/>
            <col width="260"/>
            <thead>
              <tr valign="bottom">
                <td>SLP<sup>a</sup> raters</td>
                <td>Rater 1</td>
                <td>Rater 2</td>
                <td>Rater 3</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Rater 2</td>
                <td>0.590 (0.774/0.200)</td>
                <td>—<sup>b</sup></td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Rater 3</td>
                <td>0.508 (0.573/0.412)</td>
                <td>0.397 (0.573/0.2)</td>
                <td>—</td>
              </tr>
              <tr valign="top">
                <td>Rater 4</td>
                <td>0.801 (0.650/1)</td>
                <td>0.505 (0.650/0.2)</td>
                <td>0.343 (0.249/0.412)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>SLP: speech language pathologist.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Blinded human (raters 1-4) and machine (rater 5) raters’ ability to predict aspiration risk by listening to phonations against the ground truth (based on video fluoroscopic swallow study).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="170"/>
            <col width="170"/>
            <col width="150"/>
            <col width="140"/>
            <col width="220"/>
            <col width="0"/>
            <thead>
              <tr valign="bottom">
                <td>Reader</td>
                <td>Sensitivity, % (95% CI)</td>
                <td>Specificity, % (95% CI)</td>
                <td>PPV<sup>a</sup>, % (95% CI)</td>
                <td>NPV<sup>b</sup>, % (95% CI)</td>
                <td colspan="2">Overall accuracy, % (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Rater 1</td>
                <td>36.8 (17.2-61.4)</td>
                <td>75 (47.4-91.7%)</td>
                <td>63.6 (31.6-87.6)</td>
                <td>50 (31.4-68.6)</td>
                <td>54.3 (36.9-70.8)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>Rater 2</td>
                <td>36.8 (17.2-61.4)</td>
                <td>87.5 (60.4-97.8)</td>
                <td>77.8 (40.2-96.1)</td>
                <td>53.8 (33.7-72.9)</td>
                <td>60 (42.2-75.6)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>Rater 3</td>
                <td>47.4 (25.2- 70.5)</td>
                <td>43.8 (20.8-69.4)</td>
                <td>50 (29-71)</td>
                <td>41.2 (19.4-66.5)</td>
                <td>45.7 (29.2-63.1)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>Rater 4</td>
                <td>31.6 (13.6-56.5)</td>
                <td>75 (47.4-91.7)</td>
                <td>60 (27.4-86.3)</td>
                <td>48 (28.3-68.2)</td>
                <td>51.4 (34.3-68.3)</td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>Rater 5 (machine)</td>
                <td>57.9 (34-78.9)</td>
                <td>81.2 (53.7-95)</td>
                <td>78.6 (48.8-94.3)</td>
                <td>61.9 (38.7-81)</td>
                <td>68.6 (50.6-82.6)</td>
                <td>
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>NPV: negative predictive value.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Study Strengths</title>
        <p>While acutely or chronically having ingested materials enter the larynx alters the quality of voice, human judgment of voice changes [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] or even validated tools for perceptual assessments are unreliable screening methods to detect aspiration risk [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. There is a need for tools to objectively quantify aspiration-related voice changes in patients [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. In a study of 93 patients, RAP combined with HNR increased the sensitivity of detecting aspiration in the 5 voice features analyzed [<xref ref-type="bibr" rid="ref14">14</xref>]. A similar study of 165 patients demonstrated that of 8 voice features evaluated, RAP was the most distinguishing measure between aspirators and non-aspirators [<xref ref-type="bibr" rid="ref15">15</xref>]. Recently, a study of 198 patients used ML to detect aspiration by analyzing postprandial voice [<xref ref-type="bibr" rid="ref16">16</xref>]. However, all these studies were single-center studies without an external testing cohort. The focus of these studies was on the effects of anterograde aspiration on voice as ingested materials contact the vocal folds in real time. However, current bedside swallow evaluations also screen for aspiration by making the patient swallow. There are no available tests to objectively detect aspiration risk without having the patient swallow liquids or solids. This can be a limitation for patients who are frail, immobile, and experiencing delirium, who are already at considerable risk for aspiration. An objective screening test for aspiration risk that only requires simple phonation could be useful in these scenarios. Such a method could facilitate frequent longitudinal testing, refining referral for confirmatory testing with VFSS or FEES.</p>
        <p>While our model’s performance based on metrics like AUC, <italic>F</italic><sub>1</sub>-score, and decision curve analysis showed reasonable performance (<xref rid="figure4" ref-type="fig">Figures 4</xref>-<xref rid="figure5" ref-type="fig">5</xref>), unlike other studies in this space, we also tested the performance of our model in an independent external testing cohort. Previous work has shown that external testing of voice models is critical, as it can be difficult to train clinical speech models that generalize [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. Our testing cohort was from a geographically distinct clinic where retrospective voice samples were collected from patients who were demographically different from our training cohort. Despite these differences, the model’s performance did not change significantly (<xref ref-type="table" rid="table2">Tables 2</xref>-3 and <xref rid="figure5" ref-type="fig">Figure 5</xref>), suggesting that the model is not fragile. Fragile models that are over-fitted in development cohorts tend to perform poorly in external cohorts and therefore can seldom be used effectively in clinical practice. Finally, we explored whether the ML tool added value as a preliminary test to predict aspiration risk by comparing its performance to SLPs. We found that the inter-rater reliability among our SLP raters was only “fair,” similar to reports in the literature for other perceptual evaluations.</p>
        <p>There is an inherent tradeoff between purely data-driven model development and approaches that incorporate domain expertise. While deep learning models typically require very large datasets to automatically learn relevant features, simpler supervised ML methods can achieve strong performance with smaller sample sizes when domain experts guide the feature selection and validation process [<xref ref-type="bibr" rid="ref35">35</xref>]. One of the benefits of NAM used in this evaluation is the sequential way in which features can be considered and confounding variables controlled, thereby improving the explainability of the model [<xref ref-type="bibr" rid="ref22">22</xref>]. While simpler models like logistic regression can classify by accounting for basic covariates such as age and sex, they are limited in capturing non-linear relationships and feature-specific interactions. The NAM’s feature-specific networks capture complex, nonlinear interactions while controlling for relevant patient characteristics.</p>
        <p>
          <bold>Study Limitations and Future Directions</bold>
        </p>
        <p>This study has several limitations. The relationship between voice and aspiration is likely a complex bidirectional relationship that cannot be fully elucidated with a retrospective analysis. Our population of ear, nose, and throat (ENT) patients has other reasons related or unrelated to aspiration for their voice to be altered (eg, radiation-induced vocal fold scarring). We controlled for age and sex (predictors known to covary with voice) using our NAM. However, other clinical factors like BMI and anatomical head and neck disease are known to covary with aspiration (the response variable) either as a cause or an effect and therefore cannot be easily controlled by generalized regression models in a retrospective analysis. Also, we also focused on anterograde aspiration as confirmed by VFSS. It is possible in our cohorts that participants experienced retrograde aspiration, wherein micro-aspiration of gastrointestinal contents occurs typically during sleep in the context of gastroesophageal reflux disease [<xref ref-type="bibr" rid="ref3">3</xref>]. Because it can occur in the absence of any structural or functional abnormalities, VFSS or FEES are not sensitive diagnostics. This may have resulted in false positive high-risk errors in both SLP and ML classifications. The inclusion of indirect (reflux symptoms or esophageal impedance testing) and direct indicators of retrograde aspiration (eg, pepsin and bile in bronchoalveolar lavage) in our model may further refine model performance beyond the AUC of .70 range. Additional fine-tuning may be achieved by including measures from connected speech and increasing the diversity of clinical settings. While constrained by the retrospective voice data, this formative study motivates a prospective multi-site voice collection trial to better characterize voice and its relationship to aspiration.</p>
        <p>The elbow point in the cross-validation analysis (<xref rid="figure3" ref-type="fig">Figure 3</xref>B) was attained at 7 features (relevant features highlighted in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). These features and interactions were the most predictive of aspiration risk, but only in the specific sample we evaluated, namely ENT patients, while performing the vowel phonation task. We expect that as we expand our databases to include more diverse patient samples from different clinical settings, other voice features may emerge as significant predictors. Our categorization of “high” and “low” aspiration risk was based on VFSS at one point in time. It could be claimed that this is not reflective of the patient's true longitudinal aspiration status. This is why we were careful to label the categories in terms of “risk” rather than absolute diagnoses. Those categorized as high aspiration “risk” were widely separated from the low aspiration “risk” (PAS 6-8 vs PAS 1-2), and crossovers between these categories in our ENT practice, although possible is unlikely. Since PAS scores were obtained from retrospective clinical data and not from a prospective research protocol, interrater reliability of the PAS within our clinical ENT practice cannot be easily calculated. Nevertheless, since the PAS is a well-established and widely used clinical method of estimating aspiration risk, we used this scoring system to define the labels for our cohorts. The performance of the model was only tested for binary discrimination of high-risk versus low-risk aspirators. Aspirators in the moderate group were not tested due to paucity in available retrospective data. However, we recognize that this is an important clinical group that needs to be the focus of future prospective research. It should be noted that SLPs do not make decisions about aspiration risk based on sustained phonations alone. However, this study highlights the potential benefit of ML to complement informed clinical decision-making by experts. Finally, we do not claim that this ML tool can be used as a confirmatory diagnostic test like VFSS or FEES. Rather, the goal of this study was to develop and validate a tool that can potentially serve as an easily deployable screening test used by bedside nurses and SLPs to more objectively screen for anterograde aspiration risk.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Otolaryngology (ear, nose, and throat) patients at high risk for aspiration have quantifiable voice characteristics that significantly differ from those who are at a low risk for aspiration, as detected by an ML model trained to analyze sustained phonation and tested on an independent cohort. This study uses ML techniques to quantify the quality of voice to estimate aspiration risk without performing a simultaneous swallow evaluation. Future research could include collecting voice samples (including connected speech) from a variety of clinical settings, including intensive care units, hospital wards, and ambulatory clinics, to facilitate model fine-tuning so that it is more generalizable.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Raw <italic>P</italic> values from univariate 2-sided <italic>t</italic> tests comparing acoustic features between high-risk and low-risk aspirators, along with corresponding Benjamini-Hochberg false discovery rate (FDR)-adjusted q-values. Q-values are provided to assess the robustness of univariate screening results under multiple testing and were not used to modify primary analyses or figures.</p>
        <media xlink:href="formative_v10i1e86069_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Baseline characteristics of training cohort when high-risk and low-risk aspirators were age and sex-matched.</p>
        <media xlink:href="formative_v10i1e86069_app2.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>List of 33 voice features that were used for machine learning organized in clinically meaningful domains. The seven features that contributed most to the model’s discriminability are bolded. Importance score is the average absolute weights of the corresponding feature-specific subnetworks in the neural additive model, reflecting each feature’s relative contribution to the model’s overall discriminative performance.</p>
        <media xlink:href="formative_v10i1e86069_app3.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CPP</term>
          <def>
            <p>cepstral peak prominence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ENT</term>
          <def>
            <p>ear, nose, and throat</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FEES</term>
          <def>
            <p>fiberoptic endoscopic evaluation of swallowing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HNR</term>
          <def>
            <p>harmonic-to-noise ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MCA</term>
          <def>
            <p>Mayo Clinic Arizona</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">NAM</term>
          <def>
            <p>neural additive model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PAS</term>
          <def>
            <p>Penetration Aspiration Scale</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RAP</term>
          <def>
            <p>relative average perturbation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">ROC</term>
          <def>
            <p>receiver operator characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SLP</term>
          <def>
            <p>speech language pathologist</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">TRIPOD-AI</term>
          <def>
            <p>Transparent Reporting of a multivariable Prediction model for Individual Prognosis Or Diagnosis-Artificial Intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">VFSS</term>
          <def>
            <p>video fluoroscopic swallow study</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors are grateful to Lanyu Mi, MS (Department of Qualitative Health Sciences, MCA), who assisted our primary biostatistician, Nan Zhang, MS.</p>
    </ack>
    <notes>
      <title>Funding</title>
      <p>This work was made possible by Clinical and Translational Science Award (CTSA) Grant Number KL2 TR002379 from the National Center for Advancing Translational Science (NCATS), for a 3-year funding period from July 2023 to June 2026.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>Conceptualization, supervision, funding acquisition, project administration, resources, investigation, methodology, formal analysis, visualization, validation, data curation, writing – original draft, writing – review and editing: CV</p>
        <p>Software, methodology, formal analysis, visualization, validation, data curation, writing – review and editing: JZ</p>
        <p>Investigation, methodology, formal analysis, visualization, validation, data curation, writing – review and editing: SC</p>
        <p>Methodology, formal analysis, visualization, validation, data curation, writing – review and editing: AAA, ER</p>
        <p>Methodology, visualization, validation, data curation, writing – review and editing: SH, AB, HS</p>
        <p>Investigation, methodology, formal analysis, visualization, validation, data curation, writing – review and editing: MKH</p>
        <p>Investigation, supervision, methodology, formal analysis, visualization, validation, data curation, writing – review and editing: JL</p>
        <p>Software, formal analysis, visualization, validation, data curation, writing – review and editing: NZ</p>
        <p>Validation, data curation, writing – review and editing: DO, RLP, LR</p>
        <p>Investigation, supervision, methodology, formal analysis, writing – review, and editing: VO, DL</p>
        <p>Conceptualization, supervision, resources, investigation, methodology, formal analysis, visualization, validation, writing – review and editing: VB</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bersten</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Edibam</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hunt</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Moran</surname>
              <given-names>J</given-names>
            </name>
            <collab>Australian New Zealand Intensive Care Society Clinical Trials Group</collab>
          </person-group>
          <article-title>Incidence and mortality of acute lung injury and the acute respiratory distress syndrome in three Australian States</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2002</year>
          <volume>165</volume>
          <issue>4</issue>
          <fpage>443</fpage>
          <lpage>448</lpage>
          <pub-id pub-id-type="doi">10.1164/ajrccm.165.4.2101124</pub-id>
          <pub-id pub-id-type="medline">11850334</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eworuke</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Major</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert McClain</surname>
              <given-names>LI</given-names>
            </name>
          </person-group>
          <article-title>National incidence rates for acute respiratory distress syndrome (ARDS) and ARDS cause-specific factors in the United States (2006-2014)</article-title>
          <source>J Crit Care</source>
          <year>2018</year>
          <volume>47</volume>
          <fpage>192</fpage>
          <lpage>197</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jcrc.2018.07.002</pub-id>
          <pub-id pub-id-type="medline">30015289</pub-id>
          <pub-id pub-id-type="pii">S0883-9441(18)30049-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>Reflux-aspiration in chronic lung disease</article-title>
          <source>Ann Am Thorac Soc</source>
          <year>2020</year>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>155</fpage>
          <lpage>164</lpage>
          <pub-id pub-id-type="doi">10.1513/AnnalsATS.201906-427CME</pub-id>
          <pub-id pub-id-type="medline">31697575</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fisichella</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Jalilvand</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The role of impaired esophageal and gastric motility in end-stage lung diseases and after lung transplantation</article-title>
          <source>J Surg Res</source>
          <year>2014</year>
          <volume>186</volume>
          <issue>1</issue>
          <fpage>201</fpage>
          <lpage>206</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jss.2013.09.023</pub-id>
          <pub-id pub-id-type="medline">24139634</pub-id>
          <pub-id pub-id-type="pii">S0022-4804(13)00898-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hartwig</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>RD</given-names>
            </name>
          </person-group>
          <article-title>Gastroesophageal reflux disease-induced aspiration injury following lung transplantation</article-title>
          <source>Curr Opin Organ Transplant</source>
          <year>2012</year>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>474</fpage>
          <lpage>478</lpage>
          <pub-id pub-id-type="doi">10.1097/MOT.0b013e328357f84f</pub-id>
          <pub-id pub-id-type="medline">22941322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marik</surname>
              <given-names>PE</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Aspiration pneumonia and dysphagia in the elderly</article-title>
          <source>Chest</source>
          <year>2003</year>
          <volume>124</volume>
          <issue>1</issue>
          <fpage>328</fpage>
          <lpage>336</lpage>
          <pub-id pub-id-type="doi">10.1378/chest.124.1.328</pub-id>
          <pub-id pub-id-type="medline">12853541</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(15)36028-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Metheny</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Clouse</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>YH</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Kollef</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Tracheobronchial aspiration of gastric contents in critically ill tube-fed patients: frequency, outcomes, and risk factors</article-title>
          <source>Crit Care Med</source>
          <year>2006</year>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>1007</fpage>
          <lpage>1015</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/16484901"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/01.CCM.0000206106.65220.59</pub-id>
          <pub-id pub-id-type="medline">16484901</pub-id>
          <pub-id pub-id-type="pmcid">PMC2396145</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DeLegge</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Aspiration pneumonia: incidence, mortality, and at-risk populations</article-title>
          <source>JPEN J Parenter Enteral Nutr</source>
          <year>2002</year>
          <volume>26</volume>
          <issue>6 Suppl</issue>
          <fpage>S19</fpage>
          <lpage>S24; discussion S24</lpage>
          <pub-id pub-id-type="doi">10.1177/014860710202600604</pub-id>
          <pub-id pub-id-type="medline">12405619</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Wertz</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenbek</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Mills</surname>
              <given-names>RH</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Ashford</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>Inter- and intrajudge reliability of a clinical examination of swallowing in adults</article-title>
          <source>Dysphagia</source>
          <year>2000</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>58</fpage>
          <lpage>67</lpage>
          <pub-id pub-id-type="doi">10.1007/s004550010002</pub-id>
          <pub-id pub-id-type="medline">10758187</pub-id>
          <pub-id pub-id-type="pii">10.1007/s004550010002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bours</surname>
              <given-names>GJJW</given-names>
            </name>
            <name name-style="western">
              <surname>Speyer</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lemmens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Limburg</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>de Wit</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Bedside screening tests vs. videofluoroscopy or fibreoptic endoscopic evaluation of swallowing to detect dysphagia in patients with neurological disorders: systematic review</article-title>
          <source>J Adv Nurs</source>
          <year>2009</year>
          <volume>65</volume>
          <issue>3</issue>
          <fpage>477</fpage>
          <lpage>493</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1365-2648.2008.04915.x</pub-id>
          <pub-id pub-id-type="medline">19222645</pub-id>
          <pub-id pub-id-type="pii">JAN4915</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Virvidaki</surname>
              <given-names>IE</given-names>
            </name>
            <name name-style="western">
              <surname>Nasios</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kosmidou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Giannopoulos</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Milionis</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Swallowing and aspiration risk: a critical review of non instrumental bedside screening tests</article-title>
          <source>J Clin Neurol</source>
          <year>2018</year>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>265</fpage>
          <lpage>274</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29504298"/>
          </comment>
          <pub-id pub-id-type="doi">10.3988/jcn.2018.14.3.265</pub-id>
          <pub-id pub-id-type="medline">29504298</pub-id>
          <pub-id pub-id-type="pii">14.e12</pub-id>
          <pub-id pub-id-type="pmcid">PMC6031981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martin-Harris</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>The videofluorographic swallowing study</article-title>
          <source>Phys Med Rehabil Clin N Am</source>
          <year>2008</year>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>769</fpage>
          <lpage>785, viii</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/18940640"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.pmr.2008.06.004</pub-id>
          <pub-id pub-id-type="medline">18940640</pub-id>
          <pub-id pub-id-type="pii">S1047-9651(08)00059-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC2586156</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hiss</surname>
              <given-names>SG</given-names>
            </name>
            <name name-style="western">
              <surname>Postma</surname>
              <given-names>GN</given-names>
            </name>
          </person-group>
          <article-title>Fiberoptic endoscopic evaluation of swallowing</article-title>
          <source>Laryngoscope</source>
          <year>2003</year>
          <volume>113</volume>
          <issue>8</issue>
          <fpage>1386</fpage>
          <lpage>1393</lpage>
          <pub-id pub-id-type="doi">10.1097/00005537-200308000-00023</pub-id>
          <pub-id pub-id-type="medline">12897564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Prediction of laryngeal aspiration using voice analysis</article-title>
          <source>Am J Phys Med Rehabil</source>
          <year>2004</year>
          <volume>83</volume>
          <issue>10</issue>
          <fpage>753</fpage>
          <lpage>757</lpage>
          <pub-id pub-id-type="doi">10.1097/01.phm.0000140798.97706.a5</pub-id>
          <pub-id pub-id-type="medline">15385783</pub-id>
          <pub-id pub-id-type="pii">00002060-200410000-00002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jee</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jo</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Koo</surname>
              <given-names>BS</given-names>
            </name>
          </person-group>
          <article-title>Detection of voice changes due to aspiration via acoustic voice analysis</article-title>
          <source>Auris Nasus Larynx</source>
          <year>2018</year>
          <volume>45</volume>
          <issue>4</issue>
          <fpage>801</fpage>
          <lpage>806</lpage>
          <pub-id pub-id-type="doi">10.1016/j.anl.2017.10.007</pub-id>
          <pub-id pub-id-type="medline">29097046</pub-id>
          <pub-id pub-id-type="pii">S0385-8146(17)30785-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J-M</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M-S</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>S-Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ryu</surname>
              <given-names>JS</given-names>
            </name>
          </person-group>
          <article-title>Prediction of dysphagia aspiration through machine learning-based analysis of patients' postprandial voices</article-title>
          <source>J Neuroeng Rehabil</source>
          <year>2024</year>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jneuroengrehab.biomedcentral.com/articles/10.1186/s12984-024-01329-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12984-024-01329-6</pub-id>
          <pub-id pub-id-type="medline">38555417</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12984-024-01329-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC10981344</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lechien</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Saussez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Harmegnies</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Finck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burns</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Laryngopharyngeal reflux and voice disorders: a multifactorial model of etiology and pathophysiology</article-title>
          <source>J Voice</source>
          <year>2017</year>
          <volume>31</volume>
          <issue>6</issue>
          <fpage>733</fpage>
          <lpage>752</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://hdl.handle.net/2268/314333"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jvoice.2017.03.015</pub-id>
          <pub-id pub-id-type="medline">28438489</pub-id>
          <pub-id pub-id-type="pii">S0892-1997(17)30043-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenbek</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Robbins</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Roecker</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Coyle</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Wood</surname>
              <given-names>JL</given-names>
            </name>
          </person-group>
          <article-title>A penetration-aspiration scale</article-title>
          <source>Dysphagia</source>
          <year>1996</year>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>93</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1007/BF00417897</pub-id>
          <pub-id pub-id-type="medline">8721066</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alkhuwaiter</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Davidson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hopkins-Rossabi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Martin-Harris</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Scoring the penetration-aspiration scale (PAS) in two conditions: a reliability study</article-title>
          <source>Dysphagia</source>
          <year>2022</year>
          <volume>37</volume>
          <issue>2</issue>
          <fpage>407</fpage>
          <lpage>416</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33880656"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00455-021-10292-6</pub-id>
          <pub-id pub-id-type="medline">33880656</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00455-021-10292-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8528890</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Steele</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Grace-Martin</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Reflections on clinical and statistical use of the penetration-aspiration scale</article-title>
          <source>Dysphagia</source>
          <year>2017</year>
          <volume>32</volume>
          <issue>5</issue>
          <fpage>601</fpage>
          <lpage>616</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/28534064"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00455-017-9809-z</pub-id>
          <pub-id pub-id-type="medline">28534064</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00455-017-9809-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC5608795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Degottex</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kane</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Drugman</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Raitio</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Scherer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>COVAREP — a collaborative voice analysis repository for speech technologies</article-title>
          <year>2014</year>
          <conf-name>Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)</conf-name>
          <conf-date>May 04-09, 2014</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <publisher-name>IEEE</publisher-name>
          <pub-id pub-id-type="doi">10.1109/icassp.2014.6853739</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Melnick</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Frosst</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lengerich</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Caruana</surname>
              <given-names>R</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Neural additive models: interpretable machine learning with neural nets</article-title>
          <year>2021</year>
          <conf-name>Proceedings of the 35th International Conference on Neural Information Processing Systems (NIPS '21)</conf-name>
          <conf-date>December 6–14, 2021</conf-date>
          <conf-loc>Virtual (Online)</conf-loc>
          <fpage>4699</fpage>
          <lpage>4711</lpage>
          <pub-id pub-id-type="doi">https://doi.org/10.48550/arXiv.2004.13912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kadiri</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Alku</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Analysis and detection of pathological voice using glottal source features</article-title>
          <source>IEEE J Sel Top Signal Process</source>
          <year>2020</year>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>367</fpage>
          <lpage>379</lpage>
          <pub-id pub-id-type="doi">10.1109/jstsp.2019.2957988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Park</surname>
              <given-names>HY</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Im</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Post-stroke respiratory complications using machine learning with voice features from mobile devices</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>16682</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-20348-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-20348-8</pub-id>
          <pub-id pub-id-type="medline">36202829</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-20348-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC9537337</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scherer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stratou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gratch</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morency</surname>
              <given-names>LP</given-names>
            </name>
          </person-group>
          <article-title>Investigating voice quality as a speaker-independent indicator of depression and PTSD</article-title>
          <year>2013</year>
          <conf-name>Proceedings of Interspeech 2013</conf-name>
          <conf-date>August 25–29, 2013</conf-date>
          <conf-loc>Lyon, France</conf-loc>
          <pub-id pub-id-type="doi">10.21437/interspeech.2013-240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamed</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dara</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kremer</surname>
              <given-names>SC</given-names>
            </name>
          </person-group>
          <article-title>Network intrusion detection system based on recursive feature addition and bigram technique</article-title>
          <source>Computers &amp; Security</source>
          <year>2018</year>
          <volume>73</volume>
          <fpage>137</fpage>
          <lpage>155</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cose.2017.10.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Sung</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>Recursive feature addition for gene selection</article-title>
          <year>2006</year>
          <conf-name>Proceedings of the 2006 International Joint Conference on Neural Networks Sheraton Vancouver Wall Centre Hotel</conf-name>
          <conf-date>July 16-21, 2006</conf-date>
          <conf-loc>Vancouver, BC, Canada</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ijcnn.2006.246851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Groves-Wright</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Boyce</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kelchner</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Perception of wet vocal quality in identifying penetration/aspiration during swallowing</article-title>
          <source>J Speech Lang Hear Res</source>
          <year>2010</year>
          <volume>53</volume>
          <issue>3</issue>
          <fpage>620</fpage>
          <lpage>632</lpage>
          <pub-id pub-id-type="doi">10.1044/1092-4388(2009/08-0246)</pub-id>
          <pub-id pub-id-type="medline">20029051</pub-id>
          <pub-id pub-id-type="pii">1092-4388_2009_08-0246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Warms</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>"Wet Voice" as a predictor of penetration and aspiration in oropharyngeal dysphagia</article-title>
          <source>Dysphagia</source>
          <year>2000</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>84</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1007/s004550010005</pub-id>
          <pub-id pub-id-type="medline">10758190</pub-id>
          <pub-id pub-id-type="pii">10.1007/s004550010005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Waito</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bailey</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Molfenter</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Zoratto</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Steele</surname>
              <given-names>CM</given-names>
            </name>
          </person-group>
          <article-title>Voice-quality abnormalities as a sign of dysphagia: validation against acoustic and videofluoroscopic data</article-title>
          <source>Dysphagia</source>
          <year>2011</year>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>125</fpage>
          <lpage>134</lpage>
          <pub-id pub-id-type="doi">10.1007/s00455-010-9282-4</pub-id>
          <pub-id pub-id-type="medline">20454806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hirano</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Clinical Examination of Voice</source>
          <year>1981</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berisha</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Krantsevich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>PR</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Dasarathy</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Turaga</surname>
              <given-names>P</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Digital medicine and the curse of dimensionality</article-title>
          <source>NPJ Digit Med</source>
          <year>2021</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>153</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-021-00521-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-021-00521-5</pub-id>
          <pub-id pub-id-type="medline">34711924</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-021-00521-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC8553745</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berisha</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Responsible development of clinical speech AI: bridging the gap between clinical research and technology</article-title>
          <source>NPJ Digit Med</source>
          <year>2024</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>208</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-024-01199-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-024-01199-1</pub-id>
          <pub-id pub-id-type="medline">39122889</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-024-01199-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC11316053</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Siegert</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Benway</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A tutorial on clinical speech AI development: from data collection to model validation</article-title>
          <source>arXiv</source>
          <year>2024</year>
          <month>10</month>
          <day>29</day>
          <fpage>1</fpage>
          <lpage>76</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2410.21640"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2410.21640</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stegmann</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duncan</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rutkove</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Liss</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shefner</surname>
              <given-names>JM</given-names>
            </name>
            <collab>et al</collab>
          </person-group>
          <article-title>Estimation of forced vital capacity using speech acoustics in patients with ALS</article-title>
          <source>Amyotroph Lateral Scler Frontotemporal Degener</source>
          <year>2021</year>
          <volume>22</volume>
          <issue>sup1</issue>
          <fpage>14</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tandfonline.com/doi/10.1080/21678421.2020.1866013?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:crossref.org&amp;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/21678421.2020.1866013</pub-id>
          <pub-id pub-id-type="medline">34348537</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
