<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e65555</article-id><article-id pub-id-type="doi">10.2196/65555</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Acoustic and Natural Language Markers for Bipolar Disorder: A Pilot, mHealth Cross-Sectional Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Crocamo</surname><given-names>Cristina</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cioni</surname><given-names>Riccardo Matteo</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Canestro</surname><given-names>Aurelia</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nasti</surname><given-names>Christian</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Palpella</surname><given-names>Dario</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Piacenti</surname><given-names>Susanna</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bartoccetti</surname><given-names>Alessandra</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Re</surname><given-names>Martina</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Simonetti</surname><given-names>Valentina</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Barattieri di San Pietro</surname><given-names>Chiara</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bulgheroni</surname><given-names>Maria</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>Francesco</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Carr&#x00E0;</surname><given-names>Giuseppe</given-names></name><degrees>MD, MSc, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>School of Medicine and Surgery, University of Milano-Bicocca</institution><addr-line>via Cadore 48</addr-line><addr-line>Monza</addr-line><country>Italy</country></aff><aff id="aff2"><institution>Ab.Acus</institution><addr-line>Milan</addr-line><country>Italy</country></aff><aff id="aff3"><institution>Laboratory of Neurolinguistics and Experimental Pragmatics (NEP), University School for Advanced Studies IUSS</institution><addr-line>Pavia</addr-line><country>Italy</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Meyer</surname><given-names>Denny</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Martin</surname><given-names>Vincent</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Cristina Crocamo, PhD, School of Medicine and Surgery, University of Milano-Bicocca, via Cadore 48, Monza, 20900, Italy, 39 0264488483; <email>cristina.crocamo@unimib.it</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>16</day><month>4</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e65555</elocation-id><history><date date-type="received"><day>20</day><month>08</month><year>2024</year></date><date date-type="rev-recd"><day>29</day><month>01</month><year>2025</year></date><date date-type="accepted"><day>12</day><month>02</month><year>2025</year></date></history><copyright-statement>&#x00A9; Cristina Crocamo, Riccardo Matteo Cioni, Aurelia Canestro, Christian Nasti, Dario Palpella, Susanna Piacenti, Alessandra Bartoccetti, Martina Re, Valentina Simonetti, Chiara Barattieri di San Pietro, Maria Bulgheroni, Francesco Bartoli, Giuseppe Carr&#x00E0;. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 16.4.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e65555"/><abstract><sec><title>Background</title><p>Monitoring symptoms of bipolar disorder (BD) is a challenge faced by mental health services. Speech patterns are crucial in assessing the current experiences, emotions, and thought patterns of people with BD. Natural language processing (NLP) and acoustic signal processing may support ongoing BD assessment within a mobile health (mHealth) framework.</p></sec><sec><title>Objective</title><p>Using both acoustic and NLP-based features from the speech of people with BD, we built an app-based tool and tested its feasibility and performance to remotely assess the individual clinical status.</p></sec><sec sec-type="methods"><title>Methods</title><p>We carried out a pilot, observational study, sampling adults diagnosed with BD from the caseload of the Nord Milano Mental Health Trust (Italy) to explore the relationship between selected speech features and symptom severity and to test their potential to remotely assess mental health status. Symptom severity assessment was based on clinician ratings, using the Young Mania Rating Scale (YMRS) and Montgomery-&#x00C5;sberg Depression Rating Scale (MADRS) for manic and depressive symptoms, respectively. Leveraging a digital health tool embedded in a mobile app, which records and processes speech, participants self-administered verbal performance tasks. Both NLP-based and acoustic features were extracted, testing associations with mood states and exploiting machine learning approaches based on random forest models.</p></sec><sec sec-type="results"><title>Results</title><p>We included 32 subjects (mean [SD] age 49.6 [14.3] years; 50% [16/32] females) with a MADRS median (IQR) score of 13 (21) and a YMRS median (IQR) score of 5 (16). Participants freely managed the digital environment of the app, without perceiving it as intrusive and reporting an acceptable system usability level (average score 73.5, SD 19.7). Small-to-moderate correlations between speech features and symptom severity were uncovered, with sex-based differences in predictive capability. Higher latency time (<italic>&#x03C1;</italic>=0.152), increased silences (<italic>&#x03C1;</italic>=0.416), and vocal perturbations correlated with depressive symptomatology. Pressure of speech based on the mean intraword time (<italic>&#x03C1;</italic>=&#x2013;0.343) and lower voice instability based on jitter-related parameters (<italic>&#x03C1;</italic> ranging from &#x2013;0.19 to &#x2013;0.27) were detected for manic symptoms. However, a higher contribution of NLP-based and conversational features, rather than acoustic features, was uncovered, especially for predictive models for depressive symptom severity (NLP-based: <italic>R</italic><sup>2</sup>=0.25, mean squared error [MSE]=110.07, mean absolute error [MAE]=8.17; acoustics: <italic>R</italic><sup>2</sup>=0.11, MSE=133.75, MAE=8.86; combined: <italic>R</italic><sup>2</sup>=0.16; MSE=118.53, MAE=8.68).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Remotely collected speech patterns, including both linguistic and acoustic features, are associated with symptom severity levels and may help differentiate clinical conditions in individuals with BD during their mood state assessments. In the future, multimodal, smartphone-integrated digital ecological momentary assessments could serve as a powerful tool for clinical purposes, remotely complementing standard, in-person mental health evaluations.</p></sec></abstract><kwd-group><kwd>digital mental health</kwd><kwd>remote assessment</kwd><kwd>mHealth</kwd><kwd>speech</kwd><kwd>NLP</kwd><kwd>natural language processing</kwd><kwd>acoustic</kwd><kwd>symptom severity</kwd><kwd>machine learning</kwd><kwd>markers</kwd><kwd>mental health</kwd><kwd>bipolar disorders</kwd><kwd>app</kwd><kwd>applications</kwd><kwd>multimodal</kwd><kwd>mobile health</kwd><kwd>voice</kwd><kwd>vocal</kwd><kwd>bipolar</kwd><kwd>verbal</kwd><kwd>emotion</kwd><kwd>emotional</kwd><kwd>psychiatry</kwd><kwd>psychiatric</kwd><kwd>mental illness</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Bipolar disorder (BD) is a lifelong, episodic illness characterized by mood recurrences, including manic or hypomanic, depressive, and mixed episodes [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. The burden associated with BD, affecting families, carers, and mental health care systems, is heavy [<xref ref-type="bibr" rid="ref4">4</xref>]. Community services often struggle in delivering regular monitoring of BD treatment needs, resulting in relapses that seem difficult to predict [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Language disturbances are among the core symptoms of acute episodes in BD, since speech patterns are modulated by the emotional and neurophysiological status [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Therefore, language may play a key role in the assessment of an individual&#x2019;s current experiences, emotions, thought patterns, and symptoms. While content analysis may reveal grandiosity associated with elevated mood, impulsivity, or changes in goal-directed activities, natural language may provide insights into mood fluctuations, cognitive processes, and behavioral patterns [<xref ref-type="bibr" rid="ref9">9</xref>]. In particular, changes in the rate of speech are likely to indicate mood oscillations, including pressure of speech and increased verbosity during manic episodes [<xref ref-type="bibr" rid="ref10">10</xref>] and poverty of speech and increased pause times during depressive episodes [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Clinicians are trained to recognize variations in language and voice, along with gestures and facial expressions, implicitly assessing both coherence and organization of speech and natural language features. However, this process is inevitably vulnerable to inconsistencies and biases.</p><p>Recent research in mental health and computer science has put forward computational approaches for speech analysis across a variety of mental disorders, proposing automated methods to assess and monitor the individual&#x2019;s mental state through speech patterns [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref18">18</xref>]. Promising techniques in speech acoustic signal processing [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>], using mobile health (mHealth) technology, can bridge subjective and objective components across various stages, such as prediction of illness onset, diagnostic processes, assessment of severity, and forecast of treatment outcomes [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Indeed, natural language processing (NLP) techniques, exploring language resources (eg, lexical choices, syntax, and semantics) both qualitatively and quantitatively (eg, topic modeling, clustering, and classification), may produce deeper insights across different clinical conditions [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. For example, observable linguistic traits (eg, increased use of both first-person pronouns and negative emotion expressions) can be identified among people with BD [<xref ref-type="bibr" rid="ref23">23</xref>]. However, although linguistic features are informative, they are context-dependent and inferred according to word transcriptions [<xref ref-type="bibr" rid="ref27">27</xref>]. Thus, speech analyses combining acoustic-dependent features (eg, speech prosody and voice quality) with NLP-based measures appear more promising in terms of model predictions, possibly providing a more accurate mental health assessment [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>Indeed, research has shown that acoustic features are markers of emotional states in BD [<xref ref-type="bibr" rid="ref29">29</xref>], and that quantifiable speech differences can predict the scores of scales such as the Young Mania Rating Scale (YMRS) and the Montgomery-&#x00C5;sberg Depression Rating Scale (MADRS) [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. On the other hand, recent evidence has shown how smartphone-based voice data [<xref ref-type="bibr" rid="ref30">30</xref>] can enhance BD monitoring in real time, detecting possible mood changes [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Thus, speech-based systems embedded in smartphones might be useful tools for complementary, continuous assessments of BD clinical states. We therefore built an app-based tool, jointly using acoustic and NLP-based features from the speech of people with BD who delivered a narrative, and carried out a pilot study aimed at testing its feasibility and performance to remotely assess the individual clinical status. Continuous, uninterrupted spoken accounts, as supplied by individuals, provided the unique opportunity to combine communication style information from an in-depth set of acoustic features and NLP-based scores as potential digital markers of symptom severity in speech. We rigorously chose to test the tool&#x2019;s performance against standard psychometric assessments of mania and depression in order to explore its potential for remote, complementary assessments.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>The report of this study adheres to the STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) statement (checklist presented in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref33">33</xref>].</p><sec id="s2-1"><title>Study Design and Sampling Strategies</title><p>We conducted a pilot, cross-sectional study involving adult participants (aged 18 years or older) from the caseload of the Nord Milano Mental Health Trust (Italy). The Trust includes 2 psychiatric intensive care units, with a total of 27 beds, and also provides community mental health care for the same 280,000 inhabitants of the northern area of the Metropolitan City of Milan through 4 community mental health teams with multidisciplinary staff. The relevant catchment area comprises highly urbanized, both deprived and affluent, districts.</p><p>Inclusion criteria comprised a diagnosis of BD and the willingness to participate in the study. People with physical impairments affecting their acoustic capabilities were excluded. Based on inclusion and exclusion criteria, eligible individuals were identified among individuals consecutively admitted to the Trust. Then, they were approached by the research team, explaining the purpose of the study and, if any, potential risks.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>Recruitment efforts were carried out in accordance with ethical guidelines to ensure the well-being and safety of all participants. Study participants signed a written informed consent and were not compensated for their involvement. The study received ethical approval (protocol number 172&#x2010;17032023) from the local ethical committee. To maintain participant privacy and confidentiality, all study data were pseudonymized prior to analysis. No individual participants are identifiable in any images included in this manuscript or Multimedia Appendices.</p></sec><sec id="s2-3"><title>Procedures</title><p>Acoustic data were retrieved by asking participants to self-administer verbal performance tasks through a mobile app on their smartphones (SPEAKapp; [<xref ref-type="bibr" rid="ref34">34</xref>]). Clinical testing and app usage took place on the same day in the study setting (inpatient and outpatient services). Then, the System Usability Scale (SUS), a short 10-item questionnaire based on a 5-point Likert scale, was administered to assess the usability [<xref ref-type="bibr" rid="ref35">35</xref>] of the app.</p><p>Verbal performance in terms of prose recall was based on the Babcock test [<xref ref-type="bibr" rid="ref36">36</xref>], for which participants were asked to listen to a short story characterized by graphic and intense contents (eg, a death in a car crash) and then to repeat what she or he remembered from this narrative. This enabled to capture speech timing patterns based on sustained speech samples.</p><p>The app gathered participants&#x2019; verbal production by using the smartphone-integrated microphone, recording and processing participants&#x2019; speech by leveraging Google Speech-To-Text APIs [<xref ref-type="bibr" rid="ref37">37</xref>] and Python libraries (eg, Parselmouth for the Praat software [<xref ref-type="bibr" rid="ref38">38</xref>]). Recordings involved the use of one audio channel based on the participant&#x2019;s voice in a controlled environment with minimal acoustic conditions. Both the raw audio data and the transcribed text content were processed to extract acoustic and NLP-based features from speech outputs. NLP and acoustic signal models were embedded in the backend part of the mobile app.</p></sec><sec id="s2-4"><title>Measures</title><p>Consistent with recent evidence, we assumed speech as verbal behavior, the spoken output of the mental system underlying the language [<xref ref-type="bibr" rid="ref39">39</xref>]. Through speech recognition, acoustic and linguistic features were extracted. Then, based on both NLP and acoustic features, we considered a multidimensional framework in order to generate appropriate discriminative information for the potential use of speech patterns as digital markers in BD [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. A full description of selected features is provided in Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-5"><title>NLP-Based, Semantic, and Conversational Indices</title><p>NLP-based scores were computed according to distributional semantic models, encompassing vectorial representations for the meaning of words in a multi-dimensional space.</p><p>Standard linguistic scores included both the number of words, indicative of poverty of speech, and the number of words produced that matched the story text. On the other hand, novel NLP-based scores integrated mean intraword time, estimating the average time taken to articulate or pronounce subsequent words, as an indicator of processing speed, as well as word mover&#x2019;s distance (WMD), capturing both lexical overlap and semantic similarity. In particular, WMD was estimated as the minimum cumulative distance between words required to exactly match the point cloud of the text of the full correct story (ie, the content distance between the full correct story and the story narrative produced by the participant), thus incorporating the semantic similarity between individual word pairs into the word distance metric [<xref ref-type="bibr" rid="ref40">40</xref>]. In addition, latency time was calculated as a novel NLP-based score, taking into account the delay between the initiation of a spoken utterance or action and the production of its intended outcome or response when starting the task (ie, the first word).</p><p>Additional objective information was extracted from speech data. These quantitative measures included (1) speech duration, (2) speaking time (ie, phonation), (3) silence, (4) ratios of speaking time to speech duration as well as of silence to speaking time, and (5) speech rate.</p></sec><sec id="s2-6"><title>Acoustic Indices From Vocal Signals (Prosodic Cues Indices)</title><p>Measures for prosodic cues (acoustic indices quantifying how people talk during conversations) were based on the signal&#x2019;s frequency and energy or amplitude. These were assumed to contribute to conveying paralinguistic meaning [<xref ref-type="bibr" rid="ref41">41</xref>]. Based on nontextual data, acoustic components of speech were defined as the key phonetic elements, that is, objectively and reproducibly quantified speech sounds [<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. Fundamental frequency (F0) was measured by the frequency of phonation [<xref ref-type="bibr" rid="ref43">43</xref>]. The short-term instability of the vibration of the vocal cords during phonation (ie, jitter-related indices) was also extracted (Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>). Higher jitter values indicated speech patterns likely characterized by irregularities or hesitations, thus mirroring potential underlying psychological distress or emotional instability. Furthermore, microperturbations of the ampleness of the signal (ie, how variable acoustic peaks refer to the period-to-period variability of the signal peak-to-peak amplitude) were identified as small fluctuations in the intensity of vocal sound waves by shimmer-related measures, with higher values indicating greater variability or instability, while lower ones suggesting more stable vocal intensity (ie, smoother and more regular speech production).</p><p>Since both periodic and nonperiodic sound waves may characterize the voice, the mean harmonics-to-noise ratio was used to measure the relationship between harmonic and nonharmonic voice elements. Noisier, more raucous voices (ie, not smooth or clear) were expected to show lower harmonics-to-noise ratios, indicating vocal cord tension or irritation, possibly suggesting emotional distress.</p></sec><sec id="s2-7"><title>Psychometric Measures</title><p>Diagnosis of BD was confirmed by the Structured Clinical Interview for <italic>DSM-5</italic> (<italic>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</italic>; SCID-5). Based on clinician-rated assessments, depressive symptom severity was measured by the MADRS [<xref ref-type="bibr" rid="ref44">44</xref>], while YMRS was used to assess manic symptoms [<xref ref-type="bibr" rid="ref45">45</xref>]. Scores ranged from 0 to 60 for both MADRS [<xref ref-type="bibr" rid="ref44">44</xref>] and YMRS [<xref ref-type="bibr" rid="ref45">45</xref>]. In addition, cutoffs for severe mood symptoms were either a YMRS score &#x2265;20 [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref47">47</xref>] or a MADRS score &#x2265;19 [<xref ref-type="bibr" rid="ref48">48</xref>].</p></sec><sec id="s2-8"><title>Statistical Analyses</title><p>First, we summarized participants&#x2019; characteristics, providing standard statistics for continuous and categorical variables. For both MADRS and YMRS, continuous scores were used. However, a supplementary analysis was performed based on clinically meaningful thresholds for symptom severity. A bivariate analysis was then carried out to measure the strength of the potential association between speech indices and psychometric measures. Features&#x2019; summary statistics were plotted, and correlation coefficients (Pearson and Spearman, according to assumptions on data distribution, eg, normality) were estimated. Color gradient heat plots were also generated for data visualization. Taking into account potential sex differences in speech acoustic indices [<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>], subgroup analyses were performed. Statistical significance was set at <italic>P</italic>&#x003C;.05.</p><p>Second, based on state-of-the-art algorithms, NLP and acoustic features extracted from natural language and audio streams (Table S1 in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>) were used to train machine-learning models to detect depressive and manic states by means of scores from MADRS and YMRS. Data were randomly split using a 5-fold nested cross-validation approach for training and testing in order to provide an unbiased evaluation of the model&#x2019;s performance. In particular, random forest (RF) models, with the potential to handle both linear and nonlinear relationships between features and the target variable, were implemented. The supervised learning algorithm, with no assumptions about the distribution of the target variable, was based on the ensemble learning method of different decision trees, whose predictions were aggregated using the scikit-learn library in Python. Exploiting the bagging techniques, building multiple decision trees, RF contributed to minimizing overfitting issues by randomizing the feature selection during each tree split. This was assumed to reduce sensitivity to noise and to make decision trees less correlated through the use of a unique subset of the initial data for every base model. Moreover, we deemed features scaling unnecessary due to both the properties of the RF model and the performance metrics of comparisons. Relevant models were trained to test final performance by metrics (ie, mean squared error [MSE], mean absolute error [MAE], and <italic>R</italic>-squared [<italic>R</italic><sup>2</sup>]). These tested overall performance, even controlling for sex. Shapley Additive Explanations values, showing features&#x2019; impact, were plotted. Data were analyzed using Stata release 18 and Python (version 3.10.9).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Sample Characteristics</title><p>We included 32 subjects with BD (mean age 49.6, SD 14.3 years; 50% [16/32] females). The mean (SD) age at onset was 24.4 (10) years. As a whole, participants experienced more manic (median 4, IQR 8) than depressive episodes (median 2, IQR 5). About 40% (12/32) of participants reported a previous mood episode within 1 year before study enrollment. The MADRS median (IQR) score was 13 (21), while the YMRS median (IQR) score was 5 (16). Considering the app usage, participants reported high SUS scores on average (mean 73.5, SD 19.7). Demographic and clinical details are fully provided in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Sample<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">BD<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (N=32)</td></tr></thead><tbody><tr><td align="left" valign="top">Sex, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">16 (50)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">16 (50)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">Age (</named-content>years), mean (SD)</td><td align="left" valign="top">49.6 (14.3)</td></tr><tr><td align="left" valign="top">Marital status, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>In a relationship</td><td align="left" valign="top">12 (37)</td></tr><tr><td align="left" valign="top">Family situation, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Living alone</td><td align="left" valign="top">11 (34)</td></tr><tr><td align="left" valign="top">Education, n (%)<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Elementary</td><td align="left" valign="top">1 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Middle</td><td align="left" valign="top">12 (37)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>High</td><td align="left" valign="top">13 (41)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>University or superior</td><td align="left" valign="top">5 (16)</td></tr><tr><td align="left" valign="top">Employment, n (%)<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Employed</td><td align="left" valign="top">13 (41)</td></tr><tr><td align="left" valign="top">Setting, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Outpatient</td><td align="left" valign="top">13 (41)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Inpatient</td><td align="left" valign="top">19 (59)</td></tr><tr><td align="left" valign="top">Polarity of first episode, n (%)<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Depressive</td><td align="left" valign="top">12 (38)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hypomaniac or maniac</td><td align="left" valign="top">13 (41)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Unknown</td><td align="left" valign="top">1 (3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">Age of onset<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> (</named-content>years), mean (SD)</td><td align="left" valign="top">24.4 (10)</td></tr><tr><td align="left" valign="top">Family history<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">11 (34%)</td></tr><tr><td align="left" valign="top">Hospitalizations, median (IQR)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Lifetime</td><td align="left" valign="top">3 (7.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>12 months</td><td align="left" valign="top">1 (2)</td></tr><tr><td align="left" valign="top">Suicide attempts (lifetime), n (%)</td><td align="left" valign="top">10 (31)</td></tr><tr><td align="left" valign="top">Alcohol use disorder (lifetime), n (%)</td><td align="left" valign="top">3 (9)</td></tr><tr><td align="left" valign="top">Substance use disorder (lifetime), n (%)</td><td align="left" valign="top">6 (19)</td></tr><tr><td align="left" valign="top">Medication, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FGA<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">6 (19)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>SGA<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">28 (87)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mood stabilizer</td><td align="left" valign="top">26 (81)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Antidepressant</td><td align="left" valign="top">8 (25)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Benzodiazepine</td><td align="left" valign="top">16 (50)</td></tr><tr><td align="left" valign="top">Psychometric assessment</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Depressive symptoms (MADRS<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup>), median (IQR)</td><td align="left" valign="top">13 (21)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MADRS &#x003C;19, n (%)</td><td align="left" valign="top">17 (53)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>MADRS &#x2265;19, n (%)</td><td align="left" valign="top">15 (47)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Manic symptoms (YMRS<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup>), median (SD)</td><td align="left" valign="top">5 (16)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>YMRS &#x003C;20, n (%)</td><td align="left" valign="top">26 (81)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>YMRS &#x2265;20, n (%)</td><td align="left" valign="top">6 (19)</td></tr><tr><td align="left" valign="top">SUS<sup><xref ref-type="table-fn" rid="table1fn8">h</xref></sup> score, mean (SD)</td><td align="left" valign="top">73.5 (19.7)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>The sample is for a pilot, cross-sectional study in Italy.</p></fn><fn id="table1fn2"><p><sup>b</sup>BD: bipolar disorder.</p></fn><fn id="table1fn3"><p><sup>c</sup>Missing values: education (1), employment (2), age of onset (10), polarity of first episode (6), family history (10), alcohol use disorder (2), substance use disorder (1), FGA (2), SGA (1), mood stabilizer (2), antidepressant (3), benzodiazepine (4).</p></fn><fn id="table1fn4"><p><sup>d</sup>FGA: first-generation antipsychotics.</p></fn><fn id="table1fn5"><p><sup>e</sup>SGA: second-generation antipsychotics.</p></fn><fn id="table1fn6"><p><sup>f</sup>MADRS: Montgomery-&#x00C5;sberg Depression Rating Scale.</p></fn><fn id="table1fn7"><p><sup>g</sup>YMRS: Young Mania Rating Scale.</p></fn><fn id="table1fn8"><p><sup>h</sup>SUS: System Usability Scale (range 0&#x2010;100).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Associations Between Symptom Severity and Speech Features</title><p>For descriptive purposes, NLP-based, conversational, and acoustic features are summarized in Figures S1A-S1D and S2A-S2D in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> by depressive and manic symptom severity, respectively.</p><p>In particular, grouping data into 2 categories (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>), statistically significant differences by depressive symptoms&#x2019; severity were found for many NLP-based and conversational-like measures, including word number, phonation (also as percentage over the speech duration), and mean intraword time. Correlation analyses, based on Spearman nonparametric analysis of symptom severity continuous scores, are displayed in <xref ref-type="fig" rid="figure1">Figures 1A-C</xref> and <xref ref-type="fig" rid="figure2">2A-C</xref>. These showed that both the total number of words and the length of phonation, as well as the related percentage out of segment duration, were negatively correlated (coefficients=&#x2212;0.35, &#x2212;0.32, and &#x2212;0.42) to depressive symptoms (<xref ref-type="fig" rid="figure1">Figure 1A</xref>). Consistent results were observed for the ratio between silence and phonation (coefficient=0.42), as well as for mean intraword time, which was positively correlated to depressive (coefficient=0.53) and negatively to manic (coefficient=&#x2212;0.34) symptoms. Among items for depressive symptoms assessment, this correlation was particularly clear between acoustic features and suicidal thoughts (coefficients ranging from 0.18 to 0.51). In addition, latency time also showed a moderate, though obviously opposite, correlation with manic and depressive symptoms, respectively (coefficients=&#x2212;0.28 and 0.15).</p><p>Subgroup analyses for NLP-based and conversational features revealed more pronounced relationships in females (<xref ref-type="fig" rid="figure1">Figure 1C</xref>) as compared with males (<xref ref-type="fig" rid="figure1">Figure 1B</xref>), showing a high correlation between depressive symptoms and mean intraword time (coefficient=0.75), phonation percentage (coefficient=&#x2212;0.56), and, consequently, the silence-phonation ratio (coefficient=0.56). Similarly, latency time was negatively correlated to manic symptoms among females (coefficient=&#x2212;0.60).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Correlation heatmap of NLP-based, semantic and conversational features in people with bipolar disorder. (A) Overall sample; (B) Male subgroup; (C) Female subgroup. MADRS: Montgomery-&#x00C5;sberg Depression Rating Scale, YMRS: Young Mania Rating Scale.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e65555_fig01.png"/></fig><p>On the other hand, a small positive correlation was uncovered between depressive symptoms and higher values of instability in speech patterns (jitter-related indices, with coefficients ranging from 0.10 to 0.16). In contrast, small-to-moderate negative correlations were observed between manic symptoms and lower values of instability (jitter-related indices, with coefficients ranging from &#x2212;0.19 to &#x2212;0.27). Small estimates were found for F0, respectively (coefficient=0.16 and &#x2212;0.18; <xref ref-type="fig" rid="figure2">Figure 2A</xref>). Except for shimmer_apq11 (manic symptoms coefficient=&#x2212;0.22), we did not find any substantial relationship between shimmer-related indices (describing stable and unstable vocal intensity and speech production) and symptomatology.</p><p>Subgroup analyses suggested a role for sex also in influencing acoustic features. In particular, we found deeper connections in males as compared with females, especially in terms of F0 and jitter-related indices (<xref ref-type="fig" rid="figure2">Figure 2B and C</xref>).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Correlation heatmap of acoustic features in people with bipolar disorder. (A) Overall sample; (B) Male subgroup; (C) Female subgroup. MADRS: Montgomery-&#x00C5;sberg Depression Rating Scale, YMRS: Young Mania Rating Scale.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e65555_fig02.png"/></fig></sec><sec id="s3-3"><title>Predictive Models From Speech Features</title><p>Considering depressive symptoms, performance metrics showed a contribution of NLP-based and conversational features higher than what was attributable to acoustic ones (<xref ref-type="table" rid="table2">Table 2</xref>). In particular, mean intraword time, silence-phonation ratio, ppq5 jitter (ie, perturbations in F0), WMD, and percentage of phonation over duration all ranked high in terms of relative importance.</p><p>Including sex into the analysis, a differential contribution of various features (NLP-based and conversational vs acoustics) to the predictive models for depressive (<xref ref-type="fig" rid="figure3">Figure 3A</xref>) and manic (<xref ref-type="fig" rid="figure3">Figure 3B</xref>) symptoms can be found. However, as for manic symptoms, although a relative contribution of different NLP-based and acoustic (eg, F0 SD) features was recorded, we could not find any reliable estimates for the relevant model, even including sex. <xref ref-type="table" rid="table2">Table 2</xref> shows detailed estimated performance metrics for testing for the trained RF regressors, even controlling for sex.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance estimates for random forest regression models in people with bipolar disorder.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Performance<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="2">Depressive symptoms</td><td align="left" valign="bottom" colspan="2">Manic symptoms</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unadjusted</td><td align="left" valign="top">Adjusted<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">Unadjusted</td><td align="left" valign="top">Adjusted<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">NLP<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>R</italic><sup>2</sup> average</td><td align="left" valign="top">0.26</td><td align="left" valign="top">0.25</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">0.10</td><td align="left" valign="top">&#x2212;0.55</td><td align="left" valign="top">&#x2212;0.54</td><td align="left" valign="top">0.18</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.53</td><td align="left" valign="top">&#x2212;0.13</td><td align="left" valign="top">0.02</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">0.06</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.25</td><td align="left" valign="top">0.12</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">0.54</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.23</td><td align="left" valign="top">&#x2212;0.42</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">0.13</td><td align="left" valign="top">0.26</td><td align="left" valign="top">0.01</td><td align="left" valign="top">147.98</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean squared error average</td><td align="left" valign="top">105.46</td><td align="left" valign="top">110.07</td><td align="left" valign="top">153.78</td><td align="left" valign="top">147.98</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">136.73</td><td align="left" valign="top">259.26</td><td align="left" valign="top">223.06</td><td align="left" valign="top">92.05</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">33.02</td><td align="left" valign="top">46.49</td><td align="left" valign="top">121.79</td><td align="left" valign="top">156.74</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">137.64</td><td align="left" valign="top">104.25</td><td align="left" valign="top">135.66</td><td align="left" valign="top">121.60</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">79.32</td><td align="left" valign="top">33.00</td><td align="left" valign="top">134.85</td><td align="left" valign="top">167.15</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">140.61</td><td align="left" valign="top">107.35</td><td align="left" valign="top">153.32</td><td align="left" valign="top">202.35</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean absolute error average</td><td align="left" valign="top">8.08</td><td align="left" valign="top">8.17</td><td align="left" valign="top">10.58</td><td align="left" valign="top">10.13</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">9.58</td><td align="left" valign="top">13.64</td><td align="left" valign="top">12.47</td><td align="left" valign="top">7.79</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">3.36</td><td align="left" valign="top">5.57</td><td align="left" valign="top">9.28</td><td align="left" valign="top">10.90</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">10.31</td><td align="left" valign="top">8.56</td><td align="left" valign="top">10.40</td><td align="left" valign="top">9.26</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">7.59</td><td align="left" valign="top">4.34</td><td align="left" valign="top">9.82</td><td align="left" valign="top">9.71</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">9.26</td><td align="left" valign="top">8.74</td><td align="left" valign="top">10.96</td><td align="left" valign="top">13.00</td></tr><tr><td align="left" valign="top">Acoustics</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>R</italic><sup>2</sup> average</td><td align="left" valign="top"><named-content content-type="indent">&#x2014;</named-content></td><td align="left" valign="top">0.11</td><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2014;</td><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">0.29</td><td align="left" valign="top">&#x2013;0.22</td><td align="left" valign="top">0.002</td><td align="left" valign="top">&#x2013;0.22</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">&#x2013;0.83</td><td align="left" valign="top">&#x2013;0.10</td><td align="left" valign="top">&#x2013;0.02</td><td align="left" valign="top">&#x2013;0.15</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">&#x2013;0.59</td><td align="left" valign="top">0.03</td><td align="left" valign="top">&#x2013;0.14</td><td align="left" valign="top">&#x2013;0.14</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">0.23</td><td align="left" valign="top">0.18</td><td align="left" valign="top">&#x2013;0.38</td><td align="left" valign="top">&#x2013;0.44</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.64</td><td align="left" valign="top">&#x2013;0.28</td><td align="left" valign="top">0.01</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean squared error average</td><td align="left" valign="top">161.64</td><td align="left" valign="top">133.75</td><td align="left" valign="top">162.86</td><td align="left" valign="top">163.51</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">47.97</td><td align="left" valign="top">222.18</td><td align="left" valign="top">68.9</td><td align="left" valign="top">125.14</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">333.00</td><td align="left" valign="top">200.40</td><td align="left" valign="top">160.47</td><td align="left" valign="top">122.80</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">202.17</td><td align="left" valign="top">85.04</td><td align="left" valign="top">185.63</td><td align="left" valign="top">175.34</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">128.54</td><td align="left" valign="top">148.52</td><td align="left" valign="top">272.25</td><td align="left" valign="top">225.23</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">96.49</td><td align="left" valign="top">12.62</td><td align="left" valign="top">127.06</td><td align="left" valign="top">170.30</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean absolute error average</td><td align="left" valign="top">10.02</td><td align="left" valign="top">8.86</td><td align="left" valign="top">10.35</td><td align="left" valign="top">10.73</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">5.27</td><td align="left" valign="top">11.76</td><td align="left" valign="top">7.09</td><td align="left" valign="top">9.77</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">16.43</td><td align="left" valign="top">13.8</td><td align="left" valign="top">10.94</td><td align="left" valign="top">8.82</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">11.77</td><td align="left" valign="top">6.00</td><td align="left" valign="top">12.48</td><td align="left" valign="top">11.70</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">9.69</td><td align="left" valign="top">10.13</td><td align="left" valign="top">14.05</td><td align="left" valign="top">12.34</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">6.95</td><td align="left" valign="top">2.62</td><td align="left" valign="top">7.21</td><td align="left" valign="top">11.01</td></tr><tr><td align="left" valign="top" colspan="5">Combined</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>R</italic><sup>2</sup> average</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.16</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">0.32</td><td align="left" valign="top">0.60</td><td align="left" valign="top">&#x2013;0.56</td><td align="left" valign="top">&#x2013;0.13</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">&#x2013;0.09</td><td align="left" valign="top">0.11</td><td align="left" valign="top">0.24</td><td align="left" valign="top">0.07</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">&#x2013;0.29</td><td align="left" valign="top">0.07</td><td align="left" valign="top">0.08</td><td align="left" valign="top">&#x2013;0.54</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">0.10</td><td align="left" valign="top">0.04</td><td align="left" valign="top">0.06</td><td align="left" valign="top">0.18</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">0.22</td><td align="left" valign="top">0.22</td><td align="left" valign="top">&#x2013;0.41</td><td align="left" valign="top">0.14</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean squared error average</td><td align="left" valign="top">120.90</td><td align="left" valign="top">118.53</td><td align="left" valign="top">135.94</td><td align="left" valign="top">140.03</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 1</td><td align="left" valign="top">87.51</td><td align="left" valign="top">34.13</td><td align="left" valign="top">158.54</td><td align="left" valign="top">183.39</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 2</td><td align="left" valign="top">183.67</td><td align="left" valign="top">111.11</td><td align="left" valign="top">60.67</td><td align="left" valign="top">122.32</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 3</td><td align="left" valign="top">184.71</td><td align="left" valign="top">164.45</td><td align="left" valign="top">192.81</td><td align="left" valign="top">126.43</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 4</td><td align="left" valign="top">47.71</td><td align="left" valign="top">148.83</td><td align="left" valign="top">178.84</td><td align="left" valign="top">112.36</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">100.91</td><td align="left" valign="top">134.10</td><td align="left" valign="top">88.83</td><td align="left" valign="top">155.68</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Mean absolute error average</td><td align="left" valign="top">8.65</td><td align="left" valign="top">8.68</td><td align="left" valign="top">9.61</td><td align="left" valign="top">10.00</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">6.69</td><td align="left" valign="top">4.37</td><td align="left" valign="top">11.21</td><td align="left" valign="top">11.50</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">11.49</td><td align="left" valign="top">7.33</td><td align="left" valign="top">6.95</td><td align="left" valign="top">8.86</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">11.04</td><td align="left" valign="top">11.29</td><td align="left" valign="top">11.40</td><td align="left" valign="top">10.30</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">5.57</td><td align="left" valign="top">10.15</td><td align="left" valign="top">11.19</td><td align="left" valign="top">8.26</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fold 5</td><td align="left" valign="top">8.46</td><td align="left" valign="top">10.26</td><td align="left" valign="top">7.28</td><td align="left" valign="top">11.08</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Metrics for testing based on a nested cross-validation approach (pilot, cross-sectional study, N=32). Range for symptom scores: 0&#x2010;60.</p></fn><fn id="table2fn2"><p><sup>b</sup>Including sex.</p></fn><fn id="table2fn3"><p><sup>c</sup>NLP: natural language processing.</p></fn><fn id="table2fn4"><p><sup>d</sup>Not available.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Individual features contribution to depressive and manic symptoms predictions in sex-adjusted models among people with bipolar disorder.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e65555_fig03.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Main Findings</title><p>This study aimed at piloting the simultaneous use of speech acoustics, as well as natural language features, to glean insights into BD depressive and manic symptoms. Our findings corroborate evidence on the relationships between symptom severity and speech features, supporting the potential predictive role for clinical purposes of digital mental health applications, embedded in a mHealth integrated system.</p><p>First, the speech of participants with BD showed that vocal perturbations (eg, higher instability and hesitations considering voice quality), latency time, and increased silences and pauses over time speaking all correlated to depressive symptoms. Consistently, increased depressive symptoms resulted in NLP-based features such as a smaller number of words and longer mean intraword time, with lower pressure of speech. In our exploratory study, this relationship was particularly clear among females. This effect was corroborated by the predictive model, showing a contribution of NLP-based and conversational features higher than for acoustic ones. This finding aligns with prior evidence, advocating that text-based features contribute more to model accuracy than audio parameters [<xref ref-type="bibr" rid="ref18">18</xref>]. However, also the latter component (ie, fundamental frequency, jitter- and shimmer-related indices) deserves a careful assessment, since our findings show that these indices might have an impact at least among males to predict future episodes. Indeed, recent evidence from healthy populations sheds light on sex differences in speech markers (eg, prosodic features) with different acoustic cues conveying various emotions [<xref ref-type="bibr" rid="ref50">50</xref>]. A combination of inherent biological dissimilarities, socialization processes, influences of the social environment, and cultural expectations might contribute to these differences in both expression and perception of related emotional prosody [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. Moreover, individuals may modulate their speech to align with the dominant pitch range within a specific linguistic community [<xref ref-type="bibr" rid="ref54">54</xref>], and similar modulation may occur in conversational dialogues versus monologues and in spontaneous versus elicited speech. Thus, this criterion should be taken into account when designing apps with speech recognition and processing tasks for people with BD [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>Second, voice instability and hesitations, as well as mean intraword time, were negatively correlated to manic symptoms. However, the interpretation of the relationship between manic features and vocal abnormalities is not straightforward. Mixed findings emerged on the relationships between speech features and manic symptoms, preventing us from supporting our original hypothesis. One plausible explanation may stem from the sample characteristics. Indeed, our participants were more likely to report depressive symptoms, and just a few had severe manic features.</p><p>However, the overall moderate correlations between speech markers and symptom severity were consistent with previous work that used speech smartphone data to discriminate between different mood states [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. It has been argued that speech features may be useful to detect a trait [<xref ref-type="bibr" rid="ref55">55</xref>] rather than a state [<xref ref-type="bibr" rid="ref56">56</xref>] in BD. However, alterations in voice perturbations have been observed when assessing vocal markers of suicidal ideation [<xref ref-type="bibr" rid="ref57">57</xref>], and this makes further research for vocal features reasonable, at least for depressive conditions.</p></sec><sec id="s4-2"><title>Smartphone-Based Applications</title><p>Consistent with previous research on smartphone-based applications designed to record and analyze speech patterns in real time, our findings emphasize the feasibility of a simple, yet clinically useful, application of digital technology [<xref ref-type="bibr" rid="ref13">13</xref>]. In particular, we developed the frontend of the app as a basic digital environment, freely managed by participants on their own smartphones. Participants reported a high level of engagement with the tool, showing an acceptable system usability level as assessed by SUS [<xref ref-type="bibr" rid="ref35">35</xref>], without perceiving intrusiveness of the recording of both elicited and spontaneous conversations.</p><p>Comparisons of the vocal performance of people with BD with unaffected relatives and healthy controls have shown a clear speech &#x201C;fingerprint&#x201D; of the clinical condition [<xref ref-type="bibr" rid="ref58">58</xref>], suggesting the utility of multilevel inputs [<xref ref-type="bibr" rid="ref59">59</xref>]. However, there is also the need for a wider understanding of fluctuations in symptom severity and mood states in this population [<xref ref-type="bibr" rid="ref60">60</xref>]. The major strength of our study consists in the usefulness of different speech data (eg, linguistic, conversational, acoustics) to differentially identify symptoms of BD. Thus, for relapse prevention purposes, future research should possibly explore systems combining smartphone-based generated objective acoustics data with additional information, such as from facial expressions and gestures [<xref ref-type="bibr" rid="ref61">61</xref>]. This would ultimately improve BD state prediction, even considering classification tasks [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref62">62</xref>-<xref ref-type="bibr" rid="ref64">64</xref>].</p></sec><sec id="s4-3"><title>Clinical Implications: Interdisciplinary Perspective</title><p>This pilot study represents a step forward in the identification and utilization of digital biomarkers for BD from natural language and audio streams, with implications for personalized mental health care and early intervention strategies. Our approach holds promise for complementary, remote assessments enhancing depressive and partly manic states prediction by exploiting participants&#x2019; speech. This would have significant implications, especially considering BD fluctuating symptomatology. Nonetheless, leveraging live speech recordings as a predictive tool, repeated assessments are needed to identify individuals at risk of transitioning to depressive and manic states.</p><p>Despite promising findings from automated assessments, mental health care heavily relies on participant interviews, yet with often subjective reports, cognitive limitations, and stigma [<xref ref-type="bibr" rid="ref18">18</xref>]. Integrated systems, aiming at taking advantage of candidate digital markers from speech recognition, would possibly boost a care approach in which digital technology enhances, but does not replace, existing models from clinical assessment [<xref ref-type="bibr" rid="ref30">30</xref>]. Indeed, automated assessment does not inherently lead to adherence and engagement of individuals with BD [<xref ref-type="bibr" rid="ref65">65</xref>].</p><p>Finally, clinical, hypothesis-driven research on BD should not be dismissed, since algorithms may not be considered a black-box replacement for traditional data modeling, but they rather integrate with other systems, embedding a substantial clinical validation [<xref ref-type="bibr" rid="ref66">66</xref>,<xref ref-type="bibr" rid="ref67">67</xref>].</p></sec><sec id="s4-4"><title>Limitations and Future Directions</title><p>We should acknowledge some limitations of this study. Analyzing speech and natural language in individuals with BD implies a challenge due to the nature of the disorder and to ethical considerations.</p><p>First, properties of chosen machine-learning models may hamper identification of unknown patterns based on values that fall outside the training set. Effective NLP and supervised learning models may require high-quality, annotated datasets. While exploratory in nature, the study&#x2019;s limited sample size may have constrained the model&#x2019;s statistical power and the ability to capture the full complexity of the underlying data distribution, thereby hindering meaningful subgroup comparisons. Our preliminary findings should be replicated and extended in a larger, more diverse sample of people with BD to mitigate the risks associated with overfitting. Furthermore, future research should address classification approaches based on severity thresholds for both MADRS and YMRS. Accordingly, there is potential for alternative modeling approaches for regression tasks (eg, splines) that might be implemented in the future. While still considering the number of predictors, these may possibly enable a better understanding of the nature of the existing relationships and nonlinear patterns.</p><p>Consistently, the lack of standardized (linguistic and acoustic) markers represents a barrier when studying relationships with mood states. Indeed, the model may still learn to overfit to irrelevant or noisy features the data may contain, especially if they are informative in the training set by chance.</p><p>Furthermore, the speaker&#x2019;s identity may show a possible confounding role in a between-subject design. Therefore, studies with a longitudinal design (ie, within-subjects) should be recommended, deploying Ecological Momentary Assessment approaches [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref68">68</xref>]. In addition, speech patterns may generate misinterpretations if individual cultural and linguistic factors are not accounted for [<xref ref-type="bibr" rid="ref69">69</xref>]. Similarly, speech during manic episodes may exhibit circumstantiality or tangentiality, where individuals provide excessive details or veer off-topic. Rapid speech, tangential thinking, or unconventional language use pose challenges for automatic speech recognition systems. Analyzing such complex speech patterns requires a deep evaluation of language and context, achieving appropriate understanding of an individual&#x2019;s usual way of communicating in order to distinguish changes associated with BD episodes.</p><p>Furthermore, in our study, speech features were averaged over relevant duration, thus constraining the role of temporal variations across related measures in predicting symptom severity. Future research should endeavor to integrate dynamic aspects of speech on mood states transitioning.</p><p>Finally, other clinical variables, not investigated in our sample, are likely to influence the individual&#x2019;s speech. For instance, it should be noted that anxiety and anxious distress, often co-occurring with bipolar depression [<xref ref-type="bibr" rid="ref70">70</xref>], may significantly influence speech features [<xref ref-type="bibr" rid="ref71">71</xref>], as well as medication prescribed [<xref ref-type="bibr" rid="ref72">72</xref>-<xref ref-type="bibr" rid="ref74">74</xref>] and drug or alcohol comorbid conditions [<xref ref-type="bibr" rid="ref75">75</xref>].</p></sec><sec id="s4-5"><title>Conclusions</title><p>Speech patterns, underlying both linguistic and acoustic features, are able to yield quantifiable differences, thus embodying digital markers of symptom severity. Multimodal, smartphone-integrated digital assessments could serve as powerful tools for clinical purposes to remotely complement standard mental health evaluations, potentially contributing to distinguish clinical conditions in people with BD. Feasibility of similar systems seems promising, though issues related to privacy, intrusiveness, and clinical therapeutic relationships should be carefully considered.</p></sec></sec></body><back><ack><p>This research was supported by the FSE REACT-EU Competitive Research Grant Axis-IV DM 1062/2021: &#x201C;Natural Language Processing in Digital Mental Health.&#x201D; The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p></ack><notes><sec><title>Data Availability</title><p>The datasets including audio data streams supporting the conclusions of this article are not publicly available as the original source has not granted permission to share that information but may be available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>CC, FB, and GC handled conceptualization. RMC, AC, CN, DP, SP, AB, and MR performed investigation. CC, FB, VS, CB, and MB contributed to methodology. VS and MB assisted with software. CC conducted formal analysis. GC performed supervision. CC contributed to writing &#x2013; original draft. CC, RMC, AC, CN, DP, SP, AB, MR, VS, CB, MB, FB, and GC contributed to writing &#x2013; review and editing.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">BD</term><def><p>bipolar disorder</p></def></def-item><def-item><term id="abb2">DSM-5</term><def><p>Diagnostic and Statistical Manual of Mental Disorders, Fifth Edition</p></def></def-item><def-item><term id="abb3">MADRS</term><def><p>Montgomery-&#x00C5;sberg Depression Rating Scale</p></def></def-item><def-item><term id="abb4">MAE</term><def><p>mean absolute error</p></def></def-item><def-item><term id="abb5">mHealth</term><def><p>mobile health</p></def></def-item><def-item><term id="abb6">MSE</term><def><p>mean squared error</p></def></def-item><def-item><term id="abb7">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb8">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb9">SCID-5</term><def><p>Structured Clinical Interview for DSM-5</p></def></def-item><def-item><term id="abb10">STROBE</term><def><p>Strengthening the Reporting of Observational Studies in Epidemiology</p></def></def-item><def-item><term id="abb11">WMD</term><def><p>word mover&#x2019;s distance</p></def></def-item><def-item><term id="abb12">YMRS</term><def><p>Young Mania Rating Scale</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Crocamo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Carr&#x00E0;</surname><given-names>G</given-names> </name></person-group><article-title>Clinical correlates of DSM-5 mixed features in bipolar disorder: a meta-analysis</article-title><source>J Affect Disord</source><year>2020</year><month>11</month><day>1</day><volume>276</volume><fpage>234</fpage><lpage>240</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2020.07.035</pub-id><pub-id pub-id-type="medline">32697704</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Grande</surname><given-names>I</given-names> </name><name name-style="western"><surname>Berk</surname><given-names>M</given-names> </name><name name-style="western"><surname>Birmaher</surname><given-names>B</given-names> </name><name name-style="western"><surname>Vieta</surname><given-names>E</given-names> </name></person-group><article-title>Bipolar disorder</article-title><source>Lancet</source><year>2016</year><month>04</month><day>9</day><volume>387</volume><issue>10027</issue><fpage>1561</fpage><lpage>1572</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(15)00241-X</pub-id><pub-id pub-id-type="medline">26388529</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McIntyre</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Berk</surname><given-names>M</given-names> </name><name name-style="western"><surname>Brietzke</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Bipolar disorders</article-title><source>Lancet</source><year>2020</year><month>12</month><day>5</day><volume>396</volume><issue>10265</issue><fpage>1841</fpage><lpage>1856</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(20)31544-0</pub-id><pub-id pub-id-type="medline">33278937</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karambelas</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Filia</surname><given-names>K</given-names> </name><name name-style="western"><surname>Byrne</surname><given-names>LK</given-names> </name><name name-style="western"><surname>Allott</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Jayasinghe</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cotton</surname><given-names>SM</given-names> </name></person-group><article-title>A systematic review comparing caregiver burden and psychological functioning in caregivers of individuals with schizophrenia spectrum disorders and bipolar disorders</article-title><source>BMC Psychiatry</source><year>2022</year><month>06</month><day>23</day><volume>22</volume><issue>1</issue><fpage>422</fpage><pub-id pub-id-type="doi">10.1186/s12888-022-04069-w</pub-id><pub-id pub-id-type="medline">35733174</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fajutrao</surname><given-names>L</given-names> </name><name name-style="western"><surname>Locklear</surname><given-names>J</given-names> </name><name name-style="western"><surname>Priaulx</surname><given-names>J</given-names> </name><name name-style="western"><surname>Heyes</surname><given-names>A</given-names> </name></person-group><article-title>A systematic review of the evidence of the burden of bipolar disorder in Europe</article-title><source>Clin Pract Epidemiol Ment Health</source><year>2009</year><month>01</month><day>23</day><volume>5</volume><fpage>3</fpage><pub-id pub-id-type="doi">10.1186/1745-0179-5-3</pub-id><pub-id pub-id-type="medline">19166608</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ogilvie</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Morant</surname><given-names>N</given-names> </name><name name-style="western"><surname>Goodwin</surname><given-names>GM</given-names> </name></person-group><article-title>The burden on informal caregivers of people with bipolar disorder</article-title><source>Bipolar Disord</source><year>2005</year><volume>7 Suppl 1</volume><issue>Suppl 1</issue><fpage>25</fpage><lpage>32</lpage><pub-id pub-id-type="doi">10.1111/j.1399-5618.2005.00191.x</pub-id><pub-id pub-id-type="medline">15762866</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Goodwin</surname><given-names>FK</given-names> </name><name name-style="western"><surname>Jamison</surname><given-names>KR</given-names> </name></person-group><source>Manic-Depressive Illness: Bipolar Disorders and Recurrent Depression</source><year>2007</year><publisher-name>Oxford University Press, USA</publisher-name><pub-id pub-id-type="other">9780195135794</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karam</surname><given-names>ZN</given-names> </name><name name-style="western"><surname>Provost</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Ecologically valid long-term mood monitoring of individuals with bipolar disorder using speech</article-title><source>Proc IEEE Int Conf Acoust Speech Signal Process</source><year>2014</year><month>05</month><volume>2014</volume><fpage>4858</fpage><lpage>4862</lpage><pub-id pub-id-type="doi">10.1109/ICASSP.2014.6854525</pub-id><pub-id pub-id-type="medline">27630535</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harvey</surname><given-names>D</given-names> </name><name name-style="western"><surname>Lobban</surname><given-names>F</given-names> </name><name name-style="western"><surname>Rayson</surname><given-names>P</given-names> </name><name name-style="western"><surname>Warner</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>S</given-names> </name></person-group><article-title>Natural language processing methods and bipolar disorder: scoping review</article-title><source>JMIR Ment Health</source><year>2022</year><month>04</month><day>22</day><volume>9</volume><issue>4</issue><fpage>e35928</fpage><pub-id pub-id-type="doi">10.2196/35928</pub-id><pub-id pub-id-type="medline">35451984</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birnbaum</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Abrami</surname><given-names>A</given-names> </name><name name-style="western"><surname>Heisig</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Acoustic and facial features from clinical interviews for machine learning-based psychiatric diagnosis: algorithm development</article-title><source>JMIR Ment Health</source><year>2022</year><month>01</month><day>24</day><volume>9</volume><issue>1</issue><fpage>e24699</fpage><pub-id pub-id-type="doi">10.2196/24699</pub-id><pub-id pub-id-type="medline">35072648</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gideon</surname><given-names>J</given-names> </name><name name-style="western"><surname>Provost</surname><given-names>EM</given-names> </name><name name-style="western"><surname>McInnis</surname><given-names>M</given-names> </name></person-group><article-title>Mood state prediction from speech of varying acoustic quality for individuals with bipolar disorder</article-title><source>Proc IEEE Int Conf Acoust Speech Signal Process</source><year>2016</year><month>03</month><volume>2016</volume><fpage>2359</fpage><lpage>2363</lpage><pub-id pub-id-type="doi">10.1109/ICASSP.2016.7472099</pub-id><pub-id pub-id-type="medline">27570493</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guidi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schoentgen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bertschy</surname><given-names>G</given-names> </name><name name-style="western"><surname>Gentili</surname><given-names>C</given-names> </name><name name-style="western"><surname>Scilingo</surname><given-names>EP</given-names> </name><name name-style="western"><surname>Vanello</surname><given-names>N</given-names> </name></person-group><article-title>Features of vocal frequency contour and speech rhythm in bipolar disorder</article-title><source>Biomed Signal Process Control</source><year>2017</year><month>08</month><volume>37</volume><fpage>23</fpage><lpage>31</lpage><pub-id pub-id-type="doi">10.1016/j.bspc.2017.01.017</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maxhuni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mu&#x00F1;oz-Mel&#x00E9;ndez</surname><given-names>A</given-names> </name><name name-style="western"><surname>Osmani</surname><given-names>V</given-names> </name><name name-style="western"><surname>Perez</surname><given-names>H</given-names> </name><name name-style="western"><surname>Mayora</surname><given-names>O</given-names> </name><name name-style="western"><surname>Morales</surname><given-names>EF</given-names> </name></person-group><article-title>Classification of bipolar disorder episodes based on analysis of voice and motor activity of patients</article-title><source>Pervasive Mob Comput</source><year>2016</year><month>09</month><volume>31</volume><fpage>50</fpage><lpage>66</lpage><pub-id pub-id-type="doi">10.1016/j.pmcj.2016.01.008</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Matton</surname><given-names>K</given-names> </name><name name-style="western"><surname>McInnis</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Provost</surname><given-names>EM</given-names> </name></person-group><article-title>Into the wild: transitioning from recognizing mood in clinical interactions to personal conversations for individuals with bipolar disorder</article-title><year>2019</year><conf-name>Interspeech 2019</conf-name><conf-date>Sep 15-19, 2019</conf-date><conf-loc>Graz, Austria</conf-loc><fpage>1438</fpage><lpage>1442</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://www.isca-archive.org/interspeech_2019">https://www.isca-archive.org/interspeech_2019</ext-link></comment><pub-id pub-id-type="doi">10.21437/Interspeech.2019-2698</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arevian</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Bone</surname><given-names>D</given-names> </name><name name-style="western"><surname>Malandrakis</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Clinical state tracking in serious mental illness through computational analysis of speech</article-title><source>PLoS ONE</source><year>2020</year><volume>15</volume><issue>1</issue><fpage>e0225695</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0225695</pub-id><pub-id pub-id-type="medline">31940347</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Girard</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Vail</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Liebenthal</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Computational analysis of spoken language in acute psychosis and mania</article-title><source>Schizophr Res</source><year>2022</year><month>07</month><volume>245</volume><fpage>97</fpage><lpage>115</lpage><pub-id pub-id-type="doi">10.1016/j.schres.2021.06.040</pub-id><pub-id pub-id-type="medline">34456131</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Low</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Bentley</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Ghosh</surname><given-names>SS</given-names> </name></person-group><article-title>Automated assessment of psychiatric disorders using speech: a systematic review</article-title><source>Laryngoscope Investig Otolaryngol</source><year>2020</year><month>02</month><volume>5</volume><issue>1</issue><fpage>96</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1002/lio2.354</pub-id><pub-id pub-id-type="medline">32128436</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malgaroli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hull</surname><given-names>TD</given-names> </name><name name-style="western"><surname>Zech</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Althoff</surname><given-names>T</given-names> </name></person-group><article-title>Natural language processing for mental health interventions: a systematic review and research framework</article-title><source>Transl Psychiatry</source><year>2023</year><month>10</month><day>6</day><volume>13</volume><issue>1</issue><fpage>309</fpage><pub-id pub-id-type="doi">10.1038/s41398-023-02592-2</pub-id><pub-id pub-id-type="medline">37798296</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cummins</surname><given-names>N</given-names> </name><name name-style="western"><surname>Baird</surname><given-names>A</given-names> </name><name name-style="western"><surname>Schuller</surname><given-names>BW</given-names> </name></person-group><article-title>Speech analysis for health: current state-of-the-art and the increasing impact of deep learning</article-title><source>Methods</source><year>2018</year><month>12</month><day>1</day><volume>151</volume><fpage>41</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1016/j.ymeth.2018.07.007</pub-id><pub-id pub-id-type="medline">30099083</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faurholt-Jepsen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Busk</surname><given-names>J</given-names> </name><name name-style="western"><surname>Frost</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Voice analysis as an objective state marker in bipolar disorder</article-title><source>Transl Psychiatry</source><year>2016</year><month>07</month><day>19</day><volume>6</volume><issue>7</issue><fpage>e856</fpage><pub-id pub-id-type="doi">10.1038/tp.2016.123</pub-id><pub-id pub-id-type="medline">27434490</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faurholt-Jepsen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rohani</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Busk</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Discriminating between patients with unipolar disorder, bipolar disorder, and healthy control individuals based on voice features collected from naturalistic smartphone calls</article-title><source>Acta Psychiatr Scand</source><year>2022</year><month>03</month><volume>145</volume><issue>3</issue><fpage>255</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.1111/acps.13391</pub-id><pub-id pub-id-type="medline">34923626</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Daus</surname><given-names>H</given-names> </name><name name-style="western"><surname>Bloecher</surname><given-names>T</given-names> </name><name name-style="western"><surname>Egeler</surname><given-names>R</given-names> </name><name name-style="western"><surname>De Klerk</surname><given-names>R</given-names> </name><name name-style="western"><surname>Stork</surname><given-names>W</given-names> </name><name name-style="western"><surname>Backenstrass</surname><given-names>M</given-names> </name></person-group><article-title>Development of an emotion-sensitive mHealth approach for mood-state recognition in bipolar disorder</article-title><source>JMIR Ment Health</source><year>2020</year><month>07</month><day>3</day><volume>7</volume><issue>7</issue><fpage>e14267</fpage><pub-id pub-id-type="doi">10.2196/14267</pub-id><pub-id pub-id-type="medline">32618577</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dikaios</surname><given-names>K</given-names> </name><name name-style="western"><surname>Rempel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dumpala</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Oore</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kiefte</surname><given-names>M</given-names> </name><name name-style="western"><surname>Uher</surname><given-names>R</given-names> </name></person-group><article-title>Applications of speech analysis in psychiatry</article-title><source>Harv Rev Psychiatry</source><year>2023</year><volume>31</volume><issue>1</issue><fpage>1</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1097/HRP.0000000000000356</pub-id><pub-id pub-id-type="medline">36608078</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dunster</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Swendsen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Merikangas</surname><given-names>KR</given-names> </name></person-group><article-title>Real-time mobile monitoring of bipolar disorder: a review of evidence and future directions</article-title><source>Neuropsychopharmacology</source><year>2021</year><month>01</month><volume>46</volume><issue>1</issue><fpage>197</fpage><lpage>208</lpage><pub-id pub-id-type="doi">10.1038/s41386-020-00830-5</pub-id><pub-id pub-id-type="medline">32919408</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marzano</surname><given-names>L</given-names> </name><name name-style="western"><surname>Bardill</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fields</surname><given-names>B</given-names> </name><etal/></person-group><article-title>The application of mHealth to mental health: opportunities and challenges</article-title><source>Lancet Psychiatry</source><year>2015</year><month>10</month><volume>2</volume><issue>10</issue><fpage>942</fpage><lpage>948</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(15)00268-0</pub-id><pub-id pub-id-type="medline">26462228</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le Glaz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Haralambous</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kim-Dufor</surname><given-names>DH</given-names> </name><etal/></person-group><article-title>Machine learning and natural language processing in mental health: systematic review</article-title><source>J Med Internet Res</source><year>2021</year><month>05</month><day>4</day><volume>23</volume><issue>5</issue><fpage>e15708</fpage><pub-id pub-id-type="doi">10.2196/15708</pub-id><pub-id pub-id-type="medline">33944788</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Farr&#x00FA;s</surname><given-names>M</given-names> </name><name name-style="western"><surname>Codina-Filb&#x00E0;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Escudero</surname><given-names>J</given-names> </name></person-group><article-title>Acoustic and prosodic information for home monitoring of bipolar disorder</article-title><source>Health Informatics J</source><year>2021</year><volume>27</volume><issue>1</issue><fpage>1460458220972755</fpage><pub-id pub-id-type="doi">10.1177/1460458220972755</pub-id><pub-id pub-id-type="medline">33438502</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Poellabauer</surname><given-names>C</given-names> </name></person-group><article-title>Topic modeling based multi-modal depression detection</article-title><year>2017</year><month>10</month><day>23</day><conf-name>MM &#x2019;17</conf-name><conf-date>Oct 23-27, 2017</conf-date><conf-loc>Mountain View California USA</conf-loc><fpage>2017</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/proceedings/10.1145/3133944">https://dl.acm.org/doi/proceedings/10.1145/3133944</ext-link></comment><pub-id pub-id-type="doi">10.1145/3133944.3133945</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Muaremi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gravenhorst</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gr&#x00FC;nerbl</surname><given-names>A</given-names> </name><name name-style="western"><surname>Arnrich</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tr&#x00F6;ster</surname><given-names>G</given-names> </name></person-group><article-title>Assessing bipolar episodes using speech cues derived from phone calls</article-title><source>Lect Notes Inst Comput Sci Soc Inform Telecommun Eng</source><year>2014</year><fpage>103</fpage><lpage>114</lpage><pub-id pub-id-type="doi">10.1007/978-3-319-11564-1_11</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bond</surname><given-names>RR</given-names> </name><name name-style="western"><surname>Mulvenna</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Potts</surname><given-names>C</given-names> </name><name name-style="western"><surname>O&#x2019;Neill</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ennis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name></person-group><article-title>Digital transformation of mental health services</article-title><source>Npj Ment Health Res</source><year>2023</year><month>08</month><day>22</day><volume>2</volume><issue>1</issue><fpage>13</fpage><pub-id pub-id-type="doi">10.1038/s44184-023-00033-y</pub-id><pub-id pub-id-type="medline">38609479</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flanagan</surname><given-names>O</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Roop</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sundram</surname><given-names>F</given-names> </name></person-group><article-title>Using acoustic speech patterns from smartphones to investigate mood disorders: scoping review</article-title><source>JMIR Mhealth Uhealth</source><year>2021</year><month>09</month><day>17</day><volume>9</volume><issue>9</issue><fpage>e24352</fpage><pub-id pub-id-type="doi">10.2196/24352</pub-id><pub-id pub-id-type="medline">34533465</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Oliveira</surname><given-names>L</given-names> </name><name name-style="western"><surname>Portugal</surname><given-names>LCL</given-names> </name><name name-style="western"><surname>Pereira</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Predicting bipolar disorder risk factors in distressed young adults from patterns of brain activation to reward: a machine learning approach</article-title><source>Biol Psychiatry Cogn Neurosci Neuroimaging</source><year>2019</year><month>08</month><volume>4</volume><issue>8</issue><fpage>726</fpage><lpage>733</lpage><pub-id pub-id-type="doi">10.1016/j.bpsc.2019.04.005</pub-id><pub-id pub-id-type="medline">31201147</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von Elm</surname><given-names>E</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name><name name-style="western"><surname>Egger</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Strengthening the reporting of observational studies in epidemiology (STROBE) statement: guidelines for reporting observational studies</article-title><source>BMJ</source><year>2007</year><month>10</month><day>20</day><volume>335</volume><issue>7624</issue><fpage>806</fpage><lpage>808</lpage><pub-id pub-id-type="doi">10.1136/bmj.39335.541782.AD</pub-id><pub-id pub-id-type="medline">17947786</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><source>Ab.acus Srl</source><access-date>2024-03-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ab-acus.eu/index.php/portfolio-items/speakapp/">https://www.ab-acus.eu/index.php/portfolio-items/speakapp/</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hyzy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bond</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mulvenna</surname><given-names>M</given-names> </name><etal/></person-group><article-title>System Usability Scale benchmarking for digital health apps: meta-analysis</article-title><source>JMIR Mhealth Uhealth</source><year>2022</year><month>08</month><day>18</day><volume>10</volume><issue>8</issue><fpage>e37290</fpage><pub-id pub-id-type="doi">10.2196/37290</pub-id><pub-id pub-id-type="medline">35980732</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><article-title>Italian standardization and classification of neuropsychological tests. The Italian Group on the Neuropsychological Study of Aging</article-title><source>Ital J Neurol Sci</source><year>1987</year><month>12</month><volume>Suppl 8</volume><issue>1-120</issue><fpage>1</fpage><lpage>120</lpage><pub-id pub-id-type="medline">3330072</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><article-title>Google speech-to-text apis</article-title><source>Google Cloud</source><access-date>2025-04-09</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cloud.google.com/speech-to-text">https://cloud.google.com/speech-to-text</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jadoul</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>B</given-names> </name><name name-style="western"><surname>de Boer</surname><given-names>B</given-names> </name></person-group><article-title>Introducing Parselmouth: a Python interface to Praat</article-title><source>J Phon</source><year>2018</year><month>11</month><volume>71</volume><fpage>1</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1016/j.wocn.2018.07.001</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Boer</surname><given-names>JN</given-names> </name><name name-style="western"><surname>Brederoo</surname><given-names>SG</given-names> </name><name name-style="western"><surname>Voppel</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Sommer</surname><given-names>IEC</given-names> </name></person-group><article-title>Anomalies in language as a biomarker for schizophrenia</article-title><source>Curr Opin Psychiatry</source><year>2020</year><month>05</month><volume>33</volume><issue>3</issue><fpage>212</fpage><lpage>218</lpage><pub-id pub-id-type="doi">10.1097/YCO.0000000000000595</pub-id><pub-id pub-id-type="medline">32049766</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Kusner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Kolkin</surname><given-names>N</given-names> </name><name name-style="western"><surname>Weinberger</surname><given-names>K</given-names> </name></person-group><article-title>From word embeddings to document distances</article-title><year>2015</year><conf-name>Proceedings of the 32nd International Conference on Machine Learning, PMLR</conf-name><conf-date>Jul 5-7, 2015</conf-date><conf-loc>Lille, France</conf-loc><fpage>957</fpage><lpage>966</lpage></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nadeu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Prieto</surname><given-names>P</given-names> </name></person-group><article-title>Pitch range, gestural information, and perceived politeness in Catalan</article-title><source>J Pragmat</source><year>2011</year><month>02</month><volume>43</volume><issue>3</issue><fpage>841</fpage><lpage>854</lpage><pub-id pub-id-type="doi">10.1016/j.pragma.2010.09.015</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>SW</given-names> </name></person-group><source>Digital Signal Processing: A Practical Guide for Engineers and Scientists</source><year>2002</year><publisher-name>California Technical Publishing</publisher-name><fpage>978</fpage><pub-id pub-id-type="doi">10.1016/B978-0-7506-7444-7.X5036-5</pub-id><pub-id pub-id-type="other">978-0-7506-7444-7</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ververidis</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kotropoulos</surname><given-names>C</given-names> </name></person-group><article-title>Emotional speech recognition: resources, features, and methods</article-title><source>Speech Commun</source><year>2006</year><month>09</month><volume>48</volume><issue>9</issue><fpage>1162</fpage><lpage>1181</lpage><pub-id pub-id-type="doi">10.1016/j.specom.2006.04.003</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Montgomery</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Asberg</surname><given-names>M</given-names> </name></person-group><article-title>A new depression scale designed to be sensitive to change</article-title><source>Br J Psychiatry</source><year>1979</year><month>04</month><volume>134</volume><fpage>382</fpage><lpage>389</lpage><pub-id pub-id-type="doi">10.1192/bjp.134.4.382</pub-id><pub-id pub-id-type="medline">444788</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Young</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Biggs</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Ziegler</surname><given-names>VE</given-names> </name><name name-style="western"><surname>Meyer</surname><given-names>DA</given-names> </name></person-group><article-title>A rating scale for mania: reliability, validity and sensitivity</article-title><source>Br J Psychiatry</source><year>1978</year><month>11</month><volume>133</volume><fpage>429</fpage><lpage>435</lpage><pub-id pub-id-type="doi">10.1192/bjp.133.5.429</pub-id><pub-id pub-id-type="medline">728692</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lukasiewicz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gerard</surname><given-names>S</given-names> </name><name name-style="western"><surname>Besnard</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Young Mania Rating Scale: how to interpret the numbers? Determination of a severity threshold and of the minimal clinically significant difference in the EMBLEM cohort</article-title><source>Int J Methods Psychiatr Res</source><year>2013</year><month>03</month><volume>22</volume><issue>1</issue><fpage>46</fpage><lpage>58</lpage><pub-id pub-id-type="doi">10.1002/mpr.1379</pub-id><pub-id pub-id-type="medline">23526724</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samara</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Levine</surname><given-names>SZ</given-names> </name><name name-style="western"><surname>Leucht</surname><given-names>S</given-names> </name></person-group><article-title>Linkage of Young Mania Rating Scale to Clinical Global Impression Scale to enhance utility in clinical practice and research trials</article-title><source>Pharmacopsychiatry</source><year>2023</year><month>01</month><volume>56</volume><issue>1</issue><fpage>18</fpage><lpage>24</lpage><pub-id pub-id-type="doi">10.1055/a-1841-6672</pub-id><pub-id pub-id-type="medline">35896419</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thase</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Harrington</surname><given-names>A</given-names> </name><name name-style="western"><surname>Calabrese</surname><given-names>J</given-names> </name><name name-style="western"><surname>Montgomery</surname><given-names>S</given-names> </name><name name-style="western"><surname>Niu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Patel</surname><given-names>MD</given-names> </name></person-group><article-title>Evaluation of MADRS severity thresholds in patients with bipolar depression</article-title><source>J Affect Disord</source><year>2021</year><month>05</month><day>1</day><volume>286</volume><fpage>58</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2021.02.043</pub-id><pub-id pub-id-type="medline">33677183</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Eichhorn</surname><given-names>JT</given-names> </name><name name-style="western"><surname>Kent</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Austin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Vorperian</surname><given-names>HK</given-names> </name></person-group><article-title>Effects of aging on vocal fundamental frequency and vowel formants in men and women</article-title><source>J Voice</source><year>2018</year><month>09</month><volume>32</volume><issue>5</issue><fpage>644</fpage><pub-id pub-id-type="doi">10.1016/j.jvoice.2017.08.003</pub-id><pub-id pub-id-type="medline">28864082</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ert&#x00FC;rk</surname><given-names>A</given-names> </name><name name-style="western"><surname>G&#x00FC;rses</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kulak Kay&#x0131;kc&#x0131;</surname><given-names>ME</given-names> </name></person-group><article-title>Sex related differences in the perception and production of emotional prosody in adults</article-title><source>Psychol Res</source><year>2024</year><month>03</month><volume>88</volume><issue>2</issue><fpage>449</fpage><lpage>457</lpage><pub-id pub-id-type="doi">10.1007/s00426-023-01865-1</pub-id><pub-id pub-id-type="medline">37542581</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mendoza</surname><given-names>E</given-names> </name><name name-style="western"><surname>Valencia</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mu&#x00F1;oz</surname><given-names>J</given-names> </name><name name-style="western"><surname>Trujillo</surname><given-names>H</given-names> </name></person-group><article-title>Differences in voice quality between men and women: use of the long-term average spectrum (LTAS)</article-title><source>J Voice</source><year>1996</year><month>03</month><volume>10</volume><issue>1</issue><fpage>59</fpage><lpage>66</lpage><pub-id pub-id-type="doi">10.1016/s0892-1997(96)80019-1</pub-id><pub-id pub-id-type="medline">8653179</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chaplin</surname><given-names>TM</given-names> </name></person-group><article-title>Gender and emotion expression: a developmental contextual perspective</article-title><source>Emot Rev</source><year>2015</year><month>01</month><volume>7</volume><issue>1</issue><fpage>14</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1177/1754073914544408</pub-id><pub-id pub-id-type="medline">26089983</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name></person-group><article-title>Gender differences in identifying facial, prosodic, and semantic emotions show category- and channel-specific effects mediated by encoder&#x2019;s gender</article-title><source>J Speech Lang Hear Res</source><year>2021</year><month>08</month><day>9</day><volume>64</volume><issue>8</issue><fpage>2941</fpage><lpage>2955</lpage><pub-id pub-id-type="doi">10.1044/2021_JSLHR-20-00553</pub-id><pub-id pub-id-type="medline">34310173</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aung</surname><given-names>T</given-names> </name><name name-style="western"><surname>Puts</surname><given-names>D</given-names> </name></person-group><article-title>Voice pitch: a window into the communication of social power</article-title><source>Curr Opin Psychol</source><year>2020</year><month>06</month><volume>33</volume><fpage>154</fpage><lpage>161</lpage><pub-id pub-id-type="doi">10.1016/j.copsyc.2019.07.028</pub-id><pub-id pub-id-type="medline">31445439</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Gui</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Analysis on speech signal features of manic patients</article-title><source>J Psychiatr Res</source><year>2018</year><month>03</month><volume>98</volume><fpage>59</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychires.2017.12.012</pub-id><pub-id pub-id-type="medline">29291581</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guidi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Salvi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ottaviano</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Smartphone application for the analysis of prosodic features in running speech with a focus on bipolar disorders: system performance evaluation and case study</article-title><source>Sensors (Basel)</source><year>2015</year><month>11</month><day>6</day><volume>15</volume><issue>11</issue><fpage>28070</fpage><lpage>28087</lpage><pub-id pub-id-type="doi">10.3390/s151128070</pub-id><pub-id pub-id-type="medline">26561811</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ozdas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Shiavi</surname><given-names>RG</given-names> </name><name name-style="western"><surname>Silverman</surname><given-names>SE</given-names> </name><name name-style="western"><surname>Silverman</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Wilkes</surname><given-names>DM</given-names> </name></person-group><article-title>Investigation of vocal jitter and glottal flow spectrum as possible cues for depression and near-term suicidal risk</article-title><source>IEEE Trans Biomed Eng</source><year>2004</year><month>09</month><volume>51</volume><issue>9</issue><fpage>1530</fpage><lpage>1540</lpage><pub-id pub-id-type="doi">10.1109/TBME.2004.827544</pub-id><pub-id pub-id-type="medline">15376501</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Faurholt-Jepsen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rohani</surname><given-names>DA</given-names> </name><name name-style="western"><surname>Busk</surname><given-names>J</given-names> </name><name name-style="western"><surname>Vinberg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bardram</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Kessing</surname><given-names>LV</given-names> </name></person-group><article-title>Voice analyses using smartphone-based data in patients with bipolar disorder, unaffected relatives and healthy control individuals, and during different affective states</article-title><source>Int J Bipolar Disord</source><year>2021</year><month>12</month><day>1</day><volume>9</volume><issue>1</issue><fpage>38</fpage><pub-id pub-id-type="doi">10.1186/s40345-021-00243-3</pub-id><pub-id pub-id-type="medline">34850296</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bucci</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bell</surname><given-names>IH</given-names> </name><etal/></person-group><article-title>The growing field of digital psychiatry: current evidence and the future of apps, social media, chatbots, and virtual reality</article-title><source>World Psychiatry</source><year>2021</year><month>10</month><volume>20</volume><issue>3</issue><fpage>318</fpage><lpage>335</lpage><pub-id pub-id-type="doi">10.1002/wps.20883</pub-id><pub-id pub-id-type="medline">34505369</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balcombe</surname><given-names>L</given-names> </name><name name-style="western"><surname>De Leo</surname><given-names>D</given-names> </name></person-group><article-title>Digital mental health challenges and the horizon ahead for solutions</article-title><source>JMIR Ment Health</source><year>2021</year><month>03</month><day>29</day><volume>8</volume><issue>3</issue><fpage>e26811</fpage><pub-id pub-id-type="doi">10.2196/26811</pub-id><pub-id pub-id-type="medline">33779570</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soenksen</surname><given-names>LR</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Integrated multimodal artificial intelligence framework for healthcare applications</article-title><source>NPJ Digit Med</source><year>2022</year><month>09</month><day>20</day><volume>5</volume><issue>1</issue><fpage>149</fpage><pub-id pub-id-type="doi">10.1038/s41746-022-00689-4</pub-id><pub-id pub-id-type="medline">36127417</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gr&#x00FC;nerbl</surname><given-names>A</given-names> </name><name name-style="western"><surname>Muaremi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Osmani</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Smartphone-based recognition of states and state changes in bipolar disorder patients</article-title><source>IEEE J Biomed Health Inform</source><year>2015</year><month>01</month><volume>19</volume><issue>1</issue><fpage>140</fpage><lpage>148</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2014.2343154</pub-id><pub-id pub-id-type="medline">25073181</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Osmani</surname><given-names>V</given-names> </name></person-group><article-title>Smartphones in mental health: detecting depressive and manic episodes</article-title><source>IEEE Pervasive Comput</source><year>2015</year><volume>14</volume><issue>3</issue><fpage>10</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1109/MPRV.2015.54</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Richter</surname><given-names>V</given-names> </name><name name-style="western"><surname>Neumann</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A multimodal dialog approach to mental state characterization in clinically depressed, anxious, and suicidal populations</article-title><source>Front Psychol</source><year>2023</year><volume>14</volume><fpage>1135469</fpage><pub-id pub-id-type="doi">10.3389/fpsyg.2023.1135469</pub-id><pub-id pub-id-type="medline">37767217</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Or</surname><given-names>F</given-names> </name><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name><name name-style="western"><surname>Onnela</surname><given-names>JP</given-names> </name></person-group><article-title>High potential but limited evidence: using voice data from smartphones to monitor and diagnose mood disorders</article-title><source>Psychiatr Rehabil J</source><year>2017</year><month>09</month><volume>40</volume><issue>3</issue><fpage>320</fpage><lpage>324</lpage><pub-id pub-id-type="doi">10.1037/prj0000279</pub-id><pub-id pub-id-type="medline">28891659</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garcia-Ceja</surname><given-names>E</given-names> </name><name name-style="western"><surname>Riegler</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nordgreen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Jakobsen</surname><given-names>P</given-names> </name><name name-style="western"><surname>Oedegaard</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>T&#x00F8;rresen</surname><given-names>J</given-names> </name></person-group><article-title>Mental health monitoring with multimodal sensing and machine learning: a survey</article-title><source>Pervasive Mob Comput</source><year>2018</year><month>12</month><volume>51</volume><fpage>1</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1016/j.pmcj.2018.09.003</pub-id></nlm-citation></ref><ref id="ref67"><label>67</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McCoy</surname><given-names>LG</given-names> </name><name name-style="western"><surname>Brenna</surname><given-names>CTA</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Vold</surname><given-names>K</given-names> </name><name name-style="western"><surname>Das</surname><given-names>S</given-names> </name></person-group><article-title>Believing in black boxes: machine learning for healthcare does not need explainability to be evidence-based</article-title><source>J Clin Epidemiol</source><year>2022</year><month>02</month><volume>142</volume><fpage>252</fpage><lpage>257</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2021.11.001</pub-id><pub-id pub-id-type="medline">34748907</pub-id></nlm-citation></ref><ref id="ref68"><label>68</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yerushalmi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sixsmith</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pollock Star</surname><given-names>A</given-names> </name><name name-style="western"><surname>King</surname><given-names>DB</given-names> </name><name name-style="western"><surname>O&#x2019;Rourke</surname><given-names>N</given-names> </name></person-group><article-title>Ecological momentary assessment of bipolar disorder symptoms and partner affect: longitudinal pilot study</article-title><source>JMIR Form Res</source><year>2021</year><month>09</month><day>2</day><volume>5</volume><issue>9</issue><fpage>e30472</fpage><pub-id pub-id-type="doi">10.2196/30472</pub-id><pub-id pub-id-type="medline">34473069</pub-id></nlm-citation></ref><ref id="ref69"><label>69</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clark</surname><given-names>EL</given-names> </name><name name-style="western"><surname>Easton</surname><given-names>C</given-names> </name><name name-style="western"><surname>Verdon</surname><given-names>S</given-names> </name></person-group><article-title>The impact of linguistic bias upon speech-language pathologists&#x2019; attitudes towards non-standard dialects of English</article-title><source>Clin Linguist Phon</source><year>2021</year><month>06</month><day>3</day><volume>35</volume><issue>6</issue><fpage>542</fpage><lpage>559</lpage><pub-id pub-id-type="doi">10.1080/02699206.2020.1803405</pub-id><pub-id pub-id-type="medline">32781853</pub-id></nlm-citation></ref><ref id="ref70"><label>70</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Bachi</surname><given-names>B</given-names> </name><name name-style="western"><surname>Callovini</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Anxious distress in people with major depressive episodes: a cross-sectional analysis of clinical correlates</article-title><source>CNS Spectr</source><year>2024</year><month>02</month><volume>29</volume><issue>1</issue><fpage>49</fpage><lpage>53</lpage><pub-id pub-id-type="doi">10.1017/S1092852923002377</pub-id><pub-id pub-id-type="medline">37489522</pub-id></nlm-citation></ref><ref id="ref71"><label>71</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malgaroli</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hull</surname><given-names>TD</given-names> </name><name name-style="western"><surname>Calderon</surname><given-names>A</given-names> </name><name name-style="western"><surname>Simon</surname><given-names>NM</given-names> </name></person-group><article-title>Linguistic markers of anxiety and depression in somatic symptom and related disorders: observational study of a digital intervention</article-title><source>J Affect Disord</source><year>2024</year><month>05</month><day>1</day><volume>352</volume><fpage>133</fpage><lpage>137</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2024.02.012</pub-id><pub-id pub-id-type="medline">38336165</pub-id></nlm-citation></ref><ref id="ref72"><label>72</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Crocamo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Clerici</surname><given-names>M</given-names> </name><name name-style="western"><surname>Carr&#x00E0;</surname><given-names>G</given-names> </name></person-group><article-title>Allopurinol as add-on treatment for mania symptoms in bipolar disorder: systematic review and meta-analysis of randomised controlled trials</article-title><source>Br J Psychiatry</source><year>2017</year><month>01</month><volume>210</volume><issue>1</issue><fpage>10</fpage><lpage>15</lpage><pub-id pub-id-type="doi">10.1192/bjp.bp.115.180281</pub-id><pub-id pub-id-type="medline">27856422</pub-id></nlm-citation></ref><ref id="ref73"><label>73</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Cavaleri</surname><given-names>D</given-names> </name><name name-style="western"><surname>Bachi</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Repurposed drugs as adjunctive treatments for mania and bipolar depression: a meta-review and critical appraisal of meta-analyses of randomized placebo-controlled trials</article-title><source>J Psychiatr Res</source><year>2021</year><month>11</month><volume>143</volume><fpage>230</fpage><lpage>238</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychires.2021.09.018</pub-id><pub-id pub-id-type="medline">34509090</pub-id></nlm-citation></ref><ref id="ref74"><label>74</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bartoli</surname><given-names>F</given-names> </name><name name-style="western"><surname>Cavaleri</surname><given-names>D</given-names> </name><name name-style="western"><surname>Nasti</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Long-acting injectable antipsychotics for the treatment of bipolar disorder: evidence from mirror-image studies</article-title><source>Ther Adv Psychopharmacol</source><year>2023</year><volume>13</volume><fpage>20451253231163682</fpage><pub-id pub-id-type="doi">10.1177/20451253231163682</pub-id><pub-id pub-id-type="medline">36994116</pub-id></nlm-citation></ref><ref id="ref75"><label>75</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carr&#x00E0;</surname><given-names>G</given-names> </name><name name-style="western"><surname>Scioli</surname><given-names>R</given-names> </name><name name-style="western"><surname>Monti</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Marinoni</surname><given-names>A</given-names> </name></person-group><article-title>Severity profiles of substance-abusing patients in Italian community addiction facilities: influence of psychiatric concurrent disorders</article-title><source>Eur Addict Res</source><year>2006</year><volume>12</volume><issue>2</issue><fpage>96</fpage><lpage>101</lpage><pub-id pub-id-type="doi">10.1159/000090429</pub-id><pub-id pub-id-type="medline">16543745</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Checklist.</p><media xlink:href="formative_v9i1e65555_app1.doc" xlink:title="DOC File, 87 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Features.</p><media xlink:href="formative_v9i1e65555_app2.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Supplementary analyses.</p><media xlink:href="formative_v9i1e65555_app3.docx" xlink:title="DOCX File, 329 KB"/></supplementary-material></app-group></back></article>