<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e84744</article-id><article-id pub-id-type="doi">10.2196/84744</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Prediction of Clinically Significant Depressive Symptoms at 2-Year Follow-Up in Older Adults: Machine Learning Study Using the English Longitudinal Study of Ageing</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Khorram</surname><given-names>Bahar</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Nilforooshan</surname><given-names>Ramin</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Barnaghi</surname><given-names>Payam</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kouchaki</surname><given-names>Samaneh</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Centre for Vision, Speech and Signal Processing (CVSSP), Department of Electrical and Electronic Engineering, Faculty of Engineering and Physical Sciences, University of Surrey</institution><addr-line>Stag Hill Campus</addr-line><addr-line>Guildford</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>Surrey and Borders Partnership NHS Foundation Trust</institution><addr-line>Leatherhead</addr-line><country>United Kingdom</country></aff><aff id="aff3"><institution>School of Psychology, University of Surrey</institution><addr-line>Guildford</addr-line><country>United Kingdom</country></aff><aff id="aff4"><institution>UK Dementia Research Institute, Care Research and Technology Centre</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff5"><institution>Department of Brain Sciences, Imperial College London</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff6"><institution>Department of Data Research, Innovation and Virtual Environments, Great Ormond Street Hospital NHS Foundation Trust</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Chen</surname><given-names>Chun-Yuan</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhu</surname><given-names>Ningzhe</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Tutsoy</surname><given-names>Onder</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Bahar Khorram, MSc, Centre for Vision, Speech and Signal Processing (CVSSP), Department of Electrical and Electronic Engineering, Faculty of Engineering and Physical Sciences, University of Surrey, Stag Hill Campus, Guildford, GU2 7XH, United Kingdom, +44 (0)1483 683435; <email>bk00531@surrey.ac.uk</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>2</day><month>7</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e84744</elocation-id><history><date date-type="received"><day>24</day><month>09</month><year>2025</year></date><date date-type="rev-recd"><day>30</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>31</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Bahar Khorram, Ramin Nilforooshan, Payam Barnaghi, Samaneh Kouchaki. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 2.7.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e84744"/><abstract><sec><title>Background</title><p>Depression in older adults is often underdiagnosed due to atypical symptom presentation and generational stigma, leading to delayed intervention. Early identification of individuals at risk of developing elevated depressive symptoms is therefore critical, but traditional approaches show limited predictive accuracy. To date, no study has applied machine learning (ML) models to predict clinically significant depressive symptoms at 2-year follow-up in older adults in the United Kingdom using data from the English Longitudinal Study of Ageing (ELSA). Moreover, the impact of encoding strategies for categorical health care variables has not been examined.</p></sec><sec><title>Objective</title><p>This study aimed to develop and evaluate ML models to predict the clinically significant depressive symptoms at 2-year follow-up in older adults using ELSA data. We further compared ordinal and one-hot encoding strategies across different ML architectures and identified key predictors of depressive symptoms at follow-up.</p></sec><sec sec-type="methods"><title>Methods</title><p>Data were drawn from 4 consecutive waves of ELSA, including participants aged &#x2265;50 years without significant depressive symptoms at the baseline wave (waves 6&#x2010;9). Clinically significant depressive symptoms were defined as 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8) scores of &#x2265;4 at the subsequent wave (waves 7&#x2010;10). Over 120 features spanning sociodemographic, psychological, and health-related domains were analyzed. Eight ML models were applied, including tree-based ensembles, deep learning architectures for tabular data, distance-based methods, probabilistic methods, and linear methods. Model performance was assessed using the area under the receiver operating characteristic curve (AUROC) and <italic>F</italic><sub>1</sub>-score. Model interpretability was examined using Shapley additive explanations (SHAP). Sensitivity analyses assessed the robustness of results across alternative CES-D 8 thresholds (&#x2265;3, &#x2265;4, and &#x2265;5) and encoding strategies.</p></sec><sec sec-type="results"><title>Results</title><p>Across waves, the best-performing models achieved mean AUROC scores of 0.72&#x2010;0.73, with a peak of 0.75 in the highest-performing wave. Ordinal encoding consistently outperformed one-hot encoding across all ML models, yielding improvements in AUROCs and <italic>F</italic><sub>1</sub>-scores, with the greatest increase in tree-based methods. SHAP consistently identified loneliness, sleep disturbances, and low social engagement as strong predictors of elevated depressive symptoms at follow-up. Sensitivity analyses across CES-D 8 thresholds demonstrated robust feature importance, with AUROCs ranging from 0.67 to 0.82. Traditional ML models (random forest, extreme gradient boosting, and support vector machines) generally achieved higher performance than the deep learning models for this task.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Our findings demonstrate the feasibility of predicting clinically significant depressive symptoms at 2-year follow-up in UK older adults, with moderate accuracy. Ordinal encoding demonstrates superior performance for health care datasets with inherently ordered categorical features. The identification of consistent risk factors highlights opportunities for developing targeted clinical screening tools and preventive interventions. This study provides new evidence on depressive symptom prediction in the UK context, leveraging longitudinal data from ELSA, and contributes to advancing digital mental health research for aging populations.</p></sec></abstract><kwd-group><kwd>depressive symptoms</kwd><kwd>aged</kwd><kwd>machine learning</kwd><kwd>predictive analytics</kwd><kwd>English Longitudinal Study of Ageing</kwd><kwd>depression screening</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Depression is one of the most widespread mental health disorders, characterized by low mood, loss of interest in activities, reduced energy, and cognitive difficulties persisting for at least 2 weeks. With 280 million people affected worldwide, it is a major contributor to global disability and disease burden [<xref ref-type="bibr" rid="ref1">1</xref>]. In the United Kingdom, depression is among the most prevalent mental disorders [<xref ref-type="bibr" rid="ref2">2</xref>]. Older adults are particularly vulnerable due to age-related health decline, cognitive impairment, reduced social interactions, and changes in family dynamics [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. They also have the highest suicide rates of any age group [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Demographic shifts have exacerbated this issue. Over the past 40 years in the United Kingdom, the population aged 50 years and older has increased by 47%, and those aged 65 years and older by 52% [<xref ref-type="bibr" rid="ref6">6</xref>]. Within this significant increase, depression often remains undiagnosed, leading to a reduced quality of life and increased mortality rates [<xref ref-type="bibr" rid="ref7">7</xref>]. Therefore, early identification of individuals at risk for developing elevated depressive symptoms is critical for timely intervention, but this requires tools tailored to the risk factors most relevant to older adults.</p><p>The risk profile for depression in older adults differs from that of younger populations. While emotional symptoms such as stress are highlighted in younger cohorts, older adults with depression often experience cognitive difficulties, physical complaints, and reduced interest in activities [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Factors such as loss of independence, poor sleep, chronic diseases, and functional limitations are associated with the depression in older adults but do not necessarily predict depressive symptoms at follow-up [<xref ref-type="bibr" rid="ref10">10</xref>].</p><p>Machine learning&#x2019;s (ML&#x2019;s) ability to analyze complex, multidimensional data and uncover subtle, nonlinear interactions has made it a powerful tool for predicting depressive symptoms. Prior studies have applied ML in a variety of contexts, ranging from social media monitoring to structured cohort studies. For instance, social media&#x2013;based approaches have detected depressive language and emotional distress from user-generated content on platforms such as Twitter (X Corp), Facebook (Meta Platforms), and Reddit (Reddit, Inc), though their applicability to older adults is limited due to lower engagement with these platforms [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>Beyond social media, many studies have focused on predicting depressive symptoms through questionnaires and interviews across different cohorts, including military cohorts [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>], student populations [<xref ref-type="bibr" rid="ref16">16</xref>], and occupational cohorts [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. For example, in occupational cohorts, elastic net models have identified key predictors such as gender, anxiety disorders, and adverse life events [<xref ref-type="bibr" rid="ref18">18</xref>]. Notable longitudinal studies have examined the risk of depression in older adults from China and Canada, incorporating variables such as emotional instability, low life satisfaction, perceived health, and nutrition risk, achieving areas under the receiver operating characteristic curves (AUROCs) ranging from 0.62 to 0.79 [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Overall, these studies show the feasibility of predicting depressive symptoms in older adults; despite this difference in cohorts, predictors, and modeling approaches limit the generalizability of these findings to UK older adult populations.</p><p>Recent ML approaches for depressive symptoms prediction in older adults have yielded promising results; however, several methodological limitations remain. First, most studies have been conducted outside the United Kingdom in populations with different health care systems and cultural contexts [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], limiting their applicability to UK clinical settings. Second, while tree-based methods have shown strong performance [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref20">20</xref>], few studies have systematically compared results across diverse algorithmic paradigms (eg, deep learning vs traditional ML), making it unclear which approaches are optimal. Third, categorical feature encoding strategies, which are particularly important for handling ordinal variables such as self-rated health and mobility limitations, have not been systematically evaluated, with most studies relying on one-hot encoding. Finally, deep learning architectures specifically designed for tabular data (eg, TabTransformer and TabNet) remain largely unexplored for depressive symptoms prediction in older adults, despite their potential advantages for handling mixed feature types. The English Longitudinal Study of Ageing (ELSA) offers comprehensive longitudinal data that can address these gaps. Existing ELSA-based research has examined associations between depression and factors such as frailty, biomarkers, and social conditions [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], yet no study has systematically applied ML approaches to predict future depressive symptoms at follow-up.</p><p>Compared with conventional statistical methods that rely on linearity and predefined interaction structures, ML approaches can capture nonlinear, complex interactions among heterogeneous features in high-dimensional data. We applied 8 ML models across diverse learning paradigms, including tree-based ensembles, deep learning architectures specifically designed for tabular data, distance-based methods, probabilistic methods, and linear methods. This systematic model selection ensures our findings on encoding strategies are algorithmically robust and generalizable rather than constrained to specific model structures. The objective of this study was to apply these diverse models to predict clinically significant depressive symptoms at 2-year follow-up in older adults using data from ELSA and to identify key risk factors contributing to depressive symptoms at follow-up. The 2-year prediction window was chosen to align with the biannual wave structure of ELSA. This timeframe also provides sufficient time for preventive interventions while minimizing uncertainty from longer-term changes in health status and life circumstances that complicate prediction accuracy.</p><p>This study makes 3 key contributions. First, this study provides a systematic ML-based approach to predicting depressive symptoms at follow-up among older adults in the United Kingdom using ELSA data, addressing a gap in UK-specific risk prediction models. Second, through a comprehensive comparison of 8 diverse ML algorithms (tree-based, deep learning, distance-based, probabilistic, and linear models), we demonstrate that ordinal encoding consistently outperforms one-hot encoding for health care datasets with naturally ordered categorical features, with average improvements of 0.013 in AUROC and 0.037 in <italic>F</italic><sub>1</sub>-score. Third, we integrate multithreshold sensitivity analyses of the 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8 score &#x2265;3, &#x2265;4, and &#x2265;5) with individual-level Shapley additive explanations (SHAP) interpretability to identify robust risk factors that remain stable across depression severity levels, enabling targeted clinical interventions.</p><p>The best-performing models achieved AUROC values between 0.72 and 0.75 for 2-year depressive symptoms at follow-up. In this setting, traditional ML approaches generally achieved higher performance than the deep learning models in the ELSA dataset. SHAP analysis highlighted loneliness, sleep disturbances, and low social engagement as the robust predictors across all depression thresholds.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Participants</title><p>ELSA is a continuous cohort study that collects detailed information on people in England aged 50 years or older. These data, which include lifestyle, health, psychological, and sociodemographic factors, were collected using computer-assisted interviews and self-reported questionnaires at 2-year intervals, or &#x201C;waves&#x201D; [<xref ref-type="bibr" rid="ref24">24</xref>]. ELSA enables longitudinal tracking of health, economic, and social circumstances, with periodic refreshment of the panel by recruiting new participants aged 50 years and older during specific waves (3, 4, 6, 7, 9, and 10) to maintain representativeness. A large-scale longitudinal study like ELSA serves as a solid basis for investigating the underlying reasons for depression in older adults.</p><p>For this study, data from waves 6 (2012&#x2010;2013), 7 (2014&#x2010;2015), 8 (2016&#x2010;2017), and 9 (2018&#x2010;2019) were analyzed to predict depressive symptoms at follow-up in waves (7, 8, 9, and 10). Four of the most recent waves were chosen to ensure that the results accurately reflect contemporary aging and mental health patterns while providing sufficient longitudinal data to support robust analysis. Each wave pair was modeled independently rather than using cumulative training across all waves. This approach allowed us to examine whether predictive performance and risk factor associations remained consistent across different time periods while maximizing sample size for each prediction task. Restricting analyses to participants observed across all waves would substantially reduce the number of eligible cases due to attrition and missingness. To ensure a focus on incident depressive symptoms at follow-up rather than ongoing symptoms and eliminate any interfering factors, participants with severe psychiatric conditions, such as schizophrenia, psychosis, and bipolar disorder, as well as those expressing significant emotional or mood instability, were excluded. These exclusions were based on self-reported doctor-diagnosed conditions and relevant baseline ELSA mental health items.</p><p>The ELSA dataset demonstrates robust psychometric reliability across multiple domains, including sociodemographic, physical health, psychological, lifestyle, and social engagement. In the core member cohort, sample sizes ranged from 5362 to 6891 participants across waves 6&#x2010;9 [<xref ref-type="bibr" rid="ref25">25</xref>], providing sufficient statistical power for reliable model training. The cohort shows demographic diversity across gender, age, marital status, and cultural background as summarized for wave 6 in <xref ref-type="table" rid="table1">Table 1</xref>. It also provides insight into sample distribution based on the depressive symptom status of the participants before analyzing model performance. Tables S1, S2, and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> present the corresponding characteristics for waves 7, 8, and 9 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Because of the high percentage of missing data for the education-level variable in the waves analyzed, this variable was removed from the tables.</p><p>Depressive symptoms were assessed using the 8-item Center for Epidemiological Studies Depression Scale (CES-D 8). This scale measures depression and demonstrates psychometric properties comparable to the original 20-item version. The CES-D 8 assesses both cognitive/affective symptoms (eg, felt depressed or was happy) and somatic symptoms (eg, restless sleep). A summary score (possible range: 0&#x2010;8) was calculated by adding responses, and this score was used to define the target variable for supervised model training.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Sociodemographic characteristics, including gender, age group, marital status, and cultural background of English Longitudinal Study of Ageing (ELSA) participants at wave 6 (2012&#x2010;2013; n=8799), divided by depressive symptom status. Clinically significant depressive symptoms were defined using the 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8) with a cutoff score of &#x2265;4.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">All participants (n=8799)</td><td align="left" valign="bottom">Nondepressed (n=6842)</td><td align="left" valign="bottom">Depressed (n=1957)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Sex, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">3987 (45)</td><td align="left" valign="top">3312 (48)</td><td align="left" valign="top">675 (34)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">4812 (55)</td><td align="left" valign="top">3530 (52)</td><td align="left" valign="top">1282 (66)</td></tr><tr><td align="left" valign="top" colspan="4">Age (years), n (%)</td></tr><tr><td align="left" valign="top">&#x2003;&#x2265;65</td><td align="left" valign="top">4851 (55)</td><td align="left" valign="top">3682 (54)</td><td align="left" valign="top">1169 (60)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x003C;65</td><td align="left" valign="top">3948 (45)</td><td align="left" valign="top">3160 (46)</td><td align="left" valign="top">788 (40)</td></tr><tr><td align="left" valign="top" colspan="4">Marital status, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Single (never married)</td><td align="left" valign="top">525 (6)</td><td align="left" valign="top">414 (6.1)</td><td align="left" valign="top">111 (5.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Married/civil partnership</td><td align="left" valign="top">5973 (68)</td><td align="left" valign="top">4893 (71.5)</td><td align="left" valign="top">1080 (55.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Previously married (separated/divorced/widowed)</td><td align="left" valign="top">2299 (26)</td><td align="left" valign="top">1533 (22.4)</td><td align="left" valign="top">766 (39.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not available</td><td align="left" valign="top">2 (&#x223C;0)</td><td align="left" valign="top">2 (&#x223C;0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top" colspan="4">Cultural background, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;English</td><td align="left" valign="top">6871 (78.1)</td><td align="left" valign="top">5358 (78.3)</td><td align="left" valign="top">1513 (77.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Irish/Scottish/Welsh</td><td align="left" valign="top">443 (5)</td><td align="left" valign="top">352 (5.1)</td><td align="left" valign="top">91 (4.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other European</td><td align="left" valign="top">44 (0.5)</td><td align="left" valign="top">30 (0.4)</td><td align="left" valign="top">14 (0.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Other cultural backgrounds</td><td align="left" valign="top">172 (2)</td><td align="left" valign="top">138 (2)</td><td align="left" valign="top">34 (1.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Not available</td><td align="left" valign="top">1269 (14.4)</td><td align="left" valign="top">964 (14.1)</td><td align="left" valign="top">305 (15.6)</td></tr></tbody></table></table-wrap><p>For the binary classification task, <inline-formula><mml:math id="ieqn1"><mml:msub><mml:mrow><mml:mtext>y</mml:mtext></mml:mrow><mml:mrow><mml:mtext>i</mml:mtext></mml:mrow></mml:msub><mml:mtext>&#x2208;{0, 1}</mml:mtext></mml:math></inline-formula> denote the depressive symptom status for participant <inline-formula><mml:math id="ieqn2"><mml:mtext>i</mml:mtext></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn3"><mml:msub><mml:mrow><mml:mtext>y</mml:mtext></mml:mrow><mml:mrow><mml:mtext>i</mml:mtext></mml:mrow></mml:msub><mml:mtext>=1</mml:mtext></mml:math></inline-formula> if <inline-formula><mml:math id="ieqn4"><mml:mtext>CES</mml:mtext><mml:mtext>_</mml:mtext><mml:mtext>D</mml:mtext><mml:mtext>_8</mml:mtext><mml:mtext>&#x2265;4 </mml:mtext></mml:math></inline-formula> (Class 1, depression) and <inline-formula><mml:math id="ieqn5"><mml:msub><mml:mrow><mml:mtext>y</mml:mtext></mml:mrow><mml:mrow><mml:mtext>i</mml:mtext></mml:mrow></mml:msub><mml:mtext>=0</mml:mtext></mml:math></inline-formula> if <inline-formula><mml:math id="ieqn6"><mml:mtext>CES</mml:mtext><mml:mtext>_</mml:mtext><mml:mtext>D</mml:mtext><mml:mtext>_8</mml:mtext><mml:mtext>&#x2264;3</mml:mtext></mml:math></inline-formula> (Class 0, no depression) [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref26">26</xref>]. This threshold has been validated in ELSA and older adult populations, reflecting clinically meaningful levels of depressive symptoms. The CES-D 8 demonstrates strong internal consistency (Cronbach &#x03B1;=0.90&#x2010;0.92 across ELSA waves) and longitudinal measurement invariance, ensuring reliable depression classification across waves [<xref ref-type="bibr" rid="ref26">26</xref>].</p><p>This criterion was applied at each wave to examine the emergence of new depression cases over the following 2 years. For each wave pair, this baseline exclusion criterion was used independently. Therefore, individuals who were identified as depressed at the baseline of a particular wave were excluded from that related analysis, even if they were not classified as depressed at earlier waves, ensuring that the learning task focused on incident depressive symptoms at follow-up rather than persistent or recurrent symptoms.</p><p>Participants who did not provide responses to any of the 8 CES-D items were excluded from the analysis, as a complete CES-D score was required to determine depressive symptom status, thereby minimizing label noise arising from missing symptom data. To assess the robustness of the depression labels, sensitivity analyses were conducted using alternative CES-D 8 thresholds (&#x2265;3 and &#x2265;5), which showed consistent performance patterns and stable feature importance rankings across models, supporting the reliability of the outcome definition.</p></sec><sec id="s2-2"><title>Data Preprocessing and Feature Engineering</title><p>The ELSA dataset provides a rich set of variables collected across multiple domains and waves. For this analysis, data from 4 consecutive waves were used to predict the incidence of depression within the subsequent 2 years. A number of psychological variables were included in the analysis, including the Life Satisfaction Scale [<xref ref-type="bibr" rid="ref27">27</xref>], which measures overall life contentment; the Quality of Life Questionnaire, which assesses autonomy, control, and self-realization; and the Loneliness Scale [<xref ref-type="bibr" rid="ref28">28</xref>], which measures feelings of loneliness and perceived social isolation. In addition to psychological measures, nonpsychological variables were incorporated to provide a broader range of participants&#x2019; lives, spanning socioeconomic status, environmental exposures, health status, lifestyle behaviors, and cognitive function. To have a depressive symptom prediction model that is accurate and consistent, careful selection and efficient preprocessing of variables are important. In addition to excluding participants with missing CES-D variables, missing predictor values were imputed. Mean imputation was used for continuous variables and mode imputation for categorical variables, applied separately within each wave.</p><p>To address the high dimensionality of the ELSA dataset, which contains over 6100 variables per wave, we applied systematic feature reduction and selection approaches. We developed a consistent feature set that included over 120 variables from all waves to develop depressive symptom prediction models. First, a variance threshold of 0.01 was applied to remove features with low variability, as they provide limited discriminative information. Features were subsequently selected based on well-established associations with depressive symptoms reported in prior epidemiological and clinical studies, supported by domain knowledge and clinical expertise. Features with known associations with mental health, such as sleep quality (heslpf), and self-rated health (hehelf), were prioritized, whereas administrative variables, technical survey metadata, and variables with limited relevance to mental health were excluded. While the total number of available features varied slightly between waves due to differences in data collection, we ensured consistency by prioritizing common features across waves. Additionally, wave-specific variables were incorporated, such as sleep-related variables available in waves 6 and 8, fruit and vegetable intake in waves 6&#x2010;8, cognitive measures in waves 7&#x2010;9, and sexual activity variables in wave 6. These variables were included only where available, resulting in slightly different feature sets across wave pairs. A complete list of all features is provided in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. While predictive modeling was performed across all waves, feature importance and clustering analyses were conducted on one representative wave.</p></sec><sec id="s2-3"><title>ML Methods</title><p>To maintain a balance between interpretability, computational efficiency, and predictive accuracy among traditional and state-of-the-art models, the following ML algorithms were used for the prediction of depression.</p><list list-type="bullet"><list-item><p>Logistic regression (LR): a baseline method for binary classification that models linear relationships between predictors and the outcome. This approach is valued for its simplicity and interpretable coefficients.</p></list-item><list-item><p>Random forest (RF): an ensemble method that combines multiple decision trees. It improves classification performance by using bootstrap aggregating (bagging) and randomization in feature selection during tree construction to reduce overfitting and enhance robustness.</p></list-item><list-item><p>Support vector machine (SVM): a supervised ML method that identifies an optimal hyperplane to separate classes in high-dimensional space with minimal error. Its capabilities make it a widely used option for both classification and regression tasks.</p></list-item><list-item><p>Extreme gradient boosting (XGBoost): an ensemble gradient-boosting framework that iteratively refines predictions by correcting the residuals of previous models. It then aggregates results from all models to make the final prediction, making it highly effective as a predictive model. Given the nature of our dataset and XGBoost&#x2019;s ability to handle missing data and identify complex interactions between features, it is a strong choice for this study.</p></list-item><list-item><p>K-nearest neighbors (KNN): a nonparametric algorithm that classifies samples based on the label of the most frequent samples among its KNNs in the feature space. The idea behind the approach is that samples of the same class are usually grouped near each other in the feature space. Feature normalization was applied to optimize the distance metric.</p></list-item><list-item><p>TabNet: a deep learning architecture designed for tabular data. TabNet enables sparse feature selection while maintaining interpretability by using a sequential attention mechanism. All these features make it an appropriate choice for tasks involving the classification of tabular data, such as depressive symptom prediction [<xref ref-type="bibr" rid="ref29">29</xref>].</p></list-item><list-item><p>TabTransformer: an attention-based model designed to emphasize categorical features. Initially, categorical features are mapped into contextual embeddings. After being concatenated with continuous features, they are subsequently processed through a multilayer perceptron for classification or regression tasks [<xref ref-type="bibr" rid="ref30">30</xref>]. This approach leverages the rich categorical features of the dataset.</p></list-item><list-item><p>Multinomial naive Bayes (MNB): a probabilistic classifier that assumes feature independence. Three approaches were tested: (1) using standard naive Bayes for the entire dataset, (2) applying MNB to the entire dataset, and (3) combining standard naive Bayes for numerical features and MNB for categorical features. The second strategy outperformed the other methods, indicating its effectiveness for this study. Continuous variables were discretized into 10 categories to adapt them for MNB. For instance, age ranging from 50 years to 90+ years was divided into 10 equal-width bins (50&#x2010;54, 54&#x2010;58, etc).</p></list-item></list><p>For each pair of waves, participants who were present in both the baseline wave and the target wave were identified. The matched participants were divided into training, validation, and test sets (70%, 15%, 15%). Internal validation was used for hyperparameter tuning. To ensure robustness, this procedure was performed 5 times using different random seeds. For each run, a new train-validation-test split was generated, and model performance was evaluated on the corresponding held-out test set. The classification threshold was selected based on the value that maximized the <italic>F</italic><sub>1</sub>-score <inline-formula><mml:math id="ieqn7"><mml:mtext>F</mml:mtext><mml:mtext>1= </mml:mtext><mml:mfrac><mml:mrow><mml:mtext>2</mml:mtext><mml:mtext>PR</mml:mtext></mml:mrow><mml:mrow><mml:mtext>(</mml:mtext><mml:mtext>P</mml:mtext><mml:mtext>+</mml:mtext><mml:mtext>R</mml:mtext><mml:mtext>)</mml:mtext></mml:mrow></mml:mfrac></mml:math></inline-formula>. on the validation set. This approach was adopted to ensure a balanced trade-off between precision and recall, which is particularly important given the class imbalance in the depression dataset.</p><p>Additional hyperparameters were optimized using cross-validation and included RF (n_estimators=300, max_depth=7, min_samples_split=5, min_samples_leaf=2, class_weight=&#x201C;balanced_subsample&#x201D;), XGBoost (n_estimators=200, max_depth=5, learning_rate=0.1), SVM (C=1.0, kernel=&#x201C;rbf&#x201D;), LR (C=1.0, penalty=&#x201C;l2&#x201D;), and KNN (n_neighbors=5). TabNet and TabTransformer used learning_rate=0.001 and batch_size=128. MNB used &#x03B1;=1.0.</p></sec><sec id="s2-4"><title>Feature Importance</title><p>To evaluate the importance of the features for depressive symptom prediction in our study, SHAP was used for XGBoost and RF. SHAP offered a clear way to understand how individual variables affect model predictions. Applying SHAP to our best-performing models highlighted the most significant features that played a crucial role in predictions. This approach enhanced model interpretability and facilitated a deeper understanding of the complex multidimensional risk factors associated with depression in older adults.</p><p>To investigate feature contribution variation at the individual level, SHAP force plots were performed for 2 representative cases, one participant who developed clinically significant depressive symptoms in the consecutive wave and one who remained nondepressed in both waves. To highlight a distinct risk profile, the cases were chosen based on high model confidence (predicted probabilities &#x003E;0.80 for depressed cases and &#x003C;0.10 for nondepressed cases).</p></sec><sec id="s2-5"><title>Sensitivity Analysis</title><p>To evaluate the robustness of results to key methodological choices, we performed sensitivity analyses examining CES-D 8 threshold selection and categorical feature encoding strategies.</p><sec id="s2-5-1"><title>Threshold Selection</title><p>To assess the influence of CES-D 8 threshold selection on depressive symptoms prediction, we conducted sensitivity analyses using different depression cutoffs (&#x2265;3, &#x2265;4, and &#x2265;5 symptoms) for both XGBoost and RF models. Increasing the CES-D 8 threshold makes the outcome definition stricter, shifting from a larger group with milder symptoms to a smaller group with more severe depressive symptomatology.</p><p>For each threshold, depressive symptom status at wave 7 (outcome) was redefined while preserving identical inclusion criteria, feature engineering, and model hyperparameters. Both models were retrained and evaluated on data having a depression label corresponding to each threshold. Feature importance was assessed using SHAP values, and the consistency of highly ranked predictors across thresholds was examined to evaluate the robustness of key risk factors under increasingly conservative definitions of depression.</p></sec><sec id="s2-5-2"><title>Categorical Encoding</title><p>To validate our categorical encoding strategy, we performed a sensitivity analysis comparing 2 encoding approaches for categorical variables with natural ordering. For a categorical feature <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mtext>x</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext></mml:mrow></mml:msub><mml:mtext> </mml:mtext></mml:math></inline-formula> with <inline-formula><mml:math id="ieqn9"><mml:mtext>k</mml:mtext><mml:mtext> </mml:mtext></mml:math></inline-formula> ordered levels, ordinal encoding maps categories to integer values<italic>,</italic> <inline-formula><mml:math id="ieqn10"><mml:msub><mml:mrow><mml:mtext>x</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext></mml:mrow></mml:msub><mml:mtext>&#x2208;{1,2,&#x2026;,</mml:mtext><mml:mtext>k</mml:mtext><mml:mtext>}</mml:mtext></mml:math></inline-formula>, preserving their natural sequence (eg, self-rated health: excellent=1, very good=2, good=3, fair=4, and poor=5). By contrast, one-hot encoding is represented <inline-formula><mml:math id="ieqn11"><mml:msub><mml:mrow><mml:mtext>x</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext></mml:mrow></mml:msub></mml:math></inline-formula> as a binary indicator vector <inline-formula><mml:math id="ieqn12"><mml:msub><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext></mml:mrow></mml:msub><mml:mtext>=[</mml:mtext><mml:msub><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext><mml:mtext>1</mml:mtext></mml:mrow></mml:msub><mml:mtext>,</mml:mtext><mml:msub><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext><mml:mtext>2</mml:mtext></mml:mrow></mml:msub><mml:mtext>,&#x2026;,</mml:mtext><mml:msub><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mrow><mml:mtext>jk</mml:mtext></mml:mrow></mml:msub><mml:mtext>]&#x2208;{0,1</mml:mtext><mml:msup><mml:mrow><mml:mtext>}</mml:mtext></mml:mrow><mml:mrow><mml:mtext>k</mml:mtext></mml:mrow></mml:msup></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn13"><mml:msub><mml:mrow><mml:mtext>e</mml:mtext></mml:mrow><mml:mrow><mml:mtext>ji</mml:mtext></mml:mrow></mml:msub><mml:mtext>=1 </mml:mtext></mml:math></inline-formula> if <inline-formula><mml:math id="ieqn14"><mml:msub><mml:mrow><mml:mtext>x</mml:mtext></mml:mrow><mml:mrow><mml:mtext>j</mml:mtext></mml:mrow></mml:msub><mml:mtext> </mml:mtext></mml:math></inline-formula> belongs to the <inline-formula><mml:math id="ieqn15"><mml:mtext>i</mml:mtext></mml:math></inline-formula><sup>th</sup> category and <inline-formula><mml:math id="ieqn16"><mml:mtext>0 </mml:mtext></mml:math></inline-formula> otherwise. Ordinal variables such as self-rated health and mobility limitations were encoded as sequential integers under the ordinal approach, whereas one-hot encoding transformed these variables into binary indicators. This comparison was performed across all 6 ML algorithms using identical train-test splits, resampling strategies, and hyperparameters. Model performance was evaluated using AUROC and <italic>F</italic><sub>1</sub>-score for the positive class (depression).</p></sec></sec><sec id="s2-6"><title>Clustering</title><p>Different subset groupings and preprocessing strategies were examined to identify clustering patterns associated with depression. Three-group and 6-group feature sets were chosen for clustering analysis. The 3-group configuration divided features into 3 main categories, including demographic, physical health, and social engagement categories aligned with known risk factors. The 6-group approach organized these into demographic, physical health, social engagement, psychological, lifestyle, and cognition features to examine whether more detailed subgroups would reveal additional depression-related patterns. These configurations were chosen to balance interpretability with sufficient detail while ensuring adequate samples within each cluster. The 3-group approach was chosen because it produced more visually distinct groupings in t-distributed stochastic neighbor embedding (t-SNE) visualizations compared to the 6-group approach, which showed overlapping patterns. This suggests that aggregating features into broader categories captures more distinct patterns and provides clearer clinical interpretability.</p><p>Two approaches were used to represent the features. To enhance cluster separation, the first strategy applied ordinal encoding to categorical features and discretization to numerical features. In the second method, the TabTransformer model was used to encode categorical features into dense vectors, which were then combined with continuous features. K-Means clustering was applied iteratively in both strategies to determine the optimal number of clusters based on the Silhouette score, and t-SNE was used to visualize the results. It facilitated the qualitative evaluation of the methods to identify patterns related to depression. Findings from these analyses are discussed in the Results section.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study used the publicly available, deidentified ELSA dataset. Ethical approval for each wave of ELSA was obtained from the South Central&#x2013;Berkshire Research Ethics Committee (formerly the NRES Committee South Central&#x2013;Berkshire), and all participants provided informed consent at the time of data collection. No new participant data were collected by the authors. For questions regarding the dataset, contact ELSA. No artificial intelligence tools were used for data analysis or interpretation in this study.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>This section presents the results of applying multiple ML models to 4 independent ELSA wave pairs to predict clinically significant depressive symptoms at 2-year follow-up in older adults. Across waves, the best-performing models had AUROC values between 0.72 and 0.75. Notably, traditional ML techniques&#x2014;including RF, XGBoost, SVM, and MNB&#x2014;generally achieved higher performance than the deep learning models designed for tabular data. Moreover, ordinal encoding consistently improved model performance for every algorithm compared to one-hot encoding. Robustness was assessed through sensitivity analyses examining alternative CES-D 8 thresholds and encoding strategies, which demonstrated stable performance patterns. Finally, feature importance analyses identified social engagement, sleep quality, and self-rated health as reliable and influential predictors of depressive symptoms at follow-up.</p></sec><sec id="s3-2"><title>Comparative Model Performance Across Waves</title><p>Models&#x2019; performance in predicting depressive symptoms varied across the 4 waves, and no single model consistently outperformed others in every wave. <xref ref-type="table" rid="table2">Table 2</xref> reports wave-specific analytic sample sizes and incident case rates, which provide essential context for interpreting model performance in the presence of class imbalance. <xref ref-type="table" rid="table3">Table 3</xref> presents the mean performance metrics across waves, focusing on clinically relevant Class 1. AUROC values in <xref ref-type="table" rid="table3">Table 3</xref> are reported with 95% CIs to quantify uncertainty in model discrimination across waves. Detailed wave-specific results, as well as the mean performance metrics for Class 0, are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Wave-specific analytic sample size and incident depressive symptoms at follow-up.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Outcome wave</td><td align="left" valign="bottom">Baseline sample size (n)</td><td align="left" valign="bottom">Incident cases n (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Wave 7</td><td align="left" valign="top">5735</td><td align="left" valign="top">764 (13.3)</td></tr><tr><td align="left" valign="top">Wave 8</td><td align="left" valign="top">5128</td><td align="left" valign="top">828 (16.1)</td></tr><tr><td align="left" valign="top">Wave 9</td><td align="left" valign="top">4582</td><td align="left" valign="top">821 (17.9)</td></tr><tr><td align="left" valign="top">Wave 10</td><td align="left" valign="top">3144</td><td align="left" valign="top">650 (20.7)</td></tr></tbody></table></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Comparative performance metrics of machine learning models for predicting depressive symptoms at follow-up among older adults (aged &#x2265;50 years) using English Longitudinal Study of Ageing (ELSA), waves 6&#x2010;10. Clinically significant depressive symptoms were defined using 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8) with a cutoff score &#x2265;4.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">Precision (C1)<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, mean (SD)</td><td align="left" valign="bottom">Recall (C1), mean (SD)</td><td align="left" valign="bottom">Specificity (C0)<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>, mean (SD)</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub> (C1), mean (SD)</td><td align="left" valign="bottom">Accuracy, mean (SD)</td><td align="left" valign="bottom">Macro <italic>F</italic><sub>1</sub>, mean (SD)</td><td align="left" valign="bottom">AUROC<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup> (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">RF<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></td><td align="left" valign="top">0.32 (0.015)</td><td align="left" valign="top">0.52 (0.022)</td><td align="left" valign="top">0.80 (0.010)</td><td align="left" valign="top">0.40 (0.018)</td><td align="left" valign="top">0.76 (0.009)</td><td align="left" valign="top">0.62 (0.012)</td><td align="left" valign="top">0.72 (0.70&#x2010;0.74)</td></tr><tr><td align="left" valign="top">TabNet</td><td align="left" valign="top">0.27 (0.017)</td><td align="left" valign="top">0.45 (0.025)</td><td align="left" valign="top">0.78 (0.011)</td><td align="left" valign="top">0.34 (0.020)</td><td align="left" valign="top">0.73 (0.010)</td><td align="left" valign="top">0.58 (0.013)</td><td align="left" valign="top">0.67 (0.65&#x2010;0.69)</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">0.30 (0.014)</td><td align="left" valign="top">0.57 (0.023)</td><td align="left" valign="top">0.75 (0.012)</td><td align="left" valign="top">0.39 (0.017)</td><td align="left" valign="top">0.72 (0.010)</td><td align="left" valign="top">0.60 (0.011)</td><td align="left" valign="top">0.72 (0.70&#x2010;0.74)</td></tr><tr><td align="left" valign="top">KNN<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup></td><td align="left" valign="top">0.26 (0.018)</td><td align="left" valign="top">0.28 (0.020)</td><td align="left" valign="top">0.85 (0.008)</td><td align="left" valign="top">0.26 (0.016)</td><td align="left" valign="top">0.76 (0.007)</td><td align="left" valign="top">0.56 (0.012)</td><td align="left" valign="top">0.60 (0.58&#x2010;0.62)</td></tr><tr><td align="left" valign="top">LR<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top">0.30 (0.016)</td><td align="left" valign="top">0.44 (0.021)</td><td align="left" valign="top">0.82 (0.009)</td><td align="left" valign="top">0.35 (0.018)</td><td align="left" valign="top">0.76 (0.008)</td><td align="left" valign="top">0.60 (0.011)</td><td align="left" valign="top">0.71 (0.69&#x2010;0.73)</td></tr><tr><td align="left" valign="top">SVM<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup></td><td align="left" valign="top">0.30 (0.015)</td><td align="left" valign="top">0.57 (0.022)</td><td align="left" valign="top">0.76 (0.011)</td><td align="left" valign="top">0.40 (0.017)</td><td align="left" valign="top">0.73 (0.009)</td><td align="left" valign="top">0.61 (0.012)</td><td align="left" valign="top">0.72 (0.70&#x2010;0.74)</td></tr><tr><td align="left" valign="top">MNB<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="top">0.32 (0.014)</td><td align="left" valign="top">0.56 (0.021)</td><td align="left" valign="top">0.78 (0.010)</td><td align="left" valign="top">0.40 (0.016)</td><td align="left" valign="top">0.75 (0.009)</td><td align="left" valign="top">0.62 (0.011)</td><td align="left" valign="top">0.73 (0.71&#x2010;0.75)</td></tr><tr><td align="left" valign="top">TabTransformer</td><td align="left" valign="top">0.29 (0.016)</td><td align="left" valign="top">0.60 (0.024)</td><td align="left" valign="top">0.73 (0.012)</td><td align="left" valign="top">0.39 (0.018)</td><td align="left" valign="top">0.71 (0.010)</td><td align="left" valign="top">0.60 (0.012)</td><td align="left" valign="top">0.71 (0.69&#x2010;0.73)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>C1: Class 1, indicates depressive symptoms at follow-up.</p></fn><fn id="table3fn2"><p><sup>b</sup>C0: Class 0, indicates no depression.</p></fn><fn id="table3fn3"><p><sup>c</sup>AUROC: area under the receiver operating characteristic curve.</p></fn><fn id="table3fn4"><p><sup>d</sup>RF: random forest.</p></fn><fn id="table3fn5"><p><sup>e</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table3fn6"><p><sup>f</sup>KNN: k-nearest neighbors.</p></fn><fn id="table3fn7"><p><sup>g</sup>LR: logistic regression.</p></fn><fn id="table3fn8"><p><sup>h</sup>SVM: support vector machine.</p></fn><fn id="table3fn9"><p><sup>i</sup>MNB: multinomial naive Bayes.</p></fn></table-wrap-foot></table-wrap><p>Overall, RF, XGBoost, MNB, and SVM were the best-performing models. These models achieved the highest balanced results, with mean AUROC scores between 0.72 and 0.73 and mean macro-average <italic>F</italic><sub>1</sub>-scores ranging from 0.60 to 0.62. Despite their effectiveness in detecting participants without depression (<italic>F</italic><sub>1</sub>-score=0.82&#x2010;0.85), their performance in identifying depressive symptoms at follow-up (<italic>F</italic><sub>1</sub>-score=0.40) was more limited, highlighting challenges in predicting the minority class. <xref ref-type="table" rid="table4">Table 4</xref> presents confusion matrices and predictive values for RF and XGBoost models trained on wave 6 predictors and evaluated at wave 7.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Confusion matrices and predictive values for random forest (RF) and extreme gradient boosting (XGBoost) models predicting incident depressive symptoms at wave 7 using wave 6 predictors, evaluated on the held-out test set (n=1147; 12.4% incident cases).</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">TN<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">FP<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="bottom">FN<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="bottom">TP<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></td><td align="left" valign="bottom">PPV<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="bottom">NPV<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">RF<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">848</td><td align="left" valign="top">157</td><td align="left" valign="top">68</td><td align="left" valign="top">74</td><td align="left" valign="top">0.32</td><td align="left" valign="top">0.93</td></tr><tr><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">816</td><td align="left" valign="top">189</td><td align="left" valign="top">61</td><td align="left" valign="top">81</td><td align="left" valign="top">0.30</td><td align="left" valign="top">0.93</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>TN: true negative.</p></fn><fn id="table4fn2"><p><sup>b</sup>FP: false positive.</p></fn><fn id="table4fn3"><p><sup>c</sup>FN: false negative.</p></fn><fn id="table4fn4"><p><sup>d</sup>TP: true positive.</p></fn><fn id="table4fn5"><p><sup>e</sup>PPV: positive predictive value.</p></fn><fn id="table4fn6"><p><sup>f</sup>NPV: negative predictive value.</p></fn><fn id="table4fn7"><p><sup>g</sup>RF: random forest.</p></fn><fn id="table4fn8"><p><sup>h</sup>XGBoost: extreme gradient boosting.</p></fn></table-wrap-foot></table-wrap><p>TabNet and TabTransformer yielded mixed results. Although TabNet achieved competitive metrics for Class 0, its macro-average <italic>F</italic><sub>1</sub>-scores and overall performance were impacted by its frequently low recall for Class 1. TabTransformer showed moderate accuracy with higher variability across waves, though its Class 1 <italic>F</italic><sub>1</sub>-scores remained lower than those of RF and XGBoost.</p><p>MNB emerged as a relatively balanced model, with acceptable <italic>F</italic><sub>1</sub>-scores for both Class 0 and Class 1. As shown in <xref ref-type="fig" rid="figure1">Figure 1</xref>, its AUROC scores demonstrated consistent differentiation between the 2 classes, which makes it a viable choice for dealing with imbalanced datasets. KNN and LR were the weakest for Class 1, with <italic>F</italic><sub>1</sub>-scores consistently below 0.35 and recall values often dropping below 0.40.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Receiver operating characteristic (ROC) curves and corresponding area under the receiver operating characteristic curve (AUROC) scores for machine learning models predicting depressive symptoms at follow-up in the English Longitudinal Study of Ageing (ELSA), waves 6&#x2010;10 (older adults aged &#x2265;50 years). Panels show prediction from (A) wave 6&#x2192;7, (B) wave 7&#x2192;8, (C) wave 8&#x2192;9, and (D) wave 9&#x2192;10. Clinically significant depressive symptoms were defined using 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8) score &#x2265;4. AUC: area under the curve; KNN: k-nearest neighbors; ROC: receiver operating characteristic; SVM: support vector machine; XGBoost: extreme gradient boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e84744_fig01.png"/></fig></sec><sec id="s3-3"><title>Feature Importance Analysis</title><p>As shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>, SHAP values computed on the held-out test sets for the top-performing XGBoost and RF models highlighted several key predictors of depression. Common factors such as negative social interactions (NSoc), future outlook (FuEn), motivation for social and cultural activities (MoActVar), and overall sleep quality (heslpf) scored highly in both models, suggesting their importance in predicting depressive symptoms. Other features that are common across algorithms, such as self-reported general health (Hehelf) and frequency of internet use (scint), further emphasize their significance. These overlaps demonstrate the robustness and generalizability of these features in capturing multidimensional risk factors for depression. Some predictors overlap conceptually with CES-D items and may partly reflect symptom persistence; therefore, SHAP values should not be interpreted as evidence of independent causal effects. The SHAP analysis offers an interpretable framework for understanding model behavior and emphasizing the critical role of these common predictors in identifying individuals at risk for depression. Refer to <xref ref-type="table" rid="table5">Table 5</xref> for a detailed breakdown of these features.</p><fig position="float" id="figure2"><label>Figure 2</label><caption><p>Shapley additive explanations (SHAP) feature importance for the top 20 predictors of depressive symptoms at follow-up using (A) extreme gradient boosting (XGBoost) and (B) random forest (RF) models among older adults (aged &#x2265;50 years) in the English Longitudinal Study of Ageing (ELSA), wave 6. Each point represents an individual prediction. The x-axis shows SHAP values, which reveal each feature&#x2019;s contribution to the model&#x2019;s predicted probability of depressive symptoms at follow-up. Higher absolute SHAP values indicate greater predictive importance. The color gradient represents normalized feature values (blue=low and red=high).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e84744_fig02.png"/></fig><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Descriptions of top predictive features for depressive symptoms at follow-up identified through Shapley additive explanations (SHAP) feature importance analysis using random forest (RF) and extreme gradient boosting (XGBoost) models in the English Longitudinal Study of Ageing (ELSA), wave 6.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Features<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="bottom">Feature description</td></tr></thead><tbody><tr><td align="left" valign="top">heslpf</td><td align="left" valign="top">Sleep: rating sleep quality overall</td></tr><tr><td align="left" valign="top">indager</td><td align="left" valign="top">Definitive age variable collapsed at 90+ to avoid disclosure</td></tr><tr><td align="left" valign="top">NSoc</td><td align="left" valign="top">Frequency of negative social interactions and feelings of loneliness</td></tr><tr><td align="left" valign="top">FuEn</td><td align="left" valign="top">Frequency of positive future outlook and energy levels</td></tr><tr><td align="left" valign="top">NCon</td><td align="left" valign="top">Frequency of feelings of lack of control and social exclusion</td></tr><tr><td align="left" valign="top">BarEnj</td><td align="left" valign="top">Barriers preventing enjoyment: health, age, money, family responsibilities</td></tr><tr><td align="left" valign="top">AgeEx</td><td align="left" valign="top">Expectation of living to advanced ages (80 or 90+ y)</td></tr><tr><td align="left" valign="top">indsex</td><td align="left" valign="top">Definitive sex variable: priority disex, dhsex</td></tr><tr><td align="left" valign="top">Helim</td><td align="left" valign="top">Whether long-standing illness is limiting</td></tr><tr><td align="left" valign="top">HePaa</td><td align="left" valign="top">Severity of pain most of the time</td></tr><tr><td align="left" valign="top">hemobch</td><td align="left" valign="top">Mobility: difficulty getting up from a chair after sitting long periods</td></tr><tr><td align="left" valign="top">TVh</td><td align="left" valign="top">Total hours of TV watching during a week</td></tr><tr><td align="left" valign="top">heslpc</td><td align="left" valign="top">Sleep: number of days with trouble staying asleep in last month</td></tr><tr><td align="left" valign="top">MoActVar</td><td align="left" valign="top">Desire to engage more in cultural and social activities</td></tr><tr><td align="left" valign="top">ExHLim</td><td align="left" valign="top">Expectation (%) that health will limit ability to work before age 65</td></tr><tr><td align="left" valign="top">LTCEx</td><td align="left" valign="top">Expectation of needing to pay for long-term care</td></tr><tr><td align="left" valign="top">Hehelf</td><td align="left" valign="top">Self-reported general health</td></tr><tr><td align="left" valign="top">HeLWk</td><td align="left" valign="top">Self-reported health problem/disability that limits paid work</td></tr><tr><td align="left" valign="top">HePain</td><td align="left" valign="top">Whether often troubled with pain</td></tr><tr><td align="left" valign="top">hemobli</td><td align="left" valign="top">Mobility: difficulty lifting/carrying over 10 pounds</td></tr><tr><td align="left" valign="top">hemobwa</td><td align="left" valign="top">Mobility: difficulty walking 100 yards</td></tr><tr><td align="left" valign="top">scint</td><td align="left" valign="top">Frequency of internet or email use</td></tr><tr><td align="left" valign="top">PSoc</td><td align="left" valign="top">Frequency of positive social experiences and enjoyment</td></tr><tr><td align="left" valign="top">HeActb</td><td align="left" valign="top">Frequency of engaging in moderate sports/activities</td></tr><tr><td align="left" valign="top">Alch</td><td align="left" valign="top">Weighted alcohol consumption over the last 7 days</td></tr><tr><td align="left" valign="top">TotSav</td><td align="left" valign="top">Total financial wealth across all savings and assets</td></tr><tr><td align="left" valign="top">sclifeb</td><td align="left" valign="top">Agreement with the statement that life conditions are excellent</td></tr><tr><td align="left" valign="top">DiMar</td><td align="left" valign="top">Marital status of the respondent</td></tr><tr><td align="left" valign="top">Eyesight</td><td align="left" valign="top">Level of visual acuity</td></tr><tr><td align="left" valign="top">JobSat</td><td align="left" valign="top">Level of job satisfaction</td></tr><tr><td align="left" valign="top">LitMon</td><td align="left" valign="top">Level of financial limitations</td></tr><tr><td align="left" valign="top">DhWork</td><td align="left" valign="top">Whether in paid employment (last week)</td></tr><tr><td align="left" valign="top">LiSat</td><td align="left" valign="top">Level of life satisfaction and meaning</td></tr><tr><td align="left" valign="top">PosSpou</td><td align="left" valign="top">Level of positive spousal support</td></tr><tr><td align="left" valign="top">heslpb</td><td align="left" valign="top">Sleep: frequency of waking up several times at night</td></tr><tr><td align="left" valign="top">hemobcs</td><td align="left" valign="top">Mobility: difficulty climbing several flights of stairs</td></tr><tr><td align="left" valign="top">CaTNo</td><td align="left" valign="top">Number of activities respondent received help with in the last month</td></tr><tr><td align="left" valign="top">heslpa</td><td align="left" valign="top">Sleep: how often respondent has difficulty falling asleep</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>Features are derived from self-completion questionnaires and computer-assisted personal interviewing, including demographic, physical health, and psychosocial domains. Feature names correspond to original English Longitudinal Study of Ageing variable codes.</p></fn></table-wrap-foot></table-wrap><p>Distinct risk profiles at the individual level are illustrated by SHAP force plots (<xref ref-type="fig" rid="figure3">Figures 3</xref> and <xref ref-type="fig" rid="figure4">4</xref>). The participant who developed clinically significant depressive symptoms (predicted risk: 85.7%) exhibited convergence of multiple risk factors: mobility limitations (hemobch and hemobli), pain severity (HePaa), poor self-rated health (Hehelf), low future outlook and energy (FuEn), reduced desire to engage in activities (MoActVar), and digital exclusion (scint). In this case, red features pushed the prediction toward depression, whereas blue features pushed the prediction away from depression. Arrow width indicates the strength of each feature&#x2019;s contribution. The base value represents the average model prediction across participants, and the final prediction for this individual was f(x)=1.60 in log-odds space. Low negative social interactions (NSoc) contributed in the protective direction, but the overall prediction remained high because multiple risk factors contributed more strongly toward depression. In contrast, the nondepressed participant (predicted risk: 7.8%) demonstrated strong protective factors, including low negative social interactions (NSoc), positive future outlook and energy (FuEn), good sleep quality (heslpf), adequate job recognition (JobRec), and diverse activity engagement (MoActVar). For this participant, the final prediction was f(x)=-2.48 in log-odds space, indicating very low depression risk, with minimal risk variables present. These plots illustrate how features contribute differently across individuals based on their specific risk profiles.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Shapley additive explanations (SHAP) force plot demonstrating individual-level feature contributions for a participant who developed clinically significant depressive symptoms at wave 7 (predicted risk: 85.7%).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e84744_fig03.png"/></fig><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Shapley additive explanations (SHAP) force plot demonstrating individual-level feature contributions for a participant who remained nondepressed at wave 7 (predicted risk: 7.8%).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e84744_fig04.png"/></fig></sec><sec id="s3-4"><title>Sensitivity Analyses</title><p>The results of the sensitivity analyses regarding CES-D 8 threshold selection and categorical encoding strategies are presented below.</p><sec id="s3-4-1"><title>Threshold Selection</title><p>The area under the curve (AUC) score, which measures the model&#x2019;s capability to discriminate between participants who will and will not develop depression over 2 years, increased consistently with threshold severity: from 0.67 at threshold &#x2265;3 (mild depression, 31.6% prevalence) to 0.75&#x2010;0.76 at threshold &#x2265;4 (moderate, 14.2% prevalence) to 0.81&#x2010;0.82 at threshold &#x2265;5 (severe, 8.0% prevalence; <xref ref-type="table" rid="table6">Table 6</xref>). This increase is based on the clinical reality that severe depression shows more significant and distinct symptoms, making it easier to distinguish from nondepressed individuals. In comparison, lower thresholds (&#x2265;3) capture subclinical presentations that overlap with nonclinical states, creating ambiguity and making discrimination more challenging.</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Comparative performance metrics of extreme gradient boosting (XGBoost) and random forest (RF) for predicting depressive symptoms at follow-up using different 8-item Center for Epidemiologic Studies Depression Scale (CES-D 8) thresholds (&#x2265;3, &#x2265;4, and &#x2265;5 symptoms) among older adults (aged &#x2265;50 years) using the English Longitudinal Study of Ageing (ELSA), wave 6.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Model and threshold</td><td align="left" valign="bottom">Cases (n)<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup></td><td align="left" valign="bottom">Prevalence (%)<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup></td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table6fn3">c</xref></sup><sup>,</sup><sup><xref ref-type="table-fn" rid="table6fn4">d</xref></sup></td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td></tr></thead><tbody><tr><td align="left" valign="bottom" colspan="5">XGBoost<sup><xref ref-type="table-fn" rid="table6fn5">e</xref></sup></td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;3</td><td align="left" valign="top">1359</td><td align="left" valign="top">31.6</td><td align="left" valign="top">0.672</td><td align="left" valign="top">0.507</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;4</td><td align="left" valign="top">883</td><td align="left" valign="top">14.2</td><td align="left" valign="top">0.751</td><td align="left" valign="top">0.373</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;5</td><td align="left" valign="top">569</td><td align="left" valign="top">8.0</td><td align="left" valign="top">0.815</td><td align="left" valign="top">0.315</td></tr><tr><td align="left" valign="top" colspan="5">Random forest</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;3</td><td align="left" valign="top">1359</td><td align="left" valign="top">31.6</td><td align="left" valign="top">0.669</td><td align="left" valign="top">0.495</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;4</td><td align="left" valign="top">883</td><td align="left" valign="top">14.2</td><td align="left" valign="top">0.756</td><td align="left" valign="top">0.373</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;5</td><td align="left" valign="top">569</td><td align="left" valign="top">8.0</td><td align="left" valign="top">0.813</td><td align="left" valign="top">0.311</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>n indicates the number of participants who developed clinically significant depressive symptoms at wave 7 among those nondepressed at wave 6.</p></fn><fn id="table6fn2"><p><sup>b</sup>Prevalence calculated as percentage of eligible participants meeting depression criteria at each threshold.</p></fn><fn id="table6fn3"><p><sup>c</sup>AUC: area under the curve.</p></fn><fn id="table6fn4"><p><sup>d</sup>AUC and <italic>F</italic><sub>1</sub>-score show discriminative ability and balanced classification performance respectively. Both models show area under the receiver operating characteristic curve increases with threshold severity, while <italic>F</italic><sub>1</sub>-score decreases due to prevalence-related trade-offs.</p></fn><fn id="table6fn5"><p><sup>e</sup>XGBoost: extreme gradient boosting,</p></fn></table-wrap-foot></table-wrap><p>As shown in <xref ref-type="table" rid="table6">Table 6</xref>, the <italic>F</italic><sub>1</sub>-score showed an inverse pattern, decreasing with higher thresholds. This decrease occurs mainly because of imbalanced classification tasks in which lower prevalence at stricter thresholds makes it challenging to maintain both high precision and high recall simultaneously.</p><p>Both models demonstrated nearly similar performance at each threshold, validating the consistency of these patterns. Feature importance also appeared to have stable patterns across all thresholds, with 3 main risk factors (self-rated health, future outlook/energy, and negative social interactions) revealed in the top 20 features for both models across all thresholds.</p></sec><sec id="s3-4-2"><title>Categorical Encoding</title><p><xref ref-type="table" rid="table7">Table 7</xref> demonstrates the sensitivity analysis comparing ordinal and one-hot encoding across all 6 ML algorithms. Ordinal encoding achieved superior or equivalent performance in all algorithms. For tree-based models, ordinal encoding increased the AUC score by 0.03 for XGBoost and 0.02 for RF. The distance-based method KNN also benefited from ordinal encoding, showing a 0.02 AUC score increase. Linear models, LR, and SVM achieved equivalent AUC scores regardless of the chosen encoding strategy.</p><p>The <italic>F</italic><sub>1</sub>-score increase in depressive symptoms prediction was more significant than the AUC score. XGBoost showed the highest improvement by 0.09, followed by RF and SVM. For MNB, ordinal encoding also achieved superior performance by 0.01 AUC score and by 0.03 <italic>F</italic><sub>1</sub>-score increase. On average across all models, ordinal encoding improved the AUC score by 0.013 and the <italic>F</italic><sub>1</sub>-score by 0.037, validating our feature engineering approach.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Comparative performance metrics of 6 machine learning algorithms for predicting depressive symptoms at follow-up using different encoding strategies (ordinal and one-hot encoding) among older adults (aged &#x2265;50 years) using the English Longitudinal Study of Ageing (ELSA), wave 6.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Model and encoding</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table7fn1">a</xref></sup></td><td align="left" valign="bottom">AUROC<sup><xref ref-type="table-fn" rid="table7fn2">b</xref></sup> &#x0394;<sup><xref ref-type="table-fn" rid="table7fn3">c</xref></sup></td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub> &#x0394;<sup><xref ref-type="table-fn" rid="table7fn4">d</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">RF<sup><xref ref-type="table-fn" rid="table7fn5">e</xref></sup></td><td align="left" valign="top">0.02</td><td align="left" valign="top"/><td align="left" valign="top">0.06</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.75</td><td align="left" valign="top"/><td align="left" valign="top">0.38</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.73</td><td align="left" valign="top"/><td align="left" valign="top">0.32</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">XGBoost<sup><xref ref-type="table-fn" rid="table7fn6">f</xref></sup></td><td align="left" valign="top">0.03</td><td align="left" valign="top"/><td align="left" valign="top">0.09</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.75</td><td align="left" valign="top"/><td align="left" valign="top">0.33</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.72</td><td align="left" valign="top"/><td align="left" valign="top">0.24</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">KNN<sup><xref ref-type="table-fn" rid="table7fn7">g</xref></sup></td><td align="left" valign="top">0.02</td><td align="left" valign="top"/><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.61</td><td align="left" valign="top"/><td align="left" valign="top">0.21</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.59</td><td align="left" valign="top"/><td align="left" valign="top">0.21</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">LR<sup><xref ref-type="table-fn" rid="table7fn8">h</xref></sup></td><td align="left" valign="top">0</td><td align="left" valign="top"/><td align="left" valign="top">0</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.72</td><td align="left" valign="top"/><td align="left" valign="top">0.33</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.72</td><td align="left" valign="top"/><td align="left" valign="top">0.33</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">SVM<sup><xref ref-type="table-fn" rid="table7fn9">i</xref></sup></td><td align="left" valign="top">0</td><td align="left" valign="top"/><td align="left" valign="top">0.04</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.75</td><td align="left" valign="top"/><td align="left" valign="top">0.39</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.75</td><td align="left" valign="top"/><td align="left" valign="top">0.35</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="3">MNB<sup><xref ref-type="table-fn" rid="table7fn10">j</xref></sup></td><td align="left" valign="top">0.01</td><td align="left" valign="top"/><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Ordinal</td><td align="left" valign="top">0.75</td><td align="left" valign="top"/><td align="left" valign="top">0.40</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>One-Hot</td><td align="left" valign="top">0.74</td><td align="left" valign="top"/><td align="left" valign="top">0.37</td><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table7fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn><fn id="table7fn2"><p><sup>b</sup>AUROC: area under the receiver operating characteristic curve.</p></fn><fn id="table7fn3"><p><sup>c</sup>AUROC &#x0394; indicates the difference in AUC score (ordinal &#x2013; one-hot).</p></fn><fn id="table7fn4"><p><sup>d</sup><italic>F</italic><sub>1</sub> &#x0394; shows the difference in <italic>F</italic><sub>1</sub>-score (ordinal &#x2013; one-hot).</p></fn><fn id="table7fn5"><p><sup>e</sup>RF: random forest.</p></fn><fn id="table7fn6"><p><sup>f</sup>XGBoost: extreme gradient boosting.</p></fn><fn id="table7fn7"><p><sup>g</sup>KNN: k-nearest neighbors.</p></fn><fn id="table7fn8"><p><sup>h</sup>LR: logistic regression.</p></fn><fn id="table7fn9"><p><sup>i</sup>SVM: support vector machine.</p></fn><fn id="table7fn10"><p><sup>j</sup>MNB: multinomial naive Bayes.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s3-5"><title>Clustering Analysis and Feature Representation</title><p>Clustering analysis was applied to explore if depression-related patterns could be identified beyond supervised performance. Features were grouped into 3 subsets, including demographic features, social engagement features, and physical health features, based on domain knowledge. Two clustering approaches were evaluated: (1) discretizing continuous data and applying ordinal encoding to categorical features, and (2) using TabTransformer embeddings for feature representation. TabTransformer embeddings were used to investigate whether a learned feature representation could better capture nonlinear relationships than simple encoding.</p><p>Across both representations and all 3 subsets, the silhouette scores were consistently negative, indicating challenges in forming clear clusters. Accordingly, t-SNE visualizations were used for qualitative exploration rather than to demonstrate cluster separability. While qualitative inspection of the t-SNE visualizations (<xref ref-type="fig" rid="figure5">Figure 5</xref>) suggested informative exploratory patterns related to depression scores, these visual structures should be interpreted cautiously, as t-SNE can produce apparent groupings even when substantial overlap exists in the original feature space. Patterns were most visible in social engagement features, less visible in demographic features, and minimal in physical health features.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>t-distributed stochastic neighbor embedding (t-SNE) visualizations and K-Means clustering results for older adults (aged &#x2265;50 years) in the English Longitudinal Study of Ageing (ELSA), wave 6, using 2 different feature-representation strategies: (A) discretized and ordinally encoded features combining numerical and categorical variables and (B) TabTransformer embeddings integrating categorical and continuous features through attention-based representation learning. Each panel displays 3 feature subsets&#x2014;demographic, physical health, and social engagement&#x2014;used to explore latent groupings related to depression risk. Color bars show depression severity scores (8-item Center for Epidemiologic Studies Depression Scale [CES-D 8], right) and cluster labels (left, k=3).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e84744_fig05.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>Using data from the nationally representative ELSA study, we developed ML models to predict clinically significant depressive symptoms at 2-year follow-up among adults aged 50 years and above. Our goal was to identify individuals at higher risk for developing elevated depressive symptoms and provide a foundation for potential early interventions.</p><p>Although statistical models are highly practical for hypothesis testing, they rely on linear assumptions and predefined interaction structures, which may limit their capability to capture the complex relationships underlying depressive symptoms at follow-up in older adults. In contrast, ML and deep learning approaches can model nonlinear interactions among psychological, social, and health-related domains, enabling a more comprehensive representation of depression risk factors. Applying explainability methods such as SHAP allows us to retain interpretability, despite models&#x2019; complexity.</p><p>In our study, traditional ML models, such as RF, SVM, and XGBoost, demonstrated better performance than deep learning techniques such as TabNet and TabTransformer. These results may be explained by the nature of our dataset, which contains mostly ordinal categorical features. Traditional ML models effectively handle ordinal data through straightforward encoding techniques that preserve the inherent order and relationships within the features. In contrast, TabNet and TabTransformer use complex embedding strategies designed to capture complex feature interactions. However, they may not fully leverage the ordinal characteristics of the data, leading to weaker performance. Furthermore, simpler models such as KNN and LR showed an insufficient ability to recognize complex patterns in the data, leading to lower predictive accuracy. This suggests that while simpler algorithms can perform well in some cases, their effectiveness may decline significantly when applied to complex, multidimensional phenomena like depression.</p><p>Although MNB has a probabilistic nature and assumes feature independence, it still yielded competitive results. This suggests that in certain contexts, when applying appropriate preprocessing steps, probabilistic models are capable of capturing depression risk factors. In our study to meet the model&#x2019;s requirements, numerical variables were discretized into categorical bins. These findings reveal why understanding a dataset&#x2019;s characteristics is important for selecting the best model. Choosing a model that is inherently aligned with the dataset&#x2019;s characteristics can improve both performance and interpretability. Selecting the right model is even more important when dealing with datasets that contain a mix of categorical (nominal and ordinal) and numerical variables.</p><p>In contrast to earlier studies, our models achieved AUROC values comparable to or higher than those reported in prior longitudinal work, as RF and XGBoost both reached an AUROC of 0.72 [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. However, direct comparisons of predictive performance should be interpreted cautiously considering the differences in cohorts, target definitions, and predictor sets. Zheng et al [<xref ref-type="bibr" rid="ref20">20</xref>] reported an AUROC of 0.673, while Su et al [<xref ref-type="bibr" rid="ref19">19</xref>] achieved an AUROC of 0.629 using LR with lasso regularization. In our study, we aimed to identify independent predictors of future depressive symptoms by excluding baseline depressive symptom status from the models to minimize data leakage and preserve predictor independence. In contrast, Song et al [<xref ref-type="bibr" rid="ref7">7</xref>] incorporated baseline psychological scales as predictors and reported a higher AUROC of 0.791. Although this reflects stronger predictive performance, including baseline symptom measures makes interpretation more challenging, as predictions may partly capture symptom persistence rather than future risk. In our study, participants with clinically significant depressive symptoms at baseline were excluded, which minimizes the risk of directly capturing symptom persistence.</p><p>Our feature importance analysis identified significant predictors of depressive symptoms at follow-up, including negative social interactions, sleep quality, positive social and spousal support, self-rated health, and life satisfaction. These findings align with prior research. For example, Zheng et al [<xref ref-type="bibr" rid="ref20">20</xref>] highlighted sex, self-rated health status, physical pain, and marital status as key predictors, consistent with our results, although occupation and eyesight were not emphasized in our analysis. Further supporting the role of functional health and social engagement in predicting depressive symptoms, Su et al [<xref ref-type="bibr" rid="ref19">19</xref>] had emphasized the significance of self-rated health, activities of daily living, and marital status. Similarly, Song et al [<xref ref-type="bibr" rid="ref7">7</xref>] pointed out that perceived health, social support, and life satisfaction are key indicators, emphasizing that depression spans physical, social, and emotional areas. Interestingly, they also found that nutritional risk and emotional instability are significant predictors, factors that we did not focus on in our analysis. These differences may arise from variations in study populations, data collection methods, or feature engineering approaches.</p><p>The SHAP force plots provide individual-level validation of the aggregate feature importance findings. Key predictors observed across both XGBoost and RF models, including negative social interactions, future outlook and energy, and activity motivation, emerge as the main factors influencing individual predictions in both cases examined. In the depressed participant, poor self-rated health, low future outlook, reduced activity motivation, and digital exclusion collectively push the prediction toward depression, while in the nondepressed participant, low negative social interactions, positive future outlook, good sleep quality, and varied activity engagement provide strong protective effects. This consistency between population-level feature importance and individual-level explanations improves confidence in the clinical relevance of these predictors and implies that the found risk factors operate meaningfully at both aggregate and individual levels.</p><p>The sensitivity analysis conducted demonstrated strong consistency between XGBoost and RF across all thresholds, suggesting that results represent a true clinical situation rather than model-specific artifacts. It is important to consider that CES-D 8 is an abbreviated version of the original 20-item scale. Therefore, a one-point difference in threshold implies a significant change in depression severity. This explains the notable performance variation observed: the AUC score increases as the threshold becomes higher, mainly due to the improved ability of the model to detect well-defined cases with obvious symptom profiles. Conversely, <italic>F</italic><sub>1</sub>-scores decrease with severity, since this parameter is heavily influenced by prevalence. As the threshold increases, the imbalance degree of the dataset increases, which makes it more challenging to maintain both high precision and recall. The main risk factors, self-rated health, psychological well-being, negative social interactions, remained stable across thresholds in both models, implying these predictors are important regardless of the threshold chosen.</p><p>In addition to threshold selection, we conducted sensitivity analysis to examine categorical encoding strategies. The consistent outperformance of ordinal encoding over one-hot encoding supports the suitability of our feature engineering strategy. Ordinal encoding preserves natural ordering of many ELSA variables, such as self-rated health, mobility limitations, and pain severity, while one-hot encoding discards it. It was especially important for tree-based models, such as XGBoost and RF, and distance-based methods, such as KNN, which directly benefit from preserved ordinal relationships. The improved <italic>F</italic><sub>1</sub>-scores with ordinal encoding lead to better identification of at-risk individuals for early intervention.</p><p>In order to address high dimensionality and missing data, some recent works have integrated dimensionality reduction into deep learning architectures. Tutsoy and Sumbul [<xref ref-type="bibr" rid="ref31">31</xref>], for instance, suggested dimensionality reduction based on correlation and target similarity for the diagnosis of thyroid cancer. This method reduced 39-dimensional biomarker data to 5&#x2010;10 features while handling up to 88% random missingness and achieving 83% testing accuracy. However, these methods have limitations when applied to epidemiological cohorts with diverse feature types and are best suited to continuous biomarker data. We focus on encoding-aware preprocessing for longitudinal cohort data like ELSA because, in particular, categorical variables need to be encoded before correlation analysis, since correlation measures might not be directly comparable across categorical and continuous data types, and standard correlation does not respect ordinal structure.</p><p>Building on the insights from feature importance analysis, clustering provided an additional layer of understanding. Despite negative Silhouette scores across all subsets, highlighting the inherent difficulty of clustering high-dimensional data, t-SNE visualizations revealed depression-related patterns. Among the subsets, social engagement features exhibited the clearest groupings, emphasizing the critical role of social factors such as social support, which were identified as key predictors in our feature importance analysis. While physical health features showed less clear clustering, reflecting their more indirect relationship with depression severity, demographic features showed moderate separation, matching predictors such as self-rated health and marital status.</p><p>Our study has several limitations that should be acknowledged. The use of self-reported questionnaires may introduce reporting bias, as participants may underreport or overreport symptoms and behaviors, potentially affecting the reliability of some predictors. Additionally, depressive symptoms in this study were assessed using the CES-D 8 scale, a widely used screening tool with strong psychometric properties; however, it is not a clinical diagnostic tool and may lead to misclassification, particularly among individuals with subthreshold symptoms. Some baseline predictors, such as sleep quality and physical functioning, may overlap conceptually with CES-D items, which could result in the models partly capturing symptom persistence rather than entirely new symptom emergence. To minimize direct information leakage, participants exceeding the CES-D threshold at baseline were excluded, and all predictors were measured prior to the follow-up outcome. Excluding individuals with severe psychiatric conditions may also limit the ability to generalize the results to high-risk populations. Finally, negative silhouette scores indicate that depression risk in older adults may vary gradually rather than forming clear, distinct groups. Apparent groupings observed in t-SNE visualizations should therefore be interpreted cautiously.</p><p>There are several sources of uncertainty in model predictions. Sampling uncertainty arises from missingness associated with older age, lower education, and baseline depression, potentially violating missing-at-random assumptions. Methodological uncertainty results from modeling choices such as threshold selection, hyperparameter tuning, and encoding. Feature-related uncertainty relates to both feature reduction (6100 to 120 variables) and unmeasured confounding such as genetic vulnerability or medication use. The 2-year prediction interval introduces temporal uncertainty, as major life events may occur between baseline and follow-up assessments.</p><p>To assess the real-world application of the model, further study should validate the predictive model&#x2019;s performance using a clinical dataset such as general practitioner records. The predictive analysis in this study relied on single-wave data to forecast depressive symptoms at the subsequent wave, which may not fully capture longer-term temporal dynamics of depression; incorporating information from multiple previous waves could provide a more comprehensive understanding of depression trajectories.</p><p>Despite these limitations, our findings demonstrate that ML algorithms using the ELSA dataset are feasible in predicting depressive symptoms at 2-year follow-up in the aging population. The results suggested that incorporating a wide range of features from different domains can enhance the model&#x2019;s prediction capability. Notably, traditional ML models, such as SVM, RF, MNB, and XGBoost, performed better than deep learning techniques, emphasizing the value of aligning model selection and preprocessing with the properties of the dataset. These findings contribute to a deeper understanding of depression risk and support the development of early intervention strategies and scalable risk stratification approaches for aging populations.</p></sec></body><back><ack><p>ChatGPT by OpenAI was used during manuscript preparation to support language editing, clarity, grammar, and phrasing. The authors reviewed and verified all artificial intelligence&#x2013;assisted text and remain fully responsible for the accuracy, originality, and integrity of the manuscript, including all references and citations.</p></ack><notes><sec><title>Funding</title><p>PB and RN would like to acknowledge the UKRI Engineering and Physical Sciences Research Council (EPSRC) and the National Institute of Health and Care Research (NIHR) Protect/Resilient Project (grant number: EP/W031892/1). PB would like to acknowledge funding via the Royal Academy of Engineering and Great Ormond Street Hospital (grant number: RCSRF2324-18-69), PB, SK, and RN would like to acknowledge funding via the UK Dementia Research Institute (UK DRI-7002) through UK DRI Ltd, principally funded by the Medical Research Council and the Wellcome Trust (228269/Z/23/Z).</p></sec><sec><title>Data Availability</title><p>The data used in this study are from the English Longitudinal Study of Ageing (ELSA), waves 6&#x2010;10. These data are publicly available to registered users through the UK Data Service [<xref ref-type="bibr" rid="ref32">32</xref>]. Access requires institutional affiliation and approval. The code used for the analysis is available from the corresponding author upon reasonable request via email.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: BK, SK</p><p>Methodology: BK, SK</p><p>Data curation and preprocessing: BK</p><p>Formal analysis: BK</p><p>Supervision: SK, RN</p><p>Interpretation of findings: BK, SK</p><p>Writing &#x2013; original draft: BK</p><p>Writing &#x2013; review &#x0026; editing: SK, PB, RN</p><p>Approval of final manuscript: All authors</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb2">AUROC</term><def><p>area under the receiver operating characteristic curve</p></def></def-item><def-item><term id="abb3">CES-D</term><def><p>Center for Epidemiologic Studies Depression Scale</p></def></def-item><def-item><term id="abb4">CES-D 8</term><def><p>8-item Center for Epidemiologic Studies Depression Scale</p></def></def-item><def-item><term id="abb5">ELSA</term><def><p>English Longitudinal Study of Ageing</p></def></def-item><def-item><term id="abb6">KNN</term><def><p>k-nearest neighbors</p></def></def-item><def-item><term id="abb7">LR</term><def><p>logistic regression</p></def></def-item><def-item><term id="abb8">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb9">MNB</term><def><p>multinomial naive Bayes</p></def></def-item><def-item><term id="abb10">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb11">SHAP</term><def><p>Shapley additive explanations</p></def></def-item><def-item><term id="abb12">SVM</term><def><p>support vector machine</p></def></def-item><def-item><term id="abb13">t-SNE</term><def><p>t-distributed stochastic neighbor embedding</p></def></def-item><def-item><term id="abb14">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Depression</article-title><source>World Health Organization</source><year>2023</year><access-date>2024-07-17</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/news-room/fact-sheets/detail/depression">https://www.who.int/news-room/fact-sheets/detail/depression</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>McManus</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bebbington</surname><given-names>P</given-names> </name><name name-style="western"><surname>Jenkins</surname><given-names>R</given-names> </name><name name-style="western"><surname>Brugha</surname><given-names>T</given-names> </name></person-group><article-title>Mental health and wellbeing in England: adult psychiatric morbidity survey 2014</article-title><year>2016</year><access-date>2026-06-15</access-date><publisher-name>NHS Digital</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://digital.nhs.uk/data-and-information/publications/statistical/adult-psychiatric-morbidity-survey/adult-psychiatric-morbidity-survey-survey-of-mental-health-and-wellbeing-england-2014">https://digital.nhs.uk/data-and-information/publications/statistical/adult-psychiatric-morbidity-survey/adult-psychiatric-morbidity-survey-survey-of-mental-health-and-wellbeing-england-2014</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vink</surname><given-names>D</given-names> </name><name name-style="western"><surname>Aartsen</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Schoevers</surname><given-names>RA</given-names> </name></person-group><article-title>Risk factors for anxiety and depression in the elderly: a review</article-title><source>J Affect Disord</source><year>2008</year><month>02</month><volume>106</volume><issue>1-2</issue><fpage>29</fpage><lpage>44</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2007.06.005</pub-id><pub-id pub-id-type="medline">17707515</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nguyen</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lukens</surname><given-names>E</given-names> </name><name name-style="western"><surname>Mui</surname><given-names>A</given-names> </name></person-group><article-title>Suicidal thoughts and attempts among Chinese and Indian older adults: findings from WHO-SAGE</article-title><source>Gerontologist</source><year>2016</year><month>11</month><volume>56</volume><issue>Suppl_3</issue><fpage>466</fpage><lpage>467</lpage><pub-id pub-id-type="doi">10.1093/geront/gnw162.1870</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Santos</surname><given-names>M</given-names> </name><name name-style="western"><surname>Giusti</surname><given-names>BB</given-names> </name><name name-style="western"><surname>Yamamoto</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Ciosak</surname><given-names>SI</given-names> </name><name name-style="western"><surname>Szylit</surname><given-names>R</given-names> </name></person-group><article-title>Suicide in the elderly: an epidemiologic study</article-title><source>Rev Esc Enferm USP</source><year>2021</year><volume>55</volume><fpage>e03694</fpage><pub-id pub-id-type="doi">10.1590/S1980-220X2019026603694</pub-id><pub-id pub-id-type="medline">34076149</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>The state of ageing 2023-24</article-title><source>Centre for Ageing Better</source><year>2023</year><access-date>2024-09-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://ageing-better.org.uk/our-ageing-population-state-ageing-2023-4">https://ageing-better.org.uk/our-ageing-population-state-ageing-2023-4</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Song</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Qian</surname><given-names>L</given-names> </name><name name-style="western"><surname>Sui</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Prediction of depression onset risk among middle-aged and elderly adults using machine learning and Canadian longitudinal study on aging cohort</article-title><source>J Affect Disord</source><year>2023</year><month>10</month><day>15</day><volume>339</volume><fpage>52</fpage><lpage>57</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2023.06.031</pub-id><pub-id pub-id-type="medline">37380110</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fiske</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wetherell</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Gatz</surname><given-names>M</given-names> </name></person-group><article-title>Depression in older adults</article-title><source>Annu Rev Clin Psychol</source><year>2009</year><volume>5</volume><issue>1</issue><fpage>363</fpage><lpage>389</lpage><pub-id pub-id-type="doi">10.1146/annurev.clinpsy.032408.153621</pub-id><pub-id pub-id-type="medline">19327033</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Prevalence of depression in older adults: a systematic review and meta-analysis</article-title><source>Psychiatry Res</source><year>2022</year><month>05</month><volume>311</volume><fpage>114511</fpage><pub-id pub-id-type="doi">10.1016/j.psychres.2022.114511</pub-id><pub-id pub-id-type="medline">35316691</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leite</surname><given-names>T da S</given-names> </name><name name-style="western"><surname>Fett</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Stoppiglia</surname><given-names>LF</given-names> </name><etal/></person-group><article-title>Prevalence and factors associated with depression in the elderly: a cross-sectional study</article-title><source>Medicina (Ribeir&#x00E3;o Preto)</source><year>2020</year><volume>53</volume><issue>3</issue><fpage>205</fpage><lpage>214</lpage><pub-id pub-id-type="doi">10.11606/issn.2176-7262.v53i3p205-214</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zogan</surname><given-names>H</given-names> </name><name name-style="western"><surname>Razzak</surname><given-names>I</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Jameel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>G</given-names> </name></person-group><article-title>Explainable depression detection with multi-aspect features using a hybrid deep learning model on social media</article-title><source>World Wide Web</source><year>2022</year><volume>25</volume><issue>1</issue><fpage>281</fpage><lpage>304</lpage><pub-id pub-id-type="doi">10.1007/s11280-021-00992-2</pub-id><pub-id pub-id-type="medline">35106059</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Orabi</surname><given-names>AH</given-names> </name><name name-style="western"><surname>Buddhitha</surname><given-names>P</given-names> </name><name name-style="western"><surname>Orabi</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Inkpen</surname><given-names>D</given-names> </name></person-group><article-title>Deep learning for depression detection of Twitter users</article-title><access-date>2026-06-12</access-date><conf-name>Proceedings of the Fifth Workshop on Computational Linguistics and Clinical Psychology: From Keyboard to Clinic</conf-name><conf-date>Jun 5, 2018</conf-date><conf-loc>New Orleans, LA</conf-loc><fpage>88</fpage><lpage>97</lpage><comment><ext-link ext-link-type="uri" xlink:href="http://aclweb.org/anthology/W18-06">http://aclweb.org/anthology/W18-06</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/W18-0609</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chiong</surname><given-names>R</given-names> </name><name name-style="western"><surname>Budhi</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Dhakal</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chiong</surname><given-names>F</given-names> </name></person-group><article-title>A textual-based featuring approach for depression detection using machine learning classifiers and social media texts</article-title><source>Comput Biol Med</source><year>2021</year><month>08</month><volume>135</volume><fpage>104499</fpage><pub-id pub-id-type="doi">10.1016/j.compbiomed.2021.104499</pub-id><pub-id pub-id-type="medline">34174760</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sampson</surname><given-names>L</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Gradus</surname><given-names>JL</given-names> </name><etal/></person-group><article-title>A machine learning approach to predicting new-onset depression in a military population</article-title><source>Psychiatr Res Clin Pract</source><year>2021</year><volume>3</volume><issue>3</issue><fpage>115</fpage><lpage>122</lpage><pub-id pub-id-type="doi">10.1176/appi.prcp.20200031</pub-id><pub-id pub-id-type="medline">34734165</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Orak</surname><given-names>U</given-names> </name><name name-style="western"><surname>Kayaalp</surname><given-names>A</given-names> </name><name name-style="western"><surname>Walker</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Breault</surname><given-names>K</given-names> </name></person-group><article-title>Resilience and depression in military service: evidence from the national longitudinal study of adolescent to adult health (add health)</article-title><source>Mil Med</source><year>2022</year><month>10</month><day>29</day><volume>187</volume><issue>11-12</issue><fpage>1441</fpage><lpage>1448</lpage><pub-id pub-id-type="doi">10.1093/milmed/usab364</pub-id><pub-id pub-id-type="medline">34458920</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sawangarreerak</surname><given-names>S</given-names> </name><name name-style="western"><surname>Thanathamathee</surname><given-names>P</given-names> </name></person-group><article-title>Random forest with sampling techniques for handling imbalanced prediction of university student depression</article-title><source>Information</source><year>2020</year><volume>11</volume><issue>11</issue><fpage>519</fpage><pub-id pub-id-type="doi">10.3390/info11110519</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Narkbunnum</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wisaeng</surname><given-names>K</given-names> </name></person-group><article-title>Prediction of depression for undergraduate students based on imbalanced data by using data mining techniques</article-title><source>ASI</source><year>2022</year><volume>5</volume><issue>6</issue><fpage>120</fpage><pub-id pub-id-type="doi">10.3390/asi5060120</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Librenza-Garcia</surname><given-names>D</given-names> </name><name name-style="western"><surname>Passos</surname><given-names>IC</given-names> </name><name name-style="western"><surname>Feiten</surname><given-names>JG</given-names> </name><etal/></person-group><article-title>Prediction of depression cases, incidence, and chronicity in a large occupational cohort using machine learning techniques: an analysis of the ELSA-Brasil study</article-title><source>Psychol Med</source><year>2021</year><month>12</month><volume>51</volume><issue>16</issue><fpage>2895</fpage><lpage>2903</lpage><pub-id pub-id-type="doi">10.1017/S0033291720001579</pub-id><pub-id pub-id-type="medline">32493535</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Su</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>He</surname><given-names>K</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name></person-group><article-title>Use of machine learning approach to predict depression in the elderly in China: a longitudinal study</article-title><source>J Affect Disord</source><year>2021</year><month>03</month><day>1</day><volume>282</volume><fpage>289</fpage><lpage>298</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2020.12.160</pub-id><pub-id pub-id-type="medline">33418381</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>T</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>Y</given-names> </name></person-group><article-title>Using machine learning to predict the probability of incident 2-year depression in older adults with chronic diseases: a retrospective cohort study</article-title><source>BMC Psychiatry</source><year>2024</year><month>12</month><day>2</day><volume>24</volume><issue>1</issue><fpage>870</fpage><pub-id pub-id-type="doi">10.1186/s12888-024-06299-6</pub-id><pub-id pub-id-type="medline">39623372</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Solmi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Maggi</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Frailty and incident depression in community-dwelling older people: results from the ELSA study</article-title><source>Int J Geriatr Psychiatry</source><year>2017</year><month>12</month><volume>32</volume><issue>12</issue><fpage>e141</fpage><lpage>e149</lpage><pub-id pub-id-type="doi">10.1002/gps.4673</pub-id><pub-id pub-id-type="medline">28195361</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Souza-Teodoro</surname><given-names>LH</given-names> </name><name name-style="western"><surname>de Oliveira</surname><given-names>C</given-names> </name><name name-style="western"><surname>Walters</surname><given-names>K</given-names> </name><name name-style="western"><surname>Carvalho</surname><given-names>LA</given-names> </name></person-group><article-title>Higher serum dehydroepiandrosterone sulfate protects against the onset of depression in the elderly: Findings from the English Longitudinal Study of Aging (ELSA)</article-title><source>Psychoneuroendocrinology</source><year>2016</year><month>02</month><volume>64</volume><fpage>40</fpage><lpage>46</lpage><pub-id pub-id-type="doi">10.1016/j.psyneuen.2015.11.005</pub-id><pub-id pub-id-type="medline">26600009</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tsimpida</surname><given-names>D</given-names> </name><name name-style="western"><surname>Kontopantelis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ashcroft</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Panagioti</surname><given-names>M</given-names> </name></person-group><article-title>The dynamic relationship between hearing loss, quality of life, socioeconomic position and depression and the impact of hearing aids: answers from the English Longitudinal Study of Ageing (ELSA)</article-title><source>Soc Psychiatry Psychiatr Epidemiol</source><year>2022</year><month>02</month><volume>57</volume><issue>2</issue><fpage>353</fpage><lpage>362</lpage><pub-id pub-id-type="doi">10.1007/s00127-021-02155-0</pub-id><pub-id pub-id-type="medline">34383085</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Steptoe</surname><given-names>A</given-names> </name><name name-style="western"><surname>Breeze</surname><given-names>E</given-names> </name><name name-style="western"><surname>Banks</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nazroo</surname><given-names>J</given-names> </name></person-group><article-title>Cohort profile: the English longitudinal study of ageing</article-title><source>Int J Epidemiol</source><year>2013</year><month>12</month><volume>42</volume><issue>6</issue><fpage>1640</fpage><lpage>1648</lpage><pub-id pub-id-type="doi">10.1093/ije/dys168</pub-id><pub-id pub-id-type="medline">23143611</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schlechter</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ford</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Neufeld</surname><given-names>SAS</given-names> </name></person-group><article-title>The eight-item center for epidemiological studies depression scale in the English longitudinal study of aging: longitudinal and gender invariance, sum score models, and external associations</article-title><source>Assessment</source><year>2023</year><month>10</month><volume>30</volume><issue>7</issue><fpage>2146</fpage><lpage>2161</lpage><pub-id pub-id-type="doi">10.1177/10731911221138930</pub-id><pub-id pub-id-type="medline">36511122</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Poole</surname><given-names>L</given-names> </name><name name-style="western"><surname>Steptoe</surname><given-names>A</given-names> </name></person-group><article-title>Depressive symptoms predict incident chronic disease burden 10&#x202F;years later: findings from the English longitudinal study of ageing (ELSA)</article-title><source>J Psychosom Res</source><year>2018</year><month>10</month><volume>113</volume><fpage>30</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1016/j.jpsychores.2018.07.009</pub-id><pub-id pub-id-type="medline">30190045</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diener</surname><given-names>E</given-names> </name><name name-style="western"><surname>Emmons</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Larsen</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Griffin</surname><given-names>S</given-names> </name></person-group><article-title>The satisfaction with life scale</article-title><source>J Pers Assess</source><year>1985</year><month>02</month><volume>49</volume><issue>1</issue><fpage>71</fpage><lpage>75</lpage><pub-id pub-id-type="doi">10.1207/s15327752jpa4901_13</pub-id><pub-id pub-id-type="medline">16367493</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hughes</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Waite</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Hawkley</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Cacioppo</surname><given-names>JT</given-names> </name></person-group><article-title>A short scale for measuring loneliness in large surveys: results from two population-based studies</article-title><source>Res Aging</source><year>2004</year><volume>26</volume><issue>6</issue><fpage>655</fpage><lpage>672</lpage><pub-id pub-id-type="doi">10.1177/0164027504268574</pub-id><pub-id pub-id-type="medline">18504506</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Arik</surname><given-names>S&#x00D6;</given-names> </name><name name-style="western"><surname>Pfister</surname><given-names>T</given-names> </name></person-group><article-title>TabNet: attentive interpretable tabular learning</article-title><source>Proc AAAI Conf Artif Intell</source><year>2021</year><volume>35</volume><issue>8</issue><fpage>6679</fpage><lpage>6687</lpage><pub-id pub-id-type="doi">10.1609/aaai.v35i8.16826</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Khetan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Cvitkovic</surname><given-names>M</given-names> </name><name name-style="western"><surname>Karnin</surname><given-names>Z</given-names> </name></person-group><article-title>TabTransformer: tabular data modeling using contextual embeddings</article-title><source>arXiv</source><comment>Preprint posted online on  Dec 11, 2020</comment><pub-id pub-id-type="doi">10.48550/arXiv.2012.06678</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tutsoy</surname><given-names>O</given-names> </name><name name-style="western"><surname>Sumbul</surname><given-names>HE</given-names> </name></person-group><article-title>A novel deep machine learning algorithm with dimensionality and size reduction approaches for feature elimination: thyroid cancer diagnoses with randomly missing data</article-title><source>Brief Bioinform</source><year>2024</year><month>05</month><day>23</day><volume>25</volume><issue>4</issue><fpage>bbae344</fpage><pub-id pub-id-type="doi">10.1093/bib/bbae344</pub-id><pub-id pub-id-type="medline">39007597</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="web"><article-title>English longitudinal study of ageing</article-title><source>UK Data Service</source><year>1998</year><access-date>2026-06-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://beta.ukdataservice.ac.uk/datacatalogue/series/series?id=200011">https://beta.ukdataservice.ac.uk/datacatalogue/series/series?id=200011</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Supplementary tables of baseline sociodemographic characteristics and model performance metrics by depression status across English Longitudinal Study of Ageing (ELSA) waves 7-9.</p><media xlink:href="formative_v10i1e84744_app1.docx" xlink:title="DOCX File, 73 KB"/></supplementary-material></app-group></back></article>