<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e57874</article-id><article-id pub-id-type="doi">10.2196/57874</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>AI Machine Learning&#x2013;Based Diabetes Prediction in Older Adults in South Korea: Cross-Sectional Analysis</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Hocheol</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><name name-style="western"><surname>Park</surname><given-names>Myung-Bae</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Won</surname><given-names>Young-Joo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Department of Health Administration, College of Software and Digital Healthcare Convergence, Yonsei University</institution><addr-line>Changjogwan, Yonseidae-gil 1</addr-line><addr-line>Wonju</addr-line><country>Republic of Korea</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Kunar</surname><given-names>Bijay Mihir</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Drissi</surname><given-names>Hamed Bazrafshan</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Nalepa</surname><given-names>Jakub</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Young-Joo Won, PhD, Department of Health Administration, College of Software and Digital Healthcare Convergence, Yonsei University, Changjogwan, Yonseidae-gil 1, Wonju, 26493, Republic of Korea, +82 (0) 33-760-2257; <email>youngwon@yonsei.ac.kr</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>21</day><month>1</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e57874</elocation-id><history><date date-type="received"><day>28</day><month>02</month><year>2024</year></date><date date-type="rev-recd"><day>09</day><month>12</month><year>2024</year></date><date date-type="accepted"><day>09</day><month>12</month><year>2024</year></date></history><copyright-statement>&#x00A9;Hocheol Lee, Myung-Bae Park, Young-Joo Won. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 21.1.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e57874"/><abstract><sec><title>Background</title><p>Diabetes is prevalent in older adults, and machine learning algorithms could help predict diabetes in this population.</p></sec><sec><title>Objective</title><p>This study determined diabetes risk factors among older adults aged &#x2265;60 years using machine learning algorithms and selected an optimized prediction model.</p></sec><sec sec-type="methods"><title>Methods</title><p>This cross-sectional study was conducted on 3084 older adults aged &#x2265;60 years in Seoul from January to November 2023. Data were collected using a mobile app (Gosufit) that measured depression, stress, anxiety, basal metabolic rate, oxygen saturation, heart rate, and average daily step count. Health coordinators recorded data on diabetes, hypertension, hyperlipidemia, chronic obstructive pulmonary disease, percent body fat, and percent muscle. The presence of diabetes was the target variable, with various health indicators as predictors. Machine learning algorithms, including random forest, gradient boosting model, light gradient boosting model, extreme gradient boosting model, and k-nearest neighbors, were employed for analysis. The dataset was split into 70% training and 30% testing sets. Model performance was evaluated using accuracy, precision, recall, F1 score, and area under the curve (AUC). Shapley additive explanations (SHAPs) were used for model interpretability.</p></sec><sec sec-type="results"><title>Results</title><p>Significant predictors of diabetes included hypertension (<italic>&#x03C7;</italic>&#x00B2;<sub>1</sub>=197.294; <italic>P</italic>&#x003C;.001), hyperlipidemia (<italic>&#x03C7;</italic>&#x00B2;<sub>1</sub>=47.671; <italic>P</italic>&#x003C;.001), age (mean: diabetes group 72.66 years vs nondiabetes group 71.81 years), stress (mean: diabetes group 42.68 vs nondiabetes group 41.47; <italic>t</italic><sub>3082</sub>=&#x2212;2.858; <italic>P</italic>=.004), and heart rate (mean: diabetes group 75.05 beats/min vs nondiabetes group 73.14 beats/min; <italic>t</italic><sub>3082</sub>=&#x2212;7.948; <italic>P</italic>&#x003C;.001). The extreme gradient boosting model (XGBM) demonstrated the best performance, with an accuracy of 84.88%, precision of 77.92%, recall of 66.91%, F1 score of 72.00, and AUC of 0.7957. The SHAP analysis of the top-performing XGBM revealed key predictors for diabetes: hypertension, age, percent body fat, heart rate, hyperlipidemia, basal metabolic rate, stress, and oxygen saturation. Hypertension strongly increased diabetes risk, while advanced age and elevated stress levels also showed significant associations. Hyperlipidemia and higher heart rates further heightened diabetes probability. These results highlight the importance and directional impact of specific features in predicting diabetes, providing valuable insights for risk stratification and targeted interventions.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study focused on modifiable risk factors, providing crucial data for establishing a system for the automated collection of health information and lifelog data from older adults using digital devices at service facilities.</p></sec></abstract><kwd-group><kwd>diabetes</kwd><kwd>prediction model</kwd><kwd>super-aging population</kwd><kwd>extreme gradient boosting model</kwd><kwd>geriatrics</kwd><kwd>older adults</kwd><kwd>aging</kwd><kwd>artificial intelligence</kwd><kwd>machine learning</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>With advancements in medical technology contributing to longer life expectancies, the world is witnessing a rapid acceleration in population aging. The United Nations projects that the global older adult population will increase from 10% in 2022 to 16% by 2025 [<xref ref-type="bibr" rid="ref1">1</xref>]. This demographic shift significantly drives the increased prevalence of noncommunicable diseases such as diabetes, hypertension, hyperlipidemia, renal failure, arthritis, and Alzheimer disease, which burden the primary health care system [<xref ref-type="bibr" rid="ref2">2</xref>]. Specifically, the aging population presents challenges in geriatric care within primary health care systems, including shortages of caregiving personnel, financial constraints, and psychological stresses associated with family caregiving.</p><p>Among the Organisation for Economic Co-operation and Development (OECD) countries, South Korea is expected to become the first super-aged society&#x2014;a society where the older adult population accounts for more than 20% of the total population [<xref ref-type="bibr" rid="ref3">3</xref>]. The rising older adult population and concurrent increase in chronic diseases represent a critical public health issue in Korea. Notably, diabetes is a serious issue, with 39.2% of older adults experiencing diabetes [<xref ref-type="bibr" rid="ref4">4</xref>]. Diabetes management is crucial, as inadequate control can lead to severe complications, including hypertension and hyperlipidemia [<xref ref-type="bibr" rid="ref5">5</xref>]. However, the diabetes management rate among the Korean older adult population stands at a mere 30.3%, significantly lower than the awareness rates (84%) and treatment rates (74.8%) [<xref ref-type="bibr" rid="ref6">6</xref>]. Older adults are particularly vulnerable to diabetes due to aging and consequent physiological changes, as well as lifestyle modifications resulting from physical decline and other medical conditions [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>The early prevention of diabetes is imperative in older adults. Previous studies have shown that early intervention in older adults can reduce the risk of complications such as cardiovascular diseases, renal failure, and vision impairment [<xref ref-type="bibr" rid="ref8">8</xref>]. Yet, clinical interventions such as fast blood glucose tests are often needed for diabetes management, and these may not be feasible due to older adults&#x2019; physical frailty. Thus, health education and behavioral interventions, not clinical treatments and diagnostics, are vital for preventing diabetes and other chronic conditions in this population [<xref ref-type="bibr" rid="ref9">9</xref>]. Research indicates that the factors affecting diabetes in older adults differ from those in the general population [<xref ref-type="bibr" rid="ref10">10</xref>]. While obesity is a risk factor in individuals in their 40s, being underweight elevates the risk for various chronic conditions, including obesity in older adults [<xref ref-type="bibr" rid="ref11">11</xref>]. The differences in diabetes risk factors for older adults highlight the importance of specific research on the older adult population.</p><p>With the global advancement of computer technology, machine learning and deep learning have been used across various fields. Machine learning algorithms can analyze complex and large datasets and identify patterns and risk factors that might not be apparent through traditional statistical methods. In medicine, the potential of machine learning has been demonstrated through an increase in research on disease prediction, personalized medicine, and personalized public health services in clinical and public health care. Particularly, machine learning algorithms can incorporate a broader array of factors and use a wide range of data types to produce generalizable results compared to conventional statistical approaches [<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>However, research using machine learning algorithms to predict diabetes in the Korean older adult population remains sparse. Studies targeting older adults are particularly lacking, partly due to challenges in accessing physical and mental health data, as well as daily lifelog data, due to the mobility constraints of the older population. Thus, a system for the automated collection of health information and lifelog data from older adults must be established using digital devices at service facilities they frequent [<xref ref-type="bibr" rid="ref10">10</xref>]. This study determines diabetes risk factors among older adults aged &#x2265;60 years by using machine learning algorithms and selects an optimized model. We hypothesize that older adults may be affected by different and identical risk factors compared to younger generations and provide evidence to facilitate policy-making. The findings could serve as a model for other countries with similar demographic changes and health care challenges.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Data Collection</title><p>This cross-sectional study was conducted on older adults aged &#x2265;60 years, and the survey was conducted from January to November 2023. Participants aged &#x2265;60 years living in Seoul, South Korea, were recruited among individuals who had participated in the Mind Care Provider Project. Participants in the project voluntarily collected data through public institution promotions aimed at users interested in using health measurement services. To collect data from the participants, a mobile app called Gosufit was developed, which was installed on participants&#x2019; digital devices [<xref ref-type="bibr" rid="ref13">13</xref>].</p><p>First, the app measured indicators such as depression, stress, anxiety, basal metabolic rate (BMR), oxygen saturation, heart rate, and average daily step count, and the data were stored on a server. Second, a health coordinator (registered nurse) measured and recorded data on diabetes, hypertension, hyperlipidemia, chronic obstructive pulmonary disease (COPD), percent body fat, and percent muscle. In total, 3674 older adults aged &#x2265;60 years participated in the survey from January 1 to November 30, 2023. After excluding 590 participants due to nonresponse, dropout, or missing data, 3084 participants were included in the final analysis. Nonresponse and dropout occurred when participants did not wish to continue or when data transmission was interrupted due to issues with the app during the survey.</p></sec><sec id="s2-2"><title>Instruments</title><p>The target variable was the presence of diabetes. Those diagnosed with diabetes by a physician were coded as 1, and those who have not been diagnosed were coded as 0. The predictor variables included hypertension, posttraumatic stress disorder (PTSD), stress, anxiety, depression, BMR, oxygen saturation, average daily step count, hyperlipidemia, COPD, percent body fat, and percent muscle. Hypertension, hyperlipidemia, and COPD were coded as 1 for a physician diagnosis or 0 otherwise. PTSD, stress, and anxiety and depression were assessed using the 5-item PTSD checklist (PCL-5), the short version of the Geriatric Depression Scale (SGDS), and the Hospital Anxiety and Depression Scale (HADS), respectively, with 100-point&#x2013;based scoring. Oxygen saturation was determined using a digital oximeter per 100% saturation. The average daily step count was measured via the app.</p></sec><sec id="s2-3"><title>Statistical Analysis</title><p>The Boruta-based feature selection method (FSM) was used for feature selection, which is a wrapper-based FSM that uses the random forest classification algorithm [<xref ref-type="bibr" rid="ref14">14</xref>]. The entire dataset was randomly divided into a 70% training set and a 30% testing set using a stratified sampling procedure [<xref ref-type="bibr" rid="ref15">15</xref>].</p></sec><sec id="s2-4"><title>Machine Learning Algorithm</title><p>The random forest algorithm is an ensemble learning method used for classification and regression. It constructed multiple decision trees during training and output the class or mean prediction (for regression) of individual trees. Random forests corrected overfitting to the training set that is often seen in decision trees. The gradient boosting model (GBM) is a machine learning technique used for regression and classification problems. It generated a predictive model as an ensemble of weak prediction models (typically decision trees). Like other boosting methods, it built the model in a stage-wise fashion and optimized any differentiable loss function to generalize the model. The light gradient boosting model (LGBM) is a gradient-boosting framework that uses a tree-based learning algorithm. It was designed for distributed and efficient operation with a faster training speed, higher efficiency, lower memory use, and better accuracy. The LGBM was capable of processing large-scale data with numerous features and data points while maintaining performance.</p><p>The extreme gradient boosting model (XGBM) is an algorithm for tree-based ensemble learning that addresses the slow performance speed and overfitting regularization issues of gradient boosting models. It featured built-in cross-validation and the automatic handling of missing values. The k-nearest neighbors (KNN) model is a simple, versatile, and easy-to-implement supervised machine learning algorithm used for classification and regression. It classified data points based on how their neighbors were classified, stored all available cases, and classified new cases based on a majority vote of its k neighbors. The case assigned to the class was the most common class among the k-nearest neighbors.</p></sec><sec id="s2-5"><title>Performance Evaluation Criteria</title><p>The performance of the machine learning models was evaluated based on accuracy, precision, recall, F1 score, and area under the curve (AUC). The confusion matrix evaluated classification models by dividing them into positive or negative categories based on the match between actual and predicted classes.</p><p>Accuracy represented the proportion of correctly classified data among the total predictions for the hypertension risk group. It measured how accurate the predictions for the hypertension risk group were. The mathematical equation for accuracy is as follows:</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>%</mml:mi></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mfenced separators="|"><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn></mml:math></disp-formula><p>Precision represented the proportion of actual positive samples among the cases that were predicted as positive by the machine learning model. In other words, it indicated the ratio of samples positive for hypertension to those predicted to have hypertension by the model. The mathematical equation for precision is as follows:</p><disp-formula id="equWL2"><mml:math id="eqn2"><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>%</mml:mi></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mfenced separators="|"><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn></mml:math></disp-formula><p>Recall measured the proportion of those predicted by the machine learning model to have hypertension within the actual hypertension group. It provided the percentage of cases predicted to be at risk for hypertension from the entire hypertension risk group. The mathematical equation is as follows:</p><disp-formula id="equWL3"><mml:math id="eqn3"><mml:mi>R</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>l</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>%</mml:mi></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mfenced separators="|"><mml:mrow><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn></mml:math></disp-formula><p>The F1 score was the harmonic mean of precision and recall. The mathematical equation is as follows:</p><disp-formula id="equWL4"><mml:math id="eqn4"><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mi> </mml:mi><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>%</mml:mi></mml:mrow></mml:mfenced><mml:mo>=</mml:mo><mml:mfenced separators="|"><mml:mrow><mml:mfrac><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:mn>2</mml:mn><mml:msub><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:mfenced><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn></mml:math></disp-formula><p>The AUC referred to the area under the receiver operating characteristic curve, which was used to evaluate the performance of binary classification models, and an AUC close to 1 indicated better model performance. The AUC equation is as follows:</p><disp-formula id="equWL5"><mml:math id="eqn5"><mml:mi>A</mml:mi><mml:mi>U</mml:mi><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:mi> </mml:mi><mml:mrow><mml:msubsup><mml:mo stretchy="false">&#x222B;</mml:mo><mml:mrow><mml:mi>X</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msubsup><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mi>R</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msup><mml:mrow><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mi>R</mml:mi></mml:mrow><mml:mrow><mml:mo>-</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msup><mml:mfenced separators="|"><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:mfenced></mml:mrow></mml:mfenced><mml:mi>d</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:mrow></mml:math></disp-formula></sec><sec id="s2-6"><title>Model Interpretability</title><p>Shapley additive explanations (SHAPs) interpret the prediction outcomes of machine learning models. SHAPs were introduced by Lundberg and Lee [<xref ref-type="bibr" rid="ref16">16</xref>] in 2017 and were designed based on a game theory concept known as Shapley values. These values supported prediction interpretation, enabling the assessment of the relative importance among features. Additionally, they helped understand the characteristics with the greatest influence on the model&#x2019;s predictions, assisting in model improvement or decision-making processes. The mathematical equation is as follows:</p><p><inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>&#x2205;</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>v</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>M</mml:mi><mml:mo>!</mml:mo></mml:mrow></mml:mfrac><mml:munder><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x2286;</mml:mo><mml:mi>M</mml:mi><mml:mrow><mml:mo>/</mml:mo></mml:mrow><mml:mrow><mml:mo>{</mml:mo><mml:mi>k</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:munder><mml:mrow><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo></mml:mrow><mml:mo>!</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mrow><mml:mo>|</mml:mo><mml:mi>S</mml:mi><mml:mo>|</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>!</mml:mo><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>S</mml:mi><mml:mo>&#x222A;</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mi>k</mml:mi><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>v</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>S</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>Ethical approval for this study was obtained from Yonsei University Mirae Institutional Review Board (No. 1041849&#x2010;202401-SB-021-01), including a supplementary application for expanded data collection. All procedures and data management were conducted following the General Data Protection Regulation and ethical principles outlined in the Helsinki Declaration. Informed consent was obtained from all study participants regarding data collection and the analysis of the data. The questionnaires were submitted entirely anonymously. No form of compensation was provided to the participants.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Participant Information</title><p>In total, 3084 individuals participated in this study (<xref ref-type="table" rid="table1">Table 1</xref>). The study population comprised of 895 (29%) individuals with diabetes and 2189 (71%) individuals without. In addition, 1730 (56.1%) had hypertension, and diabetes prevalence significantly differed according to hypertension (<italic>&#x03C7;</italic><sup>2</sup><sub>1</sub>=197.294; <italic>P</italic>&#x003C;.001). In total, 1803 (58.5%) participants had hyperlipidemia, and diabetes prevalence significantly differed according to hyperlipidemia (<italic>&#x03C7;</italic><sup>2</sup><sub>1</sub>=47.671; <italic>P</italic>&#x003C;.001). The mean age was 72.66 years in the diabetes group and 71.81 years in the nondiabetes group. The mean stress score was 42.68 in the diabetes group and 41.47 in the nondiabetes group and significantly differed between the two groups (<italic>t</italic><sub>3082</sub>=&#x2212;2.858; <italic>P</italic>=.004). The mean heart rate was 75.05 beats/min in the diabetes group and 73.14 beats/min in the nondiabetes group and significantly differed between the two groups (<italic>t</italic><sub>3082</sub>=&#x2212;7.948; <italic>P</italic>&#x003C;.001).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Respondent characteristics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Risk factor</td><td align="left" valign="bottom">Diabetes (n=895)</td><td align="left" valign="bottom">Nondiabetes (n=2189)</td><td align="left" valign="bottom">Chi-square or <italic>t</italic> test (<italic>df</italic>)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Hypertension, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">197.294<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (1)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">209 (23.4)</td><td align="left" valign="top">1115 (50.9)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">686 (76.6)</td><td align="left" valign="top">1074 (49.1)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Hyperlipidemia, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">47.671<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (1)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">286 (32)</td><td align="left" valign="top">995 (45.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">609 (68)</td><td align="left" valign="top">1194 (54.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">COPD<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">7.764<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.005</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>No</td><td align="left" valign="top">856 (95.6)</td><td align="left" valign="top">2135 (97.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Yes</td><td align="left" valign="top">39 (4.4)</td><td align="left" valign="top">54 (2.5)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Sex, n (%)</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">3.472<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="left" valign="top">60 (6.7)</td><td align="left" valign="top">191 (8.7)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="left" valign="top">835 (93.3)</td><td align="left" valign="top">1998 (91.3)</td><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Age (years), mean (SD)</td><td align="left" valign="top">72.66 (6.31)</td><td align="left" valign="top">71.81 (6.32)</td><td align="left" valign="top">&#x2212;3.395<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.001</td></tr><tr><td align="left" valign="top">PTSD<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup>, mean (SD)</td><td align="left" valign="top">14.45 (10.02)</td><td align="left" valign="top">14.21 (9.87)</td><td align="left" valign="top">&#x2212;0.614<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.5</td></tr><tr><td align="left" valign="top">Stress, mean (SD)</td><td align="left" valign="top">42.68 (9.83)</td><td align="left" valign="top">41.47 (10.97)</td><td align="left" valign="top">&#x2212;2.858<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.004</td></tr><tr><td align="left" valign="top">Anxiety, mean (SD)</td><td align="left" valign="top">15.58 (12.09)</td><td align="left" valign="top">15.67 (12.38)</td><td align="left" valign="top">0.179<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.86</td></tr><tr><td align="left" valign="top">Depression, mean (SD)</td><td align="left" valign="top">19.39 (12.48)</td><td align="left" valign="top">19.42 (12.92)</td><td align="left" valign="top">0.063<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.95</td></tr><tr><td align="left" valign="top">Percent body fat, mean (SD)</td><td align="left" valign="top">31.00 (8.74)</td><td align="left" valign="top">30.75 (8.66)</td><td align="left" valign="top">&#x2013;0.727<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.47</td></tr><tr><td align="left" valign="top">Percent muscle, mean (SD)</td><td align="left" valign="top">35.73 (5.30)</td><td align="left" valign="top">35.95 (5.35)</td><td align="left" valign="top">&#x2013;0.465<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.31</td></tr><tr><td align="left" valign="top">BMR<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup>, mean (SD)</td><td align="left" valign="top">1192.54 (159.05)</td><td align="left" valign="top">1187.71 (272.54)</td><td align="left" valign="top">&#x2013;0.496<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.62</td></tr><tr><td align="left" valign="top">Oxygen saturation (%), mean (SD)</td><td align="left" valign="top">97.12 (3.48)</td><td align="left" valign="top">97.28 (2.44)</td><td align="left" valign="top">1.455<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.15</td></tr><tr><td align="left" valign="top">Heart rate (beats/min), mean (SD)</td><td align="left" valign="top">75.05 (6.29)</td><td align="left" valign="top">73.14 (5.97)</td><td align="left" valign="top">&#x2013;7.948<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Daily step count, mean (SD)</td><td align="left" valign="top">11789.99 (18061.89)</td><td align="left" valign="top">13710.84 (34980.68)</td><td align="left" valign="top">1.560<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> (3082)</td><td align="left" valign="top">.12</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>Chi-square.</p></fn><fn id="table1fn2"><p><sup>b</sup>1-tailed <italic>t</italic> test.</p></fn><fn id="table1fn3"><p><sup>c</sup>COPD: chronic obstructive pulmonary disease.</p></fn><fn id="table1fn4"><p><sup>d</sup>PTSD: posttraumatic stress disorder.</p></fn><fn id="table1fn5"><p><sup>e</sup>BMR: basal metabolic rate.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Risk Factor Selection Using Boruta</title><p>The importance of features was measured using the Boruta-based FSM (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Hypertension, age, percent body fat, heart rate, hyperlipidemia, BMR, stress, and oxygen saturation were identified as important features. These features were included in the machine learning model for diabetes prediction.</p></sec><sec id="s3-3"><title>Performance Comparison of Machine Learning Models</title><p>The performances of the 5 machine learning models used in the study were compared based on accuracy, precision, recall, F1 score, and AUC. Model performance was the highest for the XGBM, followed by the LGBM, random forest model, GBM, and KNN model (<xref ref-type="table" rid="table2">Table 2</xref>, <xref ref-type="fig" rid="figure1">Figure 1</xref>). The XGBM had an accuracy of 84.88%, precision of 77.92%, recall of 66.91%, F1 score of 72.00, and AUC of 0.7957, showing a high performance for its prediction.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Performance of ml methods.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Ranking</td><td align="left" valign="bottom">Models</td><td align="left" valign="bottom">Accuracy (%)</td><td align="left" valign="bottom">Precision (%)</td><td align="left" valign="bottom">Recall (%)</td><td align="left" valign="bottom">F1 score</td><td align="left" valign="bottom">AUC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">1</td><td align="left" valign="top">Extreme gradient boosting model</td><td align="left" valign="top">84.88</td><td align="left" valign="top">77.92</td><td align="left" valign="top">66.91</td><td align="left" valign="top">72.00</td><td align="left" valign="top">0.7957</td></tr><tr><td align="left" valign="top">2</td><td align="left" valign="top">Light gradient boosting model</td><td align="left" valign="top">84.77</td><td align="left" valign="top">78.57</td><td align="left" valign="top">65.42</td><td align="left" valign="top">71.39</td><td align="left" valign="top">0.7906</td></tr><tr><td align="left" valign="top">3</td><td align="left" valign="top">Random forest model</td><td align="left" valign="top">81.53</td><td align="left" valign="top">78.82</td><td align="left" valign="top">49.81</td><td align="left" valign="top">61.04</td><td align="left" valign="top">0.7216</td></tr><tr><td align="left" valign="top">4</td><td align="left" valign="top">Gradient boosting model</td><td align="left" valign="top">77.32</td><td align="left" valign="top">77.57</td><td align="left" valign="top">30.85</td><td align="left" valign="top">44.14</td><td align="left" valign="top">0.6360</td></tr><tr><td align="left" valign="top">5</td><td align="left" valign="top">K-nearest neighbors model</td><td align="left" valign="top">66.95</td><td align="left" valign="top">42.06</td><td align="left" valign="top">36.43</td><td align="left" valign="top">39.04</td><td align="left" valign="top">0.5794</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AUC: area under the curve.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Receiver operating characteristic curves and precision-recall curves of 4 predictive models. AUC: area under the curve; LBGM: light gradient boosting model; ROC: receiver operating characteristic; XBGM: extreme gradient boosting model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e57874_fig01.png"/></fig></sec><sec id="s3-4"><title>Interpretable Risk Factors of Hypertension</title><p>SHAP analysis was performed for the XGBM&#x2014;the best-performing diabetes prediction model. The features selected through the FSM (hypertension, age, percent body fat, heart rate, hyperlipidemia, BMR, stress, and oxygen saturation) were included in the SHAP analysis. Red SHAP values indicated a higher impact on diabetes prediction, and blue SHAP values suggested a greater influence on nondiabetes outcomes (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The findings indicated positive SHAP values for hypertension, indicating a stronger prediction of having diabetes, while not having hypertension was strongly linked to not having diabetes. Age presented mixed SHAP values, but notably, higher ages correlated with increased diabetes prediction. Similarly, higher heart rates and the presence of hyperlipidemia were associated with increased diabetes risk. Additionally, elevated stress levels were linked to a higher probability of diabetes. The SHAP analysis revealed a mixed impact of age on diabetes prediction. Particularly, as age increased, the likelihood of a diabetes prediction also increased. Additionally, a higher heart rate and the presence of hyperlipidemia were also linked to a higher diabetes probability. Moreover, an increase in stress levels elevated the risk of developing diabetes.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Importance of risk factors based on Shapley additive explanation values. SHAP: Shapley additive explanation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e57874_fig02.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>This study analyzed the performance of 5 machine learning&#x2013;based algorithms in predicting diabetes risk in older Korean adults. The XGBM performed the best, supporting previous research that shows XGBM as the superior model for diabetes prediction with an AUC of 84% [<xref ref-type="bibr" rid="ref17">17</xref>], which was also similar to this study. In contrast, other studies have assessed the predictive accuracy of decision trees, naive Bayes algorithms, and random forest algorithms for diabetes risk factors, finding the random forest algorithm to be the top performer with 94% accuracy and precision [<xref ref-type="bibr" rid="ref18">18</xref>], which was identical to our predictive algorithm. Systematic reviews of machine learning approaches also mention that support vector machines, artificial neural networks, and decision trees are frequently used prediction classification models [<xref ref-type="bibr" rid="ref19">19</xref>]. However, these were excluded due to the anticipated challenges in managing nonlinear patterns with linear approaches [<xref ref-type="bibr" rid="ref20">20</xref>].</p><p>In this study, hypertension, hyperlipidemia, old age, heart rate, and stress were identified as diabetes risk factors, which was similar to previous reports. Existing studies establish a close link between hypertension and diabetes, with a stronger correlation observed in older adults [<xref ref-type="bibr" rid="ref21">21</xref>], and this was also seen in our results. A study analyzing diabetes risk factors using large-scale health care data from Kuwait demonstrated that the logistic regression had the highest accuracy at 80.7% and that hypertension, obesity, and sex were also strongly associated with diabetes risk [<xref ref-type="bibr" rid="ref22">22</xref>]. Further, an increase in heart rate was predicted to elevate diabetes risk. A study of 30,000 participants reported that diabetes risk increased with every 10 beats per minute rise in pulse rate [<xref ref-type="bibr" rid="ref23">23</xref>], which was consistent with the findings of this study. Hyperlipidemia was also identified as a diabetes risk factor. Hyperlipidemia, hypertension, and diabetes are 3 major chronic conditions that are interrelated as mutual risk factors, and this study confirmed that hyperlipidemia is a diabetes risk factor [<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>This study predicted diabetes in older adults, and certain factors differed from previously known diabetes risk factors. In the existing literature, diabetes risk is elevated with increasing obesity [<xref ref-type="bibr" rid="ref25">25</xref>], but in this study, the SHAP value for percent body fat was not clear, with no significant differences in diabetes risk according to percent body fat in the independent <italic>t</italic> test. Thus, the severity of obesity was not a significant risk factor for diabetes. On the contrary, diabetes risk increased with decreasing body weight. An array of study results has been reported regarding this issue. One such report suggested that weight loss positively impacts diabetes in older adults but also complicates the treatment for optimal blood glucose regulation in patients with type 2 diabetes [<xref ref-type="bibr" rid="ref26">26</xref>]. Here, weight loss in older adults entails loss of muscles and bone density, which may have a detrimental impact on diabetes in the long term [<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>Certain predictor variables identified as significant diabetes risk factors in previous studies were not included in this study. Particularly, family history is a known risk factor for diabetes [<xref ref-type="bibr" rid="ref28">28</xref>]. One important factor of this study was that it focused on modifiable risk factors, rather than family history, which is an unmodifiable risk factor. Particularly, we examined other risk factors besides unmodifiable risk factors, such as family history, age, and sex, as being able to provide interventions for these risk factors is essential. The Korean government should emphasize preventive health education in primary health care facilities, senior welfare centers, community centers, and public health centers to address modifiable diabetes risk factors in the older adult population, such as hypertension, heart rate, hyperlipidemia, and stress. Additionally, while education for older adults is crucial, prevention is more important. Thus, health education should be provided for those in their 40s to prepare for healthy older adulthood.</p></sec><sec id="s4-2"><title>Limitations</title><p>This study had a few limitations. First, the study population comprised older adults aged &#x2265;60 years who reside in Seoul, Korea. Thus, it did not represent the entire 60-and-over population in the country. Further, while existing studies set the age criterion for older adults as 65 years and older, we set the age to 60 years and older because the retirement age in Korea is 60 years. Second, we could not obtain personally identifiable information such as personal income and education level due to legal regulations. Thus, even though these factors may predict diabetes, this study was limited to other characteristics. Lastly, this study randomly split the dataset into training and testing subsets (70% and 30%). This split was not stratified, which means the distribution of target variables in the subsets may not perfectly match the original dataset&#x2019;s distribution. This could potentially impact the generalizability of the model&#x2019;s performance to other datasets. Future studies could consider employing a stratified splitting method to ensure a balanced representation of target variables across subsets.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study analyzed machine learning algorithms for diabetes prediction in older adults in Korea. Hypertension, hyperlipidemia, and stress were identified as modifiable diabetes risk factors. Additionally, body fat percentage did not significantly predict diabetes in older adults, presumably because body fat loss is closely linked to muscle strength and bone mass loss. The findings suggest that targeted interventions focusing on managing hypertension, hyperlipidemia, and stress can significantly reduce diabetes risk in this population.</p><p>Furthermore, these diabetes predictors in older adults could be mitigated by promoting healthier lifestyle choices and behaviors, such as regular physical activity, balanced nutrition, and stress management techniques. The government should implement comprehensive health education programs across various facilities, including primary health care facilities and welfare centers, to raise awareness about these modifiable risk factors.</p><p>Moreover, educational interventions should be initiated at a younger age&#x2014;particularly for individuals in their 40s&#x2014;to foster proactive health management and prevent the onset of diabetes in older adulthood. By adopting a preventive approach and addressing modifiable risk factors early, we can enhance the overall health and quality of life for the aging population in Korea. It is necessary to develop personalized modeling that predicts major chronic diseases such as diabetes, hypertension, hyperlipidemia, and obesity through the advancement of prediction algorithms. This will provide a foundation for creating personalized health promotion education and programs. Future research should focus on refining machine learning&#x2013;based models by incorporating diverse datasets and longitudinal data to improve generalizability and predictive performance. Additionally, exploring the integration of behavioral and environmental factors into machine learning algorithms may further enhance the accuracy and applicability of these models in real-world settings.</p></sec></sec></body><back><ack><p>This research was supported by the Basic Science Research Program through the National Research Foundation of Korea funded by the Ministry of Education (NRF-2021R1C1C2005464).</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb2">BMR</term><def><p>basal metabolic rate</p></def></def-item><def-item><term id="abb3">COPD</term><def><p>chronic obstructive pulmonary disease</p></def></def-item><def-item><term id="abb4">FSM</term><def><p>feature selection method</p></def></def-item><def-item><term id="abb5">GBM</term><def><p>gradient boosting model</p></def></def-item><def-item><term id="abb6">HADS</term><def><p>Hospital Anxiety and Depression Scale</p></def></def-item><def-item><term id="abb7">KNN</term><def><p>k-nearest neighbors</p></def></def-item><def-item><term id="abb8">LGBM</term><def><p>light gradient boosting model</p></def></def-item><def-item><term id="abb9">OECD</term><def><p>Organisation for Economic Co-operation and Development</p></def></def-item><def-item><term id="abb10">PCL-5</term><def><p>5-item PTSD checklist</p></def></def-item><def-item><term id="abb11">SGDS</term><def><p>short version of the Geriatric Depression Scale</p></def></def-item><def-item><term id="abb12">SHAP</term><def><p>Shapley additive explanation</p></def></def-item><def-item><term id="abb13">XGBM</term><def><p>extreme gradient boosting model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>World population prospects 2022: summary of results</article-title><source>United Nations</source><year>2022</year><access-date>2024-12-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/wpp2022_summary_of_results.pdf">https://www.un.org/development/desa/pd/sites/www.un.org.development.desa.pd/files/wpp2022_summary_of_results.pdf</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name></person-group><article-title>Digital health for aging populations</article-title><source>N Med</source><year>2023</year><month>07</month><volume>29</volume><issue>7</issue><fpage>1623</fpage><lpage>1630</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02391-8</pub-id><pub-id pub-id-type="medline">37464029</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>OS</given-names> </name></person-group><article-title>Super aging in South Korea unstoppable but mitigatable: a sub-national scale population projection for best policy planning</article-title><source>Spat Demogr</source><year>2020</year><month>07</month><volume>8</volume><issue>2</issue><fpage>155</fpage><lpage>173</lpage><pub-id pub-id-type="doi">10.1007/s40980-020-00061-8</pub-id><pub-id pub-id-type="medline">34222615</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kwon</surname><given-names>HS</given-names> </name></person-group><article-title>Prevalence and treatment status of diabetes mellitus in Korea</article-title><source>J Korean Med Assoc</source><year>2023</year><month>07</month><volume>66</volume><issue>7</issue><fpage>404</fpage><lpage>407</lpage><pub-id pub-id-type="doi">10.5124/jkma.2023.66.7.404</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Boer</surname><given-names>IH</given-names> </name><name name-style="western"><surname>Bangalore</surname><given-names>S</given-names> </name><name name-style="western"><surname>Benetos</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Diabetes and hypertension: a position statement by the American Diabetes Association</article-title><source>Diabetes Care</source><year>2017</year><month>09</month><volume>40</volume><issue>9</issue><fpage>1273</fpage><lpage>1284</lpage><pub-id pub-id-type="doi">10.2337/dci17-0026</pub-id><pub-id pub-id-type="medline">28830958</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ha</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Han</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Moon</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>DJ</given-names> </name></person-group><article-title>Diabetes screening in South Korea: a new estimate of the number needed to screen to detect diabetes</article-title><source>Korean J Intern Med</source><year>2023</year><month>01</month><volume>38</volume><issue>1</issue><fpage>93</fpage><lpage>100</lpage><pub-id pub-id-type="doi">10.3904/kjim.2022.283</pub-id><pub-id pub-id-type="medline">36420563</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Flack</surname><given-names>KD</given-names> </name><name name-style="western"><surname>Davy</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Hulver</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Winett</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Frisard</surname><given-names>MI</given-names> </name><name name-style="western"><surname>Davy</surname><given-names>BM</given-names> </name></person-group><article-title>Aging, resistance training, and diabetes prevention</article-title><source>J Aging Res</source><year>2010</year><month>12</month><day>15</day><volume>2011</volume><fpage>127315</fpage><pub-id pub-id-type="doi">10.4061/2011/127315</pub-id><pub-id pub-id-type="medline">21197110</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rooney</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Rawlings</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Pankow</surname><given-names>JS</given-names> </name><etal/></person-group><article-title>Risk of progression to diabetes among older adults with prediabetes</article-title><source>JAMA Intern Med</source><year>2021</year><month>04</month><day>1</day><volume>181</volume><issue>4</issue><fpage>511</fpage><lpage>519</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2020.8774</pub-id><pub-id pub-id-type="medline">33555311</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kramer</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Vanderwood</surname><given-names>KK</given-names> </name><name name-style="western"><surname>Arena</surname><given-names>VC</given-names> </name><etal/></person-group><article-title>Evaluation of a diabetes prevention program lifestyle intervention in older adults: a randomized controlled study in three senior/community centers of varying socioeconomic status</article-title><source>Diabetes Educ</source><year>2018</year><month>04</month><volume>44</volume><issue>2</issue><fpage>118</fpage><lpage>129</lpage><pub-id pub-id-type="doi">10.1177/0145721718759982</pub-id><pub-id pub-id-type="medline">29514568</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Caprani</surname><given-names>N</given-names> </name><name name-style="western"><surname>Gurrin</surname><given-names>C</given-names> </name><name name-style="western"><surname>O&#x2019;Connor</surname><given-names>N</given-names> </name></person-group><article-title>I like to log: a questionnaire study towards accessible lifelogging for older users</article-title><conf-name>12th International ACM SIGACCESS Conference on Computers and Accessibility</conf-name><conf-date>Oct 25-27, 2010</conf-date><conf-loc>Orlando, FL</conf-loc><fpage>263</fpage><lpage>264</lpage><pub-id pub-id-type="doi">10.1145/1878803.1878862</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilson</surname><given-names>PWF</given-names> </name><name name-style="western"><surname>Kannel</surname><given-names>WB</given-names> </name></person-group><article-title>Obesity, diabetes, and risk of cardiovascular disease in the elderly</article-title><source>Am J Geriatr Cardiol</source><year>2002</year><volume>11</volume><issue>2</issue><fpage>119</fpage><lpage>123</lpage><pub-id pub-id-type="doi">10.1111/j.1076-7460.2002.00998.x</pub-id><pub-id pub-id-type="medline">11872970</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Henglin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Stein</surname><given-names>G</given-names> </name><name name-style="western"><surname>Hushcha</surname><given-names>PV</given-names> </name><name name-style="western"><surname>Snoek</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wiltschko</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>S</given-names> </name></person-group><article-title>Machine learning approaches in cardiovascular imaging</article-title><source>Circ Cardiovasc Imaging</source><year>2017</year><month>10</month><volume>10</volume><issue>10</issue><fpage>e005614</fpage><pub-id pub-id-type="doi">10.1161/CIRCIMAGING.117.005614</pub-id><pub-id pub-id-type="medline">28956772</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="web"><article-title>GoSuFit</article-title><source>Google Play</source><access-date>2024-12-31</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://play.google.com/store/apps/details?id=com.healthbridge.hfact20&#x0026;pcampaignid=web_share&#x0026;pli=1">https://play.google.com/store/apps/details?id=com.healthbridge.hfact20&#x0026;pcampaignid=web_share&#x0026;pli=1</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pudjihartono</surname><given-names>N</given-names> </name><name name-style="western"><surname>Fadason</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kempa-Liehr</surname><given-names>AW</given-names> </name><name name-style="western"><surname>O&#x2019;Sullivan</surname><given-names>JM</given-names> </name></person-group><article-title>A review of feature selection methods for machine learning-based disease risk prediction</article-title><source>Front Bioinform</source><year>2022</year><volume>2</volume><fpage>927312</fpage><pub-id pub-id-type="doi">10.3389/fbinf.2022.927312</pub-id><pub-id pub-id-type="medline">36304293</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>May</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Maier</surname><given-names>HR</given-names> </name><name name-style="western"><surname>Dandy</surname><given-names>GC</given-names> </name></person-group><article-title>Data splitting for artificial neural networks using SOM-based stratified sampling</article-title><source>Neural Netw</source><year>2010</year><month>03</month><volume>23</volume><issue>2</issue><fpage>283</fpage><lpage>294</lpage><pub-id pub-id-type="doi">10.1016/j.neunet.2009.11.009</pub-id><pub-id pub-id-type="medline">19959327</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Su-In</surname><given-names>L</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><access-date>2024-12-31</access-date><conf-name>31st International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 4-9, 2017</conf-date><conf-loc>Long Beach, CA</conf-loc><fpage>4768</fpage><lpage>4777</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.5555/3295222.3295230">https://dl.acm.org/doi/10.5555/3295222.3295230</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Keshavjee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Guergachi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>X</given-names> </name></person-group><article-title>Predictive models for diabetes mellitus using machine learning techniques</article-title><source>BMC Endocr Disord</source><year>2019</year><month>10</month><day>15</day><volume>19</volume><issue>1</issue><fpage>101</fpage><pub-id pub-id-type="doi">10.1186/s12902-019-0436-6</pub-id><pub-id pub-id-type="medline">31615566</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yuvaraj</surname><given-names>N</given-names> </name><name name-style="western"><surname>SriPreethaa</surname><given-names>KR</given-names> </name></person-group><article-title>Diabetes prediction in healthcare systems using machine learning algorithms on Hadoop cluster</article-title><source>Cluster Comput</source><year>2019</year><month>01</month><volume>22</volume><issue>S1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1007/s10586-017-1532-x</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kavakiotis</surname><given-names>I</given-names> </name><name name-style="western"><surname>Tsave</surname><given-names>O</given-names> </name><name name-style="western"><surname>Salifoglou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Maglaveras</surname><given-names>N</given-names> </name><name name-style="western"><surname>Vlahavas</surname><given-names>I</given-names> </name><name name-style="western"><surname>Chouvarda</surname><given-names>I</given-names> </name></person-group><article-title>Machine learning and data mining methods in diabetes research</article-title><source>Comput Struct Biotechnol J</source><year>2017</year><volume>15</volume><fpage>104</fpage><lpage>116</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2016.12.005</pub-id><pub-id pub-id-type="medline">28138367</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>Y</given-names> </name></person-group><article-title>Deep learning using linear support vector machines</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 21, 2015</comment><pub-id pub-id-type="doi">arXiv:1306.0239v4</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ye</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hao</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Prediction of incident hypertension within the next year: prospective study using statewide electronic health records and machine learning</article-title><source>J Med Internet Res</source><year>2018</year><month>01</month><day>30</day><volume>20</volume><issue>1</issue><fpage>e22</fpage><pub-id pub-id-type="doi">10.2196/jmir.9268</pub-id><pub-id pub-id-type="medline">29382633</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Farran</surname><given-names>B</given-names> </name><name name-style="western"><surname>Channanath</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Behbehani</surname><given-names>K</given-names> </name><name name-style="western"><surname>Thanaraj</surname><given-names>TA</given-names> </name></person-group><article-title>Predictive models to assess risk of type 2 diabetes, hypertension and comorbidity: machine-learning algorithms and validation using national health data from Kuwait--a cohort study</article-title><source>BMJ Open</source><year>2013</year><month>05</month><day>14</day><volume>3</volume><issue>5</issue><fpage>e002457</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2012-002457</pub-id><pub-id pub-id-type="medline">23676796</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>DH</given-names> </name><name name-style="western"><surname>de Rezende</surname><given-names>LFM</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>FB</given-names> </name><name name-style="western"><surname>Jeon</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Giovannucci</surname><given-names>EL</given-names> </name></person-group><article-title>Resting heart rate and risk of type 2 diabetes: a prospective cohort study and meta-analysis</article-title><source>Diabetes Metab Res Rev</source><year>2019</year><month>02</month><volume>35</volume><issue>2</issue><fpage>e3095</fpage><pub-id pub-id-type="doi">10.1002/dmrr.3095</pub-id><pub-id pub-id-type="medline">30378246</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sullivan</surname><given-names>PW</given-names> </name><name name-style="western"><surname>Ghushchyan</surname><given-names>VH</given-names> </name><name name-style="western"><surname>Ben-Joseph</surname><given-names>R</given-names> </name></person-group><article-title>The impact of obesity on diabetes, hyperlipidemia and hypertension in the United States</article-title><source>Qual Life Res</source><year>2008</year><month>10</month><volume>17</volume><issue>8</issue><fpage>1063</fpage><lpage>1071</lpage><pub-id pub-id-type="doi">10.1007/s11136-008-9385-7</pub-id><pub-id pub-id-type="medline">18777200</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Du</surname><given-names>J</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ye</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>S</given-names> </name></person-group><article-title>Visualization obesity risk prediction system based on machine learning</article-title><source>Sci Rep</source><year>2024</year><month>09</month><day>28</day><volume>14</volume><issue>1</issue><fpage>22424</fpage><pub-id pub-id-type="doi">10.1038/s41598-024-73826-6</pub-id><pub-id pub-id-type="medline">39342032</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kyrou</surname><given-names>I</given-names> </name><name name-style="western"><surname>Tsigos</surname><given-names>C</given-names> </name></person-group><article-title>Obesity in the elderly diabetic patient: is weight loss beneficial? No</article-title><source>Diabetes Care</source><year>2009</year><month>11</month><volume>32</volume><issue>Suppl 2</issue><fpage>S403</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.2337/dc09-S348</pub-id><pub-id pub-id-type="medline">19875589</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Newman</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Visser</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Weight change and the conservation of lean mass in old age: the Health, Aging and Body Composition Study</article-title><source>Am J Clin Nutr</source><year>2005</year><month>10</month><volume>82</volume><issue>4</issue><fpage>872</fpage><lpage>878</lpage><pub-id pub-id-type="doi">10.1093/ajcn/82.4.872</pub-id><pub-id pub-id-type="medline">16210719</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Harrison</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Hindorff</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Family history of diabetes as a potential public health tool</article-title><source>Am J Prev Med</source><year>2003</year><month>02</month><volume>24</volume><issue>2</issue><fpage>152</fpage><lpage>159</lpage><pub-id pub-id-type="doi">10.1016/s0749-3797(02)00588-3</pub-id><pub-id pub-id-type="medline">12568821</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Risk factors selection using the Boruta-based feature selection method.</p><media xlink:href="formative_v9i1e57874_app1.jpeg" xlink:title="JPEG File, 127 KB"/></supplementary-material></app-group></back></article>