<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e92820</article-id><article-id pub-id-type="doi">10.2196/92820</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Improving Models to Predict Care Utilization Using Machine Learning: Retrospective Observational Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Kitchen</surname><given-names>Christopher</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Zhang</surname><given-names>Talan</given-names></name><degrees>MS, MPH</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Lemke</surname><given-names>Klaus</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Pandya</surname><given-names>Chintan</given-names></name><degrees>MBBS, MPH, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kharrazi</surname><given-names>Hadi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Weiner</surname><given-names>Jonathan P</given-names></name><degrees>DrPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Health Policy and Management, Bloomberg School of Public Health, Johns Hopkins University</institution><addr-line>2024 E Monument Street</addr-line><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><aff id="aff2"><institution>Center for Biomedical Informatics and Data Science, School of Medicine, Johns Hopkins University</institution><addr-line>Baltimore</addr-line><addr-line>MD</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Steenstra</surname><given-names>Ivan</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Tibble</surname><given-names>Holly</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Shao</surname><given-names>Lex</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Christopher Kitchen, MS, Department of Health Policy and Management, Bloomberg School of Public Health, Johns Hopkins University, 2024 E Monument Street, Baltimore, MD, United States, 1 3015310011; <email>ckitchen@jhu.edu</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>26</day><month>6</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e92820</elocation-id><history><date date-type="received"><day>03</day><month>02</month><year>2026</year></date><date date-type="rev-recd"><day>28</day><month>05</month><year>2026</year></date><date date-type="accepted"><day>28</day><month>05</month><year>2026</year></date></history><copyright-statement>&#x00A9; Christopher Kitchen, Talan Zhang, Klaus Lemke, Chintan Pandya, Hadi Kharrazi, Jonathan P Weiner. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 26.6.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e92820"/><abstract><sec><title>Background</title><p>The use of artificial intelligence and machine learning (ML) tools is now common in the advancement of health care services and clinical risk estimation. Legacy systems make use of highly informative feature sets developed from years of clinical expertise and research to estimate different outcomes, but only recently have they been tested against novel statistical approaches. One such system, the Johns Hopkins Adjusted Clinical Group (ACG) System, is a long-standing and widely used approach to categorizing clinical risk factors, and it is amenable to ML techniques.</p></sec><sec><title>Objective</title><p>This study aims to test the ACG System using a contrasted area under the receiver operating characteristic (AUROC) and <italic>F</italic><sub>1</sub> classification optimization strategy and compare its performance against traditional logistic regression methods. Assuming that selected ML algorithms can be tuned to enhance overall measures of performance, this would strengthen arguments for incorporating them into ACG-related workflows.</p></sec><sec sec-type="methods"><title>Methods</title><p>Using a retrospective observational design, prospective year estimates of all-cause hospitalization and elevated total cost were modeled using a cross-validation framework. Patients with elevated costs were identified as those falling above the 95th percentile of total amounts billed, including pharmacy costs. Hyperparameter settings for XGBoost (Extreme Gradient Boosting), random forest, and elastic net were determined using average cross-validated performances for <italic>F</italic><sub>1</sub> and AUROC in a grid search aimed at maximizing either statistic. Additional iterated cross-validation was used to compare point-estimated average AUROC and <italic>F</italic><sub>1</sub>-scores between models, further decomposed by sensitivity, positive predictive value, and <italic>F</italic>-beta statistics.</p></sec><sec sec-type="results"><title>Results</title><p>There were 350,463 patients selected in 2019 from the Johns Hopkins Health System. Model features identified by the ACG System for predicting prospective year hospitalization and total cost were included in these analyses. Findings suggest small but statistically significant improvements in cross-validated AUROC and <italic>F</italic><sub>1</sub>-scores over logistic regression, using either optimization strategy and XGBoost. Logistic models achieved average receiver operating characteristic values of 0.886 and 0.841 for cost and hospitalization, respectively, whereas XGBoost achieved 0.891 and 0.849, respectively. <italic>F</italic><sub>1</sub> optimization yielded similar findings, with logistic models achieving 0.367 and 0.341 on average for hospitalization and cost, respectively, but XGBoost exceeded values for cost but not for hospitalization (0.411 and 0.328, respectively).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The clinical implications of these findings and the effect of class imbalance on model calibration are explored, along with the limitations of these data and approach. The core finding is that logistic regression remains very well-suited to these tasks, especially in situations where the efficiency or interpretability of models is critical. Under conditions of imbalance, regressions tended to yield high-precision estimates for the outnumbered class. Nevertheless, the findings also underscore a diversity of suitable models depending on clinical use cases, each having its own tradeoffs for evaluating performance. As such, health systems must clearly identify the needs and expectations of a model before calibrating one for use.</p></sec></abstract><kwd-group><kwd>medical informatics</kwd><kwd>risk stratification</kwd><kwd>clinical decision support</kwd><kwd>machine learning</kwd><kwd>public health informatics</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Artificial intelligence (AI) and machine learning (ML) have been applied to risk stratification and predictive modeling within health care settings for several decades [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Some of the earliest examples include rule-based systems and conditional probability approaches to aid decision-making [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Today, health risks are frequently calculated through statistical modeling that accounts for demographic information, clinical comorbidities, laboratory findings, pharmacy records, and other important clinical or health care delivery characteristics. Health insurance claims represent a rich source of information for this purpose [<xref ref-type="bibr" rid="ref4">4</xref>]. Recent high-profile advances in AI and ML have promulgated the narrative that newer, more complicated approaches are superior at modeling risk compared to regression techniques. ML tools are known to have certain advantages over regression, in part because they rely on fewer assumptions and might incorporate nonlinear associations with outcomes [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Additionally, many ML approaches allow hyperparameter tuning to optimize metrics for specific use cases, increasing sensitivity without compromising precision. Although research is still ongoing, many recent comparative analyses suggest that advanced ML may not always be superior or at least that improvements to model performance have been modest compared to traditional approaches [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>In the real-world context of health care delivery, it is critical to consider the operational benefits and costs of using different quantitative risk models. Specific operational issues must be understood and assessed when attempting to evaluate statistical models [<xref ref-type="bibr" rid="ref13">13</xref>]. Prediction of mortality and risk of hospitalization, for example, is challenging because these tend to constitute imbalanced class problems. Estimating health-related costs, as an overall measure of utilization and patient risk, tends to result in extremely skewed response distributions, making continuous outcomes a similarly nuanced concern [<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. Many real-world tasks might, therefore, be better understood as supervised anomaly detection and outlier estimation tasks, not just ones involving typical classification and regression performance estimates [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. Clinicians care more about precision and residuals among high-risk individuals than among those least at risk.</p><p>This concern has been expressed by health care researchers and might be remedied through combinations of parameter tuning and cohort sampling techniques [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. The generalizability of such approaches is problematic, however, leading to faulty conclusions about model performance, often due to sample bias [<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. Researchers also frequently report sensitivity, specificity, and C-statistics but fail to acknowledge practical limitations of these evaluation metrics or perform analyses with cost weighting applied from the clinical context. This may mask some critical differences between models, especially when applied to costly clinical decision support programs [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. This criticism is especially important for imbalanced data and has been raised as a justification for relying on the increasingly popular <italic>F</italic>-beta statistic: the weighted harmonic average of precision and recall.</p><p>To demonstrate the potential for added predictive value in ML over traditional techniques, we selected a high-performance risk model for clinical risk estimation, the Johns Hopkins Adjusted Clinical Group (ACG) System. The ACG System incorporates clinical risk groupings for multiple diagnostic, care utilization, pharmacy, and demographic characteristics into a sequence of well-validated binary features. These features then form a basis for multiple predictive models published with the software, predicting expected costs, probability of hospitalization, and readmissions with multiple calibrations available for different age groups and lines of business [<xref ref-type="bibr" rid="ref25">25</xref>]. Because these models are derived from extensive research and clinical expertise, we consider the ACG System&#x2019;s modeling features to be among the most robust and detailed commercially available tools.</p></sec><sec id="s1-2"><title>Objectives</title><p>The aim of this work is to compare the relative performance of regression and decision tree ensemble methods using the ACG System&#x2019;s sets of predictive features, evaluating 2 clinical outcomes: all-cause inpatient hospitalization and elevated cost. Though these outcomes specifically correspond to patient care utilization, they are also informative as general indicators of clinical risk [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. We explore different optimization strategies and performance statistics to interpret a diversity of findings, addressing the common practice of locally calibrating models for user data. Such recalibration is burdensome to health systems, and ML approaches to estimation might be hard to adopt if local calibration also requires an intensive optimization search for each setting.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Participants and Setting</title><p>Patients from the Johns Hopkins Health Plans (JHHP) for the years 2019 to 2020 were considered for this retrospective cohort design. JHHP is a managed care organization that is part of the Johns Hopkins Health System. JHHP fully insures care for patients receiving treatment at any location (not just from the Johns Hopkins Health System). Our data were drawn from three lines of business: (1) the Employee Health Program, (2) Medicare Advantage, and (3) the Priority Partners Medicaid contracting health plan. Patients were selected if they had at least 1 month of enrollment in a single line of business during both the concurrent year (2019) and the prospective year of analysis (2020). A total of 350,463 such patients were identified.</p><p>Organization and quality of this analysis relied on the STROBE (Strengthening the Reporting of Observational Studies in Epidemiology) guidelines for reporting, which were completed where appropriate for the aims of this work (<xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>).</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This work has been reviewed by the institutional review board at Johns Hopkins Bloomberg School of Public Health, approved as exempt (IRB00005784), and determined to be exempt from requiring patient consent. All data have been deidentified for secondary use in observational research, and patients could not be contacted for consent to participate as a result.</p></sec><sec id="s2-3"><title>Variable Definitions</title><p>Health claims were aggregated into an annual summary for each patient-year using the ACG System (version 13.0), published by Johns Hopkins Healthcare Solutions. The patient summary file is used to understand the characteristics of a cohort, such as the number of patients with a hospital admission, the number of chronic conditions, and patient demographics.</p><p>No patients were found to have missing data among the ACG System variables used for modeling as part of this research. Binary features are generally regarded as absent when not explicitly coded for in claims. For example, a claim identifying a 50-year-old woman with 2 concurrent-year emergency room visits would have a positive value for modeling features acg_4554, female, and edpat_2.</p><p>The analytical files consisting of ACG predictive features (model markers) were also developed for each annualized outcome: all-cause inpatient hospitalization and elevated total health care cost, defined as above the 95th percentile for billed amounts for each patient. Additional binary features included for these outcome-specific models are multiple ranges in age, concurrent year cost ranges associated with care, counts of concurrent year emergency room visits, hospitalizations, and outpatient encounters, clustered Resource Utilization Band, diagnosis-based morbidity risk (eg, expanded diagnosis cluster and adjusted clinical group), and pharmacy-based morbidity risk group. Specific description labels and the data dictionary for each feature can be found in the ACG System manual and are restricted to subscribers of these tools. There were a total of 268 features included in the ACG prospective year hospitalization models (all ages) and 251 for prospective year cost (all ages).</p></sec><sec id="s2-4"><title>Model Training and Selection</title><p>For all model types, concurrent-year observations (ie, 2019 annualized markers) are used to predict the likelihood of all-cause hospitalization or high cost in 1 year. Three ML approaches were explored in a cross-validated hyperparameter tuning framework, along with 3 regression-based methods commonly used in our risk stratification tasks. ML techniques included elastic net, random forest, and Extreme Gradient Boosting (XGBoost). Algorithms were selected on the basis of interpretability, scalability, and ability to handle high-dimensional data (ie, hundreds of features) [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Added parsimony is often a stated reason for using several techniques (eg, random forest, elastic net, and regularized models), but improved prediction is also frequently observed with reduced model variance [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p><p>The 3 non-ML regression models include logistic regression, least absolute shrinkage and selection operator (LASSO) regression, and a &#x201C;reduced&#x201D; multivariate regression consisting of just the set of inputs selected through LASSO and recalibrated without a regularization term. LASSO is a regularization technique that shrinks highly collinear coefficients to zero, dropping them from the model and weighing the remaining independent effects. All models were fitted using the R programming language (version 4.0.2; R Foundation for Statistical Computing) with relevant packages for each model type, including &#x201C;glmnet,&#x201D; &#x201C;xgboost,&#x201D; &#x201C;randomForest,&#x201D; and additional model evaluation tasks using &#x201C;pROC&#x201D; [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref32">32</xref>].</p></sec><sec id="s2-5"><title>Evaluation Framework or Parameter Settings</title><p>Evaluation metrics commonly used in ML tasks include area under the receiver operating characteristic (AUROC) and point-estimated precision and recall. A criticism of using AUROC in imbalanced data is that it tends to overestimate performance for models with poor sensitivity. This is due to the high count of negative cases, which arbitrarily reduces the false positive rate and subsequently inflates both specificity and AUROC [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. Researchers have also made use of <italic>F</italic><sub>1</sub>-scores to focus on the calibration of errors for just positive cases in imbalanced data [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. The <italic>F</italic><sub>1</sub> is part of a family of <italic>F</italic>-beta scores that weigh precision and recall differentially, mimicking the cost-benefit exchange for dissimilar use cases. The <italic>F</italic><sub>1</sub> fixes this balance where precision (positive predictive value) and recall (sensitivity) are weighted equally, and it represents the harmonic average of the two.</p><p>Highly disparate model performances were possible through the tuning of hyperparameters. Settings were evaluated such that 2 optimization strategies were distinguished, maximizing either AUROC or <italic>F</italic><sub>1</sub>. Parameters yielding the best average result in a 5-fold cross-validation for each setting were assigned through a small-scale grid search (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p></sec><sec id="s2-6"><title>Statistical Analyses</title><p>Cohort characteristics were explored through a stratified concurrent-year line of business and Patient Need Group (PNG), illustrating features including age, sex, number of chronic conditions, number of active ingredients, total cost, pharmacy cost, and utilization of certain points-of-care. PNG is an ACG classification of patients into 11 expected health care need groups using an index of comorbidity and patient care utilization [<xref ref-type="bibr" rid="ref33">33</xref>].</p><p>Model performances are evaluated using 20-iteration 5-fold cross-validation, with conditions for optimized AUROC or <italic>F</italic><sub>1</sub>. Average estimated performance is further characterized by respective 95% CIs for AUROC, <italic>F</italic><sub>1</sub>, sensitivity, and positive predictive value (PPV). Wherever a cross-validated point estimate lies outside the 95% CI of a comparison model, we consider the average performance to be significantly different.</p><p>Due to the volume of models in this cross-validated framework, it was not possible to inspect all features for meaningful associations with outcomes or differences between model types. Furthermore, decision tree ensembles do not have a method for identifying discrete effects, akin to log odds or null hypothesis testing. Instead, we interpret features by variable importance across all applicable model types. This approach involves transforming the absolute value of coefficients from regressions and the average entropy reduction for decision tree models to a scale between 0 and 1. To document model parsimony, the number of features used for each model is presented with the main findings, and the average importance of the top 20 attributes for prospective-year hospitalization and elevated total cost is explored as a secondary analysis.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Patient Characteristics</title><p>The selected cohort is a large, diverse sample that generally reflects insured patients from the Washington-Baltimore metropolitan area between 2019 and 2020 (<xref ref-type="table" rid="table1">Table 1</xref>). The average patient age was 24.1 (SD 20) years, with most of the sample belonging to the Priority Partners line of business (285,817/350,463, 81.6%; <xref ref-type="table" rid="table1">Table 1</xref>). The sample was majority female (193,984/350,463, 55.4%), and 65.2% (228,341/350,463) had a low-need or low-complexity PNG designation (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Nearly half of the patients did not have a recorded race or ethnicity designation (47.6%). This met our expectations; however, as documentation of race or ethnicity is known to be sparse for many administrative claims databases [<xref ref-type="bibr" rid="ref34">34</xref>].</p><p>Of 350,463 patients, 164,536 (46.9%) patients had 1 or more ACG-defined chronic conditions, and 240,528 patients (68.6%) took 1 or more active ingredients as part of a medication regimen. Across all patients, 6.3% (22,075/350,463) had a hospitalization in 2019. These hospitalizations tended to occur among those in the multimorbidity, high-complexity, and frailty PNGs; 48.3% (6911/14,321) and 47.2% (468/992) had concurrent-year hospitalizations, respectively; while 41.6% (5959/14,321) and 49.7% (493/992) were among the 95th percentile of patients with elevated concurrent-year health care costs.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Patient characteristics by line of business.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">Line of business</td><td align="left" valign="bottom">Total sample</td></tr><tr><td align="left" valign="top">Variable</td><td align="left" valign="top">Employee health plan</td><td align="left" valign="top">Medicare advantage</td><td align="left" valign="top">Priority partners</td><td align="left" valign="top"/></tr></thead><tbody><tr><td align="left" valign="top">Total patients, N (%)</td><td align="left" valign="top">48,630 (100)</td><td align="left" valign="top">16,016 (100)</td><td align="left" valign="top">285,817 (100)</td><td align="left" valign="top">350,463 (100)</td></tr><tr><td align="left" valign="top">Average age (SD), y</td><td align="left" valign="top">33.5 (18.3)</td><td align="left" valign="top">69.8 (8.9)</td><td align="left" valign="top">20.0 (16.8)</td><td align="left" valign="top">24.1 (20.0)</td></tr><tr><td align="left" valign="top" colspan="5">Age, y, n (%)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>0&#x2010;17</td><td align="left" valign="top">10,942 (22.5)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">159,757 (55.9)</td><td align="left" valign="top">170,699 (48.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>18&#x2010;34</td><td align="left" valign="top">14,589 (30.0)</td><td align="left" valign="top">80 (0.5)</td><td align="left" valign="top">66,140 (23.1)</td><td align="left" valign="top">80,809 (23.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>35&#x2010;64</td><td align="left" valign="top">21,850 (44.9)</td><td align="left" valign="top">2883 (18.0)</td><td align="left" valign="top">59,920 (21.0)</td><td align="left" valign="top">84,653 (24.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>&#x2265;65</td><td align="left" valign="top">1249 (2.6)</td><td align="left" valign="top">13,053 (81.5)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">14,302 (4.1)</td></tr><tr><td align="left" valign="top">Female, n (%)</td><td align="left" valign="top">28,083 (57.7)</td><td align="left" valign="top">9363 (58.5)</td><td align="left" valign="top">156,538 (54.8)</td><td align="left" valign="top">193,984 (55.4)</td></tr><tr><td align="left" valign="top">Male, n (%)</td><td align="left" valign="top">20,547 (42.3)</td><td align="left" valign="top">6653 (41.5)</td><td align="left" valign="top">129,279 (45.2)</td><td align="left" valign="top">156,479 (44.6)</td></tr><tr><td align="left" valign="top">Count of patients with 1 or more chronic conditions, n (%)</td><td align="left" valign="top">24,977 (51.4)</td><td align="left" valign="top">14,813 (92.5)</td><td align="left" valign="top">124,746 (43.6)</td><td align="left" valign="top">164,536 (46.9)</td></tr><tr><td align="left" valign="top">Count of patients with 1 or more active ingredients, n (%)</td><td align="left" valign="top">29,706 (61.1)</td><td align="left" valign="top">14,878 (92.9)</td><td align="left" valign="top">195,944 (68.6)</td><td align="left" valign="top">240,528 (68.6)</td></tr><tr><td align="left" valign="top">Cost &#x003E;95th percentile, n (%)</td><td align="left" valign="top">4105 (8.4)</td><td align="left" valign="top">2340 (14.6)</td><td align="left" valign="top">11,079 (3.9)</td><td align="left" valign="top">17,524 (5.0)</td></tr><tr><td align="left" valign="top">Count of patients with 1 or more inpatient visit, n (%)</td><td align="left" valign="top">2103 (4.3)</td><td align="left" valign="top">1790 (11.2)</td><td align="left" valign="top">18,182 (6.4)</td><td align="left" valign="top">22,075 (6.3)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Model Evaluation</title><p><xref ref-type="table" rid="table2">Table 2</xref> details the average cross-validated model performance with respect to each optimization objective and outcome. When <italic>F</italic><sub>1</sub> is maximized, XGBoost was found to have the highest average performance for identifying patients above the 95th percentile in prospective-year total health care cost (<italic>F</italic><sub>1</sub>-score=0.411, 95% CI 0.409&#x2010;0.412), followed by random forest (<italic>F</italic><sub>1</sub>-score=0.401, 95% CI 0.400&#x2010;0.403) and logistic regression (<italic>F</italic><sub>1</sub>-score=0.367, 95% CI 0.366&#x2010;0.369). Both XGBoost and random forest models were significantly different in <italic>F</italic><sub>1</sub> performance compared to their respective logistic regression counterparts (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The improvement in <italic>F</italic><sub>1</sub> over logistic regression was attributed to greater average sensitivity (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The XGBoost model correctly identified 29.8% of patients in the prospective-year 95th percentile of total cost, compared with 24.8% for logistic regression. The same was not true for prospective-year hospitalized patients, where logistic regression was identified as the best-performing model for <italic>F</italic><sub>1</sub> optimization (<italic>F</italic><sub>1</sub>-score=0.341, 95% CI 0.339&#x2010;0.342), followed by the remaining regression-based models, although equal sensitivity was roughly noted across all models. Average PPV was consistently and significantly lower than that of regressions for both XGBoost and random forest on their respective tasks (Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Cross-validated model performance by outcome (rows) and classification metric (columns), assuming a threshold of <italic>P</italic>(<italic>x</italic>)&#x003E;.5. Horizontal dashed lines depict average performance for each trace. AUC: area under the curve; LASSO: least absolute shrinkage and selection operator; PPV: positive predictive value; XGBoost: Extreme Gradient Boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e92820_fig01.png"/></fig><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Ranked models by outcome, optimization, and cross-validated average model performance for binary outcomes.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Optimization, outcome, and rank</td><td align="left" valign="bottom">Model</td><td align="left" valign="bottom">AUROC<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">PPV<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6"><italic>F</italic><sub>1</sub>-score</td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>95th percentile cost</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">XGBoost<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">0.886<underline><sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></underline></td><td align="left" valign="top">0.411<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.298</td><td align="left" valign="top">0.659</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.851</td><td align="left" valign="top">0.401</td><td align="left" valign="top">0.436<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.372</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0.886<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.367</td><td align="left" valign="top">0.249</td><td align="left" valign="top">0.701</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">Reduced logistic regression</td><td align="left" valign="top">0.885</td><td align="left" valign="top">0.366</td><td align="left" valign="top">0.248</td><td align="left" valign="top">0.699</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td><td align="left" valign="top">Elastic net</td><td align="left" valign="top">0.886<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.356</td><td align="left" valign="top">0.239</td><td align="left" valign="top">0.703</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6</td><td align="left" valign="top">LASSO<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.885</td><td align="left" valign="top">0.351</td><td align="left" valign="top">0.234</td><td align="left" valign="top">0.707<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hospitalization</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0.841<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.341<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.233</td><td align="left" valign="top">0.631<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">Reduced logistic regression</td><td align="left" valign="top">0.841<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.340</td><td align="left" valign="top">0.233</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">Elastic net</td><td align="left" valign="top">0.840</td><td align="left" valign="top">0.339</td><td align="left" valign="top">0.232</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">LASSO</td><td align="left" valign="top">0.840</td><td align="left" valign="top">0.339</td><td align="left" valign="top">0.232</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td><td align="left" valign="top">XGBoost</td><td align="left" valign="top">0.834</td><td align="left" valign="top">0.328</td><td align="left" valign="top">0.227</td><td align="left" valign="top">0.596</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.811</td><td align="left" valign="top">0.299</td><td align="left" valign="top">0.276<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.327</td></tr><tr><td align="left" valign="top" colspan="6">AUC<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>95th percentile cost</td></tr><tr><td align="char" char="." valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">XGBoost</td><td align="char" char="." valign="top">0.891<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="char" char="." valign="top">0.403<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="char" char="." valign="top">0.282</td><td align="char" char="." valign="top">0.706</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0.886</td><td align="left" valign="top">0.367</td><td align="left" valign="top">0.249</td><td align="left" valign="top">0.701</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">Elastic net</td><td align="left" valign="top">0.886</td><td align="left" valign="top">0.354</td><td align="left" valign="top">0.237</td><td align="left" valign="top">0.705</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">Reduced logistic regression</td><td align="left" valign="top">0.885</td><td align="left" valign="top">0.366</td><td align="left" valign="top">0.248</td><td align="left" valign="top">0.699</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td><td align="left" valign="top">LASSO</td><td align="left" valign="top">0.885</td><td align="left" valign="top">0.351</td><td align="left" valign="top">0.234</td><td align="left" valign="top">0.707<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.850</td><td align="left" valign="top">0.272</td><td align="left" valign="top">0.754<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.166</td></tr><tr><td align="left" valign="top" colspan="6"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hospitalization</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1</td><td align="left" valign="top">XGBoost</td><td align="left" valign="top">0.849<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.321</td><td align="left" valign="top">0.209</td><td align="left" valign="top">0.689<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2</td><td align="left" valign="top">Elastic net</td><td align="left" valign="top">0.842</td><td align="left" valign="top">0.280</td><td align="left" valign="top">0.181</td><td align="left" valign="top">0.626</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3</td><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">0.841</td><td align="left" valign="top">0.341<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.233</td><td align="left" valign="top">0.631</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4</td><td align="left" valign="top">Reduced logistic regression</td><td align="left" valign="top">0.840</td><td align="left" valign="top">0.340</td><td align="left" valign="top">0.233</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5</td><td align="left" valign="top">LASSO</td><td align="left" valign="top">0.840</td><td align="left" valign="top">0.339</td><td align="left" valign="top">0.232</td><td align="left" valign="top">0.630</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>6</td><td align="left" valign="top">Random forest</td><td align="left" valign="top">0.817</td><td align="left" valign="top">0.288</td><td align="left" valign="top">0.532<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">0.198</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AUROC: area under receiver operating characteristic.</p></fn><fn id="table2fn2"><p><sup>b</sup>PPV: positive predictive value.</p></fn><fn id="table2fn3"><p><sup>c</sup>XGBoost: Extreme Gradient Boosting.</p></fn><fn id="table2fn4"><p><sup>d</sup>Performance estimates for candidate models are based on the appropriate evaluation metric.</p></fn><fn id="table2fn5"><p><sup>e</sup>LASSO: least absolute shrinkage and selection operator.</p></fn><fn id="table2fn6"><p><sup>f</sup>AUC: area under the curve.</p></fn></table-wrap-foot></table-wrap><p>When parameters are fit to maximize AUROC, XGBoost is best suited for identifying prospective-year patients above the 95th percentile of cost (AUROC=0.891, 95% CI 0.891&#x2010;0.892) and prospective-year hospitalization (AUROC=0.849, 95% CI 0.848&#x2010;0.850). For both outcomes, the cross-validated AUROC of XGBoost is significantly different from the 95% CI of the corresponding logistic regressions for that task (95th percentile cost: 0.885&#x2010;0.886; hospitalization: 0.841&#x2010;0.842). Furthermore, average PPV is significantly better than regressions for the hospitalization task (PPV=0.689, 95% CI 0.687&#x2010;0.692). AUROC-optimized point estimates of <italic>F</italic><sub>1</sub> were again only significantly better for XGBoost in the prediction of high cost, not hospitalization.</p><p>On average, 132 of 268 features were retained by the <italic>F</italic><sub>1</sub>-optimized elastic net model predicting hospitalization, 268 by the random forest, and 251 by XGBoost (<xref ref-type="table" rid="table3">Table 3</xref>). For <italic>F</italic><sub>1</sub>-optimized models predicting the 95th percentile of cost, it was 191, 251, and 247 out of 251, respectively, suggesting there is a higher proportion of informative features estimating cost than hospitalization. This was consistent with observed AUROC-optimized selected features, though more were generally retained on average.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Feature selection summary, reflecting the average integer count of selected features by model type, optimization, and outcome across cross-validations.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Optimization and model type</td><td align="left" valign="bottom">Hospitalization, n (%)</td><td align="left" valign="bottom">95th percentile cost, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3"><italic>F</italic><sub>1</sub>-score</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Logistic regression</td><td align="left" valign="top">268 (100)</td><td align="left" valign="top">251 (100)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Elastic net</td><td align="left" valign="top">132 (49.3)</td><td align="left" valign="top">191 (76.1)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LASSO<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">123 (45.9)</td><td align="left" valign="top">131 (52.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="left" valign="top">268 (100)</td><td align="left" valign="top">251 (100)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reduced logistic regression</td><td align="char" char="." valign="top">123 (45.9)</td><td align="char" char="." valign="top">131 (52.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">251 (93.7)</td><td align="left" valign="top">247 (98.4)</td></tr><tr><td align="left" valign="top" colspan="3">AUC<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">Logistic regression</td><td align="left" valign="top">268 (100)</td><td align="left" valign="top">251 (100)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Elastic net</td><td align="left" valign="top">268 (100)</td><td align="left" valign="top">164 (65.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LASSO</td><td align="left" valign="top">123 (45.9)</td><td align="left" valign="top">131 (52.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Random forest</td><td align="left" valign="top">268 (100)</td><td align="left" valign="top">251 (100)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reduced logistic regression</td><td align="left" valign="top">123 (45.9)</td><td align="left" valign="top">131 (52.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>XGBoost</td><td align="left" valign="top">254 (94.8)</td><td align="left" valign="top">237 (94.4)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>LASSO: least absolute shrinkage and selection operator.</p></fn><fn id="table3fn2"><p><sup>b</sup>XGBoost: Extreme Gradient Boosting.</p></fn><fn id="table3fn3"><p><sup>c</sup>AUC: area under the curve.</p></fn></table-wrap-foot></table-wrap><p><xref ref-type="fig" rid="figure2">Figure 2</xref> illustrates the average variable importance and 95% CI for the top 20 ranked attributes for each task, across the performances of all 4 constituent models: logistic regression, elastic net, random forest, and XGBoost (left panel). Variable naming within the ACG System is detailed as part of an appended table and briefly described in Table S4 of <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Features ranking high in average importance, relative to all other features of the same model, are listed in descending order, and the average importance for each individual model is provided on the right panel of <xref ref-type="fig" rid="figure2">Figure 2</xref>. A great deal of variability is noted across the 4 models in terms of the order and degree of contribution for the top-ranked variables. For example, medications for endocrine disorders affecting growth (rxmg_ENDx060) are the most important feature for determining prospective-year elevated cost (95th percentile), but only in the elastic net models.</p><p>Features most strongly associated with elevated prospective-year total cost (at or above the 95th percentile) include pregnancy and maternity conditions (acg_preg), HIV or AIDS (edc_INF04), and concurrent-year total cost in the 98th to 99th percentile (tt_cost_99). For both <italic>F</italic><sub>1</sub>- and AUROC-optimized models, the importance of pregnancy conditions is ranked first only for logistic regression, plausibly overstating its importance.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Average rescaled variable importance of the top 20 ranked features across all models for each optimization (left) and for each separate technique (right). The top 20 features were identified first through either optimization setting, meaning not all of the listed features were among the top-ranked. As a result, point traces may not appear for some features in the left panel, but all associated values are plotted on the right panel. AUC: area under the curve; XGBoost: Extreme Gradient Boosting.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e92820_fig02.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Summary of Findings</title><p>ML techniques were found to significantly improve model sensitivity for the selected tasks. The <italic>F</italic><sub>1</sub>-score for elevated prospective-year total cost was robustly improved by 12.0% over logistic regression by relying on an optimized XGBoost, but AUROC was improved only by 0.6%. For prospective-year hospitalization, these improvements were 0% and 1.0%, respectively. The performance estimates were consistent with those observed in prior research. When AUROC was optimized, it reached 0.849 and 0.891 for hospitalization and cost, respectively. Using the ACG score as a pretrained model, we have previously seen an AUROC of 0.761 for prospective-year hospitalization and 0.840 for elevated cost, within a cohort of 12,820 patients aged from 21 to 64 years from the same health system [<xref ref-type="bibr" rid="ref35">35</xref>].</p><p>These findings may seem as though ML made little difference over regression, but these techniques also elevated the average point-estimated sensitivity by 0.033 (13.3%) and <italic>F</italic><sub>1</sub>-score by 0.036 (9.8%), which calls into question the reliability of only inspecting the receiver operating characteristic. This finding meant better sensitivity without losing precision, relative to a regression model. Better sensitivity means more opportunities for detection and screening for poor health outcomes and better patient support.</p><p>Our results also illustrate how the use of ML optimization may only modestly improve precision and specificity over logistic regression. Whether enhanced sensitivity is worth the investment of time and resources for model training depends on the associated costs of each type of error; thus, the rationale for choosing one model over another does not boil down to a single metric. Regression offers clear advantages that may be more valuable to researchers, care administrators, and clinicians. The ability to dissect individual effects, such as shifting the odds of a predicted outcome, is powerful and grants care analysts meaningful insights into systematic bias that might be present in any model. This remains challenging, if not impossible, to achieve with most ML and advanced AI techniques.</p></sec><sec id="s4-2"><title>Significance</title><p>Several important points follow from these findings. For real-world tasks, it is generally not the case that there is a single best-performing model. Multiple acceptable solutions are considered, each with different strengths and weaknesses. A model with highly interpretable features (ie, regression) may be preferred over one that has better performance. Proper evaluation of model performance depends on understanding real-world costs of intervention. When these models serve as a decision support tool, the balance of precision and recall can only be properly calibrated after accounting for the cost of care and harm to patients.</p><p>Evaluating model performance is further nuanced when an outcome is severely imbalanced or skewed, as is often the case in real-world data. Our findings show that AUROC alone does not indicate which model should be used or what types of errors might be effectively reduced. The flexibility of ML approaches to fit specific use cases is generally their main advantage over the rigid statistical estimation found in regression. This is critical when an intervention is already in mind, and health systems are looking for decision support in administering care.</p><p>Consider a hypothetical program to screen patients with cardiovascular disease for unplanned hospital readmissions. From a denominator of 2000 patients with cardiovascular disease with hospital discharges annually, the program can afford to screen about 500. A statistical model that is calibrated to be overly precise might miss readmissions among those with less than high risk. Readmission is a rare event, but because the task requires identification of roughly 25% of the sample, there is a premium on model sensitivity over precision. In other words, the odds of a false negative (undetected readmission) are greater than false positives (cost of screening), so the selection of an appropriate model needs to account for this.</p><p>The <italic>F</italic>-beta statistic aids selection by weighting these preferences without prior knowledge of monetary or efficiency costs. The <italic>F</italic><sub>2</sub> statistic weighs sensitivity twice over precision, for example, and permits users to index average performance to suit their needs. Returning to our results, we can see how random forest models are suddenly much more appealing with tasks that require greater sensitivity without necessarily diminishing precision, as we would expect from simply shifting the classification threshold (Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Lastly, several ML techniques are well-suited to feature selection, but there are sometimes disagreements on selected features and their relative importance. Our results show that elastic net is a powerful example of automated feature selection and maintains reasonable overall performance. We did not find that any ML technique is necessarily preferable over regression for interpretability, however. Some methods make it easier to understand which features are important for prediction, but several highly esteemed methods in the literature continue to be a black box, especially deep learning networks [<xref ref-type="bibr" rid="ref36">36</xref>]. In practice, we see that it is often best to fit multiple types of models, with some minimum specification of performance in mind, and then select candidate models based on these practical constraints: performance, interpretability, and parsimony.</p></sec><sec id="s4-3"><title>Limitations</title><p>Our work is also limited in a variety of ways. The selected cohort may not be suitable for generalizable inference at a larger scale, as it primarily comprised Medicaid beneficiaries in the state of Maryland. In our prior validation efforts with earlier years of these data, we concluded that the rates of disease conditions and care utilization are not substantially different. We know social needs are more prevalent for this sample, and consequently, there may be a general elevation in health care utilization across most points of care, including hospitalization. Additionally, the timeframe overlaps with the beginning of the COVID-19 pandemic. This adds noise to our prospective year outcomes, and it is known that overall health care utilization was reduced during the early weeks and months especially, although rates of all-cause hospitalization were also likely more reflective of unplanned admissions for this year [<xref ref-type="bibr" rid="ref37">37</xref>]. Finally, we did not perform a fully comprehensive grid search due to limitations in compute resources and the timeliness of providing results. Instead, a handful of impactful hyperparameters were varied for each technique across a broad range of values. Added performance is still possible with better refinement of these settings and further validated in this work with summary of CIs (Tables S6 and S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><p>Due to these limitations, the selected models require additional work to demonstrate their suitability for real-world use cases. This is especially true in the context of claims adjudication, where substantial financial harm could result from poorly calibrated models or those working based on erroneous assumptions, such as normality and homoscedasticity of residuals. The aim in this work is to first demonstrate the added predictive value of a cross-section of techniques but not attempt to exhaustively search all parameter settings as part of our grid search or prescribe clinical use cases. A more expansive approach, using more current, nationally representative data, is needed to validate these findings. The resulting cost and fairness of using these tools in a diversity of conditions also need to be assessed to ensure safety in implementation.</p></sec><sec id="s4-4"><title>Conclusions</title><p>Risk estimation can be improved somewhat by ML techniques over logistic regression, but there are several practical limitations. Unlike regression, ML techniques require parameter tuning and have more opaque variable interactions, making them far less interpretable. They may add parsimony to prediction efforts through feature selection, but often at the cost of performance. Conversely, the ability to tune parameters enables ML models to better tailor a response to fit specific use cases, for example, when greater precision or sensitivity is required, especially in imbalanced or skewed data. Overall improvement in precision and recall (separately or as the <italic>F</italic><sub>1</sub>-score statistic) suggests decision tree ensembles are better suited to predicting imbalanced outcomes than logistic regression. This finding must be tempered by the observation that the gain in performance was also modest overall.</p></sec></sec></body><back><ack><p>This project was supported by substantial effort from the technical team at the Center for Population Health Information Technology at the Johns Hopkins Bloomberg School of Public Health. The authors thank Tom Richards, MSc, for his work in structuring and cleaning the data used as part of this analysis. Authors of this work were responsible for all facets of this manuscript, and no generative artificial intelligence was used in the preparation of this work or its analytical findings. There was no use of generative AI in the production, drafting, or editing of this work. No portion or content of this work was authored, aided by, or is a product of generative artificial intelligence.</p></ack><notes><sec><title>Funding</title><p>This work was supported by funding through Johns Hopkins University (JHU) and Johns Hopkins HealthCare Solutions. JHU holds the copyright to the Adjusted Clinical Groups (ACG) System and receives royalties from the global distribution of the ACG System. This revenue supports a portion of the authors&#x2019; salaries. The authors are members of a group of researchers who develop and maintain the ACG System with support from JHU.</p></sec><sec><title>Data Availability</title><p>The data underlying this paper were extracted from clinical claims from the Johns Hopkins Healthcare Solutions System. As part of our data use agreement and institutional review board approval, we are prohibited from sharing any data used in this study. The Adjusted Clinical Groups System, used as part of this research, is available to researchers and clinical stakeholders.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: CK (lead), KL (equal), CP, JPW</p><p>Data curation: CK, KL, CP</p><p>Formal analysis: CK (lead), KL</p><p>Funding acquisition: JPW, HK</p><p>Investigation: CK (lead), KL</p><p>Methodology: CK (lead), KL, CP, TZ</p><p>Project administration: KL, CP</p><p>Resources: CK</p><p>Supervision: CP, JPW</p><p>Validation: TZ, KL, JPW</p><p>Visualization: CK</p><p>Writing&#x2014;original draft: CK (lead), CP, KL, HK</p><p>Writing&#x2014;review and editing: CK (lead), CP, TZ, HK</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ACG</term><def><p>Adjusted Clinical Group</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">AUROC</term><def><p>area under the receiver operating characteristic</p></def></def-item><def-item><term id="abb4">JHHP</term><def><p>Johns Hopkins Health Plans</p></def></def-item><def-item><term id="abb5">LASSO</term><def><p>least absolute shrinkage and selection operator</p></def></def-item><def-item><term id="abb6">ML</term><def><p>machine learning</p></def></def-item><def-item><term id="abb7">PNG</term><def><p>patient need group</p></def></def-item><def-item><term id="abb8">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb9">STROBE</term><def><p>Strengthening the Reporting of Observational Studies in Epidemiology</p></def></def-item><def-item><term id="abb10">XGBoost</term><def><p>Extreme Gradient Boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaul</surname><given-names>V</given-names> </name><name name-style="western"><surname>Enslin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gross</surname><given-names>SA</given-names> </name></person-group><article-title>History of artificial intelligence in medicine</article-title><source>Gastrointest Endosc</source><year>2020</year><month>10</month><volume>92</volume><issue>4</issue><fpage>807</fpage><lpage>812</lpage><pub-id pub-id-type="doi">10.1016/j.gie.2020.06.040</pub-id><pub-id pub-id-type="medline">32565184</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sutton</surname><given-names>RT</given-names> </name><name name-style="western"><surname>Pincock</surname><given-names>D</given-names> </name><name name-style="western"><surname>Baumgart</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Sadowski</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Fedorak</surname><given-names>RN</given-names> </name><name name-style="western"><surname>Kroeker</surname><given-names>KI</given-names> </name></person-group><article-title>An overview of clinical decision support systems: benefits, risks, and strategies for success</article-title><source>NPJ Digit Med</source><year>2020</year><volume>3</volume><fpage>17</fpage><pub-id pub-id-type="doi">10.1038/s41746-020-0221-y</pub-id><pub-id pub-id-type="medline">32047862</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Adams</surname><given-names>ID</given-names> </name><name name-style="western"><surname>Chan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Clifford</surname><given-names>PC</given-names> </name><etal/></person-group><article-title>Computer aided diagnosis of acute abdominal pain: a multicentre study</article-title><source>Br Med J (Clin Res Ed)</source><year>1986</year><month>09</month><day>27</day><volume>293</volume><issue>6550</issue><fpage>800</fpage><lpage>804</lpage><pub-id pub-id-type="doi">10.1136/bmj.293.6550.800</pub-id><pub-id pub-id-type="medline">3094664</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Davenport</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kalakota</surname><given-names>R</given-names> </name></person-group><article-title>The potential for artificial intelligence in healthcare</article-title><source>Future Healthc J</source><year>2019</year><month>06</month><volume>6</volume><issue>2</issue><fpage>94</fpage><lpage>98</lpage><pub-id pub-id-type="doi">10.7861/futurehosp.6-2-94</pub-id><pub-id pub-id-type="medline">31363513</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Byrne</surname><given-names>DW</given-names> </name></person-group><source>Artificial intelligence for improved patient outcomes</source><year>2022</year><publisher-name>Wolters Kluwer</publisher-name><pub-id pub-id-type="other">9781975197957</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>S</given-names> </name><name name-style="western"><surname>Basu</surname><given-names>A</given-names> </name></person-group><article-title>Alternative evaluation metrics for risk adjustment methods</article-title><source>Health Econ</source><year>2018</year><month>06</month><volume>27</volume><issue>6</issue><fpage>984</fpage><lpage>1010</lpage><pub-id pub-id-type="doi">10.1002/hec.3657</pub-id><pub-id pub-id-type="medline">29577489</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ribeiro</surname><given-names>RP</given-names> </name><name name-style="western"><surname>Moniz</surname><given-names>N</given-names> </name></person-group><article-title>Imbalanced regression and extreme value prediction</article-title><source>Mach Learn</source><year>2020</year><month>09</month><volume>109</volume><issue>9-10</issue><fpage>1803</fpage><lpage>1835</lpage><pub-id pub-id-type="doi">10.1007/s10994-020-05900-9</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Buchner</surname><given-names>F</given-names> </name><name name-style="western"><surname>Wasem</surname><given-names>J</given-names> </name><name name-style="western"><surname>Schillo</surname><given-names>S</given-names> </name></person-group><article-title>Regression trees identify relevant interactions: can this improve the predictive performance of risk adjustment?</article-title><source>Health Econ</source><year>2017</year><month>01</month><volume>26</volume><issue>1</issue><fpage>74</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1002/hec.3277</pub-id><pub-id pub-id-type="medline">26498581</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Christodoulou</surname><given-names>E</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>J</given-names> </name><name name-style="western"><surname>Collins</surname><given-names>GS</given-names> </name><name name-style="western"><surname>Steyerberg</surname><given-names>EW</given-names> </name><name name-style="western"><surname>Verbakel</surname><given-names>JY</given-names> </name><name name-style="western"><surname>Van Calster</surname><given-names>B</given-names> </name></person-group><article-title>A systematic review shows no performance benefit of machine learning over logistic regression for clinical prediction models</article-title><source>J Clin Epidemiol</source><year>2019</year><month>06</month><volume>110</volume><fpage>12</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2019.02.004</pub-id><pub-id pub-id-type="medline">30763612</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Roy</surname><given-names>J</given-names> </name><name name-style="western"><surname>Stewart</surname><given-names>WF</given-names> </name></person-group><article-title>Prediction modeling using EHR data: challenges, strategies, and a comparison of machine learning approaches</article-title><source>Med Care</source><year>2010</year><month>06</month><volume>48</volume><issue>6 Suppl</issue><fpage>S106</fpage><lpage>S113</lpage><pub-id pub-id-type="doi">10.1097/MLR.0b013e3181de9e17</pub-id><pub-id pub-id-type="medline">20473190</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Olza</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mill&#x00E1;n</surname><given-names>E</given-names> </name><name name-style="western"><surname>Rodr&#x00ED;guez-&#x00C1;lvarez</surname><given-names>MX</given-names> </name></person-group><article-title>Development and validation of predictive models for unplanned hospitalization in the Basque Country: analyzing the variability of non-deterministic algorithms</article-title><source>BMC Med Inform Decis Mak</source><year>2023</year><month>08</month><day>5</day><volume>23</volume><issue>1</issue><fpage>152</fpage><pub-id pub-id-type="doi">10.1186/s12911-023-02226-z</pub-id><pub-id pub-id-type="medline">37543596</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="web"><article-title>Dr. Jonathan Weiner and beyond the AI Buzz Informatics Grand Rounds 3/9/23</article-title><source>Johns Hopkins Medicine YouTube page</source><year>2023</year><month>03</month><day>14</day><access-date>2026-06-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.youtube.com/watch?v=rDij3K3N1Hk">https://www.youtube.com/watch?v=rDij3K3N1Hk</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hern&#x00E1;ndez Guillamet</surname><given-names>G</given-names> </name><name name-style="western"><surname>Morancho Pallaruelo</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Mir&#x00F3; Mezquita</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Machine learning model for predicting mortality risk in patients with complex chronic conditions: retrospective analysis</article-title><source>Online J Public Health Inform</source><year>2023</year><volume>15</volume><fpage>e52782</fpage><pub-id pub-id-type="doi">10.2196/52782</pub-id><pub-id pub-id-type="medline">38223690</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kan</surname><given-names>HJ</given-names> </name><name name-style="western"><surname>Kharrazi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Bodycombe</surname><given-names>D</given-names> </name><name name-style="western"><surname>Lemke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Weiner</surname><given-names>JP</given-names> </name></person-group><article-title>Exploring the use of machine learning for risk adjustment: a comparison of standard and penalized linear regression models in predicting health care costs in older adults</article-title><source>PLoS One</source><year>2019</year><volume>14</volume><issue>3</issue><fpage>e0213258</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0213258</pub-id><pub-id pub-id-type="medline">30840682</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Kong</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kowalczyk</surname><given-names>W</given-names> </name><name name-style="western"><surname>Menzel</surname><given-names>S</given-names> </name><name name-style="western"><surname>B&#x00E4;ck</surname><given-names>T</given-names> </name><etal/></person-group><person-group person-group-type="editor"><name name-style="western"><surname>B&#x00E4;ck</surname><given-names>T</given-names> </name></person-group><article-title>Improving imbalanced classification by anomaly detection</article-title><source>Parallel Problem Solving from Nature &#x2013; PPSN XVI PPSN 2020 Lecture Notes in Computer Science</source><year>2020</year><publisher-name>Springer</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-030-58112-1_35</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lin</surname><given-names>EJD</given-names> </name><name name-style="western"><surname>Hefner</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Zeng</surname><given-names>X</given-names> </name><etal/></person-group><article-title>A deep learning model for pediatric patient risk stratification</article-title><source>Am J Manag Care</source><year>2019</year><month>10</month><day>1</day><volume>25</volume><issue>10</issue><fpage>e310</fpage><lpage>e315</lpage><pub-id pub-id-type="medline">31622071</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><article-title>Classification vs. prediction</article-title><source>Statistical thinking</source><access-date>2026-06-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fharrell.com/post/classification/">https://www.fharrell.com/post/classification/</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Riley</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Ensor</surname><given-names>J</given-names> </name><name name-style="western"><surname>Snell</surname><given-names>KIE</given-names> </name><etal/></person-group><article-title>Calculating the sample size required for developing a clinical prediction model</article-title><source>BMJ</source><year>2020</year><month>03</month><day>18</day><volume>368</volume><fpage>m441</fpage><pub-id pub-id-type="doi">10.1136/bmj.m441</pub-id><pub-id pub-id-type="medline">32188600</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Provost</surname><given-names>F</given-names> </name></person-group><article-title>Machine learning from imbalanced data sets 101 [Abstract]</article-title><access-date>2026-06-06</access-date><conf-name>AAAI&#x2019;2000 Workshop on Learning from Imbalanced Data Sets</conf-name><conf-date>Jul 30, 2000</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://archive.nyu.edu/bitstream/2451/27763/2/CPP-02-00.pdf">https://archive.nyu.edu/bitstream/2451/27763/2/CPP-02-00.pdf</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kleppe</surname><given-names>A</given-names> </name></person-group><article-title>Area under the curve may hide poor generalisation to external datasets</article-title><source>ESMO Open</source><year>2022</year><month>04</month><volume>7</volume><issue>2</issue><fpage>100429</fpage><pub-id pub-id-type="doi">10.1016/j.esmoop.2022.100429</pub-id><pub-id pub-id-type="medline">35397433</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saito</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rehmsmeier</surname><given-names>M</given-names> </name></person-group><article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title><source>PLoS One</source><year>2015</year><volume>10</volume><issue>3</issue><fpage>e0118432</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id><pub-id pub-id-type="medline">25738806</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>W</given-names> </name></person-group><article-title>Precision&#x2013;recall curve (PRC) classification trees</article-title><source>Evol Intel</source><year>2022</year><volume>15</volume><issue>3</issue><fpage>1545</fpage><lpage>1569</lpage><pub-id pub-id-type="doi">10.1007/s12065-021-00565-2</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maltenfort</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Forrest</surname><given-names>CB</given-names> </name></person-group><article-title>Prediction of 30-day pediatric unplanned hospitalizations using the Johns Hopkins Adjusted Clinical Groups risk adjustment system</article-title><source>PLoS One</source><year>2019</year><volume>14</volume><issue>8</issue><fpage>e0221233</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0221233</pub-id><pub-id pub-id-type="medline">31415648</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Cheng</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Using the Johns Hopkins ACG Case-Mix System for population segmentation in a hospital-based adult patient population in Singapore</article-title><source>BMJ Open</source><year>2023</year><month>03</month><day>30</day><volume>13</volume><issue>3</issue><fpage>e062786</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2022-062786</pub-id><pub-id pub-id-type="medline">36997258</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>ACG system v13.0 system documentation</article-title><source>Johns Hopkins Medicine</source><access-date>2026-06-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.hopkinsacg.org/document/acg-system-v13-0-system-documentation/">https://www.hopkinsacg.org/document/acg-system-v13-0-system-documentation/</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morid</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Kawamoto</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ault</surname><given-names>T</given-names> </name><name name-style="western"><surname>Dorius</surname><given-names>J</given-names> </name><name name-style="western"><surname>Abdelrahman</surname><given-names>S</given-names> </name></person-group><article-title>Supervised learning methods for predicting healthcare costs: systematic literature review and empirical evaluation</article-title><source>AMIA Annu Symp Proc</source><year>2018</year><volume>2017</volume><fpage>1312</fpage><lpage>1321</lpage><pub-id pub-id-type="medline">29854200</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kulkarni</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Pannu</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Koval</surname><given-names>AV</given-names> </name><etal/></person-group><article-title>A brief analysis of key machine learning methods for predicting Medicare payments related to physical therapy practices in the United States</article-title><source>Information</source><year>2021</year><volume>12</volume><issue>2</issue><fpage>57</fpage><pub-id pub-id-type="doi">10.3390/info12020057</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Friedman</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name><name name-style="western"><surname>Tibshirani</surname><given-names>R</given-names> </name></person-group><article-title>Regularization paths for generalized linear models via coordinate descent</article-title><source>J Stat Softw</source><year>2010</year><volume>33</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.18637/jss.v033.i01</pub-id><pub-id pub-id-type="medline">20808728</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tay</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Narasimhan</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hastie</surname><given-names>T</given-names> </name></person-group><article-title>Elastic net regularization paths for all generalized linear models</article-title><source>J Stat Softw</source><year>2023</year><volume>106</volume><pub-id pub-id-type="doi">10.18637/jss.v106.i01</pub-id><pub-id pub-id-type="medline">37138589</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liaw</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wiener</surname><given-names>M</given-names> </name></person-group><article-title>Classification and regression by randomForest</article-title><source>R News</source><year>2002</year><access-date>2026-06-06</access-date><volume>2</volume><issue>3</issue><fpage>18</fpage><lpage>22</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://journal.r-project.org/articles/RN-2002-022/RN-2002-022.pdf">https://journal.r-project.org/articles/RN-2002-022/RN-2002-022.pdf</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>He</surname><given-names>T</given-names> </name><name name-style="western"><surname>Benesty</surname><given-names>M</given-names> </name><etal/></person-group><article-title>xgboost: extreme gradient boosting</article-title><source>The Comprehensive R Archive Network (CRAN)</source><access-date>2026-06-06</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://CRAN.R-project.org/package=xgboost">https://CRAN.R-project.org/package=xgboost</ext-link></comment></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Robin</surname><given-names>X</given-names> </name><name name-style="western"><surname>Turck</surname><given-names>N</given-names> </name><name name-style="western"><surname>Hainard</surname><given-names>A</given-names> </name><etal/></person-group><article-title>pROC: an open-source package for R and S+ to analyze and compare ROC curves</article-title><source>BMC Bioinformatics</source><year>2011</year><month>03</month><day>17</day><volume>12</volume><fpage>77</fpage><pub-id pub-id-type="doi">10.1186/1471-2105-12-77</pub-id><pub-id pub-id-type="medline">21414208</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lemke</surname><given-names>KW</given-names> </name><name name-style="western"><surname>Forrest</surname><given-names>CB</given-names> </name><name name-style="western"><surname>Leff</surname><given-names>BA</given-names> </name><etal/></person-group><article-title>Patterns of morbidity across the lifespan: a population segmentation framework for classifying health care needs for all ages</article-title><source>Med Care</source><year>2024</year><month>11</month><day>1</day><volume>62</volume><issue>11</issue><fpage>732</fpage><lpage>740</lpage><pub-id pub-id-type="doi">10.1097/MLR.0000000000001898</pub-id><pub-id pub-id-type="medline">37962403</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nead</surname><given-names>KT</given-names> </name><name name-style="western"><surname>Hinkston</surname><given-names>CL</given-names> </name><name name-style="western"><surname>Wehner</surname><given-names>MR</given-names> </name></person-group><article-title>Cautions when using race and ethnicity in administrative claims data sets</article-title><source>JAMA Health Forum</source><year>2022</year><month>07</month><day>1</day><volume>3</volume><issue>7</issue><fpage>e221812</fpage><pub-id pub-id-type="doi">10.1001/jamahealthforum.2022.1812</pub-id><pub-id pub-id-type="medline">36218996</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kitchen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>HY</given-names> </name><name name-style="western"><surname>Weiner</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Kharrazi</surname><given-names>H</given-names> </name></person-group><article-title>Assessing the added value of vital signs extracted from electronic health records in healthcare risk adjustment models</article-title><source>Risk Manag Healthc Policy</source><year>2022</year><volume>15</volume><fpage>1671</fpage><lpage>1682</lpage><pub-id pub-id-type="doi">10.2147/RMHP.S356080</pub-id><pub-id pub-id-type="medline">36092549</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rao</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Jain</surname><given-names>R</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>M</given-names> </name><name name-style="western"><surname>Garg</surname><given-names>R</given-names> </name></person-group><article-title>Predictive interpretable analytics models for forecasting healthcare costs using open healthcare data</article-title><source>Healthc Anal</source><year>2024</year><month>12</month><volume>6</volume><fpage>100351</fpage><pub-id pub-id-type="doi">10.1016/j.health.2024.100351</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moynihan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Sanders</surname><given-names>S</given-names> </name><name name-style="western"><surname>Michaleff</surname><given-names>ZA</given-names> </name><etal/></person-group><article-title>Impact of COVID-19 pandemic on utilisation of healthcare services: a systematic review</article-title><source>BMJ Open</source><year>2021</year><month>03</month><day>16</day><volume>11</volume><issue>3</issue><fpage>e045343</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2020-045343</pub-id><pub-id pub-id-type="medline">33727273</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Inventories of model parameter settings, patient characteristics by patient need group, full CIs around cross-validated performance, and supporting descriptions of variables depicted in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><media xlink:href="formative_v10i1e92820_app1.docx" xlink:title="DOCX File, 52 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>STROBE checklist.</p><media xlink:href="formative_v10i1e92820_app2.docx" xlink:title="DOCX File, 19 KB"/></supplementary-material></app-group></back></article>