<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i1e41725</article-id>
      <article-id pub-id-type="pmid">37234042</article-id>
      <article-id pub-id-type="doi">10.2196/41725</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Machine Learning and Causal Approaches to Predict Readmissions and Its Economic Consequences Among Canadian Patients With Heart Disease: Retrospective Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Marotta</surname>
            <given-names>Nicola</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gartner</surname>
            <given-names>Daniel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Rajkumar</surname>
            <given-names>Ethan</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Chemistry, Faculty of Science</institution>
            <institution>The University of British Columbia</institution>
            <addr-line>2036 Main Mall</addr-line>
            <addr-line>Vancouver, BC</addr-line>
            <country>Canada</country>
            <phone>1 (604) 822 3266</phone>
            <email>er12da@student.ubc.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0507-0676</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Nguyen</surname>
            <given-names>Kevin</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2118-5377</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Radic</surname>
            <given-names>Sandra</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5081-158X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Paa</surname>
            <given-names>Jubelle</given-names>
          </name>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0809-0189</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Geng</surname>
            <given-names>Qiyang</given-names>
          </name>
          <degrees>BASc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-3231-6777</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Chemistry, Faculty of Science</institution>
        <institution>The University of British Columbia</institution>
        <addr-line>Vancouver, BC</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science, Faculty of Science</institution>
        <institution>The University of British Columbia</institution>
        <addr-line>Vancouver, BC</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Biomedical Engineering, Faculty of Applied Sciences</institution>
        <institution>University of British Columbia</institution>
        <addr-line>Vancouver, BC</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Ethan Rajkumar <email>er12da@student.ubc.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>26</day>
        <month>5</month>
        <year>2023</year>
      </pub-date>
      <volume>7</volume>
      <elocation-id>e41725</elocation-id>
      <history>
        <date date-type="received">
          <day>6</day>
          <month>8</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>22</day>
          <month>11</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>21</day>
          <month>3</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>4</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Ethan Rajkumar, Kevin Nguyen, Sandra Radic, Jubelle Paa, Qiyang Geng. Originally published in JMIR Formative Research (https://formative.jmir.org), 26.05.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2023/1/e41725" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Unplanned patient readmissions within 30 days of discharge pose a substantial challenge in Canadian health care economics. To address this issue, risk stratification, machine learning, and linear regression paradigms have been proposed as potential predictive solutions. Ensemble machine learning methods, such as stacked ensemble models with boosted tree algorithms, have shown promise for early risk identification in specific patient groups.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to implement an ensemble model with submodels for structured data, compare metrics, evaluate the impact of optimized data manipulation with principal component analysis on shorter readmissions, and quantitatively verify the causal relationship between expected length of stay (ELOS) and resource intensity weight (RIW) value for a comprehensive economic perspective.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This retrospective study used Python 3.9 and streamlined libraries to analyze data obtained from the Discharge Abstract Database covering 2016 to 2021. The study used 2 sub–data sets, clinical and geographical data sets, to predict patient readmission and analyze its economic implications, respectively. A stacking classifier ensemble model was used after principal component analysis to predict patient readmission. Linear regression was performed to determine the relationship between RIW and ELOS.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The ensemble model achieved precision and slightly higher recall (0.49 and 0.68), indicating a higher instance of false positives. The model was able to predict cases better than other models in the literature. Per the ensemble model, readmitted women and men aged 40 to 44 and 35 to 39 years, respectively, were more likely to use resources. The regression tables verified the causality of the model and confirmed the trend that patient readmission is much more costly than continued hospital stay without discharge for both the patient and health care system.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study validates the use of hybrid ensemble models for predicting economic cost models in health care with the goal of reducing the bureaucratic and utility costs associated with hospital readmissions. The availability of robust and efficient predictive models, as demonstrated in this study, can help hospitals focus more on patient care while maintaining low economic costs. This study predicts the relationship between ELOS and RIW, which can indirectly impact patient outcomes by reducing administrative tasks and physicians’ burden, thereby reducing the cost burdens placed on patients. It is recommended that changes to the general ensemble model and linear regressions be made to analyze new numerical data for predicting hospital costs. Ultimately, the proposed work hopes to emphasize the advantages of implementing hybrid ensemble models in forecasting health care economic cost models, empowering hospitals to prioritize patient care while simultaneously decreasing administrative and bureaucratic expenses.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>patient readmission</kwd>
        <kwd>health care economics</kwd>
        <kwd>ensemble</kwd>
        <kwd>prediction model</kwd>
        <kwd>classification</kwd>
        <kwd>linear regression resource intensity value</kwd>
        <kwd>hospital</kwd>
        <kwd>health care</kwd>
        <kwd>principal component analysis</kwd>
        <kwd>PCA</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>An open problem that has arisen in Canadian health care economics is the detrimental cost caused by unplanned patient readmissions in hospitals. North American Hospitals have defined patient readmissions as the admittance of patients within 30 days after discharge [<xref ref-type="bibr" rid="ref1">1</xref>]. In Canada, 1 in 11 patients experience readmittance, resulting in expenses of &#62;2.3 billion Canadian dollars per year [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Consequently, this enormous expense exemplifies the bidirectional consequences of patient readmission by placing strain on individualized patient care while creating additional expenses for hospitals [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Furthermore, the COVID-19 pandemic has exacerbated many inequities that revolved around patient readmission owing to inflation. For example, patients with lower income residing in less wealthy neighborhoods were at a higher risk of being readmitted after treatment [<xref ref-type="bibr" rid="ref3">3</xref>]. Reducing these high readmission rates would prove useful in improving patient outcomes while alleviating financial concerns, for patients and hospitals alike [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>One of the ways to help reduce patient readmissions is to adopt a preventive approach [<xref ref-type="bibr" rid="ref6">6</xref>]. Risk stratification provides a standardized criterion for assigning a risk status to patients for direct care and to improve overall health outcomes. Machine learning (ML) paradigms have been used to guide clinicians in their efforts to enhance diagnosis and risk stratification [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Using ML, clinicians can be guided to make accurate diagnoses, improve patient outcomes, and even identify patients at risk of developing certain conditions that can be translatable to readmission and its economic cost. A study by Baruah [<xref ref-type="bibr" rid="ref8">8</xref>] adopted a detailed approach by analyzing electronic health records using a word convolutional neural network using a “Bag-of-Words.” Although using discharge summaries can allow for the personalization of patient prediction, a work-around for the number of resources required to train a high-throughput model such as word convolutional neural network is of high concern [<xref ref-type="bibr" rid="ref8">8</xref>]. Furthermore, Baruah’s [<xref ref-type="bibr" rid="ref8">8</xref>] model was limited in addressing the high class imbalance in shorter time frame readmission tasks in contrast to longer time frame readmission tasks [<xref ref-type="bibr" rid="ref8">8</xref>]. Solving this short time frame readmission problem can allow for a faster prevention of unplanned patient readmission [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>Although deep learning models were used for risk stratification in health care, they had limited success because of the large amount of data required for training [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In addition, incorporating comorbidities and their time periods in models could lead to the confounding of other variables [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. However, Ben-Assuli et al [<xref ref-type="bibr" rid="ref13">13</xref>] found that using multiple time periods and ensemble ML methods on large-scale data enabled early risk identification in specific patient groups [<xref ref-type="bibr" rid="ref13">13</xref>]. Stacked ensemble models, including those with boosted tree algorithms, demonstrated strong performance in predicting unplanned patient readmissions by reducing bias from individual models and sensitivities to rare classes [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. These models also offered better interpretability for health care workers and nonexperts in ML, thanks to their transparent results [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>After determining whether the patients will be readmitted within the next few days, the economic consequences to both the hospital and the patient will be estimated [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. This involves finding the causal relationship between patients’ expected length of stay (ELOS) and their resource use, which are both continuous variables for determining the economic aftermath of hospital readmission [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. However, if given a time period, linear regressions may prove useful in predicting and comparing the trends behind the relationships between variables such as ELOS and readmission in real time [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p>
      </sec>
      <sec>
        <title>Goal of This Study</title>
        <p>The objectives of the proposed work were 3-fold. The first, main goal of the project was to implement an ensemble model with individual submodels on the structured data and compare the resulting metrics to metrics resulting from other models that have also explored patient readmission in a heart-disease context. The second goal was to determine the contribution of optimized data manipulation through principal component analysis (PCA) to solving the problem of shorter time frame readmissions. The study also aimed to verify the causal relationship between the ELOS and resource intensity weight (RIW) value. Providing an understanding of this relationship in a quantitative and causal manner can allow for an in-depth economic perspective, as opposed to only readmittance within 30 days.</p>
        <p>Ultimately, the economic and predictive aspects of this model are intended to provide a view on resource allocation for health institutes to better predict readmittance and improve patient-clinician outcomes [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Resources Used</title>
        <sec>
          <title>Population Study</title>
          <p>The study used a systematic methodology with Python 3.9 and streamlined libraries to analyze the data obtained from the Discharge Abstract Database (DAD) covering 2016 to 2021 [<xref ref-type="bibr" rid="ref16">16</xref>]. Access to the database was facilitated through the Abacus Data Network, a collaborative effort between several universities [<xref ref-type="bibr" rid="ref17">17</xref>]. The study used 2 sub–data sets, clinical and geographical data sets, to predict patient readmission and analyze economic implications, respectively. The comprehensive documentation provided by Statistics Canada allowed for a robust analysis of the data set. The workflow, illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>, shows the process of data analysis and visualization using the matplotlib and seaborn libraries.</p>
          <fig id="figure1" position="float">
            <label>Figure 1</label>
            <caption>
              <p>Study workflow: data collection (blue), data preparation and machine learning implementation (orange), and outputs (green). DAD: Discharge Abstract Database; PCA: principal component analysis.</p>
            </caption>
            <graphic xlink:href="formative_v7i1e41725_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>Design</title>
          <p>Similar to the study by Baruah [<xref ref-type="bibr" rid="ref8">8</xref>], during clinical and geographical preprocessing, individuals were screened for specific criteria. Using the International Classification of Diseases, 10th revision (ICD-10) and major complication or comorbidity (MCC) codes similar to the models by Baruah [<xref ref-type="bibr" rid="ref8">8</xref>] and Liu et al [<xref ref-type="bibr" rid="ref18">18</xref>], the examination of adult patients and exclusion of individuals aged &#60;18 years were performed to prevent any confounding variables “spilling” onto both models. The ICD-10 PCA codes for the diseases included I092, I098, I099, I100, I101, I11, I13, I500, I501, I509, I516, I518, I519, I520, I521, and I528 [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. As for MCC codes, only code 5 corresponded to cardiovascular diseases [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>]. Factors that were not considered were clinical gestation of delivery (“GES_AGRP”) along with weight group (“WGT_GRP”), as they were only a direct consequence of the age group that was eliminated [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
        </sec>
        <sec>
          <title>Clinical Data Set</title>
          <sec>
            <title>Clinical Preprocessing</title>
            <p><xref rid="figure2" ref-type="fig">Figure 2</xref> shows the manipulation done and models trained on the clinical data set of DAD. Isolating for a group of patients who share similar clinical characteristics or medical conditions can be useful for identifying trends and patterns in patient care and outcomes, as well as for conducting research on specific medical conditions such as heart disease.</p>
            <p>A clinical preprocessing step was performed to isolate for specific criteria and remove any potential confounding variables. Arbitrary admission and discharge dates were chosen based on previous calculations to avoid errors or inconsistencies in the data set. To ensure that the minimum number of relative admission dates was ≥0, dates were shifted to a minimum of January 5 of the corresponding data set year. This adjustment enabled the creation of the “LTORET30Days” columns. For feature selection and dimensionality reduction, PCA was used, as it was a common methodology used for high-dimensionality data sets.</p>
            <fig id="figure2" position="float">
              <label>Figure 2</label>
              <caption>
                <p>Clinical workflow: data collection (blue), data preparation and machine learning implementation (orange), implementations (purple), and outputs (green). LGBM: LightGBM; PCA: principal component analysis.</p>
              </caption>
              <graphic xlink:href="formative_v7i1e41725_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>PCA Process</title>
            <p>According to the PCA criterion, the components to use were described by the minimum number of features required to obtain a cumulative variance of at least 80% [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. The aim was to reduce the dimensionality of the feature space while retaining as much of the original variance as possible [<xref ref-type="bibr" rid="ref19">19</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. After obtaining an encoded vector in the form of an array, the data were run through several ensemble algorithms. The ensemble algorithm consisted of several submodels, including random forest classifiers, XGBoost (XGB), and LightGBM (LGBM). Each subclassifier’s output was stacked, allowing for a logistic regression to learn the weighted distribution of the subclassifiers to ensure high predictive accuracy. After dimensionality reduction and splitting into training and testing data sets, the final sample size was n=83,083 for nonreadmitted patients and n=10,271 for readmitted patients.</p>
          </sec>
          <sec>
            <title>Submodels: LGBM and XGB</title>
            <p>LGBM and XGB presented a relative advantage with regard to efficient computation and high accuracy on a wide range of data sets, including those with high dimensionality and categorical features [<xref ref-type="bibr" rid="ref22">22</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Both methods required sequential decision tree generation via error combination or level-wise tree growth. Having dimensionality reduced data would have decreased the maximum function, <italic>δ<sub>loss</sub></italic>, for LGBM, allowing for lower error and lower changes in <italic>∇<sub>prediction</sub></italic> [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Similarly, it was extrapolated that a higher maximum depth for XGB would be achieved, as the number of features was lower [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. An in-depth analysis about LGBM and XGB can be found in Figures S1-S11 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
          <sec>
            <title>Random Forest</title>
            <p>Random forest was chosen to improve the interpretability of the model when used in conjunction with PCA [<xref ref-type="bibr" rid="ref21">21</xref>]. As the data set had a large number of features, random forest’s computational cost was high. However, after performing dimensionality reduction using PCA, the computational cost of random forest was substantially reduced, making it a practical option for large data sets [<xref ref-type="bibr" rid="ref21">21</xref>]. During the testing phase, the random forest classifier predicted the final decision of a new data point, noted by <italic>C<sup>B</sup>rf<sup>(x)</sup></italic>, by aggregating the prediction results of all decision trees using a majority vote. The classifier selected the class with the highest number of votes as the final prediction, resulting in an accurate and interpretable model. The algorithm design for random forest is formulated in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
          <sec>
            <title>Ensemble Models: Logistic Regression</title>
            <p>The ensemble model used in this study was a stacking classifier model with a metamodel (final estimator), which was a logistic regression model [<xref ref-type="bibr" rid="ref26">26</xref>]. The metamodel took the outputs of the base models as inputs and optimally combined their predictions to ensure high predictive performance [<xref ref-type="bibr" rid="ref26">26</xref>]. The ensemble model consisted of 4 base models and was defined, trained, and tested using the scikit-learn's ensemble module, which was the default. This produced an optimal workflow, which is presented in <xref rid="figure2" ref-type="fig">Figure 2</xref>. Detailed formalisms are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </sec>
          <sec>
            <title>Hyperparameter Tuning</title>
            <p>To optimize the performance of each base model, hyperparameter tuning was done using a range of values for each parameter [<xref ref-type="bibr" rid="ref27">27</xref>]. The models were evaluated based on their <italic>F</italic><sub>1</sub>-score or recall, and scikit-learn's GridSearchCV and RandomizedSearchCV were used to fine-tune the parameters [<xref ref-type="bibr" rid="ref27">27</xref>].</p>
            <p>In addition, a custom function was used to optimize the final estimator of the stacking model, specifically for the logistic regression component [<xref ref-type="bibr" rid="ref21">21</xref>]. <xref ref-type="table" rid="table1">Table 1</xref> lists the parameters used for all the models. By tuning the hyperparameters of the base models and customizing the final estimator for the stacking model, we aimed to improve the overall performance and accuracy of the ML model [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref30">30</xref>].</p>
            <table-wrap position="float" id="table1">
              <label>Table 1</label>
              <caption>
                <p>Tuned parameters organized according to submodels and estimators.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="350"/>
                <col width="650"/>
                <thead>
                  <tr valign="top">
                    <td>Model</td>
                    <td>Parameters</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>XGB<sup>a</sup></td>
                    <td>max_depth, n_estimators, and learning_rate</td>
                  </tr>
                  <tr valign="top">
                    <td>Random forest</td>
                    <td>bootstrap and max_depth</td>
                  </tr>
                  <tr valign="top">
                    <td>LGBM<sup>b</sup></td>
                    <td>learning_rate, n_estimators, num_leaves, min_child_samples, subsample, max_depth, colsample_bytree, reg_alpha, reg_lambda, and min_data_in_leaf</td>
                  </tr>
                  <tr valign="top">
                    <td>Logistic regression (stacking ensemble)</td>
                    <td>solver, penalty, and C</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table1fn1">
                  <p><sup>a</sup>XGB: XGBoost.</p>
                </fn>
                <fn id="table1fn2">
                  <p><sup>b</sup>LGBM: LightGBM.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Evaluation of the Ensemble Model Outcomes</title>
        <sec>
          <title>Evaluation Metrics</title>
          <p>Statistical analysis was performed to ensure that the model was robust in and valid for improving the patient outcomes. Three evaluation metrics were used to evaluate the robustness of the model.</p>
          <list list-type="order">
            <list-item>
              <p>Precision is the ratio between the true positive observations and total positive observations obtained from the confusion matrix [<xref ref-type="bibr" rid="ref31">31</xref>]. In other words, it provides the number of retrieved items that are relevant. This was a crucial quantity, especially given that there was high class imbalance:</p>
              <p>
                <graphic xlink:href="formative_v7i1e41725_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </p>
            </list-item>
            <list-item>
              <p>Recall is the ratio between the number of true positives and the sum of the number of true positives and number of false negatives [<xref ref-type="bibr" rid="ref31">31</xref>]. The recall score provides the number of relevant items retrieved [<xref ref-type="bibr" rid="ref31">31</xref>]. The recall score was useful in determining the model validity regardless of class imbalance owing to the measurement of false negatives:</p>
              <p>
                <graphic xlink:href="formative_v7i1e41725_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </p>
            </list-item>
            <list-item>
              <p>Balancing the 2 quantities required the use of <italic>F</italic><sub>1</sub>-score, which serves as the harmonic mean of the precision score and recall score [<xref ref-type="bibr" rid="ref31">31</xref>]:</p>
              <p>
                <graphic xlink:href="formative_v7i1e41725_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
              </p>
            </list-item>
          </list>
          <p>All the scores for the hyperparameter-tuned data were plotted on a bar graph to ensure a clear presentation of the data [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        </sec>
        <sec>
          <title>Geographical Data Set</title>
          <sec>
            <title>Feature Selections</title>
            <p>To determine the relationship between ELOS and RIW, 2 continuous variables that have been shown to be positively correlated with improved patient outcomes, a linear regression analysis was conducted [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref34">34</xref>]. RIW is a weighted measure of the anticipated use of resources associated with various demographic, diagnostic, and surgical procedure characteristics of an individual [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>]. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> discusses the requirements for the calculation and formulation of RIW [<xref ref-type="bibr" rid="ref35">35</xref>]. Therefore, linear regression analysis has the potential to provide a quantifiable measure of the correlation between these variables, thereby meeting the third objective of the research paper, which is to conduct an in-depth economic analysis [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
            <p>To ensure that the results were not biased by confounding factors, the linear regression analyses were conducted separately for each age group, gender, and readmission column class [<xref ref-type="bibr" rid="ref33">33</xref>]. This approach ensured that any potential effects of these variables were taken into account. <xref rid="figure3" ref-type="fig">Figure 3</xref> demonstrates the approach used for the geographical data sets.</p>
            <fig id="figure3" position="float">
              <label>Figure 3</label>
              <caption>
                <p>Geographic workflow: data collection (blue), data preparation and regression implementation (orange), and outputs (green). MCC: major complication or comorbidity.</p>
              </caption>
              <graphic xlink:href="formative_v7i1e41725_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Main and Controlled Geographic Data Set Variables</title>
            <p>After the data were isolated for individuals aged &#62;18 years and the MCC codes, Python pandas were used to condition the data set onto covariates. The entire data set was then placed into specific clusters based on this condition. First, individuals were clustered according to whether they had the same patient readmission column value, and then they were split by gender. Afterward, each data point was separated into age clusters. There were 2 gender data clusters for each of the 2 readmitted clusters and 15 age clusters for each of the 4 resulting clusters, resulting in 60 linear regressions being performed. To clarify, the main independent variable was ELOS, and the dependent variable was RIW. The data were split to verify the hypothesis that there was indeed an economic benefit to extending a patient’s length of stay rather than being readmitted.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was exempt from research ethics review, as it was a secondary analysis of research data. As data were received directly from acute care facilities or from their respective health or regional authority or ministry or department of health, facilities in all provinces and territories except Quebec were required to report. The authors do not claim any right to the data, as they are the property of Statistics Canada along with the Abacus Student Network [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The results of the main study are presented in this section. The results for the PCA, feature selection stages, and more data can be found in section B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      <sec>
        <title>Classification Reports</title>
        <p>The evaluation metrics for the ensemble model were presented using classification reports (<xref ref-type="table" rid="table2">Table 2</xref>). In this context, class 0 represented the model’s performance for the negative class (ie, patients who did not return within 30 days), and class 1 represented the model’s performance for the positive class (ie, patients who did return within 30 days). The support column indicated how many examples of each class were there in the test set.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Classification reports for different models.<sup>a</sup></p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="210"/>
            <col width="210"/>
            <col width="220"/>
            <thead>
              <tr valign="bottom">
                <td colspan="2">Model type and class</td>
                <td>Precision</td>
                <td>Recall</td>
                <td><italic>F</italic><sub>1</sub>-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>XGBoost</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0<sup>b</sup></td>
                <td>0.92</td>
                <td>0.99</td>
                <td>0.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1<sup>c</sup></td>
                <td>0.79</td>
                <td>0.31</td>
                <td>0.44</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Random forest</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.93</td>
                <td>0.97</td>
                <td>0.95</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.65</td>
                <td>0.39</td>
                <td>0.48</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>LightGBM</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.96</td>
                <td>0.91</td>
                <td>0.93</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.49</td>
                <td>0.68</td>
                <td>0.57</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Ensemble model<sup>d</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>0</td>
                <td>0.96</td>
                <td>0.91</td>
                <td>0.93</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>1</td>
                <td>0.49</td>
                <td>0.68</td>
                <td>0.57</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>All of these models have been hyperparameter tuned.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>For all models, class 0 contains n=16,592.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>For all models, class 1 contains n=2079.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>Tuned submodels and tuned ensemble models.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Correlation Between Inpatient RIW and ELOS</title>
        <p>A least squares linear regression model was fitted to the ELOS and RIW value columns of a geographical data set, and a summary of the best-fitted lines was obtained (<xref ref-type="table" rid="table3">Tables 3</xref>-<xref ref-type="table" rid="table5">5</xref>). The corresponding plots (Figures S6 and S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) and tables (<xref ref-type="table" rid="table6">Tables 6</xref>-<xref ref-type="table" rid="table8">8</xref>) produced by the least squares linear regression was also obtained, and the data were stratified by readmission status, age group, and gender. The coefficient of determination (<italic>R</italic><sup>2</sup>) was included, and it took a value between 0 and 1, providing a sense of how correlated the 2 variables were, with a value of 1 indicating perfect correlation. Note that all age groups had a <italic>P</italic>&#60;.001. The threshold was chosen as it was expected highly correlated and the root mean square error was used to measure the distance between predicted and actual values.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Regression lines fitted for women who were readmitted within 30 days, separated by age groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="210"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Age group (years)</td>
                <td>Slope (expected length of stay)</td>
                <td>Intercept</td>
                <td><italic>R</italic><sup>2</sup> adjusted</td>
                <td>RMSE<sup>a</sup></td>
                <td><italic>F</italic> statistics</td>
                <td>Sample size, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>18-24</td>
                <td>0.196661</td>
                <td>–0.038496</td>
                <td>0.469363</td>
                <td>1.222708</td>
                <td>67.339641</td>
                <td>76</td>
              </tr>
              <tr valign="top">
                <td>25-29</td>
                <td>0.236855</td>
                <td>–0.032451</td>
                <td>0.482341</td>
                <td>2.067290</td>
                <td>128.653235</td>
                <td>138</td>
              </tr>
              <tr valign="top">
                <td>30-34</td>
                <td>0.195582</td>
                <td>0.009696</td>
                <td>0.308630</td>
                <td>2.507248</td>
                <td>69.299619</td>
                <td>154</td>
              </tr>
              <tr valign="top">
                <td>35-39</td>
                <td>0.455326</td>
                <td>–1.431780</td>
                <td>0.587717</td>
                <td>3.356237</td>
                <td>213.402169</td>
                <td>150</td>
              </tr>
              <tr valign="top">
                <td>40-44</td>
                <td>0.776407</td>
                <td>–3.014587</td>
                <td>0.573208</td>
                <td>6.474491</td>
                <td>284.385704</td>
                <td>212</td>
              </tr>
              <tr valign="top">
                <td>45-49</td>
                <td>0.355912</td>
                <td>–0.681188</td>
                <td>0.626838</td>
                <td>2.286141</td>
                <td>572.132763</td>
                <td>341</td>
              </tr>
              <tr valign="top">
                <td>50-54</td>
                <td>0.338375</td>
                <td>–0.350636</td>
                <td>0.656287</td>
                <td>1.246735</td>
                <td>990.071951</td>
                <td>519</td>
              </tr>
              <tr valign="top">
                <td>55-59</td>
                <td>0.269042</td>
                <td>0.021663</td>
                <td>0.500513</td>
                <td>1.631754</td>
                <td>776.589838</td>
                <td>775</td>
              </tr>
              <tr valign="top">
                <td>60-64</td>
                <td>0.266451</td>
                <td>0.023355</td>
                <td>0.401983</td>
                <td>1.507652</td>
                <td>755.872164</td>
                <td>1124</td>
              </tr>
              <tr valign="top">
                <td>65-69</td>
                <td>0.361558</td>
                <td>–0.433117</td>
                <td>0.562777</td>
                <td>1.813054</td>
                <td>1821.045541</td>
                <td>1415</td>
              </tr>
              <tr valign="top">
                <td>70-74</td>
                <td>0.346215</td>
                <td>–0.373987</td>
                <td>0.455203</td>
                <td>2.111974</td>
                <td>1393.857933</td>
                <td>1668</td>
              </tr>
              <tr valign="top">
                <td>75-79</td>
                <td>0.280143</td>
                <td>–0.088512</td>
                <td>0.499735</td>
                <td>1.388378</td>
                <td>1795.095118</td>
                <td>1797</td>
              </tr>
              <tr valign="top">
                <td>&#62;80</td>
                <td>0.280403</td>
                <td>–0.140523</td>
                <td>0.321200</td>
                <td>1.600127</td>
                <td>1975.140727</td>
                <td>4173</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>RMSE: root mean squared error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The 4 types of submodels.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="300"/>
            <col width="350"/>
            <col width="350"/>
            <thead>
              <tr valign="bottom">
                <td>Ensemble model</td>
                <td>Tuned submodels (Y<sup>a</sup> or N<sup>b</sup>)</td>
                <td>Tuned LR<sup>c</sup> (Y or N)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>N</td>
                <td>N</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>N</td>
                <td>Y</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Y</td>
                <td>N</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>Y</td>
                <td>Y</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>Y: yes.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>N: no.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>LR: logistic regression.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Comparison of existing literature values.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="270"/>
            <col width="330"/>
            <col width="400"/>
            <thead>
              <tr valign="top">
                <td>Author name or literature values</td>
                <td>Description of model</td>
                <td>Comparison to current literature values with precision, recall, and <italic>F</italic><sub>1</sub>-score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Sharma et al [<xref ref-type="bibr" rid="ref36">36</xref>]: “Predicting 30-Day Readmissions in Patients With Heart Failure Using Administrative Data: A Machine Learning Approach”</td>
                <td>Sharma et al’s [<xref ref-type="bibr" rid="ref36">36</xref>] implementation of XGBoost created a precision-recall curve. Their precision and recall balance for class 1 was significantly lower than that of the ensemble model. However, the ensemble model proposed in this work allows for high balance.</td>
                <td>Sharma et al [<xref ref-type="bibr" rid="ref36">36</xref>] used a precision-recall curve to evaluate the performance of their model. The bias-variance trade-off was observed to be high during the analysis. The primary evaluation metric used in the study was the AUC<sup>a</sup>, which was not used for the proposed work here.</td>
              </tr>
              <tr valign="top">
                <td>Jamei et al [<xref ref-type="bibr" rid="ref37">37</xref>]: “Predicting All-Cause Risk of 30 Day Hospital Readmissions Using Artificial Neural Networks (ANN)”</td>
                <td>Jamei et al [<xref ref-type="bibr" rid="ref37">37</xref>] predicted patient readmission using a neural network. Their precision and recall balance was skewed, as the precision for their models was low, yet the recall was high. This results in high variance but low bias.</td>
                <td>The following scores were given for the 2-layer neural network of Jamei et al [<xref ref-type="bibr" rid="ref37">37</xref>], with the number of features being high: precision=23%, recall=59%, and <italic>F</italic><sub>1</sub>-score=16.5%. This indicates that the proposed model in this work has a significant advantage compared with an ANN<sup>b</sup>.</td>
              </tr>
              <tr valign="top">
                <td>Ho et al [<xref ref-type="bibr" rid="ref38">38</xref>]: “Predicting Readmission at Early Hospitalization Using Electronic Health Data: A Customized Model Development”</td>
                <td>Ho et al [<xref ref-type="bibr" rid="ref38">38</xref>] predicted a within a 24 month period. The model they used was an XGBoost Model having access to specific laboratory data in addition to the variables addressed in our work.</td>
                <td>The following scores were present in the readmission stage: recall score of 80% and a precision score of 76%. Although these scores may be higher overall due to the presence of more personalized data such as specific laboratory results for each patient. Furthermore, Ho et al [<xref ref-type="bibr" rid="ref38">38</xref>], does not seem to stratify based on specific diseases which could result in bias effecting this score.</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>AUC: area under the curve.</p>
            </fn>
            <fn id="table5fn2">
              <p><sup>b</sup>ANN: artificial neural networks.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Regression lines fitted for men who were not readmitted within 30 days, separated by age groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="210"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Age group (years)</td>
                <td>Slope (expected length of stay)</td>
                <td>Intercept</td>
                <td><italic>R</italic><sup>2</sup> adjusted</td>
                <td>RMSE<sup>a</sup></td>
                <td><italic>F</italic> statistics</td>
                <td>Sample size, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>18-24</td>
                <td>0.133004</td>
                <td>0.603021</td>
                <td>0.301397</td>
                <td>1.165577</td>
                <td>228.362523</td>
                <td>528</td>
              </tr>
              <tr valign="top">
                <td>25-29</td>
                <td>0.332606</td>
                <td>–0.276038</td>
                <td>0.614197</td>
                <td>1.609255</td>
                <td>718.990166</td>
                <td>452</td>
              </tr>
              <tr valign="top">
                <td>30-34</td>
                <td>0.492425</td>
                <td>–1.069907</td>
                <td>0.697588</td>
                <td>2.279228</td>
                <td>1521.145371</td>
                <td>660</td>
              </tr>
              <tr valign="top">
                <td>35-39</td>
                <td>0.447525</td>
                <td>–0.844292</td>
                <td>0.653325</td>
                <td>2.407347</td>
                <td>1902.504340</td>
                <td>1010</td>
              </tr>
              <tr valign="top">
                <td>40-44</td>
                <td>0.466519</td>
                <td>–0.840117</td>
                <td>0.647550</td>
                <td>2.075903</td>
                <td>3118.868054</td>
                <td>1698</td>
              </tr>
              <tr valign="top">
                <td>45-49</td>
                <td>0.380460</td>
                <td>–0.308439</td>
                <td>0.583264</td>
                <td>1.770296</td>
                <td>3957.666828</td>
                <td>2828</td>
              </tr>
              <tr valign="top">
                <td>50-54</td>
                <td>0.420954</td>
                <td>–0.557944</td>
                <td>0.626193</td>
                <td>2.074092</td>
                <td>7922.913006</td>
                <td>4730</td>
              </tr>
              <tr valign="top">
                <td>55-59</td>
                <td>0.410799</td>
                <td>–0.440937</td>
                <td>0.570193</td>
                <td>2.223704</td>
                <td>9108.272364</td>
                <td>6866</td>
              </tr>
              <tr valign="top">
                <td>60-64</td>
                <td>0.421378</td>
                <td>–0.502756</td>
                <td>0.471093</td>
                <td>2.769979</td>
                <td>7587.014726</td>
                <td>8518</td>
              </tr>
              <tr valign="top">
                <td>65-69</td>
                <td>0.341375</td>
                <td>–0.053999</td>
                <td>0.514460</td>
                <td>2.144153</td>
                <td>10,030.832915</td>
                <td>9467</td>
              </tr>
              <tr valign="top">
                <td>70-74</td>
                <td>0.327909</td>
                <td>–0.015719</td>
                <td>0.476918</td>
                <td>1.992737</td>
                <td>8977.140732</td>
                <td>9846</td>
              </tr>
              <tr valign="top">
                <td>75-79</td>
                <td>0.331579</td>
                <td>–0.114296</td>
                <td>0.447201</td>
                <td>2.177262</td>
                <td>7031.801936</td>
                <td>8692</td>
              </tr>
              <tr valign="top">
                <td>&#62;80</td>
                <td>0.296201</td>
                <td>–0.082061</td>
                <td>0.269552</td>
                <td>2.414906</td>
                <td>6006.483269</td>
                <td>16,275</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table6fn1">
              <p><sup>a</sup>RMSE: root mean squared error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table7">
          <label>Table 7</label>
          <caption>
            <p>Regression lines fitted for men who were readmitted within 30 days, separated by age groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="210"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Age group (years)</td>
                <td>Slope (expected length of stay)</td>
                <td>Intercept</td>
                <td><italic>R</italic><sup>2</sup> adjusted</td>
                <td>RMSE<sup>a</sup></td>
                <td><italic>F</italic> statistics</td>
                <td>Sample size, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>18-24</td>
                <td>0.13304</td>
                <td>–0.123251</td>
                <td>0.659408</td>
                <td>1.936411</td>
                <td>159.756983</td>
                <td>83</td>
              </tr>
              <tr valign="top">
                <td>25-29</td>
                <td>0.434780</td>
                <td>–1.444296</td>
                <td>0.563547</td>
                <td>3.009955</td>
                <td>123.663857</td>
                <td>96</td>
              </tr>
              <tr valign="top">
                <td>30-34</td>
                <td>0.205049</td>
                <td>0.338259</td>
                <td>0.414037</td>
                <td>1.963718</td>
                <td>109.108782</td>
                <td>154</td>
              </tr>
              <tr valign="top">
                <td>35-39</td>
                <td>0.503076</td>
                <td>–1.434408</td>
                <td>0.597649</td>
                <td>3.516315</td>
                <td>387.201489</td>
                <td>261</td>
              </tr>
              <tr valign="top">
                <td>40-44</td>
                <td>0.319638</td>
                <td>–0.426751</td>
                <td>0.633530</td>
                <td>1.556234</td>
                <td>708.053550</td>
                <td>410</td>
              </tr>
              <tr valign="top">
                <td>45-49</td>
                <td>0.321520</td>
                <td>–0.251261</td>
                <td>0.543536</td>
                <td>2.238206</td>
                <td>883.346712</td>
                <td>742</td>
              </tr>
              <tr valign="top">
                <td>50-54</td>
                <td>0.305131</td>
                <td>–0.157782</td>
                <td>0.546973</td>
                <td>1.329511</td>
                <td>1546.437115</td>
                <td>1281</td>
              </tr>
              <tr valign="top">
                <td>55-59</td>
                <td>0.336327</td>
                <td>–0.291609</td>
                <td>0.516734</td>
                <td>1.844321</td>
                <td>1963.077772</td>
                <td>1836</td>
              </tr>
              <tr valign="top">
                <td>60-64</td>
                <td>0.387830</td>
                <td>–0.541003</td>
                <td>0.524773</td>
                <td>1.978364</td>
                <td>2742.875413</td>
                <td>2484</td>
              </tr>
              <tr valign="top">
                <td>65-69</td>
                <td>0.356209</td>
                <td>–0.334169</td>
                <td>0.526643</td>
                <td>1.919789</td>
                <td>3103.957220</td>
                <td>2790</td>
              </tr>
              <tr valign="top">
                <td>70-74</td>
                <td>0.315150</td>
                <td>–0.190539</td>
                <td>0.503276</td>
                <td>1.686930</td>
                <td>2989.908128</td>
                <td>2951</td>
              </tr>
              <tr valign="top">
                <td>75-79</td>
                <td>0.330981</td>
                <td>–0.242378</td>
                <td>0.515653</td>
                <td>1.935420</td>
                <td>2699.848355</td>
                <td>2536</td>
              </tr>
              <tr valign="top">
                <td>&#62;80</td>
                <td>0.320212</td>
                <td>–0.272116</td>
                <td>0.388353</td>
                <td>1.864184</td>
                <td>2708.979941</td>
                <td>4266</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table7fn1">
              <p><sup>a</sup>RMSE: root mean squared error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table8">
          <label>Table 8</label>
          <caption>
            <p>Regression lines fitted for women who were not readmitted within 30 days, separated by age groups.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="210"/>
            <col width="130"/>
            <col width="130"/>
            <col width="120"/>
            <col width="140"/>
            <col width="120"/>
            <thead>
              <tr valign="bottom">
                <td>Age group (years)</td>
                <td>Slope (expected length of stay)</td>
                <td>Intercept</td>
                <td><italic>R</italic><sup>2</sup> adjusted</td>
                <td>RMSE<sup>a</sup></td>
                <td><italic>F</italic> statistics</td>
                <td>Sample size, n</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>18-24</td>
                <td>0.290987</td>
                <td>–0.168320</td>
                <td>0.650458</td>
                <td>1.837020</td>
                <td>733.188372</td>
                <td>306</td>
              </tr>
              <tr valign="top">
                <td>25-29</td>
                <td>0.340779</td>
                <td>–0.378737</td>
                <td>0.570807</td>
                <td>2.726389</td>
                <td>589.170872</td>
                <td>445</td>
              </tr>
              <tr valign="top">
                <td>30-34</td>
                <td>0.324827</td>
                <td>–0.284349</td>
                <td>0.584901</td>
                <td>2.048568</td>
                <td>789.076632</td>
                <td>562</td>
              </tr>
              <tr valign="top">
                <td>35-39</td>
                <td>0.368889</td>
                <td>–0.515399</td>
                <td>0.631981</td>
                <td>1.725110</td>
                <td>1102.474089</td>
                <td>644</td>
              </tr>
              <tr valign="top">
                <td>40-44</td>
                <td>0.253023</td>
                <td>0.147074</td>
                <td>0.531883</td>
                <td>1.569319</td>
                <td>1070.319261</td>
                <td>944</td>
              </tr>
              <tr valign="top">
                <td>45-49</td>
                <td>0.324630</td>
                <td>–0.232875</td>
                <td>0.627709</td>
                <td>1.493320</td>
                <td>2394.218647</td>
                <td>1422</td>
              </tr>
              <tr valign="top">
                <td>50-54</td>
                <td>0.301186</td>
                <td>–0.073365</td>
                <td>0.478817</td>
                <td>1.655429</td>
                <td>2019.326558</td>
                <td>2200</td>
              </tr>
              <tr valign="top">
                <td>55-59</td>
                <td>0.389959</td>
                <td>–0.484006</td>
                <td>0.576303</td>
                <td>1.886088</td>
                <td>4468.179114</td>
                <td>3287</td>
              </tr>
              <tr valign="top">
                <td>60-64</td>
                <td>0.339517</td>
                <td>–0.190579</td>
                <td>0.422514</td>
                <td>2.111166</td>
                <td>3013.638447</td>
                <td>4121</td>
              </tr>
              <tr valign="top">
                <td>65-69</td>
                <td>0.297055</td>
                <td>–0.029449</td>
                <td>0.504601</td>
                <td>1.616991</td>
                <td>5405.580063</td>
                <td>5309</td>
              </tr>
              <tr valign="top">
                <td>70-74</td>
                <td>0.333896</td>
                <td>–0.262753</td>
                <td>0.482080</td>
                <td>2.016495</td>
                <td>5622.958698</td>
                <td>6043</td>
              </tr>
              <tr valign="top">
                <td>75-79</td>
                <td>0.348289</td>
                <td>–0.358721</td>
                <td>0.439302</td>
                <td>2.057815</td>
                <td>5139.697471</td>
                <td>6562</td>
              </tr>
              <tr valign="top">
                <td>&#62;80</td>
                <td>0.266803</td>
                <td>–0.034627</td>
                <td>0.179332</td>
                <td>2.489782</td>
                <td>4115.368718</td>
                <td>18,835</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table8fn1">
              <p><sup>a</sup>RMSE: root mean squared error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>The proposed work aimed to use ensemble models and linear regressions for predicting patient readmissions and analyzing their economic consequences [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. The results of this study demonstrate the potential of these models to accurately predict readmissions with a balanced degree of recall and precision, which could help health care providers identify patients who are at risk of readmission and take proactive measures to prevent it.</p>
      <sec>
        <title>Notes About the Study</title>
        <p>Although the study used cutting-edge algorithms for classification and regression, there are several critical notes that must be considered [<xref ref-type="bibr" rid="ref39">39</xref>]. The primary evaluation metrics for the models were recall and <italic>F</italic><sub>1</sub>-scores, with a slight preference for false positives over false negatives to decrease the likelihood of unplanned readmissions [<xref ref-type="bibr" rid="ref40">40</xref>]. However, it is crucial to note that this approach may not be suitable for all health care scenarios and should be evaluated on a case-by-case basis [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
        <p>Another crucial consideration is the computational cost associated with clinical and graphical data [<xref ref-type="bibr" rid="ref41">41</xref>]. Although the analysis for this study only took 2 to 3 hours, it is essential to consider the computational requirements for more substantial studies, particularly those with larger data sets or more complex models [<xref ref-type="bibr" rid="ref42">42</xref>]. The computational cost may impact the feasibility of the study, and efficient models may be necessary to ensure valid and reliable results [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>In addition, some features in the geographical data, such as the case mix group diagnosis type, could not be split in the geographical data sets because of their high computational cost. This could lead to omitted variable bias and negatively affect the models’ accuracy [<xref ref-type="bibr" rid="ref43">43</xref>]. As the impact of not splitting these features was not taken into account in this study, future research should carefully evaluate the potential impact of not splitting features and consider alternatives to reduce the computational cost [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
      </sec>
      <sec>
        <title>Clinical Data Set Result Analysis</title>
        <p>In this section, the clinical data set results are analyzed and compared with those of other existing models in the literature.</p>
        <sec>
          <title>The Effect of PCA on the Study and the Bias-Variance Trade-off</title>
          <p>The use of PCA offered several advantages. The selection of the components that describe the minimum number of features required to achieve a cumulative variance of at least 80% proved to be effective in preventing overfitting [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref46">46</xref>]. The data set had high dimensionality and a substantial number of data points, which would have led to high bias and low variance without the use of PCA [<xref ref-type="bibr" rid="ref39">39</xref>]. This, in turn, would have resulted in a lower precision rate than recall rate. However, PCA prevented this issue by reducing the number of features in the model and substantially increasing computational efficiency [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
          <p>Moreover, PCA eliminated the potential for collinearity, which can create unstable and unreliable estimates of the model parameters [<xref ref-type="bibr" rid="ref39">39</xref>]. Collinearity makes it difficult to determine the unique contribution of each variable to the outcome [<xref ref-type="bibr" rid="ref41">41</xref>]. Upon computing the covariance matrix and performing an eigenvector decomposition, the resulting eigenvectors were orthogonal to each other, thereby eliminating the presence of collinearity.</p>
          <p>Furthermore, the implementation of PCA in conjunction with stacked classifiers enabled a higher interpretability of the models [<xref ref-type="bibr" rid="ref42">42</xref>]. Stacked models can be challenging to interpret in high-dimensional data, as the layers can contribute to a high level of complexity [<xref ref-type="bibr" rid="ref43">43</xref>]. Moreover, the curse of dimensionality and collinearity can make it difficult for models to isolate specific features, thereby decreasing transparency [<xref ref-type="bibr" rid="ref43">43</xref>]. However, the addition of PCA allowed for a more comprehensive and explained model, as reflected in the submodel and ensemble model analyses in the subsequent sections.</p>
        </sec>
        <sec>
          <title>Submodel Analyses</title>
          <p>This study found that although the hyperparameter-tuned XGB model outperformed its base model, it was still less accurate than the other individual submodels. This result is consistent with a previous study conducted in Alberta that also found that XGB models did not provide substantial information on patient readmissions [<xref ref-type="bibr" rid="ref36">36</xref>]. However, the tuned XGB model performed better than its base model and had a higher precision and recall score, indicating a better balance between precision and recall for both classes relative to the default XGB model.</p>
          <p>By contrast, both the tuned random forest and LGBM models (<xref ref-type="table" rid="table6">Tables 6</xref> and <xref ref-type="table" rid="table7">7</xref>, respectively) demonstrated superior performance compared with their base models (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) in predicting patient readmission for class 1, as evidenced by their higher <italic>F</italic><sub>1</sub>-score and precision. The recall for class 1 was lower for the tuned random forest model, whereas it was higher for the tuned LGBM model. LGBM was shown to balance a slightly higher recall rate and precision rate than its other decision tree counterparts, allowing it to provide substantial information regarding the use of this model.</p>
        </sec>
        <sec>
          <title>Final Estimator Analysis</title>
          <p>The ensemble model was created to ensure minimization and offset bias and variance between each of the models in discussion [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. The 4 types of ensemble models and their classification reports are listed in <xref ref-type="table" rid="table4">Table 4</xref> and Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, respectively.</p>
          <p>Upon analyzing the data, it was observed that the default model configuration, which consisted of default submodels and a default final estimator logistic regression, exhibited high precision (0.92) and recall (0.98) for nonreadmitted patients (class 0). However, its ability to predict readmissions (class 1) was comparatively weaker, as evidenced by the lower <italic>F</italic><sub>1</sub>-score (0.46), precision (0.69), and recall (0.35) for class 1.</p>
          <p>The second configuration, which used default submodels with a tuned logistic regression final estimator, demonstrated an improvement in the <italic>F</italic><sub>1</sub>-score (0.56) for class 1. Nonetheless, its precision (0.47) and recall (0.68) for class 1 remained lower than those for class 0.</p>
          <p>The third configuration, which used tuned submodels with a default final estimator logistic regression, yielded high precision (0.92) and recall (0.99) for class 0. However, its performance in predicting readmissions (class 1) was weaker, with a precision of 0.77 and recall of 0.30, leading to an <italic>F</italic><sub>1</sub>-score of 0.43.</p>
          <p>The fourth configuration, in which both submodels and final estimator logistic regression were tuned, resulted in the highest <italic>F</italic><sub>1</sub>-score (0.57) for class 1, indicating a better performance in predicting patient readmissions. Nevertheless, its precision (0.49) and recall (0.68) for class 1 remained lower than those for class 0.</p>
          <p>The overall tuned ensemble model, when compared with the submodels, is identical to the LGBM model, as although recall is favored, the balance between precision and recall for class 1, compared with the other models, is useful in preventing too many false positives from occurring.</p>
        </sec>
        <sec>
          <title>Comparison of Tuned Ensemble Models With Literature Value Predictions</title>
          <p>The results of this study are not comparable with Baruah’s [<xref ref-type="bibr" rid="ref8">8</xref>] values because of the presence of unstructured data types, which cannot serve as a useful comparison to ordered data sets such as the DAD. However, other studies have used the DAD or other similar structured data before. The existing literature review comparisons with 30-day short-term studies are presented in <xref ref-type="table" rid="table5">Table 5</xref>.</p>
          <p>Note that this list is not exhaustive and that there may be other studies that potentially use stacking classifier models and show better results. The comparison with other studies shows that the model has the potential to be viable and robust, but more tuning and comparison between submodels need to be performed.</p>
        </sec>
        <sec>
          <title>Limitations of the Clinical Data Set Analysis</title>
          <p>One notable limitation of the clinical data set used in this study was the high class imbalance problem. Specifically, there were considerably more training points for class 0 than for class 1, with n=83,083 for class 0 and n=10,271 for class 1. This issue could have led to the trained model being more prone to producing false negatives than to producing false positives, as it was more familiar with class 0 instances and thus had a tendency to classify more instances as class 0 [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Consequently, this limitation could have negatively impacted the overall performance and accuracy of the model, as well as the reliability of the predictions it produced [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
          <p>Another limitation of the data set was the encoding of the data, which could have influenced the interpretability and accuracy of the model. Specifically, if the model interpreted the encoded data as ordinal, it could have altered the ordinality of the classifier, thereby influencing the classification results. This limitation could have impacted the ability of the model to identify the most relevant features for predicting patient readmission, reducing its interpretability [<xref ref-type="bibr" rid="ref49">49</xref>]. Moreover, this limitation could have adversely impacted the accuracy of the model, as the model may have learned from the encoded data instead of the underlying features, resulting in a less accurate prediction of patient readmission [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
          <p>Finally, the data set’s lack of information about the specific principal component that contributed to the accurate prediction of the patient data set was another limitation. This limitation could have constrained the model’s ability to explain how the variables were associated with patient readmission, resulting in a lack of transparency in the model’s predictions and reduced ability to elucidate the rationale behind its decision-making process. As such, identifying the principal components that contribute to the accurate prediction of the patient data set is critical to improving the interpretability and reliability of the model.</p>
        </sec>
      </sec>
      <sec>
        <title>Geographical Data Set Result Analysis</title>
        <sec>
          <title>Causality of the Linear Regression Model</title>
          <p>The study results suggested that the model could potentially establish a causal relationship (albeit with a proper regression type) between ELOS and RIW. The anticipated hypothesis was well supported by the tables presented earlier, indicating the importance of the model. The analysis involved an explicit model of a continuous outcome (RIW) that was affected by a measured continuous variable (ELOS), and the results showed a notable impact. This finding encourages the establishment of causality in the relationship between ELOS and RIW.</p>
        </sec>
        <sec>
          <title>ELOS Effects on RIW and Fit of the Linear Regression</title>
          <p>The relationship between ELOS and RIW was investigated through a linear regression analysis, which produced the coefficient (slope) from the ELOS variables. The study findings indicated that more resources were expended and more time was spent among women aged 40 to 44 years who were readmitted than among those who were not readmitted. In addition, more resources were expended for men aged 35 to 39 years (<xref ref-type="table" rid="table7">Table 7</xref>) who were readmitted than for their nonreadmitted counterparts. Surprisingly, most of the slopes associated with ELOS are uniform in nature and are approximately the same across ages. However, a comparison between the results also suggested that ELOS had a significant effect on RIW owing to the low <italic>P</italic> values.</p>
          <p>The <italic>F</italic> test of overall significance was used to ascertain that the model was better suited than a model with no independent variables [<xref ref-type="bibr" rid="ref51">51</xref>,<xref ref-type="bibr" rid="ref52">52</xref>]. All the models had <italic>F</italic> statistic values significantly greater than their critical <italic>F</italic> values, which suggested that the linear regression model was a relatively accurate estimate of the relationship between ELOS and RIW.</p>
          <p>However, the root mean squared error and <italic>R</italic><sup>2</sup> values suggested otherwise. There was a high degree of error compared with the slope. The low <italic>R</italic><sup>2</sup> values across all the studies implied that linear regression was not a good fit, which could imply that further data clustering into groups was necessary or that further manipulation of the data to perform a different regression was needed. These results were reasonable, considering that the function was not 1-to-1, as demonstrated by the graphs in Figures S6 and S7 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
        </sec>
      </sec>
      <sec>
        <title>Future Directions</title>
        <p>Many fundamental aspects of both the ensemble model and linear regression remain unexplored.</p>
        <p>Therefore, the suggested future implementations for the ensemble model are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Including unstructured data (such as clinical data and text notes) in analysis by a deep neural network and performing logistic regression on all the models to give individuality to a specific patient [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
          </list-item>
          <list-item>
            <p>Using deep learning neural networks as a final estimator for the ensemble model and outputting evaluation metrics [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
          </list-item>
          <list-item>
            <p>Adding more submodels and optimizing for computational resources such as space, time, and memory [<xref ref-type="bibr" rid="ref53">53</xref>].</p>
          </list-item>
        </list>
        <p>The suggested improvements for the linear regression include are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>An instrumental variable that measures the relationship between ELOS and a selection decision variable should be implemented. The instrumental variables should only be involved in the selection decision process. Afterward, the relationship between RIW and the selection decision variable should be measured to ensure low omitted variable biases [<xref ref-type="bibr" rid="ref54">54</xref>].</p>
          </list-item>
          <list-item>
            <p>Logistic regression (logistic by the coefficients) should be performed to ensure that root mean squared error is minimized and a more accurate relationship between the ELOS and RIW can be derived [<xref ref-type="bibr" rid="ref55">55</xref>].</p>
          </list-item>
        </list>
        <p>These applications can allow for a more in-depth analysis and provide a multifaceted perspective in the fields of ML, econometrics, and health care interventions.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The study’s implications are to validate the use of hybrid ensemble models and attempt to predict economic cost prediction models. The availability of robust and efficient predictive models, such as the one presented in this study, can enable hospitals to focus more on patients and less on the utility and bureaucratic costs associated with their readmission. As demonstrated by the evaluation metrics, the ensemble model plays a critical role in ensuring more precise results overall. By implementing a crowdsourcing approach, the model can also estimate the resources required to control future epidemics in an easier, time-sensitive manner while maintaining low economic costs. This is particularly relevant in decentralized, universal, publicly funded countries such as Canada, where high inflation on medical equipment, technologies, and maintenance has been observed in the aftermath of the COVID-19 pandemic.</p>
        <p>Predicting the relationship between ELOS and RIW can also indirectly predict patient outcomes by reducing bureaucratic and utility costs, thereby reducing the cost burden placed on patients to implement administrative tasks and on physicians to ensure their execution. The ensemble model also considers the specific disease type, and the encoding process has resulted in the classification data being ordinal in nature, which takes into account patient utility in addition to risk stratification.</p>
        <p>The linear regression has considered the differences in continuous variables while also allowing for a clear difference in the clustered groups. Further exploration of the cost-benefit economic model can enable hospitals to ensure more cost-free, patient-friendly outcomes. It is recommended that after making several changes to the general ensemble model and the linear regressions, they be used to analyze new and incoming numerical hospital cost data.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>In-depth descriptions and mathematical formalisms for all of the submodels and ensemble models, cumulative variance and principal component analysis results and confusion matrices of all of the previous models and graphs for the linear regression, all the codes, and references.</p>
        <media xlink:href="formative_v7i1e41725_app1.docx" xlink:title="DOCX File , 1057 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">DAD</term>
          <def>
            <p>Discharge Abstract Database</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ELOS</term>
          <def>
            <p>expected length of stay</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">ICD-10</term>
          <def>
            <p>International Classification of Diseases, 10th revision</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LGBM</term>
          <def>
            <p>LightGBM</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MCC</term>
          <def>
            <p>major complication or comorbidity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PCA</term>
          <def>
            <p>principal component analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RIW</term>
          <def>
            <p>resource intensity weight</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">XGB</term>
          <def>
            <p>XGBoost</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors express their profound gratitude to Adrian Stanley from JMIR for his unwavering support and to Benjamin D Fedoruk from STEM Fellowship for his invaluable assistance in ideation and manuscript. This research would not have been possible without the generous support of the sponsors from the 2022 Inter-University Big Data Challenge, including JMIR Publications, Roche, Statistical Analysis System Institute Inc, Canadian Science Publishing, Digital Science, and Overleaf, whose contributions enabled the authors to conduct this groundbreaking research.</p>
      <p>This manuscript received first place in the STEM Fellowship Big Data Challenge Inter-University Innovation Award, which was sponsored by JMIR Publications. JMIR Publications provided APF support for the publication of this paper.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All codes have been made available by the authors in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The data set used in this study was obtained from Statistics Canada and is subject to copyright owned by Statistics Canada.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>ER assumed leadership in the administration, drafting, and computational efforts for this manuscript. KN and QG made equal contributions to the programming and algorithm development, demonstrating their expertise and commitment to this project. SR and JP contributed equally to the drafting of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldfield</surname>
              <given-names>NI</given-names>
            </name>
            <name name-style="western">
              <surname>McCullough</surname>
              <given-names>EC</given-names>
            </name>
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Eastman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rawlins</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Averill</surname>
              <given-names>RF</given-names>
            </name>
          </person-group>
          <article-title>Identifying potentially preventable readmissions</article-title>
          <source>Health Care Financ Rev</source>
          <year>2008</year>
          <volume>30</volume>
          <issue>1</issue>
          <fpage>75</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/19040175"/>
          </comment>
          <pub-id pub-id-type="medline">19040175</pub-id>
          <pub-id pub-id-type="pmcid">PMC4195042</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Samsky</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Ambrosy</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Youngson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Kaul</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>McAlister</surname>
              <given-names>FA</given-names>
            </name>
          </person-group>
          <article-title>Trends in readmissions and length of stay for patients hospitalized with heart failure in Canada and the United States</article-title>
          <source>JAMA Cardiol</source>
          <year>2019</year>
          <month>05</month>
          <day>01</day>
          <volume>4</volume>
          <issue>5</issue>
          <fpage>444</fpage>
          <lpage>53</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30969316"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamacardio.2019.0766</pub-id>
          <pub-id pub-id-type="medline">30969316</pub-id>
          <pub-id pub-id-type="pii">2730290</pub-id>
          <pub-id pub-id-type="pmcid">PMC6537806</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brahmania</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wiskar</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Walley</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Rush</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Lower household income is associated with an increased risk of hospital readmission in patients with decompensated cirrhosis</article-title>
          <source>J Gastroenterol Hepatol</source>
          <year>2021</year>
          <month>04</month>
          <day>14</day>
          <volume>36</volume>
          <issue>4</issue>
          <fpage>1088</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32562577"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jgh.15153</pub-id>
          <pub-id pub-id-type="medline">32562577</pub-id>
          <pub-id pub-id-type="pmcid">PMC8063220</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hellsten</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sutherland</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Improving hospital quality through payment reforms: a policy impact analysis in British Columbia</article-title>
          <source>Healthc Manage Forum</source>
          <year>2016</year>
          <month>01</month>
          <day>08</day>
          <volume>29</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1177/0840470415614054</pub-id>
          <pub-id pub-id-type="medline">26656392</pub-id>
          <pub-id pub-id-type="pii">0840470415614054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cropley</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The relationship-based care model: evaluation of the impact on patient satisfaction, length of stay, and readmission rates</article-title>
          <source>J Nurs Adm</source>
          <year>2012</year>
          <month>06</month>
          <volume>42</volume>
          <issue>6</issue>
          <fpage>333</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1097/NNA.0b013e31825738ed</pub-id>
          <pub-id pub-id-type="medline">22617699</pub-id>
          <pub-id pub-id-type="pii">00005110-201206000-00009</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Talwar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chatterjee</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aparasu</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Application of machine learning in predicting hospital readmissions: a scoping review of the literature</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2021</year>
          <month>05</month>
          <day>06</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>96</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-021-01284-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-021-01284-z</pub-id>
          <pub-id pub-id-type="medline">33952192</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-021-01284-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC8101040</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Avidan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Abdallah</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Kronzer</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Predicting hospital readmission via cost-sensitive deep learning</article-title>
          <source>IEEE/ACM Trans Comput Biol Bioinf</source>
          <year>2018</year>
          <month>11</month>
          <day>1</day>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>1968</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1109/tcbb.2018.2827029</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baruah</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Predicting Hospital Readmission using Unstructured Clinical Note Data. Thesis</article-title>
          <source>Brown University</source>
          <year>2020</year>
          <access-date>2023-05-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cs.brown.edu/research/pubs/theses/ugrad/2020/baruah.prakrit.pdf">https://cs.brown.edu/research/pubs/theses/ugrad/2020/baruah.prakrit.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kripalani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Theobald</surname>
              <given-names>CN</given-names>
            </name>
            <name name-style="western">
              <surname>Anctil</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vasilevskis</surname>
              <given-names>EE</given-names>
            </name>
          </person-group>
          <article-title>Reducing hospital readmission rates: current strategies and future directions</article-title>
          <source>Annu Rev Med</source>
          <year>2014</year>
          <month>01</month>
          <day>14</day>
          <volume>65</volume>
          <issue>1</issue>
          <fpage>471</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/24160939"/>
          </comment>
          <pub-id pub-id-type="doi">10.1146/annurev-med-022613-090415</pub-id>
          <pub-id pub-id-type="medline">24160939</pub-id>
          <pub-id pub-id-type="pmcid">PMC4104507</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ikemura</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bellin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yagi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Billett</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Saada</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Simone</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stahl</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Szymanski</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>DY</given-names>
            </name>
            <name name-style="western">
              <surname>Reyes Gil</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Using automated machine learning to predict the mortality of patients with COVID-19: prediction model development study</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>02</month>
          <day>26</day>
          <volume>23</volume>
          <issue>2</issue>
          <fpage>e23458</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/2/e23458/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/23458</pub-id>
          <pub-id pub-id-type="medline">33539308</pub-id>
          <pub-id pub-id-type="pii">v23i2e23458</pub-id>
          <pub-id pub-id-type="pmcid">PMC7919846</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Seo</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Jaimes</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An artificial intelligence model to predict the mortality of COVID-19 patients at hospital admission time using routine blood samples: development and validation of an ensemble model</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>12</month>
          <day>23</day>
          <volume>22</volume>
          <issue>12</issue>
          <fpage>e25442</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/12/e25442/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25442</pub-id>
          <pub-id pub-id-type="medline">33301414</pub-id>
          <pub-id pub-id-type="pii">v22i12e25442</pub-id>
          <pub-id pub-id-type="pmcid">PMC7759509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kansagara</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Englander</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Salanitro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kagen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Theobald</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kripalani</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction models for hospital readmission: a systematic review</article-title>
          <source>JAMA</source>
          <year>2011</year>
          <month>10</month>
          <day>19</day>
          <volume>306</volume>
          <issue>15</issue>
          <fpage>1688</fpage>
          <lpage>98</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22009101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2011.1515</pub-id>
          <pub-id pub-id-type="medline">22009101</pub-id>
          <pub-id pub-id-type="pii">306/15/1688</pub-id>
          <pub-id pub-id-type="pmcid">PMC3603349</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ben-Assuli</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Padman</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Trajectories of repeated readmissions of chronic disease patients: risk stratification, profiling, and prediction</article-title>
          <source>MIS Q</source>
          <year>2020</year>
          <month>01</month>
          <day>01</day>
          <volume>44</volume>
          <issue>1</issue>
          <fpage>201</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.25300/MISQ/2020/15101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Morbitzer</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Waldron</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Amerine</surname>
              <given-names>LB</given-names>
            </name>
          </person-group>
          <article-title>Development of novel formulas to determine hospital and pharmacy opportunities to reduce extended length of stay</article-title>
          <source>J Pharmacy Technol</source>
          <year>2016</year>
          <month>11</month>
          <day>14</day>
          <volume>33</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1177/8755122516677081</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barrett</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Way</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Parfrey</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Hospital utilization, efficiency and access to care during and shortly after restructuring acute care in Newfoundland and Labrador</article-title>
          <source>J Health Serv Res Policy</source>
          <year>2005</year>
          <month>10</month>
          <volume>10 Suppl 2</volume>
          <fpage>S2:31</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1258/135581905774424537</pub-id>
          <pub-id pub-id-type="medline">16259699</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>Discharge abstract database metadata (DAD)</article-title>
          <source>Canadian Institute for Health Information</source>
          <access-date>2023-03-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cihi.ca/en/discharge-abstract-database-metadata-dad#:~:text=Overview,DAD%20to%20capture%20day%20surgery">https://www.cihi.ca/en/discharge-abstract-database-metadata-dad#:~:text=Overview,DAD%20to%20capture%20day%20surgery</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="web">
          <article-title>Abacus data network</article-title>
          <source>re3data.org</source>
          <access-date>2023-03-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://doi.org/10.17616/R3692H">http://doi.org/10.17616/R3692H</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bedrick</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hersh</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Implementation of a cohort retrieval system for clinical data repositories using the observational medical outcomes partnership common data model: proof-of-concept system validation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>10</month>
          <day>06</day>
          <volume>8</volume>
          <issue>10</issue>
          <fpage>e17376</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/10/e17376/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17376</pub-id>
          <pub-id pub-id-type="medline">33021486</pub-id>
          <pub-id pub-id-type="pii">v8i10e17376</pub-id>
          <pub-id pub-id-type="pmcid">PMC7576539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nidoi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Muttamba</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Walusimbi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Imoko</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Lochoro</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ictho</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mugenyi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sekibira</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Turyahabwe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Byaruhanga</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Putoto</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Villa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Raviglione</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Kirenga</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Impact of socio-economic factors on tuberculosis treatment outcomes in north-eastern Uganda: a mixed methods study</article-title>
          <source>BMC Public Health</source>
          <year>2021</year>
          <month>11</month>
          <day>26</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>2167</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-021-12056-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-021-12056-1</pub-id>
          <pub-id pub-id-type="medline">34836521</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-021-12056-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC8620143</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Ni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Mougin</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Investigation of variation in gene expression profiling of human blood by extended principle component analysis</article-title>
          <source>PLoS One</source>
          <year>2011</year>
          <month>10</month>
          <day>27</day>
          <volume>6</volume>
          <issue>10</issue>
          <fpage>e26905</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0026905"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0026905</pub-id>
          <pub-id pub-id-type="medline">22046403</pub-id>
          <pub-id pub-id-type="pii">PONE-D-11-09034</pub-id>
          <pub-id pub-id-type="pmcid">PMC3203156</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Towards fine-scale population stratification modeling based on kernel principal component analysis and random forest</article-title>
          <source>Genes Genomics</source>
          <year>2021</year>
          <month>10</month>
          <day>07</day>
          <volume>43</volume>
          <issue>10</issue>
          <fpage>1143</fpage>
          <lpage>55</lpage>
          <pub-id pub-id-type="doi">10.1007/s13258-021-01057-4</pub-id>
          <pub-id pub-id-type="medline">34097252</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13258-021-01057-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Applying an improved stacking ensemble model to predict the mortality of ICU patients with heart failure</article-title>
          <source>J Clin Med</source>
          <year>2022</year>
          <month>10</month>
          <day>31</day>
          <volume>11</volume>
          <issue>21</issue>
          <fpage>6460</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=jcm11216460"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/jcm11216460</pub-id>
          <pub-id pub-id-type="medline">36362686</pub-id>
          <pub-id pub-id-type="pii">jcm11216460</pub-id>
          <pub-id pub-id-type="pmcid">PMC9659015</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akbar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sunyoto</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Arief</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Caesarendra</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Improvement of decision tree classifier accuracy for healthcare insurance fraud prediction by using Extreme Gradient Boosting algorithm</article-title>
          <source>Proceedings of the International Conference on Informatics, Multimedia, Cyber and Information System (ICIMCIS)</source>
          <year>2020</year>
          <conf-name>International Conference on Informatics, Multimedia, Cyber and Information System (ICIMCIS)</conf-name>
          <conf-date>Nov 19-20, 2020</conf-date>
          <conf-loc>Jakarta, Indonesia</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icimcis51567.2020.9354286</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kadiyala</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Applications of python to evaluate the performance of decision tree-based boosting algorithms</article-title>
          <source>Environ Prog Sustainable Energy</source>
          <year>2018</year>
          <month>03</month>
          <day>01</day>
          <volume>37</volume>
          <issue>2</issue>
          <fpage>618</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1002/ep.12888</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Nonlinear associations between medical expenditure, perceived medical attitude, and sociodemographics, and older adults' self-rated health in China: applying the extreme gradient boosting model</article-title>
          <source>Healthcare (Basel)</source>
          <year>2021</year>
          <month>12</month>
          <day>26</day>
          <volume>10</volume>
          <issue>1</issue>
          <fpage>39</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare10010039"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare10010039</pub-id>
          <pub-id pub-id-type="medline">35052203</pub-id>
          <pub-id pub-id-type="pii">healthcare10010039</pub-id>
          <pub-id pub-id-type="pmcid">PMC8775788</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El-Rashidy</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>El-Sappagh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Abuhmed</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Abdelrazek</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>El-Bakry</surname>
              <given-names>HM</given-names>
            </name>
          </person-group>
          <article-title>Intensive care unit mortality prediction: an improved patient-specific stacking ensemble model</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>133541</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3010556</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pfob</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sidey-Gibbons</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in medicine: a practical introduction to techniques for data pre-processing, hyperparameter tuning, and model comparison</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2022</year>
          <month>11</month>
          <day>01</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>282</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-022-01758-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-022-01758-8</pub-id>
          <pub-id pub-id-type="medline">36319956</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-022-01758-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC9624048</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Owen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <source>Hyperparameter Tuning with Python Boost Your Machine Learning Model's Performance Via Hyperparameter Tuning</source>
          <year>2022</year>
          <publisher-loc>Birmingham</publisher-loc>
          <publisher-name>Packt Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pink</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>Bolley</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Physicians in health care management: 4. Case mix groups and resource intensity weights: physicians and hospital funding</article-title>
          <source>CMAJ</source>
          <year>1994</year>
          <month>4</month>
          <day>15</day>
          <volume>150</volume>
          <issue>8</issue>
          <fpage>1255</fpage>
          <lpage>61</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Koo</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elsamadicy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>I-H</given-names>
            </name>
            <name name-style="western">
              <surname>David</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Freedman</surname>
              <given-names>IG</given-names>
            </name>
            <name name-style="western">
              <surname>Isaac</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sujijantarat</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Cord</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hebert</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bahrassa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Malhotra</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Matouk</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Predictors of extended length of stay following treatment of unruptured adult cerebral aneurysms a study of the national inpatient sample</article-title>
          <source>Neurosurgery</source>
          <year>2020</year>
          <month>12</month>
          <volume>67</volume>
          <issue>Supplement_1</issue>
          <pub-id pub-id-type="doi">10.1093/neuros/nyaa447_147</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Handelman</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Kok</surname>
              <given-names>HK</given-names>
            </name>
            <name name-style="western">
              <surname>Chandra</surname>
              <given-names>RV</given-names>
            </name>
            <name name-style="western">
              <surname>Razavi</surname>
              <given-names>AH</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Asadi</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Peering into the black box of artificial intelligence: evaluation metrics of machine learning methods</article-title>
          <source>AJR Am J Roentgenol</source>
          <year>2019</year>
          <month>01</month>
          <volume>212</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <lpage>43</lpage>
          <pub-id pub-id-type="doi">10.2214/AJR.18.20224</pub-id>
          <pub-id pub-id-type="medline">30332290</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bach</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Causality in medicine</article-title>
          <source>C R Biol</source>
          <year>2019</year>
          <month>03</month>
          <volume>342</volume>
          <issue>3-4</issue>
          <fpage>55</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1631-0691(19)30034-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.crvi.2019.03.001</pub-id>
          <pub-id pub-id-type="medline">30981720</pub-id>
          <pub-id pub-id-type="pii">S1631-0691(19)30034-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Auker-Howlett</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Evidence Evaluation and the Epistemology of Causality in Medicine</source>
          <year>2020</year>
          <publisher-loc>Canterbury, England</publisher-loc>
          <publisher-name>University of Kent</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>van Oostveen</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gouma</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bakker</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ubbink</surname>
              <given-names>DT</given-names>
            </name>
          </person-group>
          <article-title>Quantifying the demand for hospital care services: a time and motion study</article-title>
          <source>BMC Health Serv Res</source>
          <year>2015</year>
          <month>01</month>
          <day>22</day>
          <volume>15</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-014-0674-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-014-0674-2</pub-id>
          <pub-id pub-id-type="medline">25608889</pub-id>
          <pub-id pub-id-type="pii">s12913-014-0674-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC4311505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spithoff</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stockdale</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rowe</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>McPhail</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Persaud</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>The commercialization of patient data in Canada: ethics, privacy and policy</article-title>
          <source>CMAJ</source>
          <year>2022</year>
          <month>01</month>
          <day>24</day>
          <volume>194</volume>
          <issue>3</issue>
          <fpage>E95</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=35074837"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.210455</pub-id>
          <pub-id pub-id-type="medline">35074837</pub-id>
          <pub-id pub-id-type="pii">194/3/E95</pub-id>
          <pub-id pub-id-type="pmcid">PMC8900757</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kulkarni`</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>McAlister</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Eurich</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Keshwani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Simpson</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Voaklander</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Samanani</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Predicting 30-day readmissions in patients with heart failure using administrative data: a machine learning approach</article-title>
          <source>J Card Fail</source>
          <year>2022</year>
          <month>05</month>
          <volume>28</volume>
          <issue>5</issue>
          <fpage>710</fpage>
          <lpage>22</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1071-9164(21)00499-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cardfail.2021.12.004</pub-id>
          <pub-id pub-id-type="medline">34936894</pub-id>
          <pub-id pub-id-type="pii">S1071-9164(21)00499-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jamei</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Nisnevich</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Wetchler</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Sudat</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Predicting all-cause risk of 30-day hospital readmission using artificial neural networks</article-title>
          <source>PLoS One</source>
          <year>2017</year>
          <month>7</month>
          <day>14</day>
          <volume>12</volume>
          <issue>7</issue>
          <fpage>e0181173</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0181173"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0181173</pub-id>
          <pub-id pub-id-type="medline">28708848</pub-id>
          <pub-id pub-id-type="pii">PONE-D-16-39789</pub-id>
          <pub-id pub-id-type="pmcid">PMC5510858</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin Ho</surname>
              <given-names>ET</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>IE</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>PY</given-names>
            </name>
            <name name-style="western">
              <surname>Chong</surname>
              <given-names>HF</given-names>
            </name>
          </person-group>
          <article-title>Predicting readmission at early hospitalization using electronic health data: a customized model development</article-title>
          <source>Int J Integr Care</source>
          <year>2017</year>
          <month>10</month>
          <day>17</day>
          <volume>17</volume>
          <issue>5</issue>
          <fpage>A506</fpage>
          <pub-id pub-id-type="doi">10.5334/ijic.3826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xing</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Macq</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Improvement of Bragg peak shift estimation using dimensionality reduction techniques and predictive linear modeling</article-title>
          <source>Proceedings of the 13th International Symposium on Medical Information Processing and Analysis</source>
          <year>2017</year>
          <conf-name>13th International Symposium on Medical Information Processing and Analysis</conf-name>
          <conf-date>Oct 5-7, 2017</conf-date>
          <conf-loc>San Andres Island, Colombia</conf-loc>
          <pub-id pub-id-type="doi">10.1117/12.2285608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hong</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bratvold</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Nævdal</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Robust production optimization with capacitance-resistance model as precursor</article-title>
          <source>Comput Geosci</source>
          <year>2017</year>
          <month>6</month>
          <day>24</day>
          <volume>21</volume>
          <issue>5-6</issue>
          <fpage>1423</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1007/s10596-017-9666-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Dynamic Stacking ensemble monitoring model of dam displacement based on the feature selection with PCA-RF</article-title>
          <source>J Civil Struct Health Monit</source>
          <year>2022</year>
          <month>03</month>
          <day>24</day>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>557</fpage>
          <lpage>78</lpage>
          <pub-id pub-id-type="doi">10.1007/s13349-022-00557-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Indrasiri</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Rupapara</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Rustam</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ashraf</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Malicious traffic detection in IoT and local networks using stacked ensemble classifier</article-title>
          <source>Comput MaterialContinua</source>
          <year>2021</year>
          <month>11</month>
          <volume>71</volume>
          <issue>1</issue>
          <fpage>489</fpage>
          <lpage>515</lpage>
          <pub-id pub-id-type="doi">10.32604/cmc.2022.019636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zulfiker</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Biswas</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Chakraborty</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Predicting insomnia using multilayer stacked ensemble model</article-title>
          <source>Advances in Computing and Data Sciences</source>
          <year>2021</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allam</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Thoma</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Krauthammer</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Neural networks versus Logistic regression for 30 days all-cause readmission prediction</article-title>
          <source>Sci Rep</source>
          <year>2019</year>
          <month>06</month>
          <day>26</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>9277</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-019-45685-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-019-45685-z</pub-id>
          <pub-id pub-id-type="medline">31243311</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-019-45685-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6595068</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lone</surname>
              <given-names>NI</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Salisbury</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Donaghy</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ramsay</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rattray</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Walsh</surname>
              <given-names>TS</given-names>
            </name>
          </person-group>
          <article-title>Predicting risk of unplanned hospital readmission in survivors of critical illness: a population-level cohort study</article-title>
          <source>Thorax</source>
          <year>2019</year>
          <month>11</month>
          <day>05</day>
          <volume>74</volume>
          <issue>11</issue>
          <fpage>1046</fpage>
          <lpage>54</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://hdl.handle.net/20.500.11820/233e60c9-321d-467b-a6fd-2dc0785cde2e"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/thoraxjnl-2017-210822</pub-id>
          <pub-id pub-id-type="medline">29622692</pub-id>
          <pub-id pub-id-type="pii">thoraxjnl-2017-210822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Krzywinski</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The curse(s) of dimensionality</article-title>
          <source>Nat Method</source>
          <year>2018</year>
          <month>06</month>
          <day>31</day>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>399</fpage>
          <lpage>400</lpage>
          <pub-id pub-id-type="doi">10.1038/s41592-018-0019-x</pub-id>
          <pub-id pub-id-type="medline">29855577</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41592-018-0019-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tiwari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chugh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Ensemble framework for cardiovascular disease prediction</article-title>
          <source>Comput Biol Med</source>
          <year>2022</year>
          <month>07</month>
          <volume>146</volume>
          <fpage>105624</fpage>
          <pub-id pub-id-type="doi">10.1016/j.compbiomed.2022.105624</pub-id>
          <pub-id pub-id-type="medline">35598355</pub-id>
          <pub-id pub-id-type="pii">S0010-4825(22)00416-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Artetxe</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Graña</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Beristain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ríos</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Balanced training of a hybrid ensemble method for imbalanced datasets: a case of emergency department readmission prediction</article-title>
          <source>Neural Comput Appl</source>
          <year>2017</year>
          <month>10</month>
          <day>14</day>
          <volume>32</volume>
          <issue>10</issue>
          <fpage>5735</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1007/s00521-017-3242-y</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Joint imbalanced classification and feature selection for hospital readmissions</article-title>
          <source>Knowl Base Syst</source>
          <year>2020</year>
          <month>07</month>
          <volume>200</volume>
          <fpage>106020</fpage>
          <pub-id pub-id-type="doi">10.1016/j.knosys.2020.106020</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bukhari</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <source>30-days All-cause Prediction Model for Readmissions for Heart Failure Patients A Comparative Study of Machine Learning Approaches</source>
          <year>2019</year>
          <month>12</month>
          <publisher-loc>Boston, Massachusetts</publisher-loc>
          <publisher-name>Northeastern University</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hubbard</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Munoz</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Decker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Holcomb</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Schreiber</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Bulger</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Brasel</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>del Junco</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Rahbar</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Cotton</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Phelan</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Alarcon</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Muskat</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>MJ</given-names>
            </name>
            <collab>PROMMTT Study Group</collab>
          </person-group>
          <article-title>Time-dependent prediction and evaluation of variable importance using superlearning in high-dimensional clinical data</article-title>
          <source>J Trauma Acute Care Surg</source>
          <year>2013</year>
          <month>07</month>
          <volume>75</volume>
          <issue>1 Suppl 1</issue>
          <fpage>S53</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23778512"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/TA.0b013e3182914553</pub-id>
          <pub-id pub-id-type="medline">23778512</pub-id>
          <pub-id pub-id-type="pii">01586154-201307001-00009</pub-id>
          <pub-id pub-id-type="pmcid">PMC3744063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sureiman</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Mangera</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>F-test of overall significance in regression analysis simplified</article-title>
          <source>J Pract Cardiovasc Sci</source>
          <year>2020</year>
          <volume>6</volume>
          <fpage>116</fpage>
          <lpage>22</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Chatterjee</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Hybrid CNN-LSTM deep learning model and ensemble technique for automatic detection of myocardial infarction using big ECG data</article-title>
          <source>Appl Intell</source>
          <year>2021</year>
          <month>08</month>
          <day>11</day>
          <volume>52</volume>
          <issue>5</issue>
          <fpage>5366</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1007/s10489-021-02696-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dunn</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Health insurance and the demand for medical care: instrumental variable estimates using health insurer claims data</article-title>
          <source>J Health Econ</source>
          <year>2016</year>
          <month>07</month>
          <volume>48</volume>
          <fpage>74</fpage>
          <lpage>88</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jhealeco.2016.03.001</pub-id>
          <pub-id pub-id-type="medline">27107371</pub-id>
          <pub-id pub-id-type="pii">S0167-6296(16)30001-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Revisiting the relationship between nurse staffing and quality of care in nursing homes: an instrumental variables approach</article-title>
          <source>J Health Econ</source>
          <year>2014</year>
          <month>09</month>
          <volume>37</volume>
          <fpage>13</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jhealeco.2014.04.007</pub-id>
          <pub-id pub-id-type="medline">24887707</pub-id>
          <pub-id pub-id-type="pii">S0167-6296(14)00062-9</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
