<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i1e42832</article-id>
      <article-id pub-id-type="pmid">37014694</article-id>
      <article-id pub-id-type="doi">10.2196/42832</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting Measles Outbreaks in the United States: Evaluation of Machine Learning Approaches</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Allam</surname>
            <given-names>Ayman</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Nagavally</surname>
            <given-names>Sneha</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Ru</surname>
            <given-names>Boshu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Merck &#38; Co, Inc</institution>
            <addr-line>770 Sumneytown Pike</addr-line>
            <addr-line>Main Stop: WP37A</addr-line>
            <addr-line>West Point, PA, 19486</addr-line>
            <country>United States</country>
            <phone>1 2156524301</phone>
            <email>boshu.ru@merck.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9620-1306</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Kujawski</surname>
            <given-names>Stephanie</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7915-8553</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Lee Afanador</surname>
            <given-names>Nelson</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5858-3235</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Baumgartner</surname>
            <given-names>Richard</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3330-8477</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Pawaskar</surname>
            <given-names>Manjiri</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8009-805X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Das</surname>
            <given-names>Amar</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3556-0844</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Merck &#38; Co, Inc</institution>
        <addr-line>West Point, PA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Merck &#38; Co, Inc</institution>
        <addr-line>Rahway, NJ</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Boshu Ru <email>boshu.ru@merck.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>4</day>
        <month>4</month>
        <year>2023</year>
      </pub-date>
      <volume>7</volume>
      <elocation-id>e42832</elocation-id>
      <history>
        <date date-type="received">
          <day>20</day>
          <month>9</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>5</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>1</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>7</day>
          <month>2</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Boshu Ru, Stephanie Kujawski, Nelson Lee Afanador, Richard Baumgartner, Manjiri Pawaskar, Amar Das. Originally published in JMIR Formative Research (https://formative.jmir.org), 04.04.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2023/1/e42832" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Measles, a highly contagious viral infection, is resurging in the United States, driven by international importation and declining domestic vaccination coverage. Despite this resurgence, measles outbreaks are still rare events that are difficult to predict. Improved methods to predict outbreaks at the county level would facilitate the optimal allocation of public health resources.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aimed to validate and compare extreme gradient boosting (XGBoost) and logistic regression, 2 supervised learning approaches, to predict the US counties most likely to experience measles cases. We also aimed to assess the performance of hybrid versions of these models that incorporated additional predictors generated by 2 clustering algorithms, hierarchical density-based spatial clustering of applications with noise (HDBSCAN) and unsupervised random forest (uRF).</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We constructed a supervised machine learning model based on XGBoost and unsupervised models based on HDBSCAN and uRF. The unsupervised models were used to investigate clustering patterns among counties with measles outbreaks; these clustering data were also incorporated into hybrid XGBoost models as additional input variables. The machine learning models were then compared to logistic regression models with and without input from the unsupervised models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Both HDBSCAN and uRF identified clusters that included a high percentage of counties with measles outbreaks. XGBoost and XGBoost hybrid models outperformed logistic regression and logistic regression hybrid models, with the area under the receiver operating curve values of 0.920-0.926 versus 0.900-0.908, the area under the precision-recall curve values of 0.522-0.532 versus 0.485-0.513, and <italic>F</italic><sub>2</sub> scores of 0.595-0.601 versus 0.385-0.426. Logistic regression or logistic regression hybrid models had higher sensitivity than XGBoost or XGBoost hybrid models (0.837-0.857 vs 0.704-0.735) but a lower positive predictive value (0.122-0.141 vs 0.340-0.367) and specificity (0.793-0.821 vs 0.952-0.958). The hybrid versions of the logistic regression and XGBoost models had slightly higher areas under the precision-recall curve, specificity, and positive predictive values than the respective models that did not include any unsupervised features.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>XGBoost provided more accurate predictions of measles cases at the county level compared with logistic regression. The threshold of prediction in this model can be adjusted to align with each county’s resources, priorities, and risk for measles. While clustering pattern data from unsupervised machine learning approaches improved some aspects of model performance in this imbalanced data set, the optimal approach for the integration of such approaches with supervised machine learning models requires further investigation.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>measles</kwd>
        <kwd>measles outbreaks</kwd>
        <kwd>measles epidemiology</kwd>
        <kwd>machine learning</kwd>
        <kwd>epidemiology</kwd>
        <kwd>hybrid machine learning</kwd>
        <kwd>infectious disease modeling</kwd>
        <kwd>infectious disease outbreak prediction</kwd>
        <kwd>unsupervised machine learning</kwd>
        <kwd>supervised machine learning</kwd>
        <kwd>infectious disease</kwd>
        <kwd>model</kwd>
        <kwd>predict</kwd>
        <kwd>outbreak</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Measles is a highly contagious viral infection that can cause serious acute illness, complications including pneumonia and encephalitis, and death [<xref ref-type="bibr" rid="ref1">1</xref>]. A population immunity of ~95% by 5 years of age is required to disrupt transmission [<xref ref-type="bibr" rid="ref2">2</xref>]. A vaccination program initiated in the 1960s led to the formal elimination of measles in the United States in 2000 [<xref ref-type="bibr" rid="ref3">3</xref>]. However, measles has recently resurged in the United States, with notable peaks occurring in 2014 (n=667 cases), 2018 (n=375), and 2019 (n=1282) [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      <p>Despite this resurgence, measles outbreaks are still rare events that are difficult to predict. Known correlates of measles exposure and transmission include international importations, high population density, and low vaccination coverage [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. These factors vary substantially between and within states and can be used to help predict the likelihood and impact of measles outbreaks [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. However, few prior studies have used quantitative approaches to estimate the risk of measles outbreaks at the county level. One recent model used a multiplicative risk function of 4 factors—measles, mumps, and rubella vaccination coverage; county population; the volume of international air travel; and the incidence of measles at the origin points of incoming international flights—to predict 20 high-risk counties, of which 17 had at least 1 measles case in 2019, accounting for ~55% of 2019 measles cases [<xref ref-type="bibr" rid="ref9">9</xref>]. However, the model used only 4 predictors and was not validated using outbreak data from other years, meaning that its accuracy was not independently assessed. Measles prediction models could be further improved by incorporating additional county-level predictors of measles outbreak risk. For example, socioeconomic and demographic variables such as race or ethnicity, education, income, urbanicity, and health insurance coverage have been shown to correlate with measles vaccination coverage, while factors such as household composition may affect measles transmission rates [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref19">19</xref>].</p>
      <p>The identification and modeling of additional measles risk predictors may require unbiased algorithmic approaches [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. However, traditional statistical approaches, such as logistic regression, may be limited by incorrect assumptions about linearly independent predictor variables (ie, the predictors for neighboring counties may not be independent but rather multicollinear) and the low incidence of measles in the United States, which creates a data imbalance where the outcome of interest is a very rare event.</p>
      <p>Machine learning (ML) methods provide several potential solutions to the above limitations. Decision tree–based ML approaches such as the extreme gradient boosting (XGBoost) classification model are inherently neutral to multicollinearity; the training process chooses the most informative predictor at any given decision or prediction split point, rather than using all provided predictors as in logistic regression. Many ML algorithms also permit adjustments to the balance between majority and minority class instances in the training data set; this regularization of the model, also referred to as cost-sensitive training, allows the classification models to learn more information from rare observations and avoid overfitting on the majority negative class [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
      <p>Hybrid ML approaches that combine complementary models have been reported to have higher accuracy or a better interpretation of results than standalone models [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>]. Combining supervised models such as XGBoost and logistic regression with unsupervised learning may help to overcome the challenges of predicting measles cases, based on the assumption that unsupervised learning processes will extract patterns from data that can be used as a new set of features that are less prone to biases introduced by multicollinearity and imbalanced data [<xref ref-type="bibr" rid="ref26">26</xref>].</p>
      <p>The objective of this study is to validate and compare XGBoost and logistic regression, 2 supervised learning approaches that are commonly used on tabular data, to predict the US counties most likely to experience measles cases. We compared these models with hybrid ML approaches that extended the XGBoost and logistic regression models to include additional predictors generated by 2 clustering algorithms, hierarchical density-based spatial clustering of applications with noise (HDBSCAN) and unsupervised random forest (uRF).</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Design</title>
        <p>We used supervised (XGBoost and logistic regression) and unsupervised (HDBSCAN and uRF) ML analyses, as well as hybrid approaches that combined XGBoost and logistic regression with HDBSCAN, uRF, or both (<xref rid="figure1" ref-type="fig">Figure 1</xref>). All supervised and hybrid models were trained on input predictor variable data from 2014 to 2018 (training data set), with the cost-sensitive training option enabled. Predictor and outcome data from 2019 (testing data set) were used to evaluate all models.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Study overview. HDBSCAN: hierarchical density-based spatial clustering of applications with noise; LR: logistic regression; PCoA: principal coordinate analysis; uRF: unsupervised random forest; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e42832_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data</title>
        <p>The outcome of interest was the occurrence of ≥1 measles case at the county level. We performed a targeted search of published literature, state and local health department websites, and news articles to identify measles cases. We were able to identify information for 2895 counties in 2014, 2850 counties in 2018, and 2951 counties in 2019 and validate the county-level counts against published state-level counts [<xref ref-type="bibr" rid="ref27">27</xref>]. Each county-year pair was considered 1 data point. Counties for which we could not validate measles case counts for each year were removed from the data set.</p>
        <p>Variables relating to known and hypothesized predictors of measles outbreaks, based on the literature [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>-<xref ref-type="bibr" rid="ref16">16</xref>], were obtained from publicly available data sources at the county level (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref42">42</xref>]. State- or metropolitan statistical area–level data were used as a proxy when county-level data were unavailable. Data were extracted from 1 year before the outcome year when possible, or else the closest possible prior year. Variables included sociodemographic data, population statistics, measles vaccination and exemption policies, health care access, and international air travel volume and origin countries (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>We aggregated international air travel volume for each county and measles outbreak incidence at the origin of travel into a single score measuring the risk of exposure to measles via international air travel. The identification of trips from measles outbreak countries was based on the initial origin and final destination of travel using the same ticket [<xref ref-type="bibr" rid="ref28">28</xref>]. The exposure scale was modeled using spatial diffusion, whereby international air travel passenger volumes to all US airports were proportionally distributed by population size to the county where the airport was located, the nearest neighbor counties, and the next-nearest neighbor counties, weighted by measles incidence at the travel origin and the county population [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
        <p>The main models were run using all predictor variables, with a sensitivity analysis to account for multicollinearity. Multicollinearity between predictors was detected by the variance inflation factor and correlation matrices [<xref ref-type="bibr" rid="ref43">43</xref>]. We hypothesized that eliminating predictor variables that were highly correlated would improve model performance and thus removed 10 predictor variables that were highly correlated to create a reduced version of the data set. The full list with summary statistics for each year and footnotes indicating variables removed in the reduced version is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>With the exception of the air travel data, all data were extracted from publicly available published literature, state and local health department websites, and news articles. All data were aggregated and deidentified, and, therefore, this study was exempt from institutional review board approval.</p>
      </sec>
      <sec>
        <title>Models</title>
        <p>XGBoost is a gradient-boosting decision tree algorithm that is commonly used for classification and regression problems. The algorithm iteratively fits relatively simple models (typically small decision trees) to weighted versions of the training data. At each iteration, higher weights are assigned to data points that were misclassified by the model in the previous iteration; these are more likely to be from the minority class. Correctly predicting the minority class is thus rewarded more at each iteration. We magnified the weights assigned to data points in each iteration by the number of measles cases in the county + 1. This enabled the iterative training process to focus more on reducing classification errors for data points with more measles cases.</p>
        <p>In standard logistic regression, classifying an event as a false positive (FP) or false negative (FN) carries the same penalty in the model. To address the challenge of imbalanced data, in which one of the dependent values occurs infrequently, we developed a weighted logistic regression approach that penalized the model more for an FN result. The weights were based on a cost-sensitive measure derived from the ratio between counties in the training data set with and without measles cases.</p>
        <p>HDBSCAN is a density-based clustering algorithm that automatically optimizes cluster numbers and has the ability to work with noisy data [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]. We built an HDBSCAN model that maps each county-year observation into clusters using all predictor variables. HDBSCAN is a density-based clustering algorithm that automatically optimizes cluster numbers and has the ability to work with noisy data [<xref ref-type="bibr" rid="ref44">44</xref>, <xref ref-type="bibr" rid="ref45">45</xref>]. We built an HDBSCAN model that maps each county-year observation into clusters using all predictor variables. A score measuring the algorithm’s confidence in assigning each observation to a cluster was also calculated. To investigate whether clustering results were informative for predicting measles cases, we compared the percentage of county-year pairs reporting measles outbreaks across the clusters. UMAP software was used to visualize clusters in multi-dimension space into two-dimension surface [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <p>uRF combines many weak learners (individual decision trees) as a vehicle for variance and bias reduction [<xref ref-type="bibr" rid="ref47">47</xref>,<xref ref-type="bibr" rid="ref48">48</xref>]. Methods such as multidimensional scaling combined with hierarchical clustering are used to create a lower-dimensional representation of the observations. In this study, we fitted an uRF model to obtain the proximity matrix for each county-year’s predictor variable data in the training data set and then applied the model to project proximity matrices for the testing data set. Each county-year observation was then represented in 3 principal coordinates (PCoA.1-3), which we applied to the training and testing data sets to determine whether there were clustering patterns among counties reporting measles cases.</p>
        <p>We also created 3 XGBoost and 3 logistic regression hybrid models that used outputs from HDBSCAN and uRF as additional features for making predictions. XGBoost and logistic regression with HDBSCAN models added cluster membership and confidence of clustering as new features; XGBoost and logistic regression with uRF models added PCoA.1-3; and XGBoost and logistic regression with HDBSCAN+uRF used both sets of new features. Data for 2014 and 2018 (5745 county-year pairs in total) were used as a training data set, and data for 2019 (2951 counties) were used as testing data set.</p>
      </sec>
      <sec>
        <title>Evaluation</title>
        <p>The models were compared using evaluation metrics derived from the proportions of true positive (TP), FP, true negative (TN), and FN predictions. Sensitivity was defined as TP / (TP + FN), specificity as TN / (TN + FP), positive predictive value (PPV) as TP / (TP + FP), and the <italic>F</italic><sub>2</sub> score as (5 × PPV × sensitivity) / (4 × PPV + sensitivity). Given the highly infectious nature of measles, and thus the importance of sensitivity, we selected <italic>F</italic><sub>2</sub> over the more common <italic>F</italic><sub>1</sub> score, defined as (2 × PPV × sensitivity) / (PPV + sensitivity), to prioritize sensitivity over PPV.</p>
        <p>The predicted class (positive or negative) of our models was determined at the threshold of 0.20 (eg, Y<sub>prob</sub>&#62;0.20 → Y<sub>pred</sub>=1), which is smaller than the most commonly used value (0.50) due to data imbalance; adopting a lower threshold was expected to identify more counties vulnerable to measles outbreaks. Model prediction power was also measured using the area under the receiver operating curve (AUROC) and the area under the precision-recall curve (AUPRC), as suggested by previous studies on imbalanced data [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. The AUROC values were calculated from plots of sensitivity against the FP rate across prediction thresholds and the AUPRC values from plots of PPV against sensitivity across prediction thresholds, with a perfect predictive model having an AUPRC and an AUROC of 1.0 and a coin-flip having an AUROC of 0.5 [<xref ref-type="bibr" rid="ref49">49</xref>]. There is no fixed AUPRC value for random models; the baseline performance is commonly recognized as the percentage of positive class members, which was 3.1% for this study (proportion of US counties having ≥1 measles case in 2019) [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <p>Data preprocessing and logistic regression modeling were conducted using SAS Studio release 3.8 (Basic Edition; SAS Institute, Inc). Python (version 3.6; distributed by Anaconda, Inc) with Pandas, Numpy, Scikit-learn, HDBSCAN, XGBoost, Matplotlib, UMAP libraries, and R (version 3.6.3; The R Foundation) with STATS package were used to build the XGBoost, HDBSCAN, and uRF models.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Measles Cases</title>
        <p>We were able to identify counties for 635/667 (95.2%) of Centers for Disease Control and Prevention–reported US measles cases in 2014, 366/375 (97.6%) of 2018 cases, and 1247/1287 (96.9%) of 2019 cases. In 2014, 81 of the 3143 (2.6%) counties in the United States had ≥1 measles case, while 64 (2.0%) had ≥1 measles case in 2018 and 98 (3.1%) in 2019.</p>
      </sec>
      <sec>
        <title>Unsupervised Machine Learning</title>
        <p>The HDBSCAN model identified 4 clusters in the training data sets using all predictor variables (<xref rid="figure2" ref-type="fig">Figure 2</xref>A). The number of counties in clusters A and D with ≥1 measles case was 73/294 (24.8%) and 72/5936 (1.2%), respectively, while no counties with measles cases were found in clusters B or C. When applying the HDBSCAN clustering model to the testing data set, the measles cases also appeared only in clusters A and D, with frequencies of 58/207 (28%) and 40/2911 (1.4%), respectively.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Unsupervised learning results. (A) HDBSCAN-identified clusters, color-coded by cluster size and percentage of counties reporting measles cases. (B) Visualization of counties with and without measles cases by 2 of 3 uRF-generated principal coordinates. HDBSCAN: hierarchical density-based spatial clustering of applications with noise; PCoA: principal coordinate analysis; uRF: unsupervised random forest.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e42832_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>The first and second PCoA derived by uRF for each county in the training and testing data sets were plotted using all predictor variables (<xref rid="figure2" ref-type="fig">Figure 2</xref>B). The observed clustering effects of counties with measles cases in the training and testing data sets were between 0.3 and 0.5 in the axis of PCoA.1 and between −0.05 and 1.5 for PCoA.2. These ranges are meaningful in that they reflect a similar projection of dissimilarities in both the training and testing data sets.</p>
      </sec>
      <sec>
        <title>Evaluation of Prediction Models</title>
        <p>The performance of all models at a prediction threshold of 0.20 is summarized in <xref ref-type="table" rid="table1">Table 1</xref>. The XGBoost and XGBoost hybrid models achieved higher AUROC and AUPRC scores than the logistic regression and logistic regression hybrid models (AUROC 0.920-0.926 vs 0.900-0.908; AUPRC 0.522-0.532 vs 0.485-0.513). All AUPRC values were considered high when compared with the low percentage of US counties reporting ≥1 measles case in 2019 (3.1%). At the threshold of 0.20, the hybrid models of XGBoost with HDBSCAN and uRF and XGBoost with uRF achieved the highest PPVs (0.367). Logistic regression with HDBSCAN and uRF features and logistic regression with uRF features produced the highest sensitivity (0.857), but the corresponding PPVs (0.141 and 0.139, respectively) were lower than those of the XGBoost and XGBoost hybrid models (0.340-0.367). XGBoost and XGBoost hybrid models had higher specificity (0.952-0.958) and <italic>F</italic><sub>2</sub> (0.595-0.601) than logistic regression and logistic regression hybrid models (0.793-0.821 and 0.385-0.426, respectively). For both XGBoost and logistic regression, the overall differences in performance measures between the original and hybrid versions of the same model were relatively small. The performance of all the models at a range of prediction thresholds between 0.0 and 1.0 is depicted in <xref rid="figure3" ref-type="fig">Figure 3</xref>.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Performance of models predicting US counties with ≥1 measles case in 2019.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="330"/>
            <col width="0"/>
            <col width="100"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="70"/>
            <col width="0"/>
            <col width="110"/>
            <col width="0"/>
            <col width="130"/>
            <thead>
              <tr valign="bottom">
                <td colspan="3">Model</td>
                <td colspan="2">PPV<sup>a</sup></td>
                <td colspan="2">Sensitivity</td>
                <td colspan="2">Specificity</td>
                <td colspan="2">
                  <italic>F</italic>
                  <sub>2</sub>
                  <sup>b</sup>
                </td>
                <td colspan="2">AUROC<sup>c</sup></td>
                <td>AUPRC<sup>d</sup></td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="14">
                  <bold>All variables</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost<sup>e</sup></td>
                <td colspan="2">0.348</td>
                <td colspan="2">0.735</td>
                <td colspan="2">0.953</td>
                <td colspan="2">0.601</td>
                <td colspan="2">0.926</td>
                <td colspan="2">0.522</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with HDBSCAN<sup>f</sup></td>
                <td colspan="2">0.340</td>
                <td colspan="2">0.724</td>
                <td colspan="2">0.952</td>
                <td colspan="2">0.591</td>
                <td colspan="2">0.924</td>
                <td colspan="2">0.525</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with uRF<sup>g</sup></td>
                <td colspan="2">0.367</td>
                <td colspan="2">0.704</td>
                <td colspan="2">0.958</td>
                <td colspan="2">0.595</td>
                <td colspan="2">0.920</td>
                <td colspan="2">0.524</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with HDBSCAN+uRF</td>
                <td colspan="2">0.367</td>
                <td colspan="2">0.704</td>
                <td colspan="2">0.958</td>
                <td colspan="2">0.595</td>
                <td colspan="2">0.922</td>
                <td colspan="2">0.532</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR<sup>h</sup></td>
                <td colspan="2">0.122</td>
                <td colspan="2">0.837</td>
                <td colspan="2">0.793</td>
                <td colspan="2">0.385</td>
                <td colspan="2">0.900</td>
                <td colspan="2">0.485</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with HDBSCAN</td>
                <td colspan="2">0.125</td>
                <td colspan="2">0.837</td>
                <td colspan="2">0.798</td>
                <td colspan="2">0.391</td>
                <td colspan="2">0.900</td>
                <td colspan="2">0.497</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with uRF</td>
                <td colspan="2">0.139</td>
                <td colspan="2">0.857</td>
                <td colspan="2">0.818</td>
                <td colspan="2">0.422</td>
                <td colspan="2">0.908</td>
                <td colspan="2">0.512</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with HDBSCAN+uRF</td>
                <td colspan="2">0.141</td>
                <td colspan="2">0.857</td>
                <td colspan="2">0.821</td>
                <td colspan="2">0.426</td>
                <td colspan="2">0.907</td>
                <td colspan="2">0.513</td>
              </tr>
              <tr valign="top">
                <td colspan="14">
                  <bold>Reduced data set</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost</td>
                <td colspan="2">0.333</td>
                <td colspan="2">0.724</td>
                <td colspan="2">0.950</td>
                <td colspan="2">0.587</td>
                <td colspan="2">0.931</td>
                <td colspan="2">0.525</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with HDBSCAN</td>
                <td colspan="2">0.340</td>
                <td colspan="2">0.735</td>
                <td colspan="2">0.951</td>
                <td colspan="2">0.596</td>
                <td colspan="2">0.930</td>
                <td colspan="2">0.519</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with uRF</td>
                <td colspan="2">0.335</td>
                <td colspan="2">0.724</td>
                <td colspan="2">0.951</td>
                <td colspan="2">0.588</td>
                <td colspan="2">0.924</td>
                <td colspan="2">0.515</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>XGBoost with HDBSCAN+uRF</td>
                <td colspan="2">0.326</td>
                <td colspan="2">0.735</td>
                <td colspan="2">0.948</td>
                <td colspan="2">0.587</td>
                <td colspan="2">0.927</td>
                <td colspan="2">0.515</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR</td>
                <td colspan="2">0.087</td>
                <td colspan="2">0.796</td>
                <td colspan="2">0.715</td>
                <td colspan="2">0.304</td>
                <td colspan="2">0.844</td>
                <td colspan="2">0.368</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with HDBSCAN</td>
                <td colspan="2">0.096</td>
                <td colspan="2">0.867</td>
                <td colspan="2">0.720</td>
                <td colspan="2">0.333</td>
                <td colspan="2">0.894</td>
                <td colspan="2">0.402</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with uRF</td>
                <td colspan="2">0.121</td>
                <td colspan="2">0.878</td>
                <td colspan="2">0.781</td>
                <td colspan="2">0.390</td>
                <td colspan="2">0.898</td>
                <td colspan="2">0.403</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>LR with HDBSCAN+uRF</td>
                <td colspan="2">0.119</td>
                <td colspan="2">0.867</td>
                <td colspan="2">0.779</td>
                <td colspan="2">0.384</td>
                <td colspan="2">0.902</td>
                <td colspan="2">0.433</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>PPV: positive predictive value.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup><italic>F</italic><sub>2</sub> score = (5 × PPV × sensitivity) / (4 × PPV + sensitivity).</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>AUROC: area under the receiver operating curve.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>AUPRC: area under the precision-recall curve.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>XGBoost: extreme gradient boosting.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>HDBSCAN: hierarchical density-based spatial clustering of applications with noise.</p>
            </fn>
            <fn id="table1fn7">
              <p><sup>g</sup>uRF: unsupervised random forest.</p>
            </fn>
            <fn id="table1fn8">
              <p><sup>h</sup>LR: logistic regression.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Comparative model performance at different prediction thresholds. <italic>F</italic><sub>2</sub> score = (5 × PPV × sensitivity) / (4 × PPV + sensitivity). PPV: positive predictive value; XGBoost: extreme gradient boosting.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e42832_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>As a sensitivity analysis, we also evaluated the performance of models trained on the reduced variable data set (<xref ref-type="table" rid="table1">Table 1</xref>). The XGBoost and XGBoost hybrid models outperformed the logistic regression and logistic regression hybrid models on this data set in terms of AUPRC (0.515-0.525 vs 0.368-0.433) and AUROC (0.924-0.931 vs 0.844-0.902) but had lower sensitivity (0.724-0.735 vs 0.796-0.878). The PPV, sensitivity, specificity, and <italic>F</italic><sub>2</sub> scores at a prediction threshold of 0.20 were very similar among the original and hybrid models of the same type, for both XGBoost and logistic regression. The performance of logistic regression and its hybrid models was more impacted by removing the correlated predictor variables, with lower AUROC and AUPRC scores than for the corresponding models using the full data set (0.844-0.902 vs 0.900-0.908 and 0.368-0.433 vs 0.485-0.513, respectively). In contrast, the performance of XGBoost and its hybrid models was similar between the 2 data sets.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>This work developed supervised and hybrid ML models to identify US counties at risk of measles cases and compared them with predictions made using logistic regression. To our knowledge, this study is the first to determine the absolute risk of a county having a measles outbreak using ML approaches. This model is an improvement over the previous work done in this area, as it takes into account a comprehensive list of predictors that are associated with measles outbreaks to further improve the predictions.</p>
      <p>Two different types of the unsupervised model could identify clusters or groups of counties that had ≥1 measles case. In the supervised learning analysis, all models achieved very high prediction scores for future measles outbreaks as measured by AUROC and AUPRC, with XGBoost and XGBoost hybrid models outperforming logistic regression and logistic regression hybrid models. Adding clustering results and principal coordinates from unsupervised learning models as additional predictors did not improve all performance metrics of XGBoost models; in contrast, adding these features improved all performance metrics of the logistic regression models by small margins. The optimal way to incorporate information from HDBSCAN, uRF, or other unsupervised clustering algorithms into prediction models remains an open question. One potential direction is to develop predictive models tailored to clusters of counties that were identified through unsupervised learning methods. We also found that removing 10 correlated predictors with high variance inflation factors did not improve model performance in this study; however, models with a reduced number of variables may provide more interpretable results and prove more practical for public health implementation by streamlining the data collection process. It is also worth mentioning that we presented evaluation metrics as point estimates instead of constructing approximate CIs by the bootstrapping or jackknife approaches, as is used in some research, because our models produced similar performance metrics, especially for AUROC and AUPRC, and comparing their rank and point estimates of scores was, therefore, sufficient [<xref ref-type="bibr" rid="ref50">50</xref>].</p>
      <p>In this study, we selected 0.20 as the threshold to calculate PPV, sensitivity, specificity, and <italic>F</italic><sub>2</sub>. This was a subjective decision based on the rarity of measles outbreaks. The threshold can be adjusted depending on decision makers’ tolerance for FP and FN results; for example, counties with fewer resources may need to implement higher thresholds. A dedicated cost-utility model that anchors changes in costs and mortality to FP and FN rates can also be built and empirically evaluated in the future to guide threshold selection [<xref ref-type="bibr" rid="ref51">51</xref>].</p>
      <p>This study is subject to several limitations. We were unable to identify the affected county for a small proportion of measles cases, which may impact prediction accuracy. County-level data on vaccination coverage and exemption rates were not available for all counties, and metropolitan statistical area- or state-level data may not necessarily be good proxies. Some predictor variables were included based on the association between vaccine hesitancy and individual-level variables; including these variables at the county level may have introduced an atomistic fallacy [<xref ref-type="bibr" rid="ref52">52</xref>]. Further, we only included 3 distinct years of data in the study; adding more years of data (when they become available) may improve the generalizability of the results. Finally, a spatial diffusion model was used to estimate the final destination counties of travelers after arrival at the destination airport, but we did not account for the risk of spreading via domestic air travel or other major long-distance domestic travel routes.</p>
      <p>The COVID-19 pandemic has affected the volume and pattern of domestic and international air traffic and has negatively impacted the on-time administration of routine childhood vaccinations in the United States [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref55">55</xref>]. In the United States, the pandemic may have also increased hesitancy related to vaccines and altered the demographic patterns of this hesitancy [<xref ref-type="bibr" rid="ref56">56</xref>]. However, the long-term impact of the pandemic on measles importation and the rates and patterns of vaccination coverage are not yet known. Predictive models of measles outbreak risk may therefore have to be adjusted before their application to years after 2019.</p>
      <p>In conclusion, XGBoost outperformed logistic regression in predicting the US counties at risk of measles cases. Unsupervised learning models also identified clustering patterns for counties with measles cases, and these features helped to improve the PPVs of both XGBoost and logistic regression. Additional work on developing hybrid models that incorporate unsupervised ML methods may lead to further optimization of outbreak prediction.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Data sources.</p>
        <media xlink:href="formative_v7i1e42832_app1.docx" xlink:title="DOCX File , 23 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Summary statistics for predictor variables of interest.</p>
        <media xlink:href="formative_v7i1e42832_app2.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUPRC</term>
          <def>
            <p>area under the precision-recall curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AUROC</term>
          <def>
            <p>area under the receiver operating curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">FN</term>
          <def>
            <p>false negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">FP</term>
          <def>
            <p>false positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HDBSCAN</term>
          <def>
            <p>hierarchical density-based spatial clustering of applications with noise</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PCoA</term>
          <def>
            <p>principal coordinate analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">TN</term>
          <def>
            <p>true negative</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">TP</term>
          <def>
            <p>true positive</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">uRF</term>
          <def>
            <p>unsupervised random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank Matthew Pillsbury, Colleen Burgess, and Zhiwen Liu for their contributions. The authors also thank Cath Ennis, PhD, in collaboration with ScribCo, for medical writing assistance. This study was funded by Merck Sharp &#38; Dohme LLC, a subsidiary of Merck &#38; Co, Inc.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The air travel data were commercially licensed from Airport Strategy and Marketing Ltd [<xref ref-type="bibr" rid="ref28">28</xref>]. Other data were extracted from published literature, state and local health department websites, and news articles, with data sources provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>. Data sharing is not applicable to this paper as no data sets were generated during this study.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>SK, BR, NLA, RB, and MP are employees of Merck Sharp &#38; Dohme LLC, a subsidiary of Merck &#38; Co, Inc, and may hold stock or stock options in Merck &#38; Co, Inc. AD was an employee of Merck Sharp &#38; Dohme LLC, a subsidiary of Merck &#38; Co, Inc, when he worked on this study.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Centers for Disease Control and Prevention</collab>
          </person-group>
          <article-title>Measles</article-title>
          <source>Epidemiology and Prevention of Vaccine-Preventable Diseases</source>
          <year>2015</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>Public Health Foundation</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Funk</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Knapp</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Lebo</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Reef</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Dabbagh</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kretsinger</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jit</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Edmunds</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Strebel</surname>
              <given-names>PM</given-names>
            </name>
          </person-group>
          <article-title>Combining serological and contact data to derive target immunity levels for achieving and maintaining measles elimination</article-title>
          <source>BMC Med</source>
          <year>2019</year>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>180</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedicine.biomedcentral.com/articles/10.1186/s12916-019-1413-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12916-019-1413-7</pub-id>
          <pub-id pub-id-type="medline">31551070</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12916-019-1413-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6760101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Papania</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>GS</given-names>
            </name>
            <name name-style="western">
              <surname>Rota</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Icenogle</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Fiebelkorn</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Armstrong</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Reef</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Redd</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Abernathy</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Barskey</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>McLean</surname>
              <given-names>HQ</given-names>
            </name>
            <name name-style="western">
              <surname>Rota</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Bellini</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Seward</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>Elimination of endemic measles, rubella, and congenital rubella syndrome from the Western hemisphere: the US experience</article-title>
          <source>JAMA Pediatr</source>
          <year>2014</year>
          <month>02</month>
          <volume>168</volume>
          <issue>2</issue>
          <fpage>148</fpage>
          <lpage>155</lpage>
          <pub-id pub-id-type="doi">10.1001/jamapediatrics.2013.4342</pub-id>
          <pub-id pub-id-type="medline">24311021</pub-id>
          <pub-id pub-id-type="pii">1787786</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <article-title>Measles cases and outbreaks</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/measles/cases-outbreaks.html">https://www.cdc.gov/measles/cases-outbreaks.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>On the Brink: why the U.S. is in danger of losing measles elimination status</article-title>
          <source>Mo Med</source>
          <year>2019</year>
          <volume>116</volume>
          <issue>4</issue>
          <fpage>260</fpage>
          <lpage>264</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/31527963"/>
          </comment>
          <pub-id pub-id-type="medline">31527963</pub-id>
          <pub-id pub-id-type="pmcid">PMC6699811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Hinman</surname>
              <given-names>AR</given-names>
            </name>
          </person-group>
          <article-title>Summary and conclusions: measles elimination meeting, 16-17 March 2000</article-title>
          <source>J Infect Dis</source>
          <year>2004</year>
          <month>05</month>
          <day>1</day>
          <volume>189</volume>
          <issue>Suppl 1</issue>
          <fpage>S43</fpage>
          <lpage>S47</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.jid.oxfordjournals.org/cgi/pmidlookup?view=long&#38;pmid=15106088"/>
          </comment>
          <pub-id pub-id-type="doi">10.1086/377696</pub-id>
          <pub-id pub-id-type="medline">15106088</pub-id>
          <pub-id pub-id-type="pii">JID20790</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Clemmons</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Redd</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Poser</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Blog</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Zucker</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Link-Gelles</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pham</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Arciuolo</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rausch-Phung</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bankamp</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rota</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Weinbaum</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Gastañaduy</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>National update on measles cases and outbreaks - United States, January 1-October 1, 2019</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2019</year>
          <month>10</month>
          <day>11</day>
          <volume>68</volume>
          <issue>40</issue>
          <fpage>893</fpage>
          <lpage>896</lpage>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6840e2</pub-id>
          <pub-id pub-id-type="medline">31600181</pub-id>
          <pub-id pub-id-type="pmcid">PMC6788396</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Redd</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Clemmons</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>McNall</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Cohn</surname>
              <given-names>AC</given-names>
            </name>
            <name name-style="western">
              <surname>Gastañaduy</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Increase in measles cases—United States, January 1-April 26, 2019</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2019</year>
          <month>05</month>
          <day>03</day>
          <volume>68</volume>
          <issue>17</issue>
          <fpage>402</fpage>
          <lpage>404</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6817e1"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6817e1</pub-id>
          <pub-id pub-id-type="medline">31048672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Persistence of US measles risk due to vaccine hesitancy and outbreaks abroad</article-title>
          <source>Lancet Infect Dis</source>
          <year>2020</year>
          <month>10</month>
          <volume>20</volume>
          <issue>10</issue>
          <fpage>1114</fpage>
          <lpage>1115</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32738934"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/S1473-3099(20)30522-3</pub-id>
          <pub-id pub-id-type="medline">32738934</pub-id>
          <pub-id pub-id-type="pii">S1473-3099(20)30522-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7392555</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Poterek</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Kraemer</surname>
              <given-names>MUG</given-names>
            </name>
            <name name-style="western">
              <surname>Watts</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Perkins</surname>
              <given-names>TA</given-names>
            </name>
          </person-group>
          <article-title>Air passenger travel and international surveillance data predict spatiotemporal variation in measles importations to the United States</article-title>
          <source>Pathogens</source>
          <year>2021</year>
          <month>02</month>
          <day>03</day>
          <volume>10</volume>
          <issue>2</issue>
          <fpage>155</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=pathogens10020155"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/pathogens10020155</pub-id>
          <pub-id pub-id-type="medline">33546131</pub-id>
          <pub-id pub-id-type="pii">pathogens10020155</pub-id>
          <pub-id pub-id-type="pmcid">PMC7913265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarkar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zlojutro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gardner</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Measles resurgence in the USA: how international travel compounds vaccine resistance</article-title>
          <source>Lancet Infect Dis</source>
          <year>2019</year>
          <month>07</month>
          <volume>19</volume>
          <issue>7</issue>
          <fpage>684</fpage>
          <lpage>686</lpage>
          <pub-id pub-id-type="doi">10.1016/s1473-3099(19)30231-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinclair</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Grefenstette</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Krauland</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Galloway</surname>
              <given-names>DD</given-names>
            </name>
            <name name-style="western">
              <surname>Frankeny</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Travis</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>Forecasted size of measles outbreaks associated with vaccination exemptions for schoolchildren</article-title>
          <source>JAMA Netw Open</source>
          <year>2019</year>
          <month>08</month>
          <day>02</day>
          <volume>2</volume>
          <issue>8</issue>
          <fpage>e199768</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jamanetwork.com/journals/jamanetworkopen/fullarticle/10.1001/jamanetworkopen.2019.9768"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamanetworkopen.2019.9768</pub-id>
          <pub-id pub-id-type="medline">31433482</pub-id>
          <pub-id pub-id-type="pii">2748595</pub-id>
          <pub-id pub-id-type="pmcid">PMC6707017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feemster</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Szipszky</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Resurgence of measles in the United States: how did we get here?</article-title>
          <source>Curr Opin Pediatr</source>
          <year>2020</year>
          <month>02</month>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>139</fpage>
          <lpage>144</lpage>
          <pub-id pub-id-type="doi">10.1097/MOP.0000000000000845</pub-id>
          <pub-id pub-id-type="medline">31790030</pub-id>
          <pub-id pub-id-type="pii">00008480-202002000-00019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lieu</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Ray</surname>
              <given-names>GT</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kulldorff</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Geographic clusters in underimmunization and vaccine refusal</article-title>
          <source>Pediatrics</source>
          <year>2015</year>
          <month>02</month>
          <volume>135</volume>
          <issue>2</issue>
          <fpage>280</fpage>
          <lpage>289</lpage>
          <pub-id pub-id-type="doi">10.1542/peds.2014-2715</pub-id>
          <pub-id pub-id-type="medline">25601971</pub-id>
          <pub-id pub-id-type="pii">peds.2014-2715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olive</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Hotez</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Damania</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nolan</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>The state of the antivaccine movement in the United States: a focused examination of nonmedical exemptions in states and counties</article-title>
          <source>PLoS Med</source>
          <year>2018</year>
          <month>06</month>
          <volume>15</volume>
          <issue>6</issue>
          <fpage>e1002578</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pmed.1002578"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pmed.1002578</pub-id>
          <pub-id pub-id-type="medline">29894470</pub-id>
          <pub-id pub-id-type="pii">PMEDICINE-D-17-04352</pub-id>
          <pub-id pub-id-type="pmcid">PMC5997312</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marcuse</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Seward</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Orenstein</surname>
              <given-names>WA</given-names>
            </name>
          </person-group>
          <article-title>Children and adolescents unvaccinated against measles: geographic clustering, parents' beliefs, and missed opportunities</article-title>
          <source>Public Health Rep</source>
          <year>2015</year>
          <volume>130</volume>
          <issue>5</issue>
          <fpage>485</fpage>
          <lpage>504</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/26327727"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/003335491513000512</pub-id>
          <pub-id pub-id-type="medline">26327727</pub-id>
          <pub-id pub-id-type="pmcid">PMC4529833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Yankey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Elam-Evans</surname>
              <given-names>LD</given-names>
            </name>
            <name name-style="western">
              <surname>Pingali</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Vaccination coverage by age 24 months among children born in 2015 and 2016—National Immunization Survey-Child, United States, 2016-2018</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2019</year>
          <month>10</month>
          <day>18</day>
          <volume>68</volume>
          <issue>41</issue>
          <fpage>913</fpage>
          <lpage>918</lpage>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6841e2</pub-id>
          <pub-id pub-id-type="medline">31622284</pub-id>
          <pub-id pub-id-type="pmcid">PMC6802679</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jarrett</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eckersberger</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>DMD</given-names>
            </name>
            <name name-style="western">
              <surname>Paterson</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Understanding vaccine hesitancy around vaccines and vaccination from a global perspective: a systematic review of published literature, 2007-2012</article-title>
          <source>Vaccine</source>
          <year>2014</year>
          <month>04</month>
          <day>17</day>
          <volume>32</volume>
          <issue>19</issue>
          <fpage>2150</fpage>
          <lpage>2159</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2014.01.081</pub-id>
          <pub-id pub-id-type="medline">24598724</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(14)00144-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Salmon</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Dudley</surname>
              <given-names>MZ</given-names>
            </name>
            <name name-style="western">
              <surname>Glanz</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Omer</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Vaccine hesitancy: causes, consequences, and a call to action</article-title>
          <source>Vaccine</source>
          <year>2015</year>
          <month>11</month>
          <day>27</day>
          <volume>33</volume>
          <issue>Suppl 4</issue>
          <fpage>D66</fpage>
          <lpage>D71</lpage>
          <pub-id pub-id-type="doi">10.1016/j.vaccine.2015.09.035</pub-id>
          <pub-id pub-id-type="medline">26615171</pub-id>
          <pub-id pub-id-type="pii">S0264-410X(15)01311-0</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Goodman</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Kaminsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lessler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>What is machine learning? A primer for the epidemiologist</article-title>
          <source>Am J Epidemiol</source>
          <year>2019</year>
          <month>12</month>
          <day>31</day>
          <volume>188</volume>
          <issue>12</issue>
          <fpage>2222</fpage>
          <lpage>2239</lpage>
          <pub-id pub-id-type="doi">10.1093/aje/kwz189</pub-id>
          <pub-id pub-id-type="medline">31509183</pub-id>
          <pub-id pub-id-type="pii">5567515</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wiemken</surname>
              <given-names>TL</given-names>
            </name>
            <name name-style="western">
              <surname>Kelley</surname>
              <given-names>RR</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in epidemiology and health outcomes research</article-title>
          <source>Annu Rev Public Health</source>
          <year>2020</year>
          <month>04</month>
          <day>02</day>
          <volume>41</volume>
          <fpage>21</fpage>
          <lpage>36</lpage>
          <pub-id pub-id-type="doi">10.1146/annurev-publhealth-040119-094437</pub-id>
          <pub-id pub-id-type="medline">31577910</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>He</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>EA</given-names>
            </name>
          </person-group>
          <article-title>Learning from imbalanced data</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2009</year>
          <month>09</month>
          <volume>21</volume>
          <issue>9</issue>
          <fpage>1263</fpage>
          <lpage>1284</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2008.239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alizadeh-Sani</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Martínez</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>González</surname>
              <given-names>GH</given-names>
            </name>
            <name name-style="western">
              <surname>González-Briones</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chamoso</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Corchado</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>De La Prieta</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>El Bolock</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Durães</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Carneiro</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lopes</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Julian</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>A hybrid supervised/unsupervised machine learning approach to classify web services</article-title>
          <source>Highlights in Practical Applications of Agents, Multi-agent Systems, and Social Good. The PAAMS Collection. PAAMS 2021. Communications in Computer and Information Science</source>
          <year>2021</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Steel</surname>
              <given-names>PAD</given-names>
            </name>
            <name name-style="western">
              <surname>Axsom</surname>
              <given-names>KM</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Tummalapalli</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Subramanian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Deep significance clustering: a novel approach for identifying risk-stratified and predictive patient subgroups</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>11</month>
          <day>25</day>
          <volume>28</volume>
          <issue>12</issue>
          <fpage>2641</fpage>
          <lpage>2653</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34571540"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab203</pub-id>
          <pub-id pub-id-type="medline">34571540</pub-id>
          <pub-id pub-id-type="pii">6377084</pub-id>
          <pub-id pub-id-type="pmcid">PMC8500061</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Adenutsi</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Hybrid application of unsupervised and supervised learning in forecasting absolute open flow potential for shale gas reservoirs</article-title>
          <source>Energy</source>
          <year>2022</year>
          <month>03</month>
          <volume>243</volume>
          <fpage>122747</fpage>
          <pub-id pub-id-type="doi">10.1016/j.energy.2021.122747</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nicodemus</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Malley</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Strobl</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The behaviour of random forest permutation-based variable importance measures under predictor correlation</article-title>
          <source>BMC Bioinformatics</source>
          <year>2010</year>
          <month>02</month>
          <day>27</day>
          <volume>11</volume>
          <fpage>110</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-110"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2105-11-110</pub-id>
          <pub-id pub-id-type="medline">20187966</pub-id>
          <pub-id pub-id-type="pii">1471-2105-11-110</pub-id>
          <pub-id pub-id-type="pmcid">PMC2848005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>Nationally notifiable infectious diseases and conditions, United States: annual tables: malaria, measles</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <year>2019</year>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wonder.cdc.gov/nndss/static/2019/annual/2019-table2k.html">https://wonder.cdc.gov/nndss/static/2019/annual/2019-table2k.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="web">
          <article-title>Midt data</article-title>
          <source>Airport Strategy and Marketing Ltd</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.asm-global.com/">https://www.asm-global.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seither</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Masalovich</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Knighton</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Mellerson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Greby</surname>
              <given-names>SM</given-names>
            </name>
            <collab>Centers for Disease Control and Prevention (CDC)</collab>
          </person-group>
          <article-title>Vaccination coverage among children in kindergarten—United States, 2013-14 school year</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2014</year>
          <month>10</month>
          <day>17</day>
          <volume>63</volume>
          <issue>41</issue>
          <fpage>913</fpage>
          <lpage>920</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/mmwr/preview/mmwrhtml/mm6341a1.htm"/>
          </comment>
          <pub-id pub-id-type="medline">25321068</pub-id>
          <pub-id pub-id-type="pii">mm6341a1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4584748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mellerson</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Maxwell</surname>
              <given-names>CB</given-names>
            </name>
            <name name-style="western">
              <surname>Knighton</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Kriss</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Seither</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>CL</given-names>
            </name>
          </person-group>
          <article-title>Vaccination coverage for selected vaccines and exemption rates among children in kindergarten—United States, 2017-18 school year</article-title>
          <source>MMWR Morb Mortal Wkly Rep</source>
          <year>2018</year>
          <month>10</month>
          <day>12</day>
          <volume>67</volume>
          <issue>40</issue>
          <fpage>1115</fpage>
          <lpage>1122</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.15585/mmwr.mm6740a3"/>
          </comment>
          <pub-id pub-id-type="doi">10.15585/mmwr.mm6740a3</pub-id>
          <pub-id pub-id-type="medline">30307904</pub-id>
          <pub-id pub-id-type="pmcid">PMC6181259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Young Center for Anabaptist and Pietist Studies at Elizabethtown College</article-title>
          <source>Amish Studies</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://groups.etown.edu/amishstudies/statistics/older-statistics/">http://groups.etown.edu/amishstudies/statistics/older-statistics/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grammich</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hadaway</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Houseal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Krindatch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stanley</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>U.S. Religion census: religious congregations and membership study, 2010 (county file)</article-title>
          <source>The Association of Religion Data Archives</source>
          <year>2018</year>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://thearda.com/Archive/Files/Descriptions/RCMSCY10.asp">https://thearda.com/Archive/Files/Descriptions/RCMSCY10.asp</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>American Community Survey (ACS)</article-title>
          <source>United States Census Bureau</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/programs-surveys/acs">https://www.census.gov/programs-surveys/acs</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <article-title>CDC/ATSDR social vulnerability index</article-title>
          <source>Centers for Disease Control and Prevention and Agency for Toxic Substances and Disease Registry</source>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.atsdr.cdc.gov/placeandhealth/svi/index.html">https://www.atsdr.cdc.gov/placeandhealth/svi/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>County population totals: 2010-2019</article-title>
          <source>United States Census Bureau</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html">https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-total.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <article-title>Small area income and poverty estimates (SAIPE) program</article-title>
          <source>United States Census Bureau</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/programs-surveys/saipe.html">https://www.census.gov/programs-surveys/saipe.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Local area unemployment statistics</article-title>
          <source>United States Bureau of Labor Statistics</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.bls.gov/lau/#cntyaa">https://www.bls.gov/lau/#cntyaa</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Local area personal income, 2018</article-title>
          <source>United States Bureau of Economic Analysis</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://apps.bea.gov/regional/histdata/releases/1119lapi/index.cfm">https://apps.bea.gov/regional/histdata/releases/1119lapi/index.cfm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <article-title>2008-2019 small area health insurance estimates (SAHIE) using the American Community Survey (ACS)</article-title>
          <source>United States Census Bureau</source>
          <year>2019</year>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.census.gov/data/datasets/time-series/demo/sahie/estimates-acs.html">https://www.census.gov/data/datasets/time-series/demo/sahie/estimates-acs.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <article-title>BRFSS prevalence and trends data</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2022-09-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cdc.gov/brfss/brfssprevalence/index.html">https://www.cdc.gov/brfss/brfssprevalence/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>Surveillance for vaccine preventable diseases (VPDs)</article-title>
          <source>World Health Organization</source>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/active/measles_monthlydata/en/">https://www.who.int/immunization/monitoring_surveillance/burden/vpd/surveillance_type/active/measles_monthlydata/en/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>United Nations</collab>
          </person-group>
          <article-title>World population prospects, 2019</article-title>
          <source>Population Division</source>
          <year>2019</year>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://population.un.org/wpp/Download/Standard/Population/">https://population.un.org/wpp/Download/Standard/Population/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Akinwande</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Dikko</surname>
              <given-names>HG</given-names>
            </name>
            <name name-style="western">
              <surname>Samson</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Variance inflation factor: as a condition for the inclusion of suppressor variable(s) in regression analysis</article-title>
          <source>Open J Stat</source>
          <year>2015</year>
          <volume>5</volume>
          <issue>7</issue>
          <fpage>754</fpage>
          <lpage>767</lpage>
          <pub-id pub-id-type="doi">10.4236/ojs.2015.57075</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berba</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Understanding HDBSCAN and density-based clustering</article-title>
          <source>Pepe Berba</source>
          <year>2020</year>
          <access-date>2022-12-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pberba.github.io/stats/2020/01/17/hdbscan/">https://pberba.github.io/stats/2020/01/17/hdbscan/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Campello</surname>
              <given-names>RJGB</given-names>
            </name>
            <name name-style="western">
              <surname>Moulavi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sander</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Motoda</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Density-based clustering based on hierarchical density estimates</article-title>
          <source>Advances in Knowledge Discovery and Data Mining. PAKDD 2013. Lecture Notes in Computer Science</source>
          <year>2013</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Healy J</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Melville</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Umap: Uniform manifold approximation and projection for dimension reduction</article-title>
          <source>ArXiv. Preprint posted online on February 09, 2018</source>
          <year>2018</year>
          <fpage>00</fpage>
          <pub-id pub-id-type="doi">10.1093/nar/gky677</pub-id>
          <pub-id pub-id-type="medline">30169659</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Random forests</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <volume>45</volume>
          <fpage>5</fpage>
          <lpage>32</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1010933404324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breiman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Manual on setting up, using, and understanding random forests v3.1</article-title>
          <source>Statistics Department University of California Berkeley</source>
          <year>2004</year>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf">https://www.stat.berkeley.edu/~breiman/Using_random_forests_V3.1.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saito</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rehmsmeier</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The precision-recall plot is more informative than the ROC plot when evaluating binary classifiers on imbalanced datasets</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e0118432</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://dx.plos.org/10.1371/journal.pone.0118432"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0118432</pub-id>
          <pub-id pub-id-type="medline">25738806</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-26790</pub-id>
          <pub-id pub-id-type="pmcid">PMC4349800</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Efron</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tibshirani</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <source>An Introduction to the Bootstrap. 1st edition</source>
          <year>1994</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Chapman &#38; Hall</publisher-name>
          <fpage>456</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Cost‐efficiency disk failure prediction via threshold‐moving</article-title>
          <source>Concurrency Computat Pract Exper</source>
          <year>2020</year>
          <month>04</month>
          <day>25</day>
          <volume>32</volume>
          <issue>14</issue>
          <fpage>e5669</fpage>
          <pub-id pub-id-type="doi">10.1002/cpe.5669</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Diez Roux</surname>
              <given-names>AV</given-names>
            </name>
          </person-group>
          <article-title>A glossary for multilevel analysis</article-title>
          <source>J Epidemiol Community Health</source>
          <year>2002</year>
          <month>08</month>
          <volume>56</volume>
          <issue>8</issue>
          <fpage>588</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jech.bmj.com/lookup/pmidlookup?view=long&#38;pmid=12118049"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jech.56.8.588</pub-id>
          <pub-id pub-id-type="medline">12118049</pub-id>
          <pub-id pub-id-type="pmcid">PMC1732212</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hotle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mumbower</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of COVID-19 on domestic U.S. air travel operations and commercial airport service</article-title>
          <source>Transp Res Interdiscip Perspect</source>
          <year>2021</year>
          <month>03</month>
          <volume>9</volume>
          <fpage>100277</fpage>
          <pub-id pub-id-type="doi">10.1016/j.trip.2020.100277</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <article-title>Keep up the rates</article-title>
          <source>National Foundation for Infectious Diseases</source>
          <year>2022</year>
          <month>01</month>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nfid.org/keep-up-the-rates/">https://www.nfid.org/keep-up-the-rates/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <article-title>Coronavirus: impact on the aviation industry worldwide</article-title>
          <source>Statista</source>
          <year>2021</year>
          <access-date>2023-02-20</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.statista.com/topics/6178/coronavirus-impact-on-the-aviation-industry-worldwide/">https://www.statista.com/topics/6178/coronavirus-impact-on-the-aviation-industry-worldwide/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fridman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gershon</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gneezy</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>COVID-19 and vaccine hesitancy: a longitudinal study</article-title>
          <source>PLoS One</source>
          <year>2021</year>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>e0250123</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0250123"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0250123</pub-id>
          <pub-id pub-id-type="medline">33861765</pub-id>
          <pub-id pub-id-type="pii">PONE-D-20-35660</pub-id>
          <pub-id pub-id-type="pmcid">PMC8051771</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
