<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v5i12e23440</article-id>
      <article-id pub-id-type="pmid">34860663</article-id>
      <article-id pub-id-type="doi">10.2196/23440</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Predicting Risk of Stroke From Lab Tests Using Machine Learning Algorithms: Development and Evaluation of Prediction Models</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Taveira-Gomes</surname>
            <given-names>Tiago</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Kades</surname>
            <given-names>Klaus</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sule</surname>
            <given-names>Anupam</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Alanazi</surname>
            <given-names>Eman M</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Health Informatics</institution>
            <institution>College of Health Sciences</institution>
            <institution>Saudi Electronic University</institution>
            <addr-line>Abi Bakr As Siddiq Branch Rd</addr-line>
            <addr-line>Riyadh, 13323</addr-line>
            <country>Saudi Arabia</country>
            <phone>966 112613500</phone>
            <email>e.alanazi@seu.edu.sa</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9558-2602</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Abdou</surname>
            <given-names>Aalaa</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7900-3893</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>Jake</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3900-643X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Health Informatics</institution>
        <institution>College of Health Sciences</institution>
        <institution>Saudi Electronic University</institution>
        <addr-line>Riyadh</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Biomedical and Health Informatics</institution>
        <institution>College of Engineering</institution>
        <institution>University of Wisconsin-Milwaukee</institution>
        <addr-line>Milwaukee, WI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Radiotherapy</institution>
        <institution>Children's Cancer Hospital 57357</institution>
        <addr-line>Cairo</addr-line>
        <country>Egypt</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Health Informatics and Administration</institution>
        <institution>College of Health Sciences</institution>
        <institution>University of Wisconsin-Milwaukee</institution>
        <addr-line>Milwaukee, WI</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Eman M Alanazi <email>e.alanazi@seu.edu.sa</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>12</month>
        <year>2021</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>12</month>
        <year>2021</year>
      </pub-date>
      <volume>5</volume>
      <issue>12</issue>
      <elocation-id>e23440</elocation-id>
      <history>
        <date date-type="received">
          <day>12</day>
          <month>8</month>
          <year>2020</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>9</month>
          <year>2020</year>
        </date>
        <date date-type="rev-recd">
          <day>14</day>
          <month>11</month>
          <year>2020</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>10</month>
          <year>2021</year>
        </date>
      </history>
      <copyright-statement>©Eman M Alanazi, Aalaa Abdou, Jake Luo. Originally published in JMIR Formative Research (https://formative.jmir.org), 02.12.2021.</copyright-statement>
      <copyright-year>2021</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2021/12/e23440" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Stroke, a cerebrovascular disease, is one of the major causes of death. It causes significant health and financial burdens for both patients and health care systems. One of the important risk factors for stroke is health-related behavior, which is becoming an increasingly important focus of prevention. Many machine learning models have been built to predict the risk of stroke or to automatically diagnose stroke, using predictors such as lifestyle factors or radiological imaging. However, there have been no models built using data from lab tests.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to apply computational methods using machine learning techniques to predict stroke from lab test data.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the National Health and Nutrition Examination Survey data sets with three different data selection methods (ie, without data resampling, with data imputation, and with data resampling) to develop predictive models. We used four machine learning classifiers and six performance measures to evaluate the performance of the models.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We found that accurate and sensitive machine learning models can be created to predict stroke from lab test data. Our results show that the data resampling approach performed the best compared to the other two data selection techniques. Prediction with the random forest algorithm, which was the best algorithm tested, achieved an accuracy, sensitivity, specificity, positive predictive value, negative predictive value, and area under the curve of 0.96, 0.97, 0.96, 0.75, 0.99, and 0.97, respectively, when all of the attributes were used.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The predictive model, built using data from lab tests, was easy to use and had high accuracy. In future studies, we aim to use data that reflect different types of stroke and to explore the data to build a prediction model for each type.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>stroke</kwd>
        <kwd>lab tests</kwd>
        <kwd>machine learning technology</kwd>
        <kwd>predictive analytics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Stroke is a neurological deficit, primarily because of acute central nervous system focal injury caused by a vascular issue. It is a major cause of disability and death worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. It estimated that the overall prevalence of stroke in the United States is 2.5%, and about 7 million Americans over the age of 20 years have experienced a stroke. The condition has a significant negative impact on patients’ health and quality of life. It also has a negative impact on hospital services and the availability of beds and was estimated to cost the US economy about US $351.2 billion between 2014 and 2015 [<xref ref-type="bibr" rid="ref2">2</xref>]. There are two types of stroke: ischemic and hemorrhagic. Hemorrhagic stroke occurs because of a burst vessel that leads to bleeding in the brain, whereas ischemic stroke occurs because of a blockage of the arteries of the brain. Ischemic strokes are the most common, comprising 85% to 90% of all strokes [<xref ref-type="bibr" rid="ref3">3</xref>]. This condition can be prevented by promoting health and increasing awareness of risk factors. There are many risk factors related to lifestyle, including obesity, diet, alcohol intake, and lack of physical activity [<xref ref-type="bibr" rid="ref4">4</xref>]. Underlying conditions, such as diabetes, hypertension, and cardiovascular diseases, may also lead to stroke. Therefore, proper self-management of these diseases and the pursuit of a healthy lifestyle may prevent the occurrence of stroke.</p>
      <p>In 2019, the American College of Cardiology/American Heart Association released the Guideline on the Primary Prevention of Cardiovascular Disease. The guideline recommends a complete assessment and examination of patients who are at risk of developing blockages in their arteries that may lead to a heart attack or stroke and might die as a result [<xref ref-type="bibr" rid="ref5">5</xref>]. Now more than ever, physicians can access clinical evidence to identify high-risk patients using approaches such as acquiring a complete patient history and conducting thorough physical exams for risk assessment. Patient records contain many useful predictive factors, such as patient demographic (eg, age and gender), lifestyle (eg, diet and physical activity), and existing medical condition factors (eg, diabetes and hypertension), that might lead to stroke [<xref ref-type="bibr" rid="ref5">5</xref>]. The growth of arterial blockages and decades of damage to blood vessels, which may lead to stroke, are often associated with these risk factors. If physicians can assess the risks of stroke easily and conveniently, strokes could be prevented at an earlier stage. This approach could save lives and reduce the economic burden of health care services. In the age of artificial intelligence and machine learning, a clinical decision support system has been developed to assist physicians to diagnose and identify individuals with a high risk of stroke. The potential of applying machine learning technologies in the cardiovascular domain is significant, from identifying individuals with a high risk of stroke [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>] to predicting outcomes of patients following treatment [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. Most of these studies use either health habits and lifestyle factors, such as smoking or alcohol consumption; conditions that predispose to strokes, such as hypertension and diabetes mellites; or neuroimaging, such as computed tomography and magnetic resonance imaging, to either classify or predict the disease.</p>
      <p>Besides assessing known risk factors for stroke, scientists are trying to develop lab tests that can predict stroke. One of the major advantages of using lab test results for prediction is that lab tests are commonly collected in clinical settings, and the information is often well documented in patients’ records. In this study, we explored data-driven approaches using supervised machine learning models to predict the risk of stroke from different lab tests.</p>
      <p>Several studies have been able to identify independent laboratory tests that are correlated with stroke using descriptive statistical analysis. Sughrue et al [<xref ref-type="bibr" rid="ref10">10</xref>] conducted a retrospective study in 2013 that identified 35 tests with a statistically significant correlation with a future stroke diagnosis. The most informative were for various types of cholesterol. Two of these 35 laboratory tests were urine tests, and 33 were blood, serum, or plasma tests. Some tests were positively associated with an outcome of stroke (ie, neutrophil count and percent; CD3+, CD8+, and T8 suppressor cells; monocytes; eosinophils; and CD3 cells), and others were negatively correlated (ie, hematocrit and lymphocytes). Their results show that it is possible to correlate future stroke with collected lab test data. Farah and Samra [<xref ref-type="bibr" rid="ref11">11</xref>] conducted a retrospective study investigating the association between the neutrophil-to-lymphocyte ratio (NLR), mean platelet volume (MPV), and the risk of stroke. Two-tailed <italic>t</italic> tests showed no significant differences in the stroke group’s MPV values compared with those in the control group. However, the NLRs of the stroke patients were significantly different compared with those of the control group. That study indicated the existence of a correlation between the level of NLR and stroke risk. NLR levels have been shown to be higher in stroke patients than in control groups. Feng et al [<xref ref-type="bibr" rid="ref12">12</xref>] reviewed the scientific literature on the potential role and the possible epidemiological relationships between red cell distribution width (RDW) and ischemic stroke in a meta-analysis of 40 manuscripts from China National Knowledge Infrastructure and PubMed databases. They reported that patients with stroke had higher levels of RDW than those without strokes. Another study by Kaya et al [<xref ref-type="bibr" rid="ref13">13</xref>] also investigated the association between baseline RDW level and stroke risk in patients with heart failure. These authors found that heart failure patients suffering from stroke had significantly increased basal RDW levels (mean 16.9, SD 1.14, vs mean 14.8, SD 1.6; <italic>P</italic>&#60;.001) and serum uric acid levels (mean 8.8, SD 1.7, vs mean 7.5, SD 1.1; <italic>P</italic>=.027) compared with patients without stroke, according to the propensity score analysis. Giles et al [<xref ref-type="bibr" rid="ref14">14</xref>] used data from a national cohort to investigate whether low folate levels were associated with ischemic stroke and found that folate concentrations of ≤9.2 nmol/L could be a risk factor for ischemic stroke (relative risk 1.37, 95% CI 0.82-2.29). Another study by Qin et al [<xref ref-type="bibr" rid="ref15">15</xref>] concluded that there is a significant risk of first ischemic stroke in hypertensive patients with low levels of folate and vitamin B12.</p>
      <p>These studies demonstrate the value of lab test results for predicting stroke. Our study aimed to leverage lab test results to build machine learning models for stroke prediction. We prepared the data sets using three data selection techniques for this study. After that, for each data selection technique, we applied four individual machine learning classifiers to prepare prediction models. We measured the performance of each prediction model using six different performance measures. Our results indicate that the data resampling technique outperformed the decision tree and random forest classifiers.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> shows the outline of our investigation. In the first step, we collected data from the National Health and Nutrition Examination Survey (NHANES). In the second step, we selected the data using three data techniques for our prediction models. The first one was conducted without data resampling, the second one included data imputation, and the third one was conducted with data resampling.</p>
        <p>We used 10-fold cross-validation to perform the train and test approach. To train models, we used four different machine learning classifiers, and six performance measures were used to assess the performance of the models. The elaborated descriptions of the data sets, classifiers, and performance metrics that were used are given below.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Flow diagram of the study methodology. NHANES: National Health and Nutrition Examination Survey.</p>
          </caption>
          <graphic xlink:href="formative_v5i12e23440_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Data Collection</title>
        <p>The NHANES survey was conducted to examine the health and nutritional status of adults and children in the United States; “NHANES is a major program of the National Center for Health Statistics (NCHS). NCHS is part of the Centers for Disease Control and Prevention (CDC) and has the responsibility for producing vital and health statistics for the Nation” [<xref ref-type="bibr" rid="ref16">16</xref>]. The data sets contain five domains: demographics, dietary data, examination data, laboratory data, and questioner data. Each domain contains several subdomains. Our focus was on data sets that contain information about laboratory tests. The data sets are available from 1999 to 2017, and we considered data from 2011 to 2015. The total number of participants was 15,714 during this period. To reduce the impact of imbalanced data, we noted that in the entire data set, there were about 17% of participants who had experienced a stroke. Therefore, we included total of 4186 participants, of whom 608 (14.5%) had experienced a stroke (<xref rid="figure2" ref-type="fig">Figure 2</xref>). The list of data attributes is shown in <xref ref-type="table" rid="table1">Table 1</xref>. The data sets contained 21 attributes, including each patient’s age and gender as well as other lab test information for each respective patient. The data sets and their information are available online [<xref ref-type="bibr" rid="ref16">16</xref>], where the data are presented from the year 2000 to the current year. For this study, the data were collected for each year and combined using the sequence number (SEQN). After combining and cleaning the data, we used the Waikato Environment for Knowledge Analysis (WEKA; version 3.8.0) system to build and test machine learning models.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Participant selection and prevalence of stroke in the National Health and Nutrition Examination Survey (NHANES).</p>
          </caption>
          <graphic xlink:href="formative_v5i12e23440_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>List of the data attributes.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="670"/>
            <col width="330"/>
            <thead>
              <tr valign="bottom">
                <td>Feature<sup>a</sup></td>
                <td>Units</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Age</td>
                <td>Years</td>
              </tr>
              <tr valign="top">
                <td>Gender</td>
                <td>N/A<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>Albumin, urine</td>
                <td>ug/mL</td>
              </tr>
              <tr valign="top">
                <td>Creatinine, urine</td>
                <td>mg/dL</td>
              </tr>
              <tr valign="top">
                <td>White blood cell count</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Lymphocytes</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Monocytes</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Segmented neutrophils</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Eosinophils</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Basophils</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Red blood cell count</td>
                <td>Million cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Hemoglobin</td>
                <td>g/dL</td>
              </tr>
              <tr valign="top">
                <td>Hematocrit</td>
                <td>%</td>
              </tr>
              <tr valign="top">
                <td>Mean cell volume</td>
                <td>fL</td>
              </tr>
              <tr valign="top">
                <td>Mean cell hemoglobin</td>
                <td>pg</td>
              </tr>
              <tr valign="top">
                <td>Mean corpuscular hemoglobin concentration</td>
                <td>g/dL</td>
              </tr>
              <tr valign="top">
                <td>Red cell distribution width</td>
                <td>%</td>
              </tr>
              <tr valign="top">
                <td>Platelet count</td>
                <td>1000 cells/μL</td>
              </tr>
              <tr valign="top">
                <td>Mean platelet volume</td>
                <td>fL</td>
              </tr>
              <tr valign="top">
                <td>Cotinine, serum</td>
                <td>ng/mL</td>
              </tr>
              <tr valign="top">
                <td>Red blood cell folate</td>
                <td>mg/dL</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>All data types were numeric, except for “gender,” which was nominal.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>N/A: not applicable; this type of data did not have units.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Classification</title>
        <p>Several different machine learning algorithms can handle a binary classification problem. In this study, we used four machine learning algorithms: naïve Bayes, BayesNet, J48 (Java implementation of C4.5 algorithm), and random forest. The performance of the algorithms was evaluated and compared for stroke prediction using lab test results as features. Details of the algorithms are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>The J48 algorithm creates a tree based on the C4.5 algorithm with pruning.</p>
          </list-item>
          <list-item>
            <p>The random forest algorithm creates a forest of random trees and outputs the mode of the classes created by individual trees.</p>
          </list-item>
          <list-item>
            <p>The naïve Bayes algorithm creates a classifier based on the naïve Bayes method, which assumes that all attributes are independent.</p>
          </list-item>
          <list-item>
            <p>The BayesNet algorithm creates a classifier based on non–naïve Bayes, which does not assume that all attributes are independent.</p>
          </list-item>
        </list>
        <p>In the cross-validation approach, the data sets are divided into several equal portions; in general, 5-fold and 10-fold cross-validations are used when the data sets are equally divided into 5 and 10 portions [<xref ref-type="bibr" rid="ref17">17</xref>]. With this approach, for each simulation, one portion of each data set is used to train the prediction model and the rest are used for validation. In this study, we used 10-fold cross-validation and, in this process, we divided the whole of each data set into 10 equal parts; each time, 10% of each data set was used to train the model and 90% was used for validation. In this task, three data analyses were conducted where the first data analysis applied each of the machine learning techniques on the data sets without data manipulation or resampling. The aim was to determine the baseline for the data sets among the various machine learning techniques. The imputation of missing data set entries was conducted in the second analysis. In statistics, imputation entails substituting missing data with values calculated using any of a number of techniques [<xref ref-type="bibr" rid="ref18">18</xref>]. Imputation is a useful technique in remedying missing data, since missing data may lead to inaccurate predictions. We used the default ReplaceMissingValue filter in WEKA, which replaces all missing values for nominal and numeric attributes in a data set with the modes and means from the training data. Most of the features had 5% missing values, and one feature had 11% missing values. After the imputation of the missing data, data resampling was conducted in the third analysis. Data resampling is a commonly used technique, since training may result in nonuniformity of class labels. In this case, the resampling technique was applied to select a specific subset of data points for model training [<xref ref-type="bibr" rid="ref19">19</xref>]. After resampling the data, the results of the first analysis should be improved because of the balancing of the data set distribution. A balanced distribution was achieved through the use of WEKA, which randomly resamples the data. Based on the available theoretical knowledge about resampling and imputation in statistics, the results after the third analysis should be improved.</p>
      </sec>
      <sec>
        <title>Evaluation Metrics</title>
        <p>Model accuracy was evaluated based on the following measures: recall or sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), accuracy, and area under the curve (AUC) (or area under the receiver operating characteristic [ROC] curve) to compare the four classifiers. Details of these measures are as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Sensitivity, also known as recall or true positive rate, is the number of true positives divided by the number of true positives plus the number of false negatives. It is the likelihood that the patient has a high risk of stroke [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
          </list-item>
          <list-item>
            <p>Specificity, also known as the true negative rate, is the proportion of individuals classified as nonstroke to the total number of actual nonstroke cases. It is the likelihood that a patient who does not have a risk of stroke will have a negative result [<xref ref-type="bibr" rid="ref21">21</xref>].</p>
          </list-item>
          <list-item>
            <p>PPV, also known as precision, is the number of true positives divided by the number of true positives plus the number of false positives. It is the proportion of individuals who have suffered a stroke to the total number of participants classified as having a risk of stroke [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
          </list-item>
          <list-item>
            <p>NPV is the percentage of negative tests in patients who are free from the disease or the proportion of individuals who have not suffered a stroke to the total number of participants classified as not having a risk of stroke [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
          </list-item>
          <list-item>
            <p>Overall accuracy is the number of correctly classified instances over the total size of the data set [<xref ref-type="bibr" rid="ref20">20</xref>].</p>
          </list-item>
          <list-item>
            <p>The AUC is the area under the ROC curve, which is constructed by plotting the true positive rate against the true negative rate [<xref ref-type="bibr" rid="ref23">23</xref>].</p>
          </list-item>
        </list>
        <p>We will also look at the Pearson correlation coefficient value of each independent predictor to investigate the relationship between each lab test and risk of stroke.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>In the NHANES data sets, 608 participants suffered from a stroke from 2011 to 2015. The median age of participants who had a stroke was 51 years for both men and women. The numbers of men and women who had a stroke were 220 (36.2%) and 190 (31.3%), respectively; 198 (32.6%) participants did not reveal their gender identity.</p>
      <p>After the data collection process, the data were analyzed in three ways: without data resampling, with data imputation, and with data resampling. Data resampling techniques were used to tackle data imbalance problems in the data sets. These sampling techniques are widely used in machine learning–based prediction models in different areas [<xref ref-type="bibr" rid="ref24">24</xref>]. Our first analysis was done without the data resampling technique, where the four machine learning algorithms were applied directly to the data sets. The first analysis produced poor results for all four classifiers. The best sensitivity rate among the classifiers in the first analysis was for the BayesNet model, followed by the naïve Bayes model. In the second analysis, we applied the data imputation technique to the data sets, which replaced missing values and deleted features that had more than 50% missing values; the prediction accuracy improved for all models, except for the naïve Bayes model, whose performance decreased slightly after replacing the missing values.</p>
      <p>In the third analysis, we resampled the data. After resampling, the prediction accuracy improved significantly for both the decision tree and random forest models, but only slightly for the naïve Bayes and BayesNet models. <xref ref-type="table" rid="table2">Table 2</xref> shows the scores of accuracy, sensitivity, specificity, PPV, NPV, and AUC, according to the three data analysis techniques and four classifiers. The table shows that the random forest model was the best classifier with the data resampling technique. <xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref> show the score comparisons among the three data selection techniques for the decision tree and random forest models, respectively. We considered the decision tree and random forest classifiers to compare the performance, as they significantly improved the performance in the third analysis. Both figures clearly show that the third analysis, the data resampling technique, outperformed the other two techniques for the decision tree and random forest classifiers.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Results of three data analysis techniques.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="250"/>
          <col width="140"/>
          <col width="130"/>
          <col width="130"/>
          <col width="120"/>
          <col width="0"/>
          <col width="130"/>
          <col width="0"/>
          <col width="70"/>
          <thead>
            <tr valign="bottom">
              <td colspan="2">Technique and classifier</td>
              <td>Accuracy</td>
              <td>Sensitivity</td>
              <td>Specificity</td>
              <td>PPV<sup>a</sup></td>
              <td colspan="2">NPV<sup>b</sup></td>
              <td colspan="2">AUC<sup>c</sup></td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="10">
                <bold>Without data resampling</bold>
              </td>
            </tr>
            <tr valign="top">
              <td rowspan="4">
                <break/>
              </td>
              <td>Naïve Bayes</td>
              <td>0.82</td>
              <td>0.34</td>
              <td>0.88</td>
              <td colspan="2">0.27</td>
              <td colspan="2">0.91</td>
              <td>0.76</td>
            </tr>
            <tr valign="top">
              <td>BayesNet</td>
              <td>0.82</td>
              <td>0.38</td>
              <td>0.89</td>
              <td colspan="2">0.37</td>
              <td colspan="2">0.90</td>
              <td>0.88</td>
            </tr>
            <tr valign="top">
              <td>Decision tree</td>
              <td>0.83</td>
              <td>0.33</td>
              <td>0.87</td>
              <td colspan="2">0.14</td>
              <td colspan="2">0.95</td>
              <td>0.73</td>
            </tr>
            <tr valign="top">
              <td>Random forest</td>
              <td>0.86</td>
              <td>0.55</td>
              <td>0.86</td>
              <td colspan="2">0.01</td>
              <td colspan="2">0.99</td>
              <td>0.87</td>
            </tr>
            <tr valign="top">
              <td colspan="10">
                <bold>Data imputation</bold>
              </td>
            </tr>
            <tr valign="top">
              <td rowspan="4">
                <break/>
              </td>
              <td>Naïve Bayes</td>
              <td>0.81</td>
              <td>0.32</td>
              <td>0.88</td>
              <td colspan="2">0.25</td>
              <td colspan="2">0.91</td>
              <td>0.74</td>
            </tr>
            <tr valign="top">
              <td>BayesNet</td>
              <td>0.86</td>
              <td>0.53</td>
              <td>0.92</td>
              <td colspan="2">0.54</td>
              <td colspan="2">0.92</td>
              <td>0.85</td>
            </tr>
            <tr valign="top">
              <td>Decision tree</td>
              <td>0.88</td>
              <td>0.61</td>
              <td>0.91</td>
              <td colspan="2">0.46</td>
              <td colspan="2">0.95</td>
              <td>0.74</td>
            </tr>
            <tr valign="top">
              <td>Random forest</td>
              <td>0.90</td>
              <td>0.89</td>
              <td>0.90</td>
              <td colspan="2">0.33</td>
              <td colspan="2">0.99</td>
              <td>0.85</td>
            </tr>
            <tr valign="top">
              <td colspan="10">
                <bold>Data resampling</bold>
              </td>
            </tr>
            <tr valign="top">
              <td rowspan="4">
                <break/>
              </td>
              <td>Naïve Bayes</td>
              <td>0.82</td>
              <td>0.33</td>
              <td>0.88</td>
              <td colspan="2">0.29</td>
              <td colspan="2">0.90</td>
              <td>0.74</td>
            </tr>
            <tr valign="top">
              <td>BayesNet</td>
              <td>0.87</td>
              <td>0.53</td>
              <td>0.93</td>
              <td colspan="2">0.57</td>
              <td colspan="2">0.92</td>
              <td>0.85</td>
            </tr>
            <tr valign="top">
              <td>Decision tree</td>
              <td>0.93</td>
              <td>0.76</td>
              <td>0.95</td>
              <td colspan="2">0.72</td>
              <td colspan="2">0.96</td>
              <td>0.86</td>
            </tr>
            <tr valign="top">
              <td>Random forest</td>
              <td>0.96</td>
              <td>0.97</td>
              <td>0.96</td>
              <td colspan="2">0.75</td>
              <td colspan="2">0.99</td>
              <td>0.97</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table2fn1">
            <p><sup>a</sup>PPV: positive predictive value.</p>
          </fn>
          <fn id="table2fn2">
            <p><sup>b</sup>NPV: negative predictive value.</p>
          </fn>
          <fn id="table2fn3">
            <p><sup>c</sup>AUC: area under the curve.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
      <fig id="figure3" position="float">
        <label>Figure 3</label>
        <caption>
          <p>Performance comparison among three data selection techniques for the decision tree model. AUC: area under the curve; NPV: negative predictive value; PPV: positive predictive value.</p>
        </caption>
        <graphic xlink:href="formative_v5i12e23440_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <fig id="figure4" position="float">
        <label>Figure 4</label>
        <caption>
          <p>Performance comparison among three data selection techniques for the random forest model. AUC: area under the curve; NPV: negative predictive value; PPV: positive predictive value.</p>
        </caption>
        <graphic xlink:href="formative_v5i12e23440_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p><xref ref-type="table" rid="table3">Table 3</xref> shows the results from Pearson correlation analysis of the independent predictors.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Pearson correlation coefficient values of independent predictors.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="600"/>
          <col width="400"/>
          <thead>
            <tr valign="top">
              <td>Independent predictor of stroke</td>
              <td>Pearson correlation coefficient (<italic>r</italic>)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Age</td>
              <td>0.26</td>
            </tr>
            <tr valign="top">
              <td>Gender</td>
              <td>0.13</td>
            </tr>
            <tr valign="top">
              <td>Red cell distribution width (%)</td>
              <td>0.18</td>
            </tr>
            <tr valign="top">
              <td>Lymphocytes (%)</td>
              <td>0.15</td>
            </tr>
            <tr valign="top">
              <td>Red blood cell folate (ng/mL)</td>
              <td>0.13</td>
            </tr>
            <tr valign="top">
              <td>Segmented neutrophils (%)</td>
              <td>0.12</td>
            </tr>
            <tr valign="top">
              <td>Hemoglobin (g/dL)</td>
              <td>0.11</td>
            </tr>
            <tr valign="top">
              <td>Red blood cell count (million cells/μL)</td>
              <td>0.11</td>
            </tr>
            <tr valign="top">
              <td>Hematocrit (%)</td>
              <td>0.09</td>
            </tr>
            <tr valign="top">
              <td>Lymphocytes (1000 cells/μL)</td>
              <td>0.08</td>
            </tr>
            <tr valign="top">
              <td>Segmented neutrophils (1000 cell/μL)</td>
              <td>0.07</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>From the previous section, we noticed that our models had the potential to perform stroke prediction using lab test data. Our results show that the random forest model was the best classifier after conducting the data resampling technique.</p>
        <p>Also, several observations can be made from the results in <xref ref-type="table" rid="table3">Table 3</xref>. We identified nine lab tests, in addition to age and gender, that effectively correlated with stroke occurrence. These correlations were calculated using the Pearson correlation coefficient. These results align with other research that showed a linear relationship between some of these variables and stroke. Several studies have shown that age is correlated with the risk of stroke. According to Muntner et al [<xref ref-type="bibr" rid="ref2">2</xref>], stroke incidence doubles after the age of 45 years, and 70% of all strokes occur over the age of 65 years. Many studies have investigated the relationship between baseline RDW and stroke. They found that elevated RDW is a risk factor in ischemic stroke [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. One of the novel correlations that were found in this study is the lymphocyte percentage. Lymphocytes are white blood cells, including B cells, T cells, and natural killer cells. Lymphocyte percentage is positively associated with stroke occurrence. There have been no studies suggesting that lymphocyte percentage can be a predictor of stroke, but different studies have examined the use of immune cells as biomarkers to predict stroke outcome [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. There is one study that showed a negative correlation between hematocrit and stroke occurrence [<xref ref-type="bibr" rid="ref10">10</xref>]. Folate deficiency has various clinical manifestations. Our finding that serum folate level was correlated with the risk of stroke is in line with the finding of Giles et al [<xref ref-type="bibr" rid="ref14">14</xref>], who found that a serum folate concentration of ≤9.2 nmol/L may slightly increase the risk for ischemic stroke. Other studies have shown that folic acid therapy involving folic acid, vitamin B12, and vitamin B6 reduced the risk of ischemic stroke [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref28">28</xref>]. Neutrophils, which are normally the most abundant circulating white blood cells and respond quickly to infection, also contribute to the main processes causing an ischemic stroke, as they facilitate the development of blood clots. Neutrophils are, therefore, also of considerable importance as targets for treating and preventing ischemic stroke [<xref ref-type="bibr" rid="ref29">29</xref>]. A study by Sughrue et al [<xref ref-type="bibr" rid="ref10">10</xref>] produced results similar to ours regarding the positive association between neutrophils and stroke occurrence. Hemoglobin levels can predict the risk of stroke. Observational studies have reported an independent association between red blood cell count, hematocrit, and hemoglobin concentration and the risk of developing stroke [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>The correlations between these different lab tests and stroke were found in several studies. However, this is the first study that used all of these different attributes to build a prediction model using machine learning algorithms. Our results showed that a prediction model can be created using the random forest algorithm and could achieve an accuracy of 0.96.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Machine learning applications are becoming more widely used in the health care sector. The prediction of stroke using machine learning algorithms has been studied extensively. However, no previous work has explored the prediction of stroke using lab tests. The results of several laboratory tests are correlated with stroke. Building a prediction model that can predict the risk of stroke from lab test data could save lives. In this study, we created a prediction model using the random forest algorithm and achieved a 96% accuracy rate. The model can be integrated with electronic health records to provide a real-time prediction of stroke from lab tests. Because of the nature of the data, we could not predict the type of stroke: hemorrhagic or ischemic. In future studies, we aim to use data that provide information about different types of stroke to build prediction models for each type.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDC</term>
          <def>
            <p>Centers for Disease Control and Prevention</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MPV</term>
          <def>
            <p>mean platelet volume</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">NCHS</term>
          <def>
            <p>National Center for Health Statistics</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NHANES</term>
          <def>
            <p>National Health and Nutrition Examination Survey</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLR</term>
          <def>
            <p>neutrophil-to-lymphocyte ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">NPV</term>
          <def>
            <p>negative predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">RDW</term>
          <def>
            <p>red cell distribution width</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SEQN</term>
          <def>
            <p>sequence number</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">WEKA</term>
          <def>
            <p>Waikato Environment for Knowledge Analysis</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>EMA conducted the research design, data collection, and data analysis and wrote the original draft. AA assisted with the literature review of the lab tests. JL revised and edited the original draft and provided guidance throughout the whole research process. This study received no external funding.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Kasner</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Broderick</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Caplan</surname>
              <given-names>LR</given-names>
            </name>
            <name name-style="western">
              <surname>Connors</surname>
              <given-names>JJB</given-names>
            </name>
            <name name-style="western">
              <surname>Culebras</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Elkind</surname>
              <given-names>MSV</given-names>
            </name>
            <name name-style="western">
              <surname>George</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Hamdan</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Higashida</surname>
              <given-names>RT</given-names>
            </name>
            <name name-style="western">
              <surname>Hoh</surname>
              <given-names>BL</given-names>
            </name>
            <name name-style="western">
              <surname>Janis</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Kase</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Kleindorfer</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Moseley</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Turan</surname>
              <given-names>TN</given-names>
            </name>
            <name name-style="western">
              <surname>Valderrama</surname>
              <given-names>AL</given-names>
            </name>
            <name name-style="western">
              <surname>Vinters</surname>
              <given-names>HV</given-names>
            </name>
            <collab>American Heart Association Stroke Council‚  Council on Cardiovascular Surgery and Anesthesia</collab>
            <collab>Council on Cardiovascular Radiology and Intervention</collab>
            <collab>Council on Cardiovascular and Stroke Nursing</collab>
            <collab>Council on Epidemiology and Prevention</collab>
            <collab>Council on Peripheral Vascular Disease</collab>
            <collab>Council on Nutrition‚ Physical Activity and Metabolism</collab>
          </person-group>
          <article-title>An updated definition of stroke for the 21st century: A statement for healthcare professionals from the American Heart Association/American Stroke Association</article-title>
          <source>Stroke</source>
          <year>2013</year>
          <month>07</month>
          <volume>44</volume>
          <issue>7</issue>
          <fpage>2064</fpage>
          <lpage>2089</lpage>
          <pub-id pub-id-type="doi">10.1161/STR.0b013e318296aeca</pub-id>
          <pub-id pub-id-type="medline">23652265</pub-id>
          <pub-id pub-id-type="pii">STR.0b013e318296aeca</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Muntner</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Alonso</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bittencourt</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Callaway</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Carson</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Chamberlain</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Delling</surname>
              <given-names>FN</given-names>
            </name>
            <name name-style="western">
              <surname>Djousse</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elkind</surname>
              <given-names>MSV</given-names>
            </name>
            <name name-style="western">
              <surname>Ferguson</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Fornage</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jordan</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Kissela</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Knutson</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Kwan</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Lackland</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Lichtman</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Longenecker</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Loop</surname>
              <given-names>MShane</given-names>
            </name>
            <name name-style="western">
              <surname>Lutsey</surname>
              <given-names>PL</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Matsushita</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Moran</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Mussolino</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>O'Flaherty</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pandey</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Perak</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Rosamond</surname>
              <given-names>WD</given-names>
            </name>
            <name name-style="western">
              <surname>Roth</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Sampson</surname>
              <given-names>UKA</given-names>
            </name>
            <name name-style="western">
              <surname>Satou</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Schroeder</surname>
              <given-names>EB</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Spartano</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Stokes</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Tirschwell</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Tsao</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Turakhia</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>VanWagner</surname>
              <given-names>LB</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkins</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Virani</surname>
              <given-names>SS</given-names>
            </name>
            <collab>American Heart Association Council on Epidemiology and Prevention Statistics Committee and Stroke Statistics Subcommittee</collab>
          </person-group>
          <article-title>Heart Disease and Stroke Statistics-2019 Update: A report from the American Heart Association</article-title>
          <source>Circulation</source>
          <year>2019</year>
          <month>03</month>
          <day>05</day>
          <volume>139</volume>
          <issue>10</issue>
          <fpage>e56</fpage>
          <lpage>e528</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ahajournals.org/doi/abs/10.1161/CIR.0000000000000659?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1161/CIR.0000000000000659</pub-id>
          <pub-id pub-id-type="medline">30700139</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>European Stroke Initiative Executive Committee</collab>
            <collab>EUSI Writing Committee</collab>
            <name name-style="western">
              <surname>Olsen</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Langhorne</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Diener</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Hennerici</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ferro</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sivenius</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wahlgren</surname>
              <given-names>NG</given-names>
            </name>
            <name name-style="western">
              <surname>Bath</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>European Stroke Initiative Recommendations for Stroke Management – Update 2003</article-title>
          <source>Cerebrovasc Dis</source>
          <year>2003</year>
          <volume>16</volume>
          <issue>4</issue>
          <fpage>311</fpage>
          <lpage>337</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.karger.com?DOI=10.1159/000072554"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000072554</pub-id>
          <pub-id pub-id-type="medline">14584488</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boden-Albala</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>RL</given-names>
            </name>
          </person-group>
          <article-title>Lifestyle factors and stroke risk: Exercise, alcohol, diet, obesity, smoking, drug use, and stress</article-title>
          <source>Curr Atheroscler Rep</source>
          <year>2000</year>
          <month>03</month>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>160</fpage>
          <lpage>166</lpage>
          <pub-id pub-id-type="doi">10.1007/s11883-000-0111-3</pub-id>
          <pub-id pub-id-type="medline">11122740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arnett</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Blumenthal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Albert</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Buroker</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Goldberger</surname>
              <given-names>ZD</given-names>
            </name>
            <name name-style="western">
              <surname>Hahn</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Himmelfarb</surname>
              <given-names>CD</given-names>
            </name>
            <name name-style="western">
              <surname>Khera</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lloyd-Jones</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>McEvoy</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Michos</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Miedema</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Muñoz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Virani</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Yeboah</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ziaeian</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>2019 ACC/AHA Guideline on the Primary Prevention of Cardiovascular Disease: A report of the American College of Cardiology/American Heart Association Task Force on Clinical Practice Guidelines</article-title>
          <source>J Am Coll Cardiol</source>
          <year>2019</year>
          <month>09</month>
          <day>10</day>
          <volume>74</volume>
          <issue>10</issue>
          <fpage>e177</fpage>
          <lpage>e232</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jacc.2019.03.010</pub-id>
          <pub-id pub-id-type="medline">30894318</pub-id>
          <pub-id pub-id-type="pii">S0735-1097(19)33877-X</pub-id>
          <pub-id pub-id-type="pmcid">PMC7685565</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Manuel</surname>
              <given-names>DG</given-names>
            </name>
            <name name-style="western">
              <surname>Tuna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Tanuseputro</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hennessy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rosella</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanmartin</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>van Walraven</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tu</surname>
              <given-names>JV</given-names>
            </name>
          </person-group>
          <article-title>Predicting stroke risk based on health behaviours: Development of the Stroke Population Risk Tool (SPoRT)</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>12</issue>
          <fpage>e0143342</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0143342"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0143342</pub-id>
          <pub-id pub-id-type="medline">26637172</pub-id>
          <pub-id pub-id-type="pii">PONE-D-15-24430</pub-id>
          <pub-id pub-id-type="pmcid">PMC4670216</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yoo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>The development and implementation of stroke risk prediction model in National Health Insurance Service's personal health record</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2018</year>
          <month>01</month>
          <volume>153</volume>
          <fpage>253</fpage>
          <lpage>257</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0169-2607(16)31470-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2017.10.007</pub-id>
          <pub-id pub-id-type="medline">29157457</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(16)31470-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nielsen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hansen</surname>
              <given-names>MB</given-names>
            </name>
            <name name-style="western">
              <surname>Tietze</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mouridsen</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Prediction of tissue outcome and assessment of treatment effect in acute ischemic stroke using deep learning</article-title>
          <source>Stroke</source>
          <year>2018</year>
          <month>06</month>
          <volume>49</volume>
          <issue>6</issue>
          <fpage>1394</fpage>
          <lpage>1401</lpage>
          <pub-id pub-id-type="doi">10.1161/strokeaha.117.019740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rondina</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Filippone</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Girolami</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ward</surname>
              <given-names>NS</given-names>
            </name>
          </person-group>
          <article-title>Decoding post-stroke motor function from structural brain imaging</article-title>
          <source>Neuroimage Clin</source>
          <year>2016</year>
          <volume>12</volume>
          <fpage>372</fpage>
          <lpage>380</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2213-1582(16)30134-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.nicl.2016.07.014</pub-id>
          <pub-id pub-id-type="medline">27595065</pub-id>
          <pub-id pub-id-type="pii">S2213-1582(16)30134-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4995603</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sughrue</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Swiernik</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Brody</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Laboratory tests as short-term correlates of stroke</article-title>
          <source>BMC Neurol</source>
          <year>2016</year>
          <month>07</month>
          <day>21</day>
          <volume>16</volume>
          <fpage>112</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcneurol.biomedcentral.com/articles/10.1186/s12883-016-0619-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12883-016-0619-y</pub-id>
          <pub-id pub-id-type="medline">27439507</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12883-016-0619-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC4955202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farah</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Samra</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Mean platelets volume and neutrophil to lymphocyte ratio as predictors of stroke</article-title>
          <source>J Clin Lab Anal</source>
          <year>2018</year>
          <month>01</month>
          <volume>32</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28303662"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/jcla.22189</pub-id>
          <pub-id pub-id-type="medline">28303662</pub-id>
          <pub-id pub-id-type="pmcid">PMC6817265</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Red blood cell distribution width and ischaemic stroke</article-title>
          <source>Stroke Vasc Neurol</source>
          <year>2017</year>
          <month>09</month>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>172</fpage>
          <lpage>175</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://svn.bmj.com/lookup/pmidlookup?view=long&#38;pmid=28989807"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/svn-2017-000071</pub-id>
          <pub-id pub-id-type="medline">28989807</pub-id>
          <pub-id pub-id-type="pii">svn-2017-000071</pub-id>
          <pub-id pub-id-type="pmcid">PMC5628378</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Isik</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kaya</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Enginyurt</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Gunaydin</surname>
              <given-names>ZY</given-names>
            </name>
            <name name-style="western">
              <surname>Iscanli</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Kurt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanboga</surname>
              <given-names>IH</given-names>
            </name>
          </person-group>
          <article-title>Relationship between red cell distribution width and stroke in patients with stable chronic heart failure: A propensity score matching analysis</article-title>
          <source>Clin Appl Thromb Hemost</source>
          <year>2015</year>
          <month>03</month>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>160</fpage>
          <lpage>165</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/10.1177/1076029613493658?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub%3dpubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/1076029613493658</pub-id>
          <pub-id pub-id-type="medline">23804231</pub-id>
          <pub-id pub-id-type="pii">1076029613493658</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Giles</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Kittner</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Anda</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Casper</surname>
              <given-names>ML</given-names>
            </name>
          </person-group>
          <article-title>Serum folate and risk for ischemic stroke. First National Health and Nutrition Examination Survey epidemiologic follow-up study</article-title>
          <source>Stroke</source>
          <year>1995</year>
          <month>07</month>
          <volume>26</volume>
          <issue>7</issue>
          <fpage>1166</fpage>
          <lpage>1170</lpage>
          <pub-id pub-id-type="doi">10.1161/01.str.26.7.1166</pub-id>
          <pub-id pub-id-type="medline">7604408</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Spence</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Guo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kong</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hou</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Huo</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Folic acid therapy reduces the first stroke risk associated with hypercholesterolemia among hypertensive patients</article-title>
          <source>Stroke</source>
          <year>2016</year>
          <month>11</month>
          <volume>47</volume>
          <issue>11</issue>
          <fpage>2805</fpage>
          <lpage>2812</lpage>
          <pub-id pub-id-type="doi">10.1161/strokeaha.116.014578</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="web">
          <article-title>National Health and Nutrition Examination Survey</article-title>
          <source>Centers for Disease Control and Prevention</source>
          <access-date>2020-04-10</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://wwwn.cdc.gov/nchs/nhanes/Default.aspx">https://wwwn.cdc.gov/nchs/nhanes/Default.aspx</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sohan</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>SSMM</given-names>
            </name>
            <name name-style="western">
              <surname>Munna</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Allayear</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>NStackSenti: Evaluation of a multi-level approach for detecting the sentiment of users</article-title>
          <source>Proceedings of the 4th International Conference on Next Generation Computing Technologies</source>
          <year>2018</year>
          <conf-name>4th International Conference on Next Generation Computing Technologies</conf-name>
          <conf-date>November 21-22, 2018</conf-date>
          <conf-loc>Dehradun, India</conf-loc>
          <fpage>38</fpage>
          <lpage>48</lpage>
          <pub-id pub-id-type="doi">10.1007/978-981-15-1718-1_4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Marasinghe</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Koehler</surname>
              <given-names>KJ</given-names>
            </name>
          </person-group>
          <source>Statistical Data Analysis Using SAS: Intermediate Statistical Methods. 2nd edition</source>
          <year>2018</year>
          <publisher-loc>Cham, Switzerland</publisher-loc>
          <publisher-name>Springer International Publishing AG</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dugan</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Mukhopadhyay</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carroll</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Downs</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Machine learning techniques for prediction of early childhood obesity</article-title>
          <source>Appl Clin Inform</source>
          <year>2017</year>
          <month>12</month>
          <day>19</day>
          <volume>06</volume>
          <issue>03</issue>
          <fpage>506</fpage>
          <lpage>520</lpage>
          <pub-id pub-id-type="doi">10.4338/aci-2015-03-ra-0036</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Tao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jamal-Omidi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lhatoo</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Cui</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Detection of postictal generalized electroencephalogram suppression: Random forest approach</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>02</month>
          <day>14</day>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>e17061</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/2/e17061/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17061</pub-id>
          <pub-id pub-id-type="medline">32130173</pub-id>
          <pub-id pub-id-type="pii">v8i2e17061</pub-id>
          <pub-id pub-id-type="pmcid">PMC7055778</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Shortliffe</surname>
              <given-names>EH</given-names>
            </name>
            <name name-style="western">
              <surname>Cimino</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <source>Biomedical Informatics: Computer Applications in Health Care and Biomedicine. 4th edition</source>
          <year>2014</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>Springer-Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Tain</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kuo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Machine learning model for risk prediction of community-acquired acute kidney injury hospitalization from electronic health records: Development and validation study</article-title>
          <source>J Med Internet Res</source>
          <year>2020</year>
          <month>08</month>
          <day>04</day>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>e16903</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2020/8/e16903/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/16903</pub-id>
          <pub-id pub-id-type="medline">32749223</pub-id>
          <pub-id pub-id-type="pii">v22i8e16903</pub-id>
          <pub-id pub-id-type="pmcid">PMC7435690</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Accurate prediction of coronary heart disease for patients with hypertension from electronic health records with big data and machine-learning methods: Model development and performance evaluation</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>07</month>
          <day>06</day>
          <volume>8</volume>
          <issue>7</issue>
          <fpage>e17257</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/7/e17257/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17257</pub-id>
          <pub-id pub-id-type="medline">32628616</pub-id>
          <pub-id pub-id-type="pii">v8i7e17257</pub-id>
          <pub-id pub-id-type="pmcid">PMC7381262</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sohan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kabir</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jabiullah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rahman</surname>
              <given-names>SSMM</given-names>
            </name>
          </person-group>
          <article-title>Revisiting the class imbalance issue in software defect prediction</article-title>
          <source>Proceedings of the 2nd International Conference on Electrical, Computer and Communication Engineering</source>
          <year>2019</year>
          <conf-name>International Conference on Electrical, Computer and Communication Engineering</conf-name>
          <conf-date>February 7-9, 2019</conf-date>
          <conf-loc>Cox's Bazar, Bangladesh</conf-loc>
          <fpage>1</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1109/ecace.2019.8679382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Song</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hua</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dornbors</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Baseline red blood cell distribution width as a predictor of stroke occurrence and outcome: A comprehensive meta-analysis of 31 studies</article-title>
          <source>Front Neurol</source>
          <year>2019</year>
          <volume>10</volume>
          <fpage>1237</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fneur.2019.01237"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fneur.2019.01237</pub-id>
          <pub-id pub-id-type="medline">31849813</pub-id>
          <pub-id pub-id-type="pmcid">PMC6901990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Geng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Frequencies of circulating B- and T-lymphocytes  as indicators for stroke outcomes</article-title>
          <source>Neuropsychiatr Dis Treat</source>
          <year>2017</year>
          <month>10</month>
          <volume>13</volume>
          <fpage>2509</fpage>
          <lpage>2518</lpage>
          <pub-id pub-id-type="doi">10.2147/ndt.s148073</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liesz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Kleinschnitz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Offner</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Functional role of regulatory lymphocytes in stroke</article-title>
          <source>Stroke</source>
          <year>2015</year>
          <month>05</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>1422</fpage>
          <lpage>1430</lpage>
          <pub-id pub-id-type="doi">10.1161/strokeaha.114.008608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spence</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Yi</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hankey</surname>
              <given-names>GJ</given-names>
            </name>
          </person-group>
          <article-title>B vitamins in stroke prevention: Time to reconsider</article-title>
          <source>Lancet Neurol</source>
          <year>2017</year>
          <month>09</month>
          <volume>16</volume>
          <issue>9</issue>
          <fpage>750</fpage>
          <lpage>760</lpage>
          <pub-id pub-id-type="doi">10.1016/S1474-4422(17)30180-1</pub-id>
          <pub-id pub-id-type="medline">28816120</pub-id>
          <pub-id pub-id-type="pii">S1474-4422(17)30180-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jickling</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ander</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Stamova</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sharp</surname>
              <given-names>FR</given-names>
            </name>
          </person-group>
          <article-title>Targeting neutrophils in ischemic stroke: Translational insights from experimental studies</article-title>
          <source>J Cereb Blood Flow Metab</source>
          <year>2015</year>
          <month>06</month>
          <volume>35</volume>
          <issue>6</issue>
          <fpage>888</fpage>
          <lpage>901</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25806703"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/jcbfm.2015.45</pub-id>
          <pub-id pub-id-type="medline">25806703</pub-id>
          <pub-id pub-id-type="pii">jcbfm201545</pub-id>
          <pub-id pub-id-type="pmcid">PMC4640255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Jee</surname>
              <given-names>SH</given-names>
            </name>
            <name name-style="western">
              <surname>Yun</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Baek</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Hemoglobin concentration and risk of cardiovascular disease in Korean men and women - The Korean Heart Study</article-title>
          <source>J Korean Med Sci</source>
          <year>2013</year>
          <month>09</month>
          <volume>28</volume>
          <issue>9</issue>
          <fpage>1316</fpage>
          <lpage>1322</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://jkms.org/DOIx.php?id=10.3346/jkms.2013.28.9.1316"/>
          </comment>
          <pub-id pub-id-type="doi">10.3346/jkms.2013.28.9.1316</pub-id>
          <pub-id pub-id-type="medline">24015036</pub-id>
          <pub-id pub-id-type="pmcid">PMC3763105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hung</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Association between ischemic stroke and iron-deficiency anemia: A population-based study</article-title>
          <source>PLoS ONE</source>
          <year>2013</year>
          <month>12</month>
          <day>9</day>
          <volume>8</volume>
          <issue>12</issue>
          <fpage>e82952</fpage>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0082952</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
