<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i1e48351</article-id>
      <article-id pub-id-type="pmid">38096008</article-id>
      <article-id pub-id-type="doi">10.2196/48351</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Development of Risk Prediction Models for Severe Periodontitis in a Thai Population: Statistical and Machine Learning Approaches</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Gao</surname>
            <given-names>Aijing</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Suwansantisuk</surname>
            <given-names>Watcharapan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Teza</surname>
            <given-names>Htun</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1076-9513</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Pattanateepapon</surname>
            <given-names>Anuchate</given-names>
          </name>
          <degrees>DEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Clinical Epidemiology and Biostatistics</institution>
            <institution>Faculty of Medicine Ramathibodi Hospital</institution>
            <institution>Mahidol University</institution>
            <addr-line>270 RAMA VI Road, Phayathai</addr-line>
            <addr-line>Bangkok, 10400</addr-line>
            <country>Thailand</country>
            <phone>66 2 201 1269</phone>
            <email>anuchate.gab@mahidol.ac.th</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1246-9482</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Lertpimonchai</surname>
            <given-names>Attawood</given-names>
          </name>
          <degrees>DDS, PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-2501-1534</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Vathesatogkit</surname>
            <given-names>Prin</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0433-2072</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>J McKay</surname>
            <given-names>Gareth</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8197-6280</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Attia</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9800-1308</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Thakkinstian</surname>
            <given-names>Ammarin</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9991-386X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Clinical Epidemiology and Biostatistics</institution>
        <institution>Faculty of Medicine Ramathibodi Hospital</institution>
        <institution>Mahidol University</institution>
        <addr-line>Bangkok</addr-line>
        <country>Thailand</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Periodontology</institution>
        <institution>Faculty of Dentistry</institution>
        <institution>Chulalongkorn University</institution>
        <addr-line>Bangkok</addr-line>
        <country>Thailand</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Center of Excellence in Periodontal Disease and Dental Implant</institution>
        <institution>Chulalongkorn University</institution>
        <addr-line>Bangkok</addr-line>
        <country>Thailand</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Medicine</institution>
        <institution>Faculty of Medicine Ramathibodi Hospital</institution>
        <institution>Mahidol University</institution>
        <addr-line>Bangkok</addr-line>
        <country>Thailand</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Centre for Public Health, School of Medicine</institution>
        <institution>Dentistry and Biomedical Sciences</institution>
        <institution>Queen’s University Belfast</institution>
        <addr-line>Belfast</addr-line>
        <country>United Kingdom</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>School of Medicine and Public Health</institution>
        <institution>Hunter Medical Research Institute</institution>
        <institution>University of Newcastle, New Lambton</institution>
        <addr-line>NSW</addr-line>
        <country>Australia</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Anuchate Pattanateepapon <email>anuchate.gab@mahidol.ac.th</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>12</month>
        <year>2023</year>
      </pub-date>
      <volume>7</volume>
      <elocation-id>e48351</elocation-id>
      <history>
        <date date-type="received">
          <day>20</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>28</day>
          <month>9</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>31</day>
          <month>10</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>1</day>
          <month>11</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Htun Teza, Anuchate Pattanateepapon, Attawood Lertpimonchai, Prin Vathesatogkit, Gareth J McKay, John Attia, Ammarin Thakkinstian. Originally published in JMIR Formative Research (https://formative.jmir.org), 14.12.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2023/1/e48351" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Severe periodontitis affects 26% of Thai adults and 11.2% of adults globally and is characterized by the loss of alveolar bone height. Full-mouth examination by periodontal probing is the gold standard for diagnosis but is time- and resource-intensive. A screening model to identify those at high risk of severe periodontitis would offer a targeted approach and aid in reducing the workload for dentists. While statistical modelling by a logistic regression is commonly applied, optimal performance depends on feature selections and engineering. Machine learning has been recently gaining favor given its potential discriminatory power and ability to deal with multiway interactions without the requirements of linear assumptions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We aim to compare the performance of screening models developed using statistical and machine learning approaches for the risk prediction of severe periodontitis.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study used data from the prospective Electricity Generating Authority of Thailand cohort. Dental examinations were performed for the 2008 and 2013 surveys. Oral examinations (ie, number of teeth and oral hygiene index and plaque scores), periodontal pocket depth, and gingival recession were performed by dentists. The outcome of interest was severe periodontitis diagnosed by the Centre for Disease Control–American Academy of Periodontology, defined as 2 or more interproximal sites with a clinical attachment level ≥6 mm (on different teeth) and 1 or more interproximal sites with a periodontal pocket depth ≥5 mm. Risk prediction models were developed using mixed-effects logistic regression (MELR), recurrent neural network, mixed-effects support vector machine, and mixed-effects decision tree models. A total of 21 features were considered as predictive features, including 4 demographic characteristics, 2 physical examinations, 4 underlying diseases, 1 medication, 2 risk behaviors, 2 oral features, and 6 laboratory features.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>A total of 3883 observations from 2086 participants were split into development (n=3112, 80.1%) and validation (n=771, 19.9%) sets with prevalences of periodontitis of 34.4% (n=1070) and 34.1% (n=263), respectively. The final MELR model contained 6 features (gender, education, smoking, diabetes mellitus, number of teeth, and plaque score) with an area under the curve (AUC) of 0.983 (95% CI 0.977-0.989) and positive likelihood ratio (LR+) of 11.9 (95% CI 8.8-16.3). Machine learning yielded lower performance than the MELR model, with AUC (95% CI) and LR+ (95% CI) values of 0.712 (0.669-0.754) and 2.1 (1.8-2.6), respectively, for the recurrent neural network model; 0.698 (0.681-0.734) and 2.1 (1.7-2.6), respectively, for the mixed-effects support vector machine model; and 0.662 (0.621-0.702) and 2.4 (1.9-3.0), respectively, for the mixed-effects decision tree model.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The MELR model might be more useful than machine learning for large-scale screening to identify those at high risk of severe periodontitis for periodontal evaluation. External validation using data from other centers is required to evaluate the generalizability of the model.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>periodontitis</kwd>
        <kwd>prediction</kwd>
        <kwd>machine learning</kwd>
        <kwd>repeated measures</kwd>
        <kwd>panel data</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Periodontitis, one of the most common oral diseases, is a major cause of tooth loss in adult life [<xref ref-type="bibr" rid="ref1">1</xref>] with a prevalence of 11.2% globally and 15% to 20% in Asia [<xref ref-type="bibr" rid="ref2">2</xref>]. It is a complex inflammatory disease affecting supportive structures around the tooth, resulting in loosening and eventual loss [<xref ref-type="bibr" rid="ref3">3</xref>]. This leads to decreased dental occlusion, digestive ability, and quality of life. In addition to oral manifestations, it is also associated with other inflammatory or systemic diseases [<xref ref-type="bibr" rid="ref4">4</xref>], including atherosclerotic vascular disease [<xref ref-type="bibr" rid="ref5">5</xref>], diabetes mellitus, chronic kidney disease [<xref ref-type="bibr" rid="ref6">6</xref>], chronic obstructive pulmonary disease, rheumatoid arthritis, Alzheimer disease, and erectile dysfunction [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
      <p>Severe periodontitis is characterized by the loss of alveolar bone height, which is asymptomatic until the tooth becomes mobile. Radiographs are usually used as the standard tool for diagnosis along with a full mouth examination by dentists, both of which are time- and resource-intensive, especially in public health sectors constrained by the large number of participants that require examination. The impact on resource allocation can, in part, be addressed through the use of screening tools, such as risk prediction models, to identify those at high risk of severe periodontitis.</p>
      <p>Identification of severe periodontitis risk factors has been achieved largely through cross-sectional investigations that have evaluated demographic features, risk behaviors, and oral characteristics [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Inclusion of both demographic and oral features as predictors has been reported to outperform models composed of either feature alone [<xref ref-type="bibr" rid="ref9">9</xref>]. Furthermore, the addition of saliva biomarkers to established risk factors further improved performance [<xref ref-type="bibr" rid="ref11">11</xref>], but including such parameters necessitates the requirement for oral examination, which is contradictory to the purpose of a screening tool (for reducing time and resources). However, the majority of studies have used cross-sectional data, which fail to capture the complex relationship between the features and outcomes in contrast to longitudinal investigations that reflect both interindividual and intraindividual dynamics [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
      <p>Machine learning approaches for disease risk prediction have been proposed, which might perform better in dealing with multidimensional interactions, collinearity between features, and nonlinear relationships than the traditional statistical models [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Several machine learning algorithms, such as support vector machines, decision trees, and artificial neural networks, have also been reported to improve the diagnosis of periodontal disease [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. The performance of artificial neural networks was also considered [<xref ref-type="bibr" rid="ref20">20</xref>] with the inclusion of probing pocket depth (PPD) as a predictor. However, PPD requires a comprehensive periodontal examination, which is time- and resource-intensive. Applying machine learning screening models without PPD may help reduce the number of patients requiring dental examination and associated resource commitments. As such, the aim of this study was to use longitudinal data to compare the performance of statistical and machine learning approaches for periodontitis risk prediction.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Setting and Study Population</title>
        <p>This study used data from the prospective Electricity Generating Authority of Thailand (EGAT) cohort study [<xref ref-type="bibr" rid="ref21">21</xref>]. Dental examinations were performed in the 2008 and 2013 surveys.</p>
      </sec>
      <sec>
        <title>Participant Eligibility</title>
        <p>Participants were included if they had received periodontal examinations in both surveys (2008 and 2013) regardless of having periodontitis at the baseline survey. Some participants were excluded if they did not receive periodontal examinations due to a refusal to participate, had systemic conditions that required antibiotic prophylaxis before dental examination (eg, congenital heart disease or valvular heart disease, a previous history of bacterial endocarditis or rheumatic fever, total joint replacement, and end-stage renal disease), or were fully edentulous.</p>
      </sec>
      <sec>
        <title>Clinical Features</title>
        <sec>
          <title>Demographic and Clinical Characteristics</title>
          <p>General demographic data (ie, age, gender, educational level, and income), behavioral data (ie, smoking status and alcohol consumption), underlying diseases (eg, diabetes mellitus, hypertension, chronic kidney disease), and lipid lowering medication information were collected by self-administered questionnaires at both time points. A physical examination including weight, height, and waist and hip circumference was also performed at the survey site. Laboratory tests included lymphocytes, uric acid, and lipid profiling.</p>
        </sec>
        <sec>
          <title>Oral Features</title>
          <p>Oral examinations included the number of teeth and oral hygiene index (plaque score) [<xref ref-type="bibr" rid="ref22">22</xref>], which were carried out by the Department of Periodontology, Faculty of Dentistry, Chulalongkorn University. PPD and gingival recession (RE) were measured at 6 sites (buccal/labial, lingual/palatal, mesiobuccal, mesiolingual, distobuccal, and distolingual) on all fully erupted teeth except for third molars and retained roots. Centre for Disease Control–American Academy of Periodontology (CDC-AAP) criteria were used to classify severe periodontitis. PPD was defined as the distance from the coronal point of the gingival margin to the tip of a periodontal probe, and the RE was defined as the distance to the cementoenamel junction, with the clinical attachment level calculated by subtracting the RE from the PPD.</p>
        </sec>
        <sec>
          <title>Outcome</title>
          <p>The primary outcome of interest was severe periodontitis as defined by the CDC-AAP guidelines [<xref ref-type="bibr" rid="ref23">23</xref>] at 2 or more interproximal sites with a clinical attachment level ≥6 mm (on different teeth) and 1 or more interproximal sites with a PPD ≥5 mm.</p>
        </sec>
      </sec>
      <sec>
        <title>Model Development</title>
        <p>Among the included participants, the missing data rate was relatively low, ranging from 0.03% to 9.3% (Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Multiple imputation with chain equations (MICE) was applied to impute missing data assuming the data were missing at random (additional detail is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Given that repeatedly measured data were applied, a multilevel predictive mean matching method was applied for all continuous features using the <italic>miceadds-3.13-12</italic> R library. Features used to impute the missing data are presented in Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. A total 5 imputations for MICE were constructed with 8 iterations each for estimation (Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Distributions of features in complete-case and imputed data were almost the same (Table S3 and Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>A total of 21 features, including demographic characteristics (age, gender, education level, and income), physical examinations (body mass index and waist to hip ratio), underlying diseases (diabetes mellitus, hypertension, dyslipidemia, and chronic kidney disease), risk behaviors (smoking status and alcohol drinking habits), oral features (number of teeth and plaque score), laboratory features (lymphocytes, uric acid, triglyceride, cholesterol, high density lipoprotein, and low density lipoprotein), and a lipid lowering drug, were considered as predictors. Of them, 9 features were included as categorical data with the rest as continuous data.</p>
        <p>To the best of our knowledge, there are no explicit guidelines for sample size estimation for machine learning models, but previous studies have recommended that this be based on disease prevalence estimates [<xref ref-type="bibr" rid="ref24">24</xref>]. According to the 8th National Oral Health Survey of Thailand (2017) [<xref ref-type="bibr" rid="ref25">25</xref>], the adult prevalence of severe periodontitis in the Thai population was 26%. A total of 296 participants would therefore be required, assuming a type 1 error rate of 5% and a 95% CI, with 77 having severe periodontitis. Our data included 2086 participants that underwent periodontal examination, with 721 characterized as having severe symptoms, providing sufficient power.</p>
        <p>We considered 4 models: a mixed-effects logistic regression (MELR), recurrent neural networks (RNN), a mixed-effects support vector machine (ME-SVM), and a mixed-effects decision tree (ME-DT). For mixed-effects approaches, a random intercept was fitted considering the effect of participants as random. The framework for model development is shown in Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. For the MELR [<xref ref-type="bibr" rid="ref26">26</xref>], feature selection was performed based on the following steps suggested by Hosmer-Lemeshow [<xref ref-type="bibr" rid="ref27">27</xref>]: (1) univariate analysis of MELR was performed and indicated that 15 out of the 21 features (ie, age, gender, education level, income, waist to hip ratio, diabetes mellitus, hypertension, smoking status, alcohol drinking habits, number of teeth, plaque score, lymphocytes, uric acid, triglyceride, and high density lipoprotein) had <italic>P</italic>≤.1 (Table S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), which were then considered simultaneously in a multivariate MELR; (2) a stepwise process with forward selection was applied by including each of the 15 features into the MELR model one by one, and only significant features were kept in the final model; (3) 6 nonsignificant features (ie, body mass index, dyslipidemia, chronic kidney disease, cholesterol, low density lipoprotein, and lipid lowering drug) in the univariate analysis were also reconsidered to add in the final multivariate MELR model that contained only significant features, but none of them were significant, thus they were omitted; (4) interactions between significant features (eg, smoking and gender, smoking and plaque score, plaque score and diabetes) were considered but none were significant; and (5) odds ratios and 95% CIs of all significant features were estimated based on the final model.</p>
        <p>A total of 21 features were considered in the machine learning models. For RNN [<xref ref-type="bibr" rid="ref28">28</xref>], ME-SVM, and ME-DT [<xref ref-type="bibr" rid="ref29">29</xref>], hyperparameter optimization was done using a random search of the hyperparameter sets followed by grid-search procedures. This process can be subject to unfocused random noise in data development and a failure to generalize. As such, a validation data set was used to assess model performance, and the hyperparameters were readjusted if overfitting was present. The RNN model was developed using Keras-2.4.3 [<xref ref-type="bibr" rid="ref30">30</xref>] and TensorFlow-2.3.1 [<xref ref-type="bibr" rid="ref31">31</xref>]. The final model specifications for RNN were 4 hidden layers with 62, 72, 72, and 62 simple RNN nodes with a Tanh activation function in feed-forward order with a dropout of 0.2 allocated between hidden and output layers. The output layer had 1 sigmoid node for binary classification. Binary cross entropy represented a loss function, with accuracy as a monitor metric. A learning rate of 0.01 and a batch size of 64 were applied for mini-batch optimization. A total of 10,000 epochs were used with early stopping due to time and resource constraints.</p>
        <p>The ME-SVM included support vector regression developed within the <italic>e1071-1.7.4</italic> R library framework for fixed effects and a linear mixed model developed with <italic>lme4-1.1.26</italic> for random effects. Support vector regression here applies nu-regression [<xref ref-type="bibr" rid="ref32">32</xref>], with a nu value of 0.5, a cost value of 0.1 as the penalty parameter for misclassifications, and a radial basis kernel function with a gamma value of 0.3. Similarly, ME-DT used the <italic>rpart-4.1.16</italic> library framework with a maximum tree depth of 18 and a minimum number of subjects for splitting of 20. Hyperparameter tuning was performed by leave-one-out (K-1) cross-validation.</p>
        <p>The probability of having periodontitis was estimated by each model. Participants were classified as positive if the estimated probability was ≥0.35, as per the prevalence of periodontitis of our data. A contingency 2x2 table was then constructed comparing positive and negative classifications with actual periodontitis. The performance of each model was further evaluated by estimating sensitivity, specificity, accuracy, positive likelihood ratio (LR+), and <italic>F</italic><sub>1</sub>-score. In addition, discrimination and calibration performances were also assessed using the area under receiver operating curves (AUC) and Brier scores. Values ranged from 0 to 1 for both, with a higher score being preferable for the AUC in contrast to a lower score for the Brier score.</p>
        <p>All analyses were performed based on imputed data using STATA version 16.0 (StataCorp) for MELR, Python version 3.8.2 (Python Software Foundation) for RNN, and R version 4.02 (R Foundation for Statistical Computing) for ME-SVM and ME-DT.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the Human Research Ethics Committee, Faculty of Medicine Ramathibodi Hospital, Mahidol University (COA.MURA2020/1560). For the prospective EGAT cohort, all participation was voluntary and the participants gave written informed consent, including permission for secondary analyses of the collected data for necessary further studies. Identifications and personal information were encrypted and kept in databases that only the principal investigators could access.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>A total of 2271 participants were initially included in the cohort in 2008, but only 2086 participants were followed up 5 years later in 2013. The key characteristics of the 2086 participants comparing those with and without periodontitis are reported in <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>. 71% (n=1482) of the participants were men. The mean age was 54.4 (SD 5.0) years, with the youngest being 43.7 years and the oldest being 70.3. </p>
      <p>Each participant was observed 1 to 2 times, and of the 3883 total observations included in this study, 47.2% (n=1834) of participants had a bachelor or higher degree, and 70.8% (n=2749) earned &#62;50,000 baht (&#62;US $1500) per month. Approximately 67.8% (n=2634) consumed alcohol and 16.7% (n=648) were current smokers at the time of observation. The prevalence of diabetes, hypertension, and dyslipidemia were 12.9% (n=499), 44.8% (n=1741), and 71.5% (n=2775), respectively. Data from both surveys were split into development (n=3112, 80.1%) and validation (n=771, 19.9%) sets [<xref ref-type="bibr" rid="ref33">33</xref>] at the participant level to prevent data leakage (ie, participants were included in either the development or the validation set only). Participants in the development and validations sets had a prevalence of periodontitis of 34.4% (n=1070) and 34.1% (n=263), respectively (Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Gender of participants with and without observed severe periodontitis.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="210"/>
          <col width="210"/>
          <col width="280"/>
          <col width="270"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Demographic</td>
              <td>All participants (n=2086)</td>
              <td>Participants with severe periodontitis (n=721)</td>
              <td>Participants with nonsevere periodontitis (n=1365)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="5">
                <bold>Gender, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Men</td>
              <td>1482 (71)</td>
              <td>591 (82)</td>
              <td>891 (65.3)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Women</td>
              <td>604 (29)</td>
              <td>130 (18)</td>
              <td>474 (34.7)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Age, education level, income, and clinical characteristics of participants with and without observed severe periodontitis.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="30"/>
          <col width="370"/>
          <col width="200"/>
          <col width="200"/>
          <col width="200"/>
          <thead>
            <tr valign="top">
              <td colspan="2">Characteristic</td>
              <td>All observations (n=3883)</td>
              <td>Observations of participants with severe periodontitis (n=1333)</td>
              <td>Observations of participants with nonsevere periodontitis (n=2550)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td colspan="2">Age (years), mean (SD)</td>
              <td>54.4 (5.0)</td>
              <td>55.0 (5.1)</td>
              <td>54.0 (5.0)</td>
            </tr>
            <tr valign="top">
              <td colspan="5">
                <bold>Education level, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>High school graduate or lower</td>
              <td>767 (19.8)</td>
              <td>406 (30.4)</td>
              <td>361 (14.2)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Vocational school graduate</td>
              <td>1282 (33)</td>
              <td>522 (39.2)</td>
              <td>760 (29.8)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Bachelor degree graduate</td>
              <td>1519 (39.1)</td>
              <td>341 (25.6)</td>
              <td>1178 (46.2)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Above bachelor degree</td>
              <td>315 (8.1)</td>
              <td>64 (4.8)</td>
              <td>251 (9.8)</td>
            </tr>
            <tr valign="top">
              <td colspan="5">
                <bold>Monthly income in Baht (US $), n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Less than 20,000 (US $600)</td>
              <td>306 (7.9)</td>
              <td>149 (11.2)</td>
              <td>157 (6.2)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Between 20,000-49,999 (US $600-$1499.97)</td>
              <td>828 (21.3)</td>
              <td>365 (27.4)</td>
              <td>463 (18.1)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>More than 50,000 (US $1500)</td>
              <td>2749 (70.8)</td>
              <td>819 (61.4)</td>
              <td>1930 (75.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Body mass index, mean (SD)</td>
              <td>24.9 (3.7)</td>
              <td>24.9 (3.7)</td>
              <td>24.8 (3.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Waist to hip ratio, mean (SD)</td>
              <td>0.9 (0.1)</td>
              <td>0.9 (0.1)</td>
              <td>0.9 (0.1)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Diabetes mellitus, n (%)</td>
              <td>499 (12.9)</td>
              <td>234 (17.5)</td>
              <td>265 (10.4)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Hypertension, n (%)</td>
              <td>1741 (44.8)</td>
              <td>676 (50.7)</td>
              <td>1065 (41.8)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Dyslipidemia, n (%)</td>
              <td>2775 (71.5)</td>
              <td>954 (71.6)</td>
              <td>1821 (71.4)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Chronic kidney disease, n (%)</td>
              <td>293 (7.5)</td>
              <td>105 (7.9)</td>
              <td>188 (7.4)</td>
            </tr>
            <tr valign="top">
              <td colspan="5">
                <bold>Smoking status, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Nonsmoker</td>
              <td>2092 (53.9)</td>
              <td>498 (37.3)</td>
              <td>1594 (62.5)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Exsmoker</td>
              <td>1143 (29.4)</td>
              <td>446 (33.5)</td>
              <td>697 (27.3)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Current smoker</td>
              <td>648 (16.7)</td>
              <td>389 (29.2)</td>
              <td>259 (10.2)</td>
            </tr>
            <tr valign="top">
              <td colspan="5">
                <bold>Alcohol consumption, n (%)</bold>
              </td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Nonconsumer</td>
              <td>1249 (32.2)</td>
              <td>309 (23.2)</td>
              <td>940 (36.8)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Occasional consumer</td>
              <td>695 (17.9)</td>
              <td>242 (18.1)</td>
              <td>453 (17.8)</td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Frequent consumer</td>
              <td>1939 (49.9)</td>
              <td>782 (58.7)</td>
              <td>1157 (45.4)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Number of present or remaining teeth, mean (SD)</td>
              <td>23.4 (4.9)</td>
              <td>22.1 (5.4)</td>
              <td>24.1 (4.5)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Plaque score (%), mean (SD)</td>
              <td>70.9 (21.5)</td>
              <td>78.5 (18.8)</td>
              <td>66.9 (21.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Lymphocytes (mm<sup>3</sup>), mean (SD)</td>
              <td>2156.3 (623)</td>
              <td>2224.5 (636.9)</td>
              <td>2120.6 (612.7)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Uric acid (mg/dL), mean (SD)</td>
              <td>5.9 (1.4)</td>
              <td>6.1 (1.4)</td>
              <td>5.8 (1.5)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Triglyceride (mg/dL), mean (SD)</td>
              <td>147.6 (96.2)</td>
              <td>159.6 (109.5)</td>
              <td>141.4 (87.8)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Cholesterol (mg/dL), mean (SD)</td>
              <td>225.1 (43.3)</td>
              <td>223.2 (44.4)</td>
              <td>226.0 (42.6)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">High density lipoprotein (mg/dL), mean (SD)</td>
              <td>54.1 (14.3)</td>
              <td>51.5 (13.8)</td>
              <td>55.4 (14.5)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Low density lipoprotein (mg/dL), mean (SD)</td>
              <td>147.7 (39.4)</td>
              <td>145.7 (40.1)</td>
              <td>148.7 (39)</td>
            </tr>
            <tr valign="top">
              <td colspan="2">Taking lipid lowering medications, n (%)</td>
              <td>960 (24.7)</td>
              <td>328 (24.6)</td>
              <td>632 (24.8)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <p>The final multivariate MELR model included 6 features, namely, gender, education, smoking status, diabetes mellitus, number of teeth, and plaque score. The regression coefficients and odds ratios for each feature are reported in <xref rid="figure1" ref-type="fig">Figure 1</xref> and Table S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The odds of men having severe periodontitis were 2.63 times higher than those of women. Lower levels of education were significantly associated with severe periodontitis; those educated to vocational levels and high school graduates were associated with a 3.92 and 7.59 times greater likelihood of severe periodontitis, respectively, compared to those educated above a bachelor degree. Current and exsmokers had 5.38 and 2.09 times higher odds of severe periodontitis, respectively, than nonsmokers. Participants with diabetes had a 66% greater risk of severe periodontitis compared to those without diabetes. The risk of periodontitis increased by 3% per unit increase in plaque score, in contrast to a 6% reduction in risk for every remaining tooth.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Magnitude of associations (odds ratio and 95% CI) between predictors and severe periodontitis for the mixed-effect logistic regression model.</p>
        </caption>
        <graphic xlink:href="formative_v7i1e48351_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
      <p>Model performance was evaluated with both the development and validation data sets (<xref ref-type="table" rid="table3">Table 3</xref>). For the development data set, the AUC (95% CI), <italic>F</italic><sub>1</sub>-score, and Brier score for the MELR model were 0.980 (0.977-0.984), 0.869, and 0.061, respectively. The corresponding values for the validation set were 0.983 (0.977-0.989), 0.878, and 0.058, respectively, indicating that the model performed well in both data sets. The LR+ (95% CI) values (at the threshold of 0.35) were 9.4 (8.2-10.8) and 11.9 (8.8-16.3) for the development and validation data sets, respectively. This could be interpreted as participants being approximately 9-fold more likely to have periodontitis given that the model classified them as positive (ie, estimated probability ≥0.35).</p>
      <p>The RNN model yielded AUC (95% CI) values of 0.747 (0.727-0.766) and 0.712 (0.669-0.754) for the development and validation data sets, respectively. The corresponding LR+ (95% CI) values were 2.3 (2.1-2.5) and 2.1 (1.8-2.6), respectively, which were much lower compared to those from the MELR model. The AUC (95% CI) values for the ME-SVM model were 0.761 (0.754-0.766) and 0.698 (0.681-0.734) for the development and validation data sets, respectively, with corresponding LR+ (95% CI) values of 3.1 (2.7-3.4) and 2.1 (1.7-2.6), respectively. For the ME-DT model, the AUC (95% CI) and LR+ (95% CI) values were 0.695 (0.677-0.714) and 2.4 (2.1-2.6), respectively, for development data set and 0.662 (0.621-0.702) and 2.4 (1.9-3.0), respectively, for the validation data set. The receiver operating characteristics curves for all models are shown in Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Performances of the predictive models in the development and validation data sets.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="120"/>
          <col width="110"/>
          <col width="110"/>
          <col width="0"/>
          <col width="110"/>
          <col width="110"/>
          <col width="0"/>
          <col width="110"/>
          <col width="110"/>
          <col width="0"/>
          <col width="110"/>
          <col width="110"/>
          <thead>
            <tr valign="top">
              <td>Metric</td>
              <td colspan="3">MELR<sup>a</sup></td>
              <td colspan="3">RNN<sup>b</sup></td>
              <td colspan="3">ME-SVM<sup>c</sup></td>
              <td colspan="2">ME-DT<sup>d</sup></td>
            </tr>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Development</td>
              <td>Validation</td>
              <td colspan="2">Development</td>
              <td>Validation</td>
              <td colspan="2">Development</td>
              <td>Validation</td>
              <td colspan="2">Development</td>
              <td>Validation</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Sensitivity, % (95% CI)</td>
              <td>91.2 (89.4-92.8)</td>
              <td>89.4 (85-92.8)</td>
              <td colspan="2">61.6 (58.3 64.9)</td>
              <td>54.9 (48-61.7)</td>
              <td colspan="2">52.8 (49.5-56)</td>
              <td>46.1 (39.1-53.2)</td>
              <td colspan="2">47 (44-50.1)</td>
              <td>44.5 (38.4-50.7)</td>
            </tr>
            <tr valign="top">
              <td>Specificity, % (95% CI)</td>
              <td>90.3 (88.9-91.6)</td>
              <td>92.5 (89.9-94.7)</td>
              <td colspan="2">72.9 (70.9-75)</td>
              <td>74.4 (70.1-78.3)</td>
              <td colspan="2">82.7 (80.9-84.4)</td>
              <td>78.2 (74.2-81.8)</td>
              <td colspan="2">80.2 (78.4-81.9)</td>
              <td>81.3 (77.6-84.6)</td>
            </tr>
            <tr valign="top">
              <td>Accuracy, % (95% CI)</td>
              <td>90.6 (89.5-91.6)</td>
              <td>91.4 (89.2-93.3)</td>
              <td colspan="2">69.3 (67.6-71.1)</td>
              <td>68.2 (64.5-71.7)</td>
              <td colspan="2">72.7 (71-74.4)</td>
              <td>68.6 (65-72.1)</td>
              <td colspan="2">68.8<break/>(67.1 – 70.4)</td>
              <td>68.7 (65.3-72)</td>
            </tr>
            <tr valign="top">
              <td>AUC<sup>e</sup> (95% CI)</td>
              <td>0.980 (0.977-0.984)</td>
              <td>0.983 (0.977-0.989)</td>
              <td colspan="2">0.747 (0.727-0.766)</td>
              <td>0.712 (0.669-0.754)</td>
              <td colspan="2">0.761 (0.754-0.766)</td>
              <td>0.698 (0.681-0.734)</td>
              <td colspan="2">0.695 (0.677-0.714)</td>
              <td>0.662 (0.621-0.702)</td>
            </tr>
            <tr valign="top">
              <td><italic>F</italic><sub>1</sub>-score</td>
              <td>0.869</td>
              <td>0.878</td>
              <td colspan="2">0.573</td>
              <td>0.543</td>
              <td colspan="2">0.564</td>
              <td>0.467</td>
              <td colspan="2">0.509</td>
              <td>0.493</td>
            </tr>
            <tr valign="top">
              <td>Brier score</td>
              <td>0.061</td>
              <td>0.058</td>
              <td colspan="2">0.181</td>
              <td>0.187</td>
              <td colspan="2">0.198</td>
              <td>0.200</td>
              <td colspan="2">0.236</td>
              <td>0.240</td>
            </tr>
            <tr valign="top">
              <td>LR+<sup>f</sup> (95% CI)</td>
              <td>9.4 (8.2-10.8)</td>
              <td>11.9 (8.8-16.3)</td>
              <td colspan="2">2.3 (2.1-2.5)</td>
              <td>2.1 (1.8-2.6)</td>
              <td colspan="2">3.1 (2.7-3.4)</td>
              <td>2.1 (1.7-2.6)</td>
              <td colspan="2">2.4 (2.1-2.6)</td>
              <td>2.4 (1.9-3.0)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table3fn1">
            <p><sup>a</sup>MELR: mixed-effects logistic regression.</p>
          </fn>
          <fn id="table3fn2">
            <p><sup>b</sup>RNN: recurrent neural networks.</p>
          </fn>
          <fn id="table3fn3">
            <p><sup>c</sup>ME-SVM: mixed-effects support vector machine.</p>
          </fn>
          <fn id="table3fn4">
            <p><sup>d</sup>ME-DT: mixed-effects decision tree.</p>
          </fn>
          <fn id="table3fn5">
            <p><sup>e</sup>AUC: area under the receiver operating characteristic curve.</p>
          </fn>
          <fn id="table3fn6">
            <p><sup>f</sup>LR+: positive likelihood ratio.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Results</title>
        <p>Our study developed risk prediction models for periodontitis using traditional statistical and machine learning approaches. The MELR model performed best with an AUC value of 0.983 in comparison to 2 machine learning approaches, RNN and ME-SVM, which had fair performances with AUC values of 0.712 and 0.698, respectively. In addition, the Brier scores for the RNN and ME-SVM were similarly high at 0.187 to 0.200, in contrast to a score of 0.058 for the MELR, which reflects an overfitting for both machine learning models compared to the MELR. Furthermore, a LR+ value as low as 2-3 for the machine learning approaches contrasted the high value of 11.9 for the MELR.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Our MELR model also performed better than previous predictive models that applied logistic regression (AUC=0.71) [<xref ref-type="bibr" rid="ref10">10</xref>] and was superior even to those that included salivary biomarkers, such as chitinase and protease activity (AUC=0.91) [<xref ref-type="bibr" rid="ref18">18</xref>]. Our analyses suggest that the mixed model approach performs better than logistic regression because the former considers latent participant-specific variability and thus better captures information about population average effects of the risk features than regression approaches that use cross-sectional data.</p>
        <p>The machine learning models (RNN, ME-SVM, and ME-DT) may have performed less well in comparison to the MELR model due to a data imbalance, as one-third of our study participants had severe periodontitis. Ling and Victor [<xref ref-type="bibr" rid="ref34">34</xref>] suggested that a classification imbalance may affect model performance if the cost of the 2 errors (ie, false positive and false negative in the binary classification) is not the same, or if the class distribution in the validation data is different from that in the development data. The prevalence of severe periodontitis in the development and validation sets was very similar (34.4% and 34.1%, respectively; Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>), and this was similar to the 8th National Oral Health Survey of Thailand (2017) [<xref ref-type="bibr" rid="ref25">25</xref>], which reported a prevalence of 26% in adults and 36% in older individuals. Thus, both data sets had similar distributions of participants with severe periodontitis that accurately reflected the overall prevalence in Thailand.</p>
        <p>To simulate the improved performance of the MELR model, a framework to include repeated measures and random effects was applied to the machine learning models, which is a recognized advantage of the ME-ML model, although the model still failed to meet the performance levels of MELR. This may have resulted from differences in the optimization and estimation of fixed and random effects within these models; for example, the penalized quasilikelihood method was used for MELR [<xref ref-type="bibr" rid="ref35">35</xref>] and the expectation-maximization was applied in the ME-ML [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. This framework could be beneficial in estimating nonlinear relationships between predictors and outcomes; however, further studies are necessary to independently validate this.</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>A cut off value of 0.35 was selected to reflect the observed prevalence of the condition as the uniform decision threshold and applied to all 4 screening models to enable cross-model comparisons, but this can be adjusted depending on the objective and outcome [<xref ref-type="bibr" rid="ref37">37</xref>]. In most clinical screening or diagnostic tools, it is unlikely that the consequences of a false positive and false negative are similar. By reducing the decision threshold, participants with a lower probability would be considered positive, increasing the sensitivity (and consequently the number of false positives) but reducing the specificity (number of false negative cases). In mass screening situations, participants identified as positive would be referred for further examination; therefore, this would lead to increased numbers of participants requiring comprehensive periodontal probing. However, it would also fulfil the purpose of screening by providing early diagnosis and prompt referral by reducing the number of positive participants incorrectly identified as negative. Despite the reduced specificity associated with a lower decision threshold, this approach would identify those at greatest risk while reducing the overall workload for examiners and facilitating a more efficient allocation of resources.</p>
        <p>While MELR models can be calculated manually as a linear combination of features, machine learning approaches must be exported in hierarchical file formats for use in websites or applications, which can be developed with a user-friendly interface. Data collection and mining from electronic care records can be combined and reformatted for more complex procedures, such as feature engineering and data preprocessing. Risk prediction modelling would therefore be amenable to data updates, facilitating model refinement, with potential portability through web- or desktop-based applications that could be provided to health care staff.</p>
        <p>This study had several limitations. Only internal validation was carried out for model evaluation, and external validation using data from other centers or surveys is required. Furthermore, data from other Thai populations, as well as from different countries or ethnicities, would help determine the generalizability of the findings. Machine learning approaches in particular would benefit from further model refinement with larger, better characterized data sets since their performance depends on the data quality of the development sets. Although the clinical features included in the MELR model were relatively easy to examine, the assessment of a plaque score is manually intensive. Although periodontal probing is not required, the examination includes oral rinsing with plaque disclosing solutions and manual counting of the stained surfaces. Self-reporting would not be optimal since improper application and poor assessment would lead to an unreliable scoring and subsequent risk underestimation by the model. The oral features included in developing the models were limited to the factors collected by the EGAT survey. Previous studies have suggested that the inclusion of more relevant oral characteristics, such as tooth mobility and gum bleeding, would increase the performance even further, although an AUC of 0.98 is considered excellent.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, the MELR approach performed excellently, and to our knowledge represents one of the best screening models for severe periodontitis. Machine learning approaches demonstrated fair performance despite their ability to estimate nonlinear relationships. Instead of relying on PPD measurements obtained through an extensive periodontal assessment, which can be time-consuming and resource-intensive, the MELR model might be useful in health information systems to monitor oral health, prompting patients to visit a dental professional for comprehensive examination and appropriate treatment. With further independent model external validation, such a tool should be evaluated in a primary care setting to assist dental professionals in the screening of severe periodontitis to improve and direct resource allocation to where it is needed most.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Supplementary tables and figures.</p>
        <media xlink:href="formative_v7i1e48351_app1.docx" xlink:title="DOCX File , 563 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the receiver operating curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CDC-AAP</term>
          <def>
            <p>Centre for Disease Control–American Academy of Periodontology</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">EGAT</term>
          <def>
            <p>Electricity Generating Authority of Thailand</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LR+</term>
          <def>
            <p>positive likelihood ratio</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MELR</term>
          <def>
            <p>mixed-effects logistic regression</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ME-DT</term>
          <def>
            <p>mixed-effects decision tree</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">ME-SVM</term>
          <def>
            <p>mixed-effects support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">MICE</term>
          <def>
            <p>multiple imputation with chain equations</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">PPD</term>
          <def>
            <p>probing pocket depth</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RE</term>
          <def>
            <p>gingival recession</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RNN</term>
          <def>
            <p>recurrent neural networks</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was part of master dissertation for the lead author (HT) in the Department of Clinical Epidemiology and Biostatistics, Faculty of Medicine Ramathibodi Hospital, Mahidol University, Thailand. The authors would like to thank Sukanya Siriyotha for her input regarding the data. HT, AP, and AT received a grant from the National Research Council of Thailand (grant N42A640323). The funder was not involved in the study and did not impose any restrictions regarding the publication of the report.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed in this study are available from the corresponding author on reasonable request. Codes generated for the whole analyses are available [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>HT, AP, AL, and AT conceptualized the study design and curated, analyzed, and interpreted the data. HT drafted the manuscript. AP, AL, PV, GJM, JA, and AT reviewed and edited the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Phipps</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Stevens</surname>
              <given-names>VJ</given-names>
            </name>
          </person-group>
          <article-title>Relative contribution of caries and periodontal disease in adult tooth loss for an HMO dental population</article-title>
          <source>J Public Health Dent</source>
          <year>1995</year>
          <volume>55</volume>
          <issue>4</issue>
          <fpage>250</fpage>
          <lpage>252</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1752-7325.1995.tb02377.x</pub-id>
          <pub-id pub-id-type="medline">8551465</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Corbet</surname>
              <given-names>EF</given-names>
            </name>
            <name name-style="western">
              <surname>Leung</surname>
              <given-names>WK</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of periodontitis in the Asia and Oceania regions</article-title>
          <source>Periodontol 2000</source>
          <year>2011</year>
          <month>06</month>
          <volume>56</volume>
          <issue>1</issue>
          <fpage>25</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1600-0757.2010.00362.x</pub-id>
          <pub-id pub-id-type="medline">21501236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tonetti</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Jepsen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Otomo-Corgel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Impact of the global burden of periodontal diseases on health, nutrition and wellbeing of mankind: a call for global action</article-title>
          <source>J Clin Periodontol</source>
          <year>2017</year>
          <month>05</month>
          <volume>44</volume>
          <issue>5</issue>
          <fpage>456</fpage>
          <lpage>462</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12732</pub-id>
          <pub-id pub-id-type="medline">28419559</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Linden</surname>
              <given-names>GJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lyons</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Scannapieco</surname>
              <given-names>FA</given-names>
            </name>
          </person-group>
          <article-title>Periodontal systemic associations: review of the evidence</article-title>
          <source>J Clin Periodontol</source>
          <year>2013</year>
          <month>04</month>
          <volume>40 Suppl 14</volume>
          <fpage>S8</fpage>
          <lpage>S19</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12064</pub-id>
          <pub-id pub-id-type="medline">23627336</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mattila</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Nieminen</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Valtonen</surname>
              <given-names>VV</given-names>
            </name>
            <name name-style="western">
              <surname>Rasi</surname>
              <given-names>VP</given-names>
            </name>
            <name name-style="western">
              <surname>Kesäniemi</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Syrjälä</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Jungell</surname>
              <given-names>PS</given-names>
            </name>
            <name name-style="western">
              <surname>Isoluoma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hietaniemi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Jokinen</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>Association between dental health and acute myocardial infarction</article-title>
          <source>BMJ</source>
          <year>1989</year>
          <month>03</month>
          <day>25</day>
          <volume>298</volume>
          <issue>6676</issue>
          <fpage>779</fpage>
          <lpage>781</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/2496855"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.298.6676.779</pub-id>
          <pub-id pub-id-type="medline">2496855</pub-id>
          <pub-id pub-id-type="pmcid">PMC1836063</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lertpimonchai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rattanasiri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tamsailom</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Champaiboon</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ingsathit</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kitiyakara</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Limpianunchai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Attia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sritara</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Thakkinstian</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Periodontitis as the risk factor of chronic kidney disease: mediation analysis</article-title>
          <source>J Clin Periodontol</source>
          <year>2019</year>
          <month>06</month>
          <volume>46</volume>
          <issue>6</issue>
          <fpage>631</fpage>
          <lpage>639</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30993705"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/jcpe.13114</pub-id>
          <pub-id pub-id-type="medline">30993705</pub-id>
          <pub-id pub-id-type="pmcid">PMC6593715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Monsarrat</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Blaizot</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kémoun</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ravaud</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Nabet</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sixou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vergnes</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Clinical research activity in periodontal medicine: a systematic mapping of trial registers</article-title>
          <source>J Clin Periodontol</source>
          <year>2016</year>
          <month>05</month>
          <volume>43</volume>
          <issue>5</issue>
          <fpage>390</fpage>
          <lpage>400</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12534</pub-id>
          <pub-id pub-id-type="medline">26881700</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cyrino</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Miranda Cota</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira Lages</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bastos Lages</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Costa</surname>
              <given-names>FO</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of self-reported measures for prediction of periodontitis in a sample of Brazilians</article-title>
          <source>J Periodontol</source>
          <year>2011</year>
          <month>12</month>
          <volume>82</volume>
          <issue>12</issue>
          <fpage>1693</fpage>
          <lpage>1704</lpage>
          <pub-id pub-id-type="doi">10.1902/jop.2011.110015</pub-id>
          <pub-id pub-id-type="medline">21563951</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eke</surname>
              <given-names>PI</given-names>
            </name>
            <name name-style="western">
              <surname>Dye</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Slade</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Thornton-Evans</surname>
              <given-names>GO</given-names>
            </name>
            <name name-style="western">
              <surname>Beck</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>GW</given-names>
            </name>
            <name name-style="western">
              <surname>Borgnakke</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Genco</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Self-reported measures for surveillance of periodontitis</article-title>
          <source>J Dent Res</source>
          <year>2013</year>
          <month>11</month>
          <volume>92</volume>
          <issue>11</issue>
          <fpage>1041</fpage>
          <lpage>1047</lpage>
          <pub-id pub-id-type="doi">10.1177/0022034513505621</pub-id>
          <pub-id pub-id-type="medline">24065636</pub-id>
          <pub-id pub-id-type="pii">0022034513505621</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yen</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Chiu</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Fann</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>WY</given-names>
            </name>
            <name name-style="western">
              <surname>Chuang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A prediction model for periodontal disease: modelling and validation from a national survey of 4061 Taiwanese adults</article-title>
          <source>J Clin Periodontol</source>
          <year>2015</year>
          <month>05</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>413</fpage>
          <lpage>421</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12389</pub-id>
          <pub-id pub-id-type="medline">25817519</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Verhulst</surname>
              <given-names>MJL</given-names>
            </name>
            <name name-style="western">
              <surname>Teeuw</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bizzarro</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Muris</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Nicu</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Nazmi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bikker</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Loos</surname>
              <given-names>BG</given-names>
            </name>
          </person-group>
          <article-title>A rapid, non-invasive tool for periodontitis screening in a medical care setting</article-title>
          <source>BMC Oral Health</source>
          <year>2019</year>
          <month>05</month>
          <day>23</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>87</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcoralhealth.biomedcentral.com/articles/10.1186/s12903-019-0784-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12903-019-0784-7</pub-id>
          <pub-id pub-id-type="medline">31122214</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12903-019-0784-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC6533660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Weng</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Self-reported questionnaire for surveillance of periodontitis in Chinese patients from a prosthodontic clinic: a validation study</article-title>
          <source>J Clin Periodontol</source>
          <year>2013</year>
          <month>06</month>
          <volume>40</volume>
          <issue>6</issue>
          <fpage>616</fpage>
          <lpage>623</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12103</pub-id>
          <pub-id pub-id-type="medline">23557490</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Holtfreter</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Meisel</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Micheelis</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Dietrich</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Kocher</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Prediction of periodontal disease: modelling and validation in different general German populations</article-title>
          <source>J Clin Periodontol</source>
          <year>2014</year>
          <month>03</month>
          <volume>41</volume>
          <issue>3</issue>
          <fpage>224</fpage>
          <lpage>231</lpage>
          <pub-id pub-id-type="doi">10.1111/jcpe.12208</pub-id>
          <pub-id pub-id-type="medline">24313816</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsiao</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Panel data analysis—advantages and challenges</article-title>
          <source>TEST</source>
          <year>2007</year>
          <month>03</month>
          <day>16</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>22</lpage>
          <pub-id pub-id-type="doi">10.1007/s11749-007-0046-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bzdok</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Krzywinski</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Statistics versus machine learning</article-title>
          <source>Nat Methods</source>
          <year>2018</year>
          <month>04</month>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>233</fpage>
          <lpage>234</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/30100822"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nmeth.4642</pub-id>
          <pub-id pub-id-type="medline">30100822</pub-id>
          <pub-id pub-id-type="pmcid">PMC6082636</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Makridakis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Spiliotis</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Assimakopoulos</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Statistical and machine learning forecasting methods: concerns and ways forward</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>e0194889</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0194889"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0194889</pub-id>
          <pub-id pub-id-type="medline">29584784</pub-id>
          <pub-id pub-id-type="pii">PONE-D-17-43154</pub-id>
          <pub-id pub-id-type="pmcid">PMC5870978</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sidey-Gibbons</surname>
              <given-names>JAM</given-names>
            </name>
            <name name-style="western">
              <surname>Sidey-Gibbons</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>Machine learning in medicine: a practical introduction</article-title>
          <source>BMC Med Res Methodol</source>
          <year>2019</year>
          <month>03</month>
          <day>19</day>
          <volume>19</volume>
          <issue>1</issue>
          <fpage>64</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedresmethodol.biomedcentral.com/articles/10.1186/s12874-019-0681-4"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12874-019-0681-4</pub-id>
          <pub-id pub-id-type="medline">30890124</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12874-019-0681-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC6425557</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farhadian</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shokouhi</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Torkzaban</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A decision support system based on support vector machine for diagnosis of periodontal disease</article-title>
          <source>BMC Res Notes</source>
          <year>2020</year>
          <month>07</month>
          <day>13</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>337</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcresnotes.biomedcentral.com/articles/10.1186/s13104-020-05180-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s13104-020-05180-5</pub-id>
          <pub-id pub-id-type="medline">32660549</pub-id>
          <pub-id pub-id-type="pii">10.1186/s13104-020-05180-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7359226</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ozden</surname>
              <given-names>FO</given-names>
            </name>
            <name name-style="western">
              <surname>Özgönenel</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Özden</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aydogdu</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis of periodontal diseases using different classification algorithms: a preliminary study</article-title>
          <source>Niger J Clin Pract</source>
          <year>2015</year>
          <volume>18</volume>
          <issue>3</issue>
          <fpage>416</fpage>
          <lpage>421</lpage>
          <pub-id pub-id-type="doi">10.4103/1119-3077.151785</pub-id>
          <pub-id pub-id-type="medline">25772929</pub-id>
          <pub-id pub-id-type="pii">NigerJClinPract_2015_18_3_416_151785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shankarapillai</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mathur</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Mathur</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Periodontitis risk assessment using two artificial neural networks-a pilot study</article-title>
          <source>Int J Clin Dent</source>
          <year>2010</year>
          <volume>2</volume>
          <fpage>36</fpage>
          <lpage>40</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vathesatogkit</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Woodward</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tanomsup</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ratanachaiwong</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Vanavanan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yamwong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sritara</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Cohort profile: the electricity generating authority of Thailand study</article-title>
          <source>Int J Epidemiol</source>
          <year>2012</year>
          <month>04</month>
          <volume>41</volume>
          <issue>2</issue>
          <fpage>359</fpage>
          <lpage>365</lpage>
          <pub-id pub-id-type="doi">10.1093/ije/dyq218</pub-id>
          <pub-id pub-id-type="medline">21216741</pub-id>
          <pub-id pub-id-type="pii">dyq218</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>O'Leary</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Naylor</surname>
              <given-names>JE</given-names>
            </name>
          </person-group>
          <article-title>The plaque control record</article-title>
          <source>J Periodontol</source>
          <year>1972</year>
          <month>01</month>
          <volume>43</volume>
          <issue>1</issue>
          <fpage>38</fpage>
          <pub-id pub-id-type="doi">10.1902/jop.1972.43.1.38</pub-id>
          <pub-id pub-id-type="medline">4500182</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eke</surname>
              <given-names>PI</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Thornton-Evans</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Genco</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Update of the case definitions for population-based surveillance of periodontitis</article-title>
          <source>J Periodontol</source>
          <year>2012</year>
          <month>12</month>
          <volume>83</volume>
          <issue>12</issue>
          <fpage>1449</fpage>
          <lpage>1454</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22420873"/>
          </comment>
          <pub-id pub-id-type="doi">10.1902/jop.2012.110664</pub-id>
          <pub-id pub-id-type="medline">22420873</pub-id>
          <pub-id pub-id-type="pmcid">PMC6005373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>RD</given-names>
            </name>
            <name name-style="western">
              <surname>Ensor</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Snell</surname>
              <given-names>KIE</given-names>
            </name>
            <name name-style="western">
              <surname>Harrell</surname>
              <given-names>FE</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>GP</given-names>
            </name>
            <name name-style="western">
              <surname>Reitsma</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Moons</surname>
              <given-names>KGM</given-names>
            </name>
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>van Smeden</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Calculating the sample size required for developing a clinical prediction model</article-title>
          <source>BMJ</source>
          <year>2020</year>
          <month>03</month>
          <day>18</day>
          <volume>368</volume>
          <fpage>m441</fpage>
          <pub-id pub-id-type="doi">10.1136/bmj.m441</pub-id>
          <pub-id pub-id-type="medline">32188600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="web">
          <source>The 8th National Oral Health Survey 2017 of Thailand</source>
          <access-date>2022-10-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dental.anamai.moph.go.th/web-upload/migrated/files/dental2/n2423_3e9aed89eb9e4e3978640d0a60b44be6_survey8th_2nd.pdf">https://dental.anamai.moph.go.th/web-upload/migrated/files/dental2/n2423_3e9aed89eb9e4e3978640d0a60b44be6_survey8th_2nd.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>GY</given-names>
            </name>
            <name name-style="western">
              <surname>Mason</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>The hierarchical logistic regression model for multilevel analysis</article-title>
          <source>J Am Stat Assoc</source>
          <year>1985</year>
          <month>09</month>
          <volume>80</volume>
          <issue>391</issue>
          <fpage>513</fpage>
          <lpage>524</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1985.10478148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hosmer</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Lemeshow</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sturdivant</surname>
              <given-names>RX</given-names>
            </name>
          </person-group>
          <article-title>Model-building strategies and methods for logistic regression</article-title>
          <source>Applied Logistic Regression</source>
          <year>2013</year>
          <publisher-loc>Hoboken</publisher-loc>
          <publisher-name>John Wiley &#38; Sons, Inc</publisher-name>
          <fpage>89</fpage>
          <lpage>151</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Askar</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Radi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>MacDermott</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Al-Jumeily</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hussain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mallucci</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Chapter 7 - recurrent neural networks in medical data analysis and classifications</article-title>
          <source>Applied Computing in Medicine and Health</source>
          <year>2016</year>
          <publisher-loc>Burlington</publisher-loc>
          <publisher-name>Morgan Kaufmann Publishers</publisher-name>
          <fpage>147</fpage>
          <lpage>165</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hajjem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bellavance</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Larocque</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Mixed-effects random forest for clustered data</article-title>
          <source>J Stat Comput Simul</source>
          <year>2012</year>
          <month>11</month>
          <day>12</day>
          <volume>84</volume>
          <issue>6</issue>
          <fpage>1313</fpage>
          <lpage>1328</lpage>
          <pub-id pub-id-type="doi">10.1080/00949655.2012.741599</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="web">
          <source>Keras</source>
          <access-date>2023-11-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://keras.io">https://keras.io</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <source>TensorFlow</source>
          <access-date>2023-11-28</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.tensorflow.org/">https://www.tensorflow.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schölkopf</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bartlett</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Smola</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Williamson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Shrinking the tube: a new support vector regression algorithm</article-title>
          <source>Proceedings of the 11th International Conference on Neural Information Processing Systems</source>
          <year>1998</year>
          <conf-name>NIPS '98</conf-name>
          <conf-date>December 1-3, 1998</conf-date>
          <conf-loc>Denver</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bookstein</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Informetric distributions, part I: unified overview</article-title>
          <source>J Am Soc Inf Sci</source>
          <year>1990</year>
          <month>07</month>
          <volume>41</volume>
          <issue>5</issue>
          <fpage>368</fpage>
          <lpage>375</lpage>
          <pub-id pub-id-type="doi">10.1002/(sici)1097-4571(199007)41:5&#60;368::aid-asi8&#62;3.0.co;2-c</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ling</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Victor</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Sammut</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Cost-sensitive learning and the class imbalance problem</article-title>
          <source>Encyclopedia of Machine Learning</source>
          <year>2008</year>
          <publisher-loc>Boston</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Breslow</surname>
              <given-names>NE</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>DG</given-names>
            </name>
          </person-group>
          <article-title>Approximate inference in generalized linear mixed models</article-title>
          <source>J Am Stat Assoc</source>
          <year>2012</year>
          <month>12</month>
          <day>20</day>
          <volume>88</volume>
          <issue>421</issue>
          <fpage>9</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1080/01621459.1993.10594284</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hajjem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Larocque</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bellavance</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Generalized mixed effects regression trees</article-title>
          <source>Stat Probab Lett</source>
          <year>2017</year>
          <month>07</month>
          <volume>126</volume>
          <fpage>114</fpage>
          <lpage>118</lpage>
          <pub-id pub-id-type="doi">10.1016/j.spl.2017.02.033</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Tsai</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Moon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Young</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Decision threshold adjustment in class prediction</article-title>
          <source>SAR QSAR Environ Res</source>
          <year>2006</year>
          <month>06</month>
          <volume>17</volume>
          <issue>3</issue>
          <fpage>337</fpage>
          <lpage>352</lpage>
          <pub-id pub-id-type="doi">10.1080/10659360600787700</pub-id>
          <pub-id pub-id-type="medline">16815772</pub-id>
          <pub-id pub-id-type="pii">MJ41V795RPX20878</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="web">
          <article-title>Development of risk prediction models for severe periodontitis in a Thai population: statistical and machine-learning approaches</article-title>
          <source>Department of Clinical Epidemiology and Biostatistics, Faculty of Medicine Ramathibodi Hospital, Mahidol University</source>
          <access-date>2023-10-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rama.mahidol.ac.th/ceb/codes/code_pj1">https://www.rama.mahidol.ac.th/ceb/codes/code_pj1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
