<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e69838</article-id>
      <article-id pub-id-type="pmid">40409750</article-id>
      <article-id pub-id-type="doi">10.2196/69838</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Data-Driven Approach to Assessing Hepatitis B Mother-to-Child Transmission Risk Prediction Model: Machine Learning Perspective</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Atah</surname>
            <given-names>Solange Manju</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ko</surname>
            <given-names>Ko</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Nguyen Tien</surname>
            <given-names>Dung</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-6394-1025</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Thi Thu Bui</surname>
            <given-names>Huong</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Microbiology</institution>
            <institution>Thai Nguyen University of Medicine and Pharmacy</institution>
            <addr-line>284 Lương Ngọc Quyến</addr-line>
            <addr-line>Thái Nguyên, 250000</addr-line>
            <country>Vietnam</country>
            <phone>84 912916863</phone>
            <email>huongbuithithu@tnmc.edu.vn</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4101-5618</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Hoang Thi Ngoc</surname>
            <given-names>Tram</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-5617-9738</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Thi Pham</surname>
            <given-names>Thuy</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4140-7603</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Trung Nguyen</surname>
            <given-names>Dac</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-4048-1951</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Nguyen Thi Thu</surname>
            <given-names>Huyen</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0004-0571-2370</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Thu Hang Vu</surname>
            <given-names>Thi</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-6414-5500</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Lan Anh Luong</surname>
            <given-names>Thi</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0669-9919</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Thu Hoang</surname>
            <given-names>Lan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-5602-1955</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Cam Tu</surname>
            <given-names>Ho</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8239-096X</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>Körber</surname>
            <given-names>Nina</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0991-6547</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Bauer</surname>
            <given-names>Tanja</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3597-4118</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Khanh Ho</surname>
            <given-names>Lam</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6355-1553</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Microbiology</institution>
        <institution>Thai Nguyen University of Medicine and Pharmacy</institution>
        <addr-line>Thái Nguyên</addr-line>
        <country>Vietnam</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Immunology - Molecular Genetics</institution>
        <institution>Thai Nguyen National General Hospital</institution>
        <addr-line>Thái Nguyên</addr-line>
        <country>Vietnam</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of MBG</institution>
        <institution>Hanoi Medical University</institution>
        <addr-line>Hanoi</addr-line>
        <country>Vietnam</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Center of Clinical Genetics and Genomics</institution>
        <institution>Hanoi Medical University Hospital</institution>
        <addr-line>Hanoi</addr-line>
        <country>Vietnam</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Technical University of Munich</institution>
        <addr-line>Munich</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Institute of Virology (VIRO)</institution>
        <institution>Molecular Targets and Therapeutics Center</institution>
        <institution>Helmholtz Zentrum München</institution>
        <addr-line>Munich</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Faculty of Information Technology</institution>
        <institution>Hung Yen University of Technology and Education</institution>
        <addr-line>Hưng Yên</addr-line>
        <country>Vietnam</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Huong Thi Thu Bui <email>huongbuithithu@tnmc.edu.vn</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>23</day>
        <month>5</month>
        <year>2025</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e69838</elocation-id>
      <history>
        <date date-type="received">
          <day>10</day>
          <month>12</month>
          <year>2024</year>
        </date>
        <date date-type="rev-request">
          <day>2</day>
          <month>3</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>3</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>30</day>
          <month>4</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Dung Nguyen Tien, Huong Thi Thu Bui, Tram Hoang Thi Ngoc, Thuy Thi Pham, Dac Trung Nguyen, Huyen Nguyen Thi Thu, Thi Thu Hang Vu, Thi Lan Anh Luong, Lan Thu Hoang, Ho Cam Tu, Nina Körber, Tanja Bauer, Lam Khanh Ho. Originally published in JMIR Formative Research (https://formative.jmir.org), 23.05.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2025/1/e69838" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Hepatitis B virus (HBV) can be transmitted from mother to child either through transplacental infection or via blood-to-blood contact during or immediately after delivery. Early and accurate risk assessments are essential for guiding clinical decisions and implementing effective preventive measures. Data mining techniques are powerful tools for identifying key predictors in medical diagnostics.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to develop a robust predictive model for mother-to-child transmission (MTCT) of HBV using decision tree algorithms, specifically Iterative Dichotomiser 3 (ID3) and classification and regression trees (CART). The study identifies clinically and paraclinically relevant predictors, particularly hepatitis B e antigen (HBeAg) status and peripheral blood mononuclear cell (PBMC) concentration, for effective risk stratification and prevention. Additionally, we will assess the model’s reliability and generalizability through cross-validation with various training-test split ratios, aiming to enhance its applicability in clinical settings and inform improved preventive strategies against HBV MTCT.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>This study used decision tree algorithms—ID3 and CART—on a data set of 60 hepatitis B surface antigen (HBsAg)–positive pregnant women. Samples were collected either before or at the time of delivery, enabling the inclusion of patients who were undiagnosed or had limited access to treatment. We analyzed both clinical and paraclinical parameters, with a particular focus on HBeAg status and PBMC concentration. Additional biochemical markers were evaluated for their potential contributory or inhibitory effects on MTCT risk. The predictive models were validated using multiple training-test split ratios to ensure robustness and generalizability.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our analysis showed that 20 out of 48 (based on a split ratio of 0.8 from a total of 60 cases, 42%) to 27 out of 57 (based on a split ratio of 0.95 from a total of 60 cases, 47%) training cases with HBeAg-positive status were associated with a significant risk of MTCT of HBV (χ<sup>2</sup><sub>8</sub>=21.16, <italic>P</italic>=.007, <italic>df</italic>=8). Among HBeAg-negative women, those with PBMC concentrations ≥8 × 10<sup>6</sup> cells/mL exhibited a low risk of MTCT, whereas individuals with PBMC concentrations &#60;8 × 10<sup>6</sup> cells/mL demonstrated a negligible risk. Across all training-test split ratios, the decision tree models consistently identified HBeAg status and PBMC concentration as the most influential predictors, underscoring their robustness and critical role in MTCT risk stratification.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study demonstrates that decision tree models are effective tools for stratifying the risk of MTCT of HBV by integrating key clinical and paraclinical markers. Among these, HBeAg status and PBMC concentration emerged as the most critical predictors. While the analysis focused on untreated patients, it provides a strong foundation for future investigations involving treated populations. These findings offer actionable insights to support the development of more targeted and effective HBV MTCT prevention strategies.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>chronic hepatitis B virus infection</kwd>
        <kwd>liver</kwd>
        <kwd>pregnant women</kwd>
        <kwd>cord blood</kwd>
        <kwd>PBMCs (peripheral blood mononuclear cells)</kwd>
        <kwd>ID3 (Iterative Dichotomiser 3)</kwd>
        <kwd>CART (classification and regression trees)</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Hepatitis B virus (HBV) mother-to-child transmission (MTCT) can occur through transplacental infection or blood-to-blood contact during or after delivery and accounts for a significant proportion of chronic HBV infections worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. In high-prevalence countries such as Vietnam, MTCT remains the most common mode of transmission. Children who acquire chronic infection have a 40% lifetime risk of dying from HBV-related complications. Antiviral treatment to reduce high viral loads, along with immunoprophylaxis using anti-HBV immunoglobulin shortly after delivery and active HBV immunization of neonates, can significantly reduce the incidence of MTCT. However, these measures are rarely implemented in most countries [<xref ref-type="bibr" rid="ref2">2</xref>]. Studies examining high-risk factors associated with MTCT in patients with HBV lack depth, and the relationship between HBV DNA in maternal serum and cord blood remains unclear. We conducted a clustering study to explore the potential link between HBV infection in pregnant women and cord blood, aiming to identify a clinical reference marker for prenatal surveillance and postnatal management, and to strengthen the prevention of HBV MTCT.</p>
      <p>Data mining techniques play a crucial role in clinical decision-making by providing physicians with accurate, reliable, and timely predictions through various models. Machine learning is broadly categorized into 3 types: supervised learning, unsupervised learning, and reinforcement learning. The decision tree is a supervised learning algorithm capable of handling both regression and classification tasks. A typical machine learning algorithm involves 2 main steps: training (where the algorithm learns a model from data) and prediction (where the learned model is used to predict new values). The training step in the decision tree algorithm constructs a decision tree. A decision tree is an effective support tool for engineers’ decision-making [<xref ref-type="bibr" rid="ref3">3</xref>], using a tree model that illustrates decisions and their possible outcomes, including random outcomes, resource costs, and benefits. Based on published premise cluster analysis research, we set up a machine learning experiment. This research may help identify potential risk factors for MTCT.</p>
      <p>This study was conducted at a single center in Thai Nguyen, Vietnam. It is valuable to explore how variations in HBV circulation rates, genotypes, and health care practices across different regions of the country might influence both the detection and relevance of the study. Thai Nguyen, where the research took place, serves as the economic, political, and social hub of the northeastern region, the Central Highlands, and the northern mountainous areas of Vietnam. As a result, although the patients come from various hometowns, they share characteristics typical of the northern mountainous region, where many ethnic minorities reside. Viral hepatitis—particularly HBV, hepatitis D virus, and hepatitis E virus—remains a significant public health concern in Vietnam, especially among ethnic minority communities. Higher infection rates in these groups are often linked to limited access to health care, low socioeconomic status, high-risk living conditions, and a lack of awareness about the disease.</p>
      <p>In this study, we enrolled 60 pregnant women who tested positive for hepatitis B surface antigen (HBsAg) from a clinical setting, applying strict inclusion criteria to ensure that only those with chronic HBV infection—defined as being HBsAg-positive for more than 6 months—and who had not received any HBV treatment were included.</p>
      <p>In our study, we focused exclusively on HBsAg-positive pregnant women who were not receiving antiviral treatment. This approach was intentionally chosen to better understand the natural history and intrinsic risk factors for MTCT in untreated patients. According to World Health Organization (WHO) guidelines and current Vietnamese recommendations, pregnant women identified as being at high risk of MTCT are typically advised to begin treatment early. However, because sampling in our study took place either before delivery or immediately at the time of delivery, some participants had not yet been diagnosed or had not had the opportunity to access treatment.</p>
      <p>After delivery, we strongly recommended that all patients initiate antiviral therapy promptly, emphasizing that postnatal treatment does not negatively impact milk production or the quality of breast milk for the infant. Although excluding women who were already receiving treatment may limit the generalizability of our findings, our study offers valuable baseline data on MTCT risk in untreated patients. This baseline can serve as a crucial reference for future studies evaluating the effectiveness of early treatment interventions.</p>
      <p>After obtaining informed consent, we collected maternal and cord blood samples to measure various biomarkers, including viral markers (HBsAg, hepatitis B e antigen [HBeAg], HBV DNA) and biochemical parameters (eg, alanine aminotransferase [ALT], aspartate aminotransferase [AST], and peripheral blood mononuclear cells [PBMCs]). The raw data were preprocessed using R (R Foundation), which involved cleaning and organizing the data for further analysis. We conducted descriptive and univariate analyses to summarize the characteristics of the study population, followed by correlation and clustering analyses to explore relationships among the measured variables. Based on logistic regression models and initial analyses, we identified specific cutoff values (eg, PBMCs/mL and HBV DNA copies/mL). Feature selection was performed using information gain techniques based on Iterative Dichotomiser 3 (ID3) theory, identifying key predictors such as HBeAg status and PBMC levels. The selected features were then used to construct decision tree models with both the ID3 and classification and regression trees (CART) algorithms to classify participants into distinct MTCT risk categories.</p>
      <p>We validated the models through 1000 simulation runs using various training-test split ratios, calculating performance metrics—including accuracy, sensitivity, specificity, and AUC—to assess model performance. The validated models enabled risk stratification based on Cohen effect size classifications (trivial, small, medium, and large), and the results were interpreted to provide actionable clinical insights, highlighting the critical roles of HBeAg and PBMCs in predicting the risk of HBV MTCT (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
      <p>The goal of this study is to develop and validate a machine learning–based decision tree model to effectively predict the risk of HBV transmission from mother to child. This will be accomplished by incorporating key clinical and paraclinical markers—particularly HBeAg status and PBMC concentration—to inform targeted prevention strategies.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Study flowchart. CART: classification and regression trees; HBsAg: hepatitis B surface antigen; ID3: Iterative Dichotomiser 3.</p>
        </caption>
        <graphic xlink:href="formative_v9i1e69838_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Population</title>
        <p>Between 2020 and 2021, we conducted a pilot study at Thai Nguyen National Hospital in Vietnam, focusing on pregnant women who received regular check-ups and delivered at the facility. A total of 60 pregnant women who tested positive for HBsAg were enrolled.</p>
        <p>Participants were provided with detailed information about the risks of hepatitis B transmission and were counseled on available treatment options. They could choose to begin treatment immediately or defer it until after delivery. Following birth, both mothers and their newborns were monitored for 3-6 months. Newborns received routine prophylactic care, and mothers with HBV DNA levels exceeding 200,000 IU/mL were offered treatment in accordance with WHO guidelines (2021) [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>Only pregnant women with chronic hepatitis B (CHB) infection (HBsAg positive for more than 6 months) who opted to delay antiviral treatment were included in the study. We collected various biological samples and conducted clinical surveillance to establish a comprehensive hepatitis B patient cohort, aimed at exploring the multifactorial risk factors associated with HBV MTCT.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was approved by the Institutional Review Board for Ethics in Biomedical Research of Hanoi Medical University (approval number NCS22/HMU-IRD), ensuring the protection of participants’ rights. Informed consent for future data use was obtained during the original data collection, and this secondary analysis was conducted in accordance with that provision. All data used in this study were deidentified and securely stored. No personally identifiable information was collected or retained, and no identifiable images of individuals are included in the manuscript or multimedia appendices.</p>
      </sec>
      <sec>
        <title>Diagnostic Criteria</title>
        <p>We collected clinical data from pregnant women who tested positive for HBsAg and were not receiving HBV treatment. The information gathered included general demographic and obstetric details such as age at delivery, gestational age, and pregnancy history (including the number of pregnancies and births, cesarean sections, and any preexisting conditions). Laboratory test results—such as ALT, AST, HBV DNA levels, and records of any antiviral therapy—were also included. Additionally, we assessed various maternal conditions, including preeclampsia, chronic hypertension, history of abortion, placental abruption, hyperthyroidism, gestational diabetes mellitus, pregnancy-induced hypertension, intrahepatic cholestasis of pregnancy, and other related complications. A retrospective analysis was conducted to examine the association between HBV infection status and preclinical factors.</p>
      </sec>
      <sec>
        <title>Isolation of Peripheral Blood Mononuclear Cells</title>
        <p>We collected maternal blood in EDTA (ethylenediaminetetraacetic acid) tubes and also collected umbilical cord blood. The cord blood was obtained from the umbilical vein of the umbilical cord and placed in a 20-mL cylinder with anticoagulant immediately after birth. The process of collecting cord blood takes only 2-3 minutes and involves the following steps: (1) Immediately after the mother gives birth, the medical staff clamps a section of the umbilical cord that is at least 10 cm long. (2) This section can be cut immediately to obtain a blood sample or left intact until after the placenta is delivered. (3) The surface of the umbilical cord is disinfected with povidone-iodine solution. (4) The needle of the collection cylinder is then inserted into the umbilical vein to draw the blood. (5) The collection cylinder is clamped, and the needle is withdrawn. (6) Finally, the collection cylinder is gently shaken to mix the blood with the anticoagulant.</p>
        <p>Serum and plasma samples were analyzed for viral markers (HBsAg, HBeAg, and HBV DNA copies/mL) and other preclinical factors, including platelet count (×10<sup>3</sup> cells/mL), prothrombin time (seconds), prothrombin ratio (%), hemoglobin (g/L), red blood cell count (×10<sup>6</sup> cells/mL), creatinine (µmol/L), AST (U/L), and ALT (U/L) [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
      </sec>
      <sec>
        <title>Statistical and Decision Tree Analysis</title>
        <p>Data collection, storage, and analysis in this study were conducted using the R 4.1.0 package tools. The correlation <italic>R</italic> value measures the strength of the linear relationship between 2 quantitative variables. The Pearson <italic>R</italic> formula is as follows:</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>R</italic> is the Pearson correlation coefficient; and <italic>x</italic> and <italic>y</italic> are 2 vectors of length <italic>i</italic> and <italic>j</italic>, respectively [<xref ref-type="bibr" rid="ref6">6</xref>]. The value of <italic>R</italic> ranges from –1 to 1, with <italic>R</italic>&#62;0 indicating a positive association and <italic>R</italic>&#60;0 indicating a negative association [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
        <p>We conducted the clustering analysis again using the 5 most significant factors identified in our earlier study [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. To determine the effect size for each factor, we transformed the natural logarithm of the odds ratio, that is, ln(odds ratio), by dividing it by 1.81, based on their respective odds ratios [<xref ref-type="bibr" rid="ref10">10</xref>].</p>
        <p>Additionally, we utilized the ID3 algorithm to generate general rules and predictions for new cases. This algorithm requires specifying the order in which attributes are evaluated at each step. As finding the optimal solution can be challenging when dealing with numerous attributes (such as varied patient test results), we opted for a simpler approach: at each step, we selected the attribute that best satisfied a chosen criterion. After selecting an attribute, the data are divided into child nodes based on their values, and this process continues recursively for each child node. Although this greedy selection method may not always yield the optimal solution, it is intuitively close to the best outcome and significantly simplifies the problem [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>A crucial element of this approach is assessing the quality of each partition. Ideally, a good partition is one in which each child node predominantly contains data from a single class, allowing it to be treated as a leaf with no further division. Conversely, a partition that produces child nodes with mixed classes is less desirable. To evaluate this, we require a function that measures the purity or impurity of a partition. This function should yield the lowest value when each node contains data from only 1 class (indicating high purity), and a higher value when nodes include a diverse mix of classes. The entropy function, commonly used in information theory, serves this purpose (see equation 2).</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>In the ID3 algorithm, the loss function for a decision tree is defined as the weighted sum of the entropies at its leaf nodes, with the weights corresponding to the number of data points in each node. The goal of ID3 is to determine the order of attribute splits in a way that minimizes this total loss. This is achieved by selecting the attribute that leads to the greatest reduction in entropy at each step. Essentially, constructing the decision tree using ID3 can be viewed as a series of smaller tasks, where at each nonleaf node, we choose the attribute that most effectively improves the split. We then develop a calculation method for each of these nodes (see equation 3). Thus, the entropy at this node is given by:</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Next, suppose the selected attribute is <italic>x</italic>. We define it as the weighted sum of the entropy of each <italic>child node</italic>, computed similarly to equation 3. This weighting is important because <italic>nodes often</italic> contain different numbers of points (see equation 4).</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig13.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Let the selected attribute be <italic>x</italic>. Based on <italic>x</italic>, the data points in <italic>S</italic> are divided into <italic>K</italic> <italic>childnode S</italic><sub>1</sub>, <italic>S</italic><sub>2</sub>, ..., <italic>S</italic><sub>K,</sub> with the number of points in each <italic>child node</italic> being <italic>m</italic><sub>1</sub>, <italic>m</italic><sub>2</sub>, ..., <italic>m</italic><sub>K</sub>, respectively.</p>
        <p>Next, we define the <italic>information gain</italic> based on the attribute <italic>x</italic>, as given by equation 5.</p>
        <p><italic>G</italic>(<italic>x</italic>, <italic>S</italic>) = <italic>H</italic>(<italic>S</italic>) – <italic>H</italic>(<italic>x</italic>,<italic>S</italic>) <bold>(5)</bold></p>
        <p>Let the selected attribute be <italic>x</italic>. Based on <italic>x</italic>, the data points in <italic>S</italic> are divided into <italic>K</italic> <italic>childnode S</italic><sub>1</sub>, <italic>S</italic><sub>2</sub>, ..., <italic>S</italic><sub>K</sub>, with the number of points in each <italic>child node</italic> being <italic>m</italic><sub>1</sub>, <italic>m</italic><sub>2</sub>, ..., <italic>m</italic><sub>K</sub>, respectively. In equation 5, <italic>H</italic>(<italic>s</italic>) is the <italic>root node entropy.</italic> In ID3, at each node, the selected attribute is determined based on equation 6, which identifies the property that maximizes the information gain.</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig14.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Identifying important variables helps eliminate less important ones, simplifying the model and reducing noise. Thus, the final step determines the importance of each variable using a different splitting ratio. These variables can be quantified by the reduction in impurity (such as the Gini index) achieved when they are used for splitting. This imputation follows the CART formula (see equations 7 and 8).</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig15.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Let us suppose an object is selected at random from one of the C classes according to the probabilities (<italic>p</italic><sub>1</sub>, <italic>p</italic><sub>2</sub>, ..., <italic>p</italic><sub>C</sub>) and is randomly assigned to a class using the same distribution. In this scenario, we get the following:</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig16.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>In equation 8, let <italic>L</italic>(<italic>i, j</italic>) be the loss of assigning class <italic>j</italic> to an object which actually belongs to class <italic>i.</italic> The expected cost of misclassification is <inline-graphic xlink:href="formative_v9i1e69838_fig17.png" xlink:type="simple" mimetype="image"/></p>
        <p>In our study, we simulate 1000 runs for each splitting ratio. The best variable is the one that shows the highest Gini index score. The Gini index has been adapted to assess health inequality across populations by providing estimates that capture the distribution of risk, or lack of risk, among the entire population or within specific groups [<xref ref-type="bibr" rid="ref12">12</xref>].</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Power Estimation in a Multivariate Regression</title>
        <p>We generate a power estimation curve for a multivariate regression (using <italic>f</italic><sup>2</sup> as the effect size measure) with a total sample size of 60 and 5 predictors. This plot helps illustrate how adequate—or inadequate—the sample size is for detecting effects in a multivariate context. In multiple regression, Cohen [<xref ref-type="bibr" rid="ref13">13</xref>] suggested the following guidelines for <italic>f</italic><sup>2</sup> effect sizes: small (<italic>f</italic><sup>2</sup>=0.002), medium (<italic>f</italic><sup>2</sup>=0.15), and large (<italic>f</italic><sup>2</sup>=0.35). We calculate the statistical power for a range of <italic>f</italic><sup>2</sup> values given the following: total sample size N=60, number of predictors <italic>p</italic>=5, degrees of freedom for the error <italic>v</italic>=<italic>N</italic>–<italic>p</italic>–1=54, and significance level α=.05. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates how statistical power changes in a multiple regression model (with 5 predictors and a total sample size of 60) as the effect size (<italic>f</italic><sup>2</sup>) ranges from small (0.02) to large (0.35). The vertical axis shows the probability (power) of detecting an actual effect at the 5% significance level, while the horizontal axis shows the size of that effect. The red dashed line at 0.80 marks the conventional threshold for sufficient power (80%). When (<italic>f</italic><sup>2</sup>) is small (0.02), the power is around 0.2, indicating only a 20% chance of detecting such a minor effect with the given sample size and number of predictors. As (<italic>f</italic><sup>2</sup>) approaches medium (0.15), the power increases but remains below the 0.80 line, suggesting that moderate effects are not reliably detected. Only when (<italic>f</italic><sup>2</sup>) nears the large range (around 0.35) does the power surpass or approach the 80% mark, implying that the study can reliably detect larger effects but may struggle to identify smaller or moderate ones (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Power estimation in a multivariate regression with 60 participants and 5 predictors. We set n=60 (sample size) and <italic>p</italic>=5 (number of predictors). The error degrees of freedom are calculated as <italic>v</italic>=60 − 5 − 1 = 54. A sequence of <italic>f</italic><sup>2</sup> values ranging from 0.02 (small) to 0.35 (large) is generated. For each <italic>f</italic><sup>2</sup> value, we use pwr.f2.test() to estimate the power of the overall F-test in a multiple regression. The y-axis shows statistical power, and the x-axis shows the effect size (<italic>f</italic><sup>2</sup>). A horizontal red dashed line marks the 80% power threshold. Conventional effect sizes (small, medium, and large) are highlighted with dark green points and labeled accordingly.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>The Five Most Important Factors of MTCT Risk: HBeAg, ALT, AST, HBV DNA, and PBMCs</title>
        <p>The mean age is 27.6 (SD 4.2) years. Pregnancy for the second time or more accounted for 43 out of 60 (72%) cases. Clinical symptoms were edema, fatigue, and loss of appetite, similar to those experienced during pregnancy; 40 (67%) women had cesarean section. Of the 60 women, 27 (45%) were HBeAg positive, and HBV DNA ≥ 10<sup>7</sup> copies/mL was reported in 20 (33%). The mean gestational age is 38.9 (SD 1.2) weeks. Among the babies, 32 (53%) were HBsAg positive, and 23 (38%) were HBeAg positive (<xref ref-type="table" rid="table1">Tables 1</xref>-<xref ref-type="table" rid="table4">4</xref>).</p>
        <p>In 60 Vietnamese CHB pregnant women, 32 (53%) cord-blood samples were HBsAg positive, and 28 (47%) were HBsAg negative. We fit a logistic regression model to predict the subclinical values (cutoff value), which correspond to 50:50 probabilities that HBsAg in cord blood is positive. Two variables have a positive association when above-average values of one tend to accompany above-average values of the other, and below-average values tend to occur together as well. Two variables have a negative association when above-average values of one tend to accompany below-average values of the other [<xref ref-type="bibr" rid="ref14">14</xref>] (see <xref ref-type="table" rid="table5">Table 5</xref> and Figure S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>General characteristics of 60 Vietnamese pregnant women with chronic hepatitis B<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="770"/>
            <col width="0"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="2">General characteristics of the study participants</td>
                <td colspan="2">Values, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Mother’s age (years)</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>18-35</td>
                <td colspan="2">58 (97)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#62;35</td>
                <td colspan="2">2 (3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mean (SD)</td>
                <td colspan="2">27.6 (4.2)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Number of pregnancies</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>First time</td>
                <td colspan="2">17 (28)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>From the second time</td>
                <td colspan="2">43 (72)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Time of detection of hepatitis B virus infection</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Before getting pregnant</td>
                <td colspan="2">25 (42)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>This time</td>
                <td colspan="2">35 (58)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>32 (53%) cord blood samples were hepatitis B surface antigen positive, and 28 (47%) hepatitis B surface antigen negative.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Clinical and preclinical characteristics of the 60 Vietnamese pregnant women with chronic hepatitis B.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="740"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td>Values, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Clinical characteristics</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Edema</td>
                <td>7 (12)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Tired</td>
                <td>8 (13)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Anorexia</td>
                <td>1 (2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Nausea/vomiting</td>
                <td>4 (7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Insomnia</td>
                <td>4 (7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Joint pain</td>
                <td>3 (5)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Right lower quadrant pain</td>
                <td>2 (3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="3">
                  <bold>Birth method</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Birth without episiotomy</td>
                <td>4 (7)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Birth with episiotomy</td>
                <td>16 (27)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Caesarean section</td>
                <td>40 (67)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>Preclinical characteristics</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">An increase of aspartate aminotransferase</td>
                <td>14 (23)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">An increase of alanine aminotransferase</td>
                <td>11 (18)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B e antigen positive</td>
                <td>27 (45)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B e antigen negative</td>
                <td>33 (55)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B virus DNA≥10<sup>7</sup></td>
                <td>20 (33)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B virus DNA&#60;10<sup>7</sup></td>
                <td>40 (67)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Clinical and preclinical characteristics of babies (N=60).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="30"/>
            <col width="740"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Characteristics</td>
                <td>Values</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="3">
                  <bold>Clinical</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2"> Apgar score≥8, n (%)</td>
                <td>58 (97)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Apgar score&#60;7, n (%)</td>
                <td>2 (3)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Gestational age, mean (SD)</td>
                <td>38.9 (1.2)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">
                  <bold>Weight (g)</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>&#60;3500, n (%)</td>
                <td>47 (78)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>≥3500, n (%)</td>
                <td>13 (22)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
                <td>Mean (SD)</td>
                <td>3198.3 (362.9)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <bold>Preclinical (cord blood), n (%)</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B surface antigen positive</td>
                <td>32 (53)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B surface antigen negative</td>
                <td>28 (47)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B e antigen positive</td>
                <td>23 (38)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Hepatitis B e antigen negative</td>
                <td>37 (62)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Preclinical measures.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="160"/>
            <col width="250"/>
            <col width="190"/>
            <thead>
              <tr valign="top">
                <td>Details of preclinical measures results</td>
                <td>Range</td>
                <td>Median (95% CI)</td>
                <td>Mean (SD)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Maternal prothrombin time (seconds) (N=60)</td>
                <td>10.5-47.8</td>
                <td>12.1 (11.6-12.8)</td>
                <td>12.9 (4.76)</td>
              </tr>
              <tr valign="top">
                <td>Maternal prothrombin ratio (%) (N=60)</td>
                <td>10.0-133.0</td>
                <td>108.0 (99.6-117.0)</td>
                <td>106 (21.6)</td>
              </tr>
              <tr valign="top">
                <td>Maternal red blood cells (×10<sup>6</sup> cells/mL) (N=60)</td>
                <td>3.25-6.25</td>
                <td>4.33 (4.06-4.69)</td>
                <td>4.38 (0.539)</td>
              </tr>
              <tr valign="top">
                <td>Maternal platelet count (×10<sup>3</sup> cells/mL) (N=60)</td>
                <td>109.0-344.0</td>
                <td>214.0 (180.0-260.0)</td>
                <td>219 (59.6)</td>
              </tr>
              <tr valign="top">
                <td>Maternal creatinine (µmol/L) (N=60)</td>
                <td>41.0-99.0</td>
                <td>59.6 (54.4-68.8)</td>
                <td>62.1 (11.1)</td>
              </tr>
              <tr valign="top">
                <td>Maternal aspartate transaminase (U/L) (N=60)</td>
                <td>13.0-285.0</td>
                <td>22.1 (18.6-29.2)</td>
                <td>33.2 (41.3)</td>
              </tr>
              <tr valign="top">
                <td>Maternal alanine transaminase (U/L) (N=60)</td>
                <td>6.40-217.0</td>
                <td>15.8 (12.8-24.3)</td>
                <td>27.7 (38.0)</td>
              </tr>
              <tr valign="top">
                <td>Maternal protein in blood (g/L)</td>
                <td>12.3-80.2</td>
                <td>67.9 (65.8-71.1)</td>
                <td>66.7 (9.82)</td>
              </tr>
              <tr valign="top">
                <td>Maternal albumin in blood (g/L)</td>
                <td>25.6-44.9</td>
                <td>34.5 (33.0-35.7)</td>
                <td>34.4 (3.54)</td>
              </tr>
              <tr valign="top">
                <td>Maternal hepatitis B virus DNA (copies/mL) (N=60)</td>
                <td>35.0-1,350,000,000</td>
                <td>49,600 (771.0-129,000,000)</td>
                <td>152,000,000 (310,000,000)</td>
              </tr>
              <tr valign="top">
                <td>Maternal peripheral blood mononuclear cells (cells/mL) (N=60)</td>
                <td>1,300,000-12,300,000</td>
                <td>5,500,000 (3,000,000-7,510,000)</td>
                <td>5,620,000 (3,050,000)</td>
              </tr>
              <tr valign="top">
                <td>Cord blood mononuclear cell concentration (cells/mL) (N=60)</td>
                <td>3,640,000-51,000,000</td>
                <td>12,200,000 (6,500,000-15,000,000)</td>
                <td> 12,600,000 (7,630,000)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table5">
          <label>Table 5</label>
          <caption>
            <p>Cutoff values.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="400"/>
            <col width="200"/>
            <col width="120"/>
            <col width="280"/>
            <thead>
              <tr valign="top">
                <td>Variables</td>
                <td>Cutoff<sub>50</sub><sup>a</sup> for hepatitis B surface antigen–positive probability in cord blood</td>
                <td>The direction of the relationship between the 2 variables</td>
                <td>Cross-reference</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Maternal peripheral blood mononuclear cell concentration</td>
                <td>8.03 × 10<sup>6</sup> cells/mL</td>
                <td>Negative</td>
                <td>Figure S1A in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal hepatitis B virus DNA</td>
                <td>5.40 × 10<sup>7</sup> copies/mL</td>
                <td>Positive</td>
                <td>Figure S1B in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal platelet count</td>
                <td>317.89 × 10<sup>3</sup> cells/mL</td>
                <td>Negative</td>
                <td>Figure S1C in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal prothrombin time</td>
                <td>11.00 seconds</td>
                <td>Positive</td>
                <td>Figure S1D in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Cord blood mononuclear cell concentration</td>
                <td>6.64 × 10<sup>6</sup> cells/mL</td>
                <td>Positive</td>
                <td>Figure S1E in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal hemoglobulin</td>
                <td>128.53 g/L</td>
                <td>Negative</td>
                <td>Figure S1F in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal red blood cells</td>
                <td>5 × 10<sup>6</sup> cells/mL</td>
                <td>Negative</td>
                <td>Figure S1G in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal creatinine</td>
                <td>37.46 µmol/L</td>
                <td>Positive</td>
                <td>Figure S1H in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td> Maternal aspartate aminotransferase</td>
                <td>14.15 U/L</td>
                <td>Positive</td>
                <td>Figure S1I in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal alanine aminotransferase</td>
                <td>43.34 U/L</td>
                <td>Negative</td>
                <td>Figure S1K in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
              <tr valign="top">
                <td>Maternal prothrombin ratio</td>
                <td>76.34%</td>
                <td>Positive</td>
                <td>Figure S1L in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref></td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table5fn1">
              <p><sup>a</sup>Predicted values linked with a 50:50 probability that hepatitis B surface antigen is detectable in cord blood.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We study the Pearson correlation between each factor in the matrix model. The <italic>R</italic> score with a significant <italic>P</italic> value will be considered for further steps (see <xref rid="figure3" ref-type="fig">Figure 3</xref> and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendices 2</xref>-<xref ref-type="supplementary-material" rid="app5">5</xref>). When maternal viral load exceeds 5 × 10<sup>7</sup> copies/mL, the risk of being HBsAg positive in cord blood increases by 123% (risk ratio 2.23, 95% CI 1.48-3.36); when the viral load is lower than this baseline, the risk decreases by 55% (risk ratio 0.45, 95% CI 0.30-0.67; <italic>P</italic>&#60;.001; see <xref ref-type="table" rid="table6">Table 6</xref> and <xref ref-type="supplementary-material" rid="app6">Multimedia Appendices 6</xref> and <xref ref-type="supplementary-material" rid="app7">7</xref>). We calculate the risk ratio and odds ratio based on the new value indications (see <xref ref-type="table" rid="table6">Table 6</xref>) for 2 groups: HBsAg cord blood positive and negative, using the results of the HCA analysis. A dendrogram and principal component analysis plot were constructed based on the correlation between each factor (see <xref ref-type="supplementary-material" rid="app8">Multimedia Appendices 8</xref>-<xref ref-type="supplementary-material" rid="app10">10</xref>).</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Correlation matrix between biomarkers depicted as a heat map. The heat map compares 2 groups: HBV DNA ≥ 5 × 10⁷ copies/mL (left panel) and HBV DNA &#60; 5 × 10⁷ copies/mL (right panel). It illustrates the color-coded Pearson correlation coefficients between subclinical indices, including prothrombin time, AST, ALT, RBC, and haemoglobin in maternal blood; the concentration and density of PBMCs; and the status of HBeAg and anti-HBs in both cord and maternal blood. Cell colors represent the strength and direction of correlations, ranging from red (negative correlation) to blue (positive correlation). The intensity of the color reflects the magnitude of the correlation, as indicated by the color scale shown to the right of the panel. Pairwise Pearson correlation coefficients are detailed in Multimedia Appendices 2-5. ALT: alanine aminotransferase; AST: aspartate aminotransferase; CB: cord blood; CBMC: cord blood mononuclear cell; Hb: hemoglobin; HBeAg: hepatitis B e antigen; HBsAg: hepatitis B surface antigen; RBC: red blood cell.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table6">
          <label>Table 6</label>
          <caption>
            <p>Risk ratios for hepatitis B surface antigen–positive cord blood by factor, with corresponding threshold values.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="180"/>
            <col width="100"/>
            <col width="200"/>
            <col width="130"/>
            <col width="130"/>
            <col width="130"/>
            <col width="100"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Variables</td>
                <td>Cord blood hepatitis B surface antigen positive (n=32), n</td>
                <td>Cord blood hepatitis B surface antigen negative (n=28), n</td>
                <td>Risk ratio (95% CI)</td>
                <td>OR (95% CI)</td>
                <td>Chi-square (<italic>df</italic>)</td>
                <td><italic>P</italic> value&#62;chisq</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal peripheral blood mononuclear cell concentration (cells/mL)</bold>
                </td>
                <td>0.34 (1)</td>
                <td>.56</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥8.03 × 10<sup>6</sup></td>
                <td>6</td>
                <td>7</td>
                <td>0.83 (0.44-1.58)</td>
                <td>0.69 (0.20-2.38)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;8.06 × 10<sup>6</sup></td>
                <td>26</td>
                <td>21</td>
                <td>1.2 (0.63-2.28)</td>
                <td>1.44 (0.42-4.96)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal hepatitis B virus DNA (copies/mL)</bold>
                </td>
                <td>11.61 (1)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥5 × 10<sup>7</sup></td>
                <td>15</td>
                <td>2</td>
                <td>2.23 (1.48-3.36)</td>
                <td>11.47 (2.32-56.65)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;5 × 10<sup>7</sup></td>
                <td>17</td>
                <td>26</td>
                <td>0.45 (0.30-0.67)</td>
                <td>0.09 (0.02-0.43)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal platelets (×10<sup>3</sup>cells/mL)</bold>
                </td>
                <td>0 (1)</td>
                <td>&#62;.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥317.89</td>
                <td>2</td>
                <td>2</td>
                <td>0.93 (0.34-2.56)</td>
                <td>0.87 (0.11-6.59)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;317.89</td>
                <td>30</td>
                <td>26</td>
                <td>1.07 (0.39-2.94)</td>
                <td>1.15 (0.15-8.78)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal prothrombin (seconds)</bold>
                </td>
                <td>0 (1)</td>
                <td>&#62;.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥11</td>
                <td>29</td>
                <td>25</td>
                <td>1.07 (0.46-2.48)</td>
                <td>1.15 (0.21-6.27)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;11</td>
                <td>3</td>
                <td>3</td>
                <td>0.93 (0.40-2.15)</td>
                <td>0.86 (0.16-4.66)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Cord blood mononuclear cell concentration (cells/mL)</bold>
                </td>
                <td>0.73 (1)</td>
                <td>.39</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥6.64 × 10<sup>6</sup></td>
                <td>22</td>
                <td>22</td>
                <td>0.8 (0.49-1.29)</td>
                <td>0.6 (0.19-1.94)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;6.64 × 10<sup>6</sup></td>
                <td>10</td>
                <td>6</td>
                <td>1.25 (0.77-2.02)</td>
                <td>1.67 (0.52-5.38)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal hemoglobin (g/L)</bold>
                </td>
                <td>0.46 (1)</td>
                <td>.52</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥128.53</td>
                <td>11</td>
                <td>12</td>
                <td>0.84 (0.51-1.40)</td>
                <td>0.7 (0.25-1.99)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;128.53</td>
                <td>21</td>
                <td>16</td>
                <td>1.19 (0.71-1.98)</td>
                <td>1.43 (0.50-4.07)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal red blood cells (cells/mL)</bold>
                </td>
                <td>0.024 (1)</td>
                <td>.88</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥5 × 10<sup>6</sup></td>
                <td>2</td>
                <td>3</td>
                <td>0.73 (0.24-2.20)</td>
                <td>0.56 (0.09-3.59)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;5 × 10<sup>6</sup></td>
                <td>30</td>
                <td>25</td>
                <td>1.36 (0.45-4.10)</td>
                <td>1.8 (0.28-11.64)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal creatinine (µmol/L)</bold>
                </td>
                <td>0.047 (1)</td>
                <td>.83</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥37.46</td>
                <td>28</td>
                <td>23</td>
                <td>1.24 (0.57-2.67)</td>
                <td>1.52 (0.37-6.33)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;37.46</td>
                <td>4</td>
                <td>5</td>
                <td>0.81 (0.37-1.75)</td>
                <td>0.66 (0.16-2.73)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal aspartate transaminase (U/L)</bold>
                </td>
                <td>0.34 (1)</td>
                <td>.56</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥14.15</td>
                <td>29</td>
                <td>23</td>
                <td>1.49 (0.59-3.76)</td>
                <td>2.1 (0.45-9.73)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;14.15</td>
                <td>3</td>
                <td>5</td>
                <td>0.67 (0.27-1.70)</td>
                <td>0.48 (0.10-2.20)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal alanine transaminase (U/L)</bold>
                </td>
                <td>0.61 (1)</td>
                <td>.44</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥43.34</td>
                <td>4</td>
                <td>1</td>
                <td>1.57 (0.94-2.62)</td>
                <td>3.86 (0.40-36.75)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;43.34</td>
                <td>28</td>
                <td>27</td>
                <td>0.64 (0.38-1.06)</td>
                <td>0.26 (0.03-2.47)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="6">
                  <bold>Maternal prothrombin ratio (%)</bold>
                </td>
                <td>0 (1)</td>
                <td>&#62;.99</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>≥76.34</td>
                <td>31</td>
                <td>27</td>
                <td>1.07 (0.26-4.36)</td>
                <td>1.15 (0.07-19.25)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>&#60;76.34</td>
                <td>1</td>
                <td>1</td>
                <td>0.94 (0.23-3.82)</td>
                <td>0.87 (0.05-14.60)</td>
                <td>
                  <break/>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>PBMCs Gain the Most Information Following ID3 Theory Calculation</title>
        <p>We next calculated the information gain for each factor in our actual data based on the correlation and clustering study results, following the ID3 theory. There are 5 attributes of pregnant women that may increase the risk of infection in infants. Each factor has 2 types of variants: HBeAg (positive and negative); ALT (&#60;43.34 U/L and ≥43.34 U/L); AST (≥14.15 U/L and &#60;14.15 U/L); HBV DNA (≥5 × 10<sup>7</sup> and &#60;5 × 10<sup>7</sup> copies/mL); and PBMCs (≥8 × 10<sup>6</sup> and &#60;8 × 10<sup>6</sup> cells/mL).</p>
        <p>The analysis of Cohen <italic>h</italic> across the groups reveals varying degrees of effect size between the proportions of cord blood HBsAg-positive and -negative cases. Group 14 demonstrates a very large effect (<italic>h</italic>=1.46), characterized by a notably low proportion of positive cases (3/18, 17%) and a high proportion of negative cases (15/18, 83%). Group 12 shows a medium effect (<italic>h</italic>=0.51), indicating a moderate difference between the 2 proportions. Groups 10 (<italic>h</italic>=1.70) and 7 (<italic>h</italic>=1.85) both reflect very large effects, with high positive rates of 7 out of 8 (88%) and 9 out of 10 (90%), respectively, contrasted with much lower negative rates. Group 13 exhibits the maximum possible difference (<italic>h</italic>=3.14), with a proportion of 3 out of 3 (100%) positives and 0 out of 3 (0%) negatives, corresponding to the theoretical limit of Cohen <italic>h</italic>, π. By contrast, other_group shows a small effect (<italic>h</italic>=0.15), with nearly equal positive and negative proportions, suggesting minimal difference between the 2 groups (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). Using the pooled SD method, we calculated Cohen <italic>d</italic> to measure the standardized difference between groups 10 and 7 (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). For instance, if our calculated Cohen <italic>d</italic> is approximately 0.8 or higher, it suggests a large effect size, indicating that the difference in risk or related measures between these 2 groups is not only statistically significant but also clinically meaningful (<italic>P</italic>=.01 for group 10 and <italic>P</italic>=.001 for group 7).</p>
        <p>In the 17 output values in <xref ref-type="table" rid="table1">Tables 1</xref>-<xref ref-type="table" rid="table4">4</xref> (N=17 scores), there are 12 values showing effect increases: 1 value is small, 2 values are large (<italic>P</italic>=.01 for group 10 and <italic>P</italic>=.001 for group 7; also see <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>), and 2 are trivial. The probability that each data point falls into class c=<italic>medium</italic> is determined by <italic>N</italic>c<italic>/N</italic>=12/17. The probability that each data point falls into class c=<italic>small</italic> is determined by <italic>N</italic>c<italic>/N</italic>=1/17. The probability that each data point falls into class c=<italic>large</italic> is determined by <italic>N</italic>c<italic>/N</italic>=2/17, and the probability that each data point falls into class c=<italic>trivial</italic> is determined by <italic>N</italic>c<italic>/N</italic>=2/17. Therefore, the entropy at <italic>the root node</italic> is calculated according to equations 1 and 2 of this formula, with 2 classes “no” and “yes” (C=4), which is given as follows:</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e69838_fig18.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>If one of the attributes—HBeAg, ALT, AST, HBV DNA, or PBMCs—is selected to divide the data, we calculate the weighted sum of the entropy of the child nodes. The result is shown in <xref ref-type="table" rid="table5">Table 5</xref>. PBMC concentrations were chosen because they have the highest information gain of 0.247. We could construct a decision tree by selecting PBMCs as the root node, as they provide the greatest information gain. From the root node (PBMCs), we branch out to other nodes, each named after the corresponding attribute (<xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref>).</p>
      </sec>
      <sec>
        <title>The Five Most Important Factors of MTCT Risk: HBeAg, ALT, AST, Serum HBV DNA, and PBMCs</title>
        <p>We verified our calculation in R with 1000 runs (observations) for 5 split ratios: 0.50, 0.75, 0.80, 0.85, 0.90, and 0.95. The value in each node represents the number of observations in the data set that fall into that particular node. In <xref rid="figure4" ref-type="fig">Figure 4</xref>, we observed that the strongest information gain scores are found in HBeAg and PBMC concentration. From this dot plot, we can observe multiple runs of the ID3-based decision tree analysis at various training-test split ratios (shown in each column). In each run, the algorithm identifies the factor (AST, HBeAg, HBV DNA, or PBMCs) that provides the highest information gain for classification. The y-axis represents the extent of predictive “value” that each factor contributes. The results indicate that HBeAg (green dots) and PBMCs (purple dots) frequently exhibit high information gain, suggesting that these 2 factors play a prominent role in most runs, regardless of the split ratio. Meanwhile, AST (red dots) occasionally shows considerable information gain but appears less frequently, and HBV DNA (blue dot) rarely emerges as the most important factor. Overall, these findings highlight that HBeAg and PBMCs are the dominant factors in the model, maintaining stability across different training-test splits (see <xref rid="figure4" ref-type="fig">Figure 4</xref>).</p>
        <p>Comparing the medians of the score values for HBeAg and PBMCs, we see that they are higher than those of the other factors. These dot plots show the information gain values for 4 factors (AST, HBeAg, HBV DNA, and PBMCs) across various runs of the model at different data split ratios (0.5, 0.75, 0.8, 0.85, 0.9, and 0.95). Each panel represents a specific split ratio, with the y-axis displaying the distribution of information gain scores for the factor deemed most important in that particular run. In general, HBeAg (green box) and PBMCs (purple box) consistently show higher median values or broader ranges of information gain, indicating that these 2 factors are crucial in the classification model, regardless of the training-test split used. In some panels (for instance, at split ratios of 0.5 or 0.75), AST (red box) occasionally records a high information gain score, but it does not match the stability or frequency of HBeAg or PBMCs. By contrast, HBV DNA (blue) seldom ranks with the highest information gain score (and hence not visible), suggesting it plays a lesser role in the classification decisions during these ID3 runs. Therefore, HBeAg and PBMCs are the primary contributors to the model’s predictive performance, while AST shows occasional dominance, and HBV DNA rarely serves as the top predictor (see <xref rid="figure5" ref-type="fig">Figure 5</xref>).</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>The dot plot of the most important factor in each run following different split ratios. AST: aspartate aminotransferase; HBeAg: hepatitis B e antigen; HBV: hepatitis B virus; PBMC: peripheral blood mononuclear cell.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>The bar plot of the most important factor in each run following different split ratios. AST: aspartate aminotransferase; HBeAg: hepatitis B e antigen; HBV: hepatitis B virus; PBMC: peripheral blood mononuclear cell.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In the decision tree representation, each node’s split is based on the predictor variables. The result provides several pieces of information that help us interpret the tree. <xref rid="figure5" ref-type="fig">Figure 5</xref> illustrates how the decision trees (ID3/CART) divide the data based on various training-test split ratios (0.95, 0.90, 0.85, 0.80, 0.75, and 0.50), along with the associated MTCT risk classifications (trivial, small, medium, and large) and gain score results. In all splits, HBeAg consistently emerges as the primary splitting factor. The HBeAg-positive branch generally leads to the large risk category when PBMCs are ≥8 × 10<sup>6</sup> cells/mL, and to medium or small risk when PBMCs are lower. By contrast, HBeAg-negative cases typically split into small risk (when PBMCs are high) or trivial risk (when PBMCs are low). The gain score tables further validate that HBeAg and PBMCs are dominant. Although the precise distributions of risk categories (trivial, small, medium, and large) vary slightly with different splits, the model consistently highlights HBeAg and PBMCs as key factors, reinforcing earlier findings that these 2 variables are essential predictors of mother-to-child HBV transmission risk (<xref rid="figure5" ref-type="fig">Figure 5</xref>). According to <xref rid="figure6" ref-type="fig">Figure 6</xref>, the risk of MTCT of HBV is stratified based on HBeAg status and PBMC concentration as follows: among HBeAg-positive women, 20 out of 48 training cases with a split ratio of 0.80 (42%) to 27 out of 57 training cases with a split ratio of 0.95 (47%), or 16 out of 30 training cases with a split ratio of 0.50 (53%), were classified as having a high risk of MTCT. Among HBeAg-negative individuals with PBMC concentrations ≥8 × 10<sup>6</sup> cells/mL, 7 out of 51 training cases with a split ratio of 0.85 (14%) to 8 out of 45 training cases with a split ratio of 0.75 (18%) were categorized as having a small risk. The remaining 21 out of 57 training cases with a split ratio of 0.95 (37%), or 20 out of 53 training cases with a split ratio of 0.90 (38%) to 16 out of 30 training cases with a split ratio of 0.50 (53%) of HBeAg-negative cases with PBMC concentrations &#60;8 × 10<sup>6</sup> cells/mL were classified as having negligible risk (see <xref rid="figure6" ref-type="fig">Figure 6</xref>).</p>
        <p>From the dot plot (<xref rid="figure4" ref-type="fig">Figure 4</xref>), we can observe several runs of the ID3-based decision tree analysis at different training-test split ratios, indicated in each column. In each run, the algorithm identifies the factor (AST, HBeAg, HBV DNA, or PBMCs) that provides the highest information gain for classification. The y-axis shows the predictive “value” contributed by each factor. The results reveal that HBeAg (green) and PBMCs (purple) frequently demonstrate high information gain, indicating that these 2 factors are dominant in most runs, regardless of the split ratio. By contrast, AST (red) occasionally shows a notable information gain but is less frequent, while HBV DNA (blue) rarely appears as the most critical factor. Overall, these results emphasize that HBeAg and PBMCs are the key factors in the model, remaining consistent across various training-test splits.</p>
        <p>In summary, these plots show that HBeAg and PBMCs are typically the most influential factors across different training-test splits, while AST plays a more inconsistent role, and HBV DNA contributes less frequently to classification decisions in these ID3 runs.</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Decision tree diagram based on different split ratios: (A) 0.95, (B) 0.90, (C) 0.85, (D) 0.80, (E) 0.75, and (F) 0.50. Cohen classified effect sizes on MTCT risk as trivial (d&#60;0.2), small (0.2≤d&#60;0.5), medium (0.5≤d&#60;0.8), and large (d≥0.8). ALT: alanine aminotransferase; AST: aspartate aminotransferase; HBeAg: hepatitis B e antigen; HBV: hepatitis B virus; MTCT: mother-to-child transmission; PBMC: peripheral blood mononuclear cell.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Boosting the Assembly of the Five Factors Shows the Important Groups to Predict the Risk of MTCT</title>
        <p>We have 14 cases in which we could enhance the assembly simulation (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). The split ratio and number of runs influence our predictions for all cases in the group. <xref rid="figure6" ref-type="fig">Figure 6</xref> shows the raw distribution, reflecting the accuracy of each observation in relation to the split ratio. The results of the Pearson chi-square test for the contingency table show that the distribution of risk groups (large, small, and trivial) across different training data split ratios (ranging from 0.75 to 0.95) differs significantly (<italic>χ</italic><sup>2</sup><sub>8</sub>=21.16, <italic>P</italic>=.007). When testing each group individually using a 1-way chi-square test, the large group has a <italic>P</italic> value of .002, indicating a clear change in distribution across different training ratios. The small group has a <italic>P</italic> value of .07, which is close to the significance threshold, suggesting a potential trend in distribution change, while the trivial group has a <italic>P</italic> value of .84, indicating a stable distribution that is not significantly affected by the data split ratio. Thus, changes in the train/test ratio may influence how decision trees learn, particularly for groups with distinct characteristics, such as the “large” group. An accuracy measure for classification tasks, using the confusion matrix, provides a better evaluation of classification performance. The general idea is to count how often true instances (true positive and true negative) are misclassified as false (false positive and false negative). We compute the accuracy of the test from the confusion matrix using the following formula: accuracy = (true positive + true negative)/(true positive + true negative + false positive + false negative). The accuracy score reflects the probability that the test data produces the same result as the training data set. <xref rid="figure7" ref-type="fig">Figure 7</xref> shows the results of the correlation test, confirming that the number of observations and the split ratio strongly correlate with accuracy. We repeat this random selection 1000 times for each division ratio, aiming to achieve the highest accuracy. <xref rid="figure8" ref-type="fig">Figure 8</xref> provides an overview of the MTCT risk. Groups 7 and 10 have the highest MTCT risk, with a prevalence ranging from 13 out of 48 (a split ratio of 0.8 from a total of 60 cases, 27%) to 18 out of 54 (a split ratio of 0.9 from a total of 60 cases, 33%). Groups 13 and 14 have the lowest risk, with 9 out of 30 (a split ratio of 0.5 from a total of 60 cases, 30%) to 18 out of 48 (a split ratio of 0.8 from a total of 60 cases, 38%) cases falling into the trivial group, indicating negligible MTCT risk. The other groups show an accuracy ranging from 8 out of 57 (a split ratio of 0.95 from a total of 60 cases, 14%) to 9 out of 30, a split ratio of 0.5 from a total of 60 cases (30%), with cases categorized into the medium- or small-risk groups.</p>
        <fig id="figure7" position="float">
          <label>Figure 7</label>
          <caption>
            <p>Distribution of accuracy score. (A) Raw distribution. (B) Violin plot shows the median of accuracy.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>From these plots, we can observe the classification accuracy achieved by the ID3 model over 1000 runs at various training-test split ratios (0.5, 0.75, 0.8, 0.85, 0.9, and 0.95). Each panel corresponds to a specific split ratio, with the y-axis displaying accuracy scores and the x-axis representing the number of runs. Although there is considerable fluctuation within each split ratio, some general patterns emerge. At lower split ratios (eg, 0.5), the model’s accuracy tends to cluster in the lower to mid range (approximately 0.25-0.50). As the split ratio increases (eg, 0.8 or 0.85), accuracy occasionally reaches higher peaks, with some runs exceeding 0.60 or 0.70, though variability remains evident. With even larger training proportions (0.9 and 0.95), the accuracy range broadens further, with some runs achieving relatively high performance while others drop close to 0. Overall, these results indicate that the model’s accuracy is highly sensitive to the specific partitioning of the data set, showing moderate gains and substantial variation as the training-test split ratio changes (<xref rid="figure7" ref-type="fig">Figure 7</xref>A).</p>
        <p>The violin plots illustrate the distribution of classification accuracy across 1000 runs for each training-test split ratio. On the left, with a split ratio of 0.5, the distribution is relatively narrow, centering around the 0.25-0.40 range. This suggests that using half of the data for training typically results in modest accuracy. As the training proportion increases to 0.75 or 0.8, the distribution shifts upward, revealing higher accuracy values. At split ratios of 0.9 and 0.95, the distribution widens considerably, with some runs achieving very high accuracy—approaching or exceeding 0.75—while others fall closer to 0.20. This suggests that while more training data can improve the model’s performance in some cases, a smaller test set may lead to greater variance, resulting in a wider range of accuracy outcomes (<xref rid="figure7" ref-type="fig">Figure 7</xref>B).</p>
        <p><xref rid="figure8" ref-type="fig">Figure 8</xref>A shows a strong positive correlation (<italic>R</italic>=0.91, <italic>P</italic>&#60;2.2 × 10<sup>–16</sup>) between the training-test split ratio (on the x-axis) and classification accuracy (on the y-axis). As the amount of data allocated for training increases, the model’s accuracy tends to improve, resulting in a nearly linear upward trend. In <xref rid="figure8" ref-type="fig">Figure 8</xref>B, the correlation between the number of runs (indicated on the x-axis) and accuracy is considerably weaker (<italic>R</italic>=0.17, <italic>P</italic>&#60;2.2 × 10<sup>–16</sup>). This suggests that while accuracy shows a slight upward trend over multiple runs, the majority of the variation is better explained by other factors, particularly the split ratio, rather than the order or total number of runs.</p>
        <fig id="figure8" position="float">
          <label>Figure 8</label>
          <caption>
            <p>Correlation line between accuracy and split ratio or running number (number of observations). (A) Correlation plot between accuracy and split ratio. (B) Correlation plot between accuracy and running number.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In these CART diagrams (<xref rid="figure9" ref-type="fig">Figure 9</xref> and <xref ref-type="supplementary-material" rid="app13">Multimedia Appendix 13</xref>), we observe how the data set is consistently divided by key factors such as PBMCs, AST, ALT, HBeAg, and HBV DNA across various training-test split ratios, ranging from 0.50 to 0.95. Each final branch represents a category of MTCT risk—classified as “large,” “medium,” “small,” or “trivial”—according to the Cohen <italic>d</italic> index. Interestingly, some groups—such as group 7 (with PBMCs &#60; 8 × 10<sup>6</sup> cells/mL, AST ≥ 14.15 U/L, ALT &#60; 43.34 U/L, HBeAg positive, HBV DNA ≥ 5×10<sup>7</sup> copies/mL) and group 10 (with PBMCs &#60; 8 × 10<sup>6</sup> cells/mL, AST ≥ 14.15 U/L, ALT &#60; 43.34 U/L, HBeAg positive, HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL)—are frequently associated with “very large” effect sizes. By contrast, others—such as group 13 (with PBMCs &#60; 8 × 10<sup>6</sup> cells/mL, AST ≥ 14.15 U/L, ALT ≥ 43.34 U/L, HBeAg positive, HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL) and group 14 (with PBMCs &#60; 8 × 10<sup>6</sup> cells/mL, AST ≥ 14.15 U/L, ALT &#60; 43.34 U/L, HBeAg negative, HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL)—are categorized as “trivial” in terms of increasing the risk of MTCT. The proportion and number of runs leading to each category are displayed beneath each node. As the training ratio increases, the model becomes more effective at assigning specific groups to their corresponding risk categories, although the overall distribution of runs across large, medium, and trivial categories still fluctuates. This suggests that while certain subgroup characteristics, such as PBMC levels or HBeAg status, influence classification toward high or negligible MTCT risk, the model’s consistency and accuracy are also impacted by how the data are divided for training and testing.</p>
        <p>In this study, a “large effect” is defined as a Cohen <italic>d</italic> value of 0.8 or higher. Our decision tree analysis (utilizing ID3 and CART) reveals that the groups identified as 7 and 8 typically show a large effect size, indicating a high risk of MTCT. In these groups, factors such as positive HBeAg status, elevated PBMC concentration (≥8 × 10<sup>6</sup> cells/mL), and other relevant biochemical markers contribute to an effect size (Cohen <italic>d</italic>) that meets or exceeds the 0.8 threshold. This indicates an increased risk of transmission in these groups.</p>
        <fig id="figure9" position="float">
          <label>Figure 9</label>
          <caption>
            <p>Classification and regression trees with different split ratios. (A) Split ratio 0.95. (B) Split ratio 0.90. Group numbers correspond to those listed in Table 3. The range of mother-to-child transmission (MTCT) risk is based on the Cohen d index. According to Cohen classification, effect sizes on MTCT risk are defined as follows: trivial (d&#60;0.2), small (0.2≤d&#60;0.5), medium (0.5≤d&#60;0.8), and large (d≥0.8). ALT: alanine aminotransferase; AST: aspartate aminotransferase; HBeAg: hepatitis B e antigen; HBV: hepatitis B virus; PBMC: peripheral blood mononuclear cell.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e69838_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study aimed to identify key predictors of MTCT of HBV, with a particular focus on maternal HBeAg status and PBMC concentration. We developed and applied an ID3-based decision tree model to analyze clinical data from pregnant Vietnamese women. Our primary finding is that HBeAg positivity, combined with elevated PBMC levels (≥8 × 10<sup>6</sup> cells/mL), is strongly associated with an increased risk of MTCT. The decision tree model effectively stratified risk based on a combination of virological and biochemical indicators, confirming our hypothesis regarding the predictive value of both PBMCs and HBeAg.</p>
        <p>HBV is known to cross the placental barrier, and the presence of maternal HBeAg in newborns at birth supports the occurrence of vertical transmission. Our findings add to the growing body of evidence suggesting that PBMCs may play a critical role in facilitating intrauterine HBV infection [<xref ref-type="bibr" rid="ref15">15</xref>]. Prior research has shown a 9.5-fold increased risk of HBV infection among neonates who are HBV DNA positive in PBMCs [<xref ref-type="bibr" rid="ref16">16</xref>]. Similarly, our earlier study involving a cohort of 60 Vietnamese mothers and their infants found a correlation between PBMC concentrations and maternal viral load, particularly in cases with HBV DNA levels below 5 × 10<sup>7</sup> copies/mL. HBV can be transmitted from mother to child due to its ability to cross the placental barrier. Notably, HBeAg detected in newborns at birth is of maternal origin [<xref ref-type="bibr" rid="ref17">17</xref>]. In this study, we aim to utilize real-world clinical data to develop a predictive model to determine the risk of MTCT. Our approach uses a machine learning model based on the ID3 decision tree algorithm. The critical decision nodes are determined through a series of calculations, starting from individual (single) entropy to total entropy. Information gain, which quantifies the reduction in entropy, is used to evaluate how effectively a given feature separates or classifies the target outcomes. The feature with the highest information gain is selected as the most informative. This classical entropy-based measure plays a fundamental role in various machine learning algorithms, including decision tree models. In the ID3 algorithm, the attribute selected as the splitting criterion is the one with the highest information gain. This method has proven effective in various machine learning and signal processing applications. Beyond facilitating feature selection, entropy-based measures serve as reliable indicators of data complexity and classification difficulty in real-world scenarios, as demonstrated by Juszczuk et al [<xref ref-type="bibr" rid="ref18">18</xref>]. Their research underscores the critical importance of using entropy in model construction, especially when dealing with complex biomedical data sets where noise and variability can impact model performance. This supports our application of information gain in identifying PBMC concentration as the most informative variable, aligning both with clinical relevance and theoretical expectations in predicting the risk of MTCT [<xref ref-type="bibr" rid="ref18">18</xref>]. In machine learning theory, information gain is synonymous with Kullback-Leibler divergence; this index quantifies the amount of information obtained about a random variable by observing another. In the first step, the ID3 algorithm was applied to our data subset without splitting. PBMCs achieved the highest information gain score, which was 0.247 (<xref ref-type="supplementary-material" rid="app13">Multimedia Appendix 13</xref>). We could draw a decision tree by choosing PBMCs as the root node, as it has the most significant information gain. From the root node PBMCs, branches extend to other attribute nodes listed in <xref ref-type="supplementary-material" rid="app13">Multimedia Appendix 13</xref>. We found that groups 7 and 10 (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>) are associated with a high risk of MTCT, characterized by positive HBeAg status, PBMC serum concentrations above 8 × 10<sup>6</sup> cells/mL, elevated AST levels (≥14.15 U/L), and low ALT levels (&#60;43.34 U/L). These groups have Cohen <italic>d</italic> values of 1.855 and 1.696, respectively (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>), with associated probabilities ranging from 13 out of 48 (a split ratio of 0.8 from a total of 60 cases, 27%) to 18 out of 54 (a split ratio of 0.9 from a total of 60 cases, 33%), respectively (<xref rid="figure9" ref-type="fig">Figure 9</xref> and <xref ref-type="supplementary-material" rid="app12">Multimedia Appendix 12</xref>). The dual roles of HBeAg as both a tolerogen and an immunogen, along with its ability to either suppress or activate the immune response, highlight the complexity of its interactions with the host. Numerous studies have demonstrated that HBeAg can influence both innate and adaptive immune responses, contributing to the persistence of HBV. HBeAg can bind to PBMCs, neutrophils, and B lymphocytes, but not to T lymphocytes. The interaction between HBeAg and monocytes or neutrophils has been shown to be dose dependent, resulting in the inhibition of both cell types. Monocytic myeloid-derived suppressor cells (mMDSCs) are derived from myeloid progenitor cells and account for approximately 0.5% of PBMCs in healthy individuals. The mMDSC population expands during infection, inflammation, and cancer. HBeAg plays a crucial role in the expansion of the mMDSC population and the induction of immune tolerance. Compared with HBeAg-negative patients, HBeAg-positive patients were shown to have significantly higher levels of mMDSCs. When PBMCs from healthy individuals were exposed to HBeAg, there was an increase in mMDSCs and the expression of IL-6 and IL-1β. Additionally, mMDSCs from HBeAg-positive patients suppressed the proliferation of CD4+ and CD8+ T cells. This may represent a potential mechanism by which HBeAg modulates the host immune response during CHB by physically depleting or weakening virus-specific CD4+ and CD8+ T cells. As a result, these cells are unable to proliferate in response to viral antigens or produce essential antiviral and immunostimulatory cytokines, which are crucial for controlling the virus in patients with CHB [<xref ref-type="bibr" rid="ref14">14</xref>]. Additionally, new research by Padarath et al [<xref ref-type="bibr" rid="ref14">14</xref>] provided further insights into the various functions of HBeAg and its precursors in the development of chronic HBV infection. According to their review, HBeAg may influence hepatocarcinogenesis through long-term immune modulation and chronic inflammation, in addition to promoting immune evasion and tolerance. This demonstrates that HBeAg serves various functions, including promoting vertical transmission, inhibiting host defenses, and potentially contributing to chronic conditions such as liver cancer. These findings confirm our results, which indicate that HBeAg plays a major role in MTCT risk, and emphasize the need for close monitoring of pregnancies involving HBeAg-positive women. It is well known that both HBeAg and AST are independent risk factors for predicting nonminimal liver inflammation in untreated patients with CHB. In HBeAg-positive untreated patients with CHB, the liver inflammation associated with CHB is linked to the balance between the immune system and HBV infection. Quantitative changes in indicators such as HBsAg and HBeAg can signal the breakdown of immune tolerance and the onset of immune clearance in CHB infection. Including immune-related indicators in the inflammatory prediction model for CHB infection is essential. In general, ALT or AST is included in most models for liver inflammation or fibrosis. It has been found that AST is a better predictor than ALT. HBV DNA, HBsAg, and HBeAg reflect the replication capacity of HBV in untreated patients with CHB. Previous studies have shown that HBsAg and HBeAg are negatively correlated with liver inflammation. In this study, HBsAg, HBeAg, and HBV DNA were all included, with HBeAg demonstrating the best predictive ability. Compared with other models, the significance of this nonminimal liver inflammation model lies in its confirmation of the importance of HBeAg in identifying liver inflammation. On the one hand, HBeAg reflects the replication level of HBV in HBeAg-positive patients with CHB; on the other hand, a decline in HBeAg is often an early sign of the breakdown of immune tolerance [<xref ref-type="bibr" rid="ref19">19</xref>]. Based on the above calculations, it is evident that further experiments with various random split ratios of the data file are necessary. Different tests will improve prediction performance. The positive correlation between accuracy, division rate, and the number of runs supports this. A clear result is that, after 1000 random runs for each division type, HBeAg and PBMC concentrations consistently show the highest information gain scores. When testing all 5 factors simultaneously, fixed groups emerged in each risk category, highlighting the additive or inhibitory influence of other biochemical indicators on the impact of HBeAg and PBMCs on the risk of MTCT.</p>
        <p>Nonetheless, this study has several limitations that should be noted. First, the sample size was relatively small and geographically limited to a single region in Vietnam, which may affect the generalizability of the findings to other populations or ethnicities. Second, while the ID3 decision tree provided valuable insights into variable importance and risk classification, predictive performance could potentially be improved with larger, more heterogeneous data sets, and by comparing it with other, yet untested, machine learning approaches (eg, random forest, XGBoost). Third, additional maternal or fetal factors (eg, nutritional status, coinfections, genetic predisposition) may have been overlooked and could be important, given the constraints of the available data. Finally, the cross-sectional design and retrospective data collection may have introduced bias, limiting the ability to establish a definitive causal relationship. Nevertheless, the findings of the study are still valuable and provide a strong foundation for future research. By identifying key predictors of risk, particularly HBeAg status and PBMC concentration, it was possible to develop a simple yet practical model for predicting risk. These findings represent an essential first step toward developing more comprehensive tools, which can later be refined and validated through larger prospective studies aimed at enhancing clinical decision-making in HBV-infected pregnancies.</p>
        <p>Our findings underscore the predictive importance of maternal HBeAg status and PBMC concentration in assessing the risk of HBV MTCT. Incorporating these indicators into clinical screening procedures could enable more targeted interventions during pregnancy. Additionally, our results support the use of machine learning tools in epidemiological risk assessment, particularly in resource-limited settings. Future studies should aim to validate these findings in larger, more diverse cohorts and explore the potential pathways through which PBMCs mediate HBV transmission. These insights may influence decisions regarding the timing and choice of antiviral treatments in high-risk pregnancies, as well as the design of other preventive strategies.</p>
      </sec>
      <sec>
        <title>Limitation</title>
        <p>Decision trees are robust and flexible machine learning algorithms. They are interpretable, capable of handling nonlinear relationships, and efficient in computation. Additionally, decision trees can accommodate mixed data types. However, if a small sample is tested, there is a risk of overfitting or overclassification. It is also important to note that only 1 attribute is tested at a time when making a decision.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study demonstrated that by combining the ID3 and CART algorithms, data can be interpreted as a decision tree to assist clinicians in their understanding. Additionally, the proposed system provides improved performance by category. The resulting prediction rules, derived from the training data, construct the fastest and most efficient tree. This approach only requires testing enough attributes to classify all the data. By identifying leaf nodes, the test data can be pruned, reducing the number of tests required. The entire data set is explored to construct the tree. This strategy provides clear, structured choices with potential outcomes, making it especially useful in complex diagnostics. It allows health care professionals to review symptoms and test results systematically, supporting more informed decision-making.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Relative risk ratio plot. Probability: 1; CBHBsAg positif, 0; and CBHBsAg negatif: 0. CB: cord blood; HBsAg: hepatitis B surface antigen.</p>
        <media xlink:href="formative_v9i1e69838_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 422 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Pearson correlation coefficients (R) in the HBV-DNA ≥ 5×10<sup>7</sup> copies/mL group. The color-coded Pearson correlation coefficients between subclinical indices, including levels of Prothrombin, AST, ALT, RBC, and hemoglobin in maternal blood; the concentration and density of PBMCs; and the status of HBeAg and anti-HBs in both cord and maternal blood. The color of each cell reflects the strength and direction of the correlation, ranging from red (negative correlation) to blue (positive correlation), with intensity corresponding to the magnitude of the association. The correlation strength is indicated by the accompanying color scale. ALT: alanine aminotransferase; AST: aspartate aminotransferase; HBV: hepatitis B virus; HBeAg: hepatitis B e antigen; RBC, red blood cell; PBMC: peripheral blood mononuclear cell.</p>
        <media xlink:href="formative_v9i1e69838_app2.pdf" xlink:title="PDF File  (Adobe PDF File), 490 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Pearson correlation coefficients (R values) in the HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL group are presented in this table. It displays color-coded Pearson correlation coefficients between various subclinical indices, including prothrombin time, AST, ALT, RBC, and hemoglobin levels in maternal blood; the concentration and density of PBMCs; and the status of HBeAg and anti-HBs in both cord and maternal blood. Cell colors indicate the strength and direction of the correlations, ranging from red (negative correlations) to blue (positive correlations), with the intensity corresponding to the magnitude, as shown by the color scale. ALT: alanine aminotransferase; AST: aspartate aminotransferase; HBeAg: hepatitis B e antigen; HBV: hepatitis B virus; RBC: red blood cell; PBMC: peripheral blood mononuclear cell.</p>
        <media xlink:href="formative_v9i1e69838_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 324 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p><italic>P</italic> values from the Pearson correlation tests in the HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL group. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app4.pdf" xlink:title="PDF File  (Adobe PDF File), 486 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p><italic>P</italic> values from the Pearson correlation tests in the HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL group. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app5.pdf" xlink:title="PDF File  (Adobe PDF File), 315 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p><italic>P</italic> values from the Fisher exact test comparing the 2 groups: HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL and HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app6.pdf" xlink:title="PDF File  (Adobe PDF File), 651 KB"/>
      </supplementary-material>
      <supplementary-material id="app7">
        <label>Multimedia Appendix 7</label>
        <p><italic>P</italic> values from the Fisher exact test and Pearson correlation test comparing the correlation coefficients (R) between the 2 groups: HBV DNA &#60; 5 × 10⁷ copies/mL and HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app7.pdf" xlink:title="PDF File  (Adobe PDF File), 664 KB"/>
      </supplementary-material>
      <supplementary-material id="app8">
        <label>Multimedia Appendix 8</label>
        <p>Cluster dendrogram of the 2 groups. Left: group with higher maternal viral load (HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL); right: group with lower maternal viral load (HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL). The vertical axis represents the Euclidean distance between observations or clusters. Horizontal bars indicate the linkage points at which clusters or individual observations are merged. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app8.pdf" xlink:title="PDF File  (Adobe PDF File), 349 KB"/>
      </supplementary-material>
      <supplementary-material id="app9">
        <label>Multimedia Appendix 9</label>
        <p>Top: Group with higher viral load (HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL); bottom: group with lower viral load (HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL). K-means clustering was optimized using 3 methods: average silhouette, elbow, and gap statistic. The cluster analysis was performed without any missing values. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app9.pdf" xlink:title="PDF File  (Adobe PDF File), 290 KB"/>
      </supplementary-material>
      <supplementary-material id="app10">
        <label>Multimedia Appendix 10</label>
        <p>Cluster plot. Left: group with higher viral load (HBV DNA ≥ 5 × 10<sup>7</sup> copies/mL); right: group with lower viral load (HBV DNA &#60; 5 × 10<sup>7</sup> copies/mL). Clustering was performed with the number of clusters set to 5 and 8, respectively, to visualize the parameter groupings within each viral load group. HBV: hepatitis B virus.</p>
        <media xlink:href="formative_v9i1e69838_app10.pdf" xlink:title="PDF File  (Adobe PDF File), 457 KB"/>
      </supplementary-material>
      <supplementary-material id="app11">
        <label>Multimedia Appendix 11</label>
        <p>Cohen classified effect sizes on MTCT risk as follows: trivial (d&#60;0.2), small (0.2≤d&#60;0.5), medium (0.5≤d&#60;0.8), and large (d≥0.8). To determine the effect size for each factor, we transformed the natural logarithm of the odds ratio (ln[odds ratio]) by dividing it by 1.81. Additionally, Cohen h was calculated using the formula h = 2 × &#124;arcsin(√p1) − arcsin(√p2)&#124;, where p1 represents the proportion of CBHBsAg-positive cases and p2 the proportion of CBHBsAg-negative cases [<xref ref-type="bibr" rid="ref9">9</xref>]. The analysis of Cohen h across the groups revealed varying degrees of effect size between CBHBsAg-positive and CBHBsAg-negative proportions. Group 14 demonstrated a very large effect (h=1.46), with a low positive rate (17%) and high negative rate (83%). Group 12 showed a medium effect (h=0.51), indicating a moderate difference. Groups 10 (h=1.70) and 7 (h=1.85) both exhibited very large effects, with positive rates of 88% and 90%, respectively, contrasted with much lower negative rates. Group 13 showed the maximum possible difference (h=3.14), with a 100% positive and 0% negative rate, corresponding to the theoretical upper limit of Cohen h, Π. By contrast, the "other_group" displayed a small effect (h=0.15), with nearly equal proportions of positive and negative cases, suggesting minimal difference between the 2. HBsAg: hepatitis B surface antigen; MTCT: mother-to-child transmission.</p>
        <media xlink:href="formative_v9i1e69838_app11.docx" xlink:title="DOCX File , 23 KB"/>
      </supplementary-material>
      <supplementary-material id="app12">
        <label>Multimedia Appendix 12</label>
        <p>Classification and regression trees (CART) with varying split ratios. Panels show CART results with different split ratios: (A) 0.85, (B) 0.80, (C) 0.75, and (D) 0.50. Group numbers correspond to those listed in <xref ref-type="table" rid="table3">Table 3</xref>. The range of mother-to-child transmission (MTCT) risk is based on Cohen d index. According to the Cohen classification, effect sizes for MTCT risk are defined as follows: trivial (d&#60;0.2), small (0.2≤d&#60;0.5), medium (0.5≤d&#60;0.8), and large (d≥0.8).</p>
        <media xlink:href="formative_v9i1e69838_app12.pdf" xlink:title="PDF File  (Adobe PDF File), 492 KB"/>
      </supplementary-material>
      <supplementary-material id="app13">
        <label>Multimedia Appendix 13</label>
        <p>The weighted sum of the entropy of the child nodes and the corresponding information gain reaches its maximum value. Cohen classified effect sizes related to mother-to-child transmission risk as follows: trivial (d&#60;0.2), small (0.2≤d&#60;0.5), medium (0.5≤d &#60;0.8), and large (d≥0.8).</p>
        <media xlink:href="formative_v9i1e69838_app13.docx" xlink:title="DOCX File , 33 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ALT</term>
          <def>
            <p>alanine aminotransferase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AST</term>
          <def>
            <p>aspartate aminotransferase</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CART</term>
          <def>
            <p>classification and regression trees</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CHB</term>
          <def>
            <p>chronic hepatitis B</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">EDTA</term>
          <def>
            <p>ethylenediaminetetraacetic acid</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">HBeAg</term>
          <def>
            <p>hepatitis B e antigen</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">HBsAg</term>
          <def>
            <p>hepatitis B surface antigen</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">HBV</term>
          <def>
            <p>hepatitis B virus</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">ID3</term>
          <def>
            <p>Iterative Dichotomiser 3</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">mMDSC</term>
          <def>
            <p>monocytic myeloid-derived suppressor cel</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">MTCT</term>
          <def>
            <p>mother-to-child transmission</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">PBMC</term>
          <def>
            <p>peripheral blood mononuclear cell</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study is part of a collaborative research effort between Germany and Vietnam. It is funded by the BMBF (German Federal Ministry of Education and Research) under reference number 01DP19001 for its participation in the project “Identification of Viral and Host Factors in Vertical Mother-to-Child Transmission of Hepatitis B Virus,” and by MOST (Ministry of Science and Technology of Vietnam) under reference number 232/QD-BKHCN for its involvement in the project “Application of DNA Sequencing and ELISPOT Technologies in Evaluation of HBV Transmission Risk from Mother to Child.” We express our gratitude to the BMBF of Germany and MOST of Vietnam for supporting bilateral scientific cooperation between the two countries. We also thank the patients, health care workers, midwives, and technicians who participated in and assisted with the completion of this research. Our heartfelt thanks go to Professor Ulrike Protzer’s research group at the Institute of Virology, School of Medicine, Technical University of Munich, Munich, Germany, and Helmholtz Zentrum München, Munich, Germany; the German Center for Infection Research (DZIF), Munich and Hamburg Partner sites, Germany, for their invaluable collaboration and support during this study. Their expertise, insightful contributions, and dedicated efforts have been crucial to the success of this project.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The data sets generated and analyzed during this study are available from the corresponding author on reasonable request.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>DNT contributed to the study design, study supervision, and manuscript review as the project leader. HBTT was involved in the study design, data compilation and analysis, as well as manuscript writing and review. THTN contributed to sample collection, data compilation and analysis, and manuscript writing and review. TPT participated in sample collection and data compilation. TND, HNTT, and HVTT provided technical supervision. LATL was responsible for the technical supervision of LTH. HCT contributed to the study design, proposal writing, bioinformatic analysis, and manuscript writing. NK and TB were involved in the study design, study supervision, and manuscript review. LKH contributed to bioinformatic analysis planning, analysis supervision, and manuscript review. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>Noele P</given-names>
            </name>
            <name name-style="western">
              <surname>Jamieson</surname>
              <given-names>Denise J</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>Trudy V</given-names>
            </name>
          </person-group>
          <article-title>Prevention of perinatal hepatitis B virus transmission</article-title>
          <source>J Pediatric Infect Dis Soc</source>
          <year>2014</year>
          <month>09</month>
          <volume>3 Suppl 1</volume>
          <issue>Suppl 1</issue>
          <fpage>S7</fpage>
          <lpage>S12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25232477"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jpids/piu064</pub-id>
          <pub-id pub-id-type="medline">25232477</pub-id>
          <pub-id pub-id-type="pii">piu064</pub-id>
          <pub-id pub-id-type="pmcid">PMC4164184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dunford</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Carr</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Ta Thi</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Connell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Coughlan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>HT</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Thi</surname>
              <given-names>LAN</given-names>
            </name>
          </person-group>
          <article-title>A multicentre molecular analysis of hepatitis B and blood-borne virus coinfections in Viet Nam</article-title>
          <source>PLoS One</source>
          <year>2012</year>
          <month>6</month>
          <day>13</day>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>e39027</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0039027"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0039027</pub-id>
          <pub-id pub-id-type="medline">22720022</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-07965</pub-id>
          <pub-id pub-id-type="pmcid">PMC3374772</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarker</surname>
              <given-names>IH</given-names>
            </name>
          </person-group>
          <article-title>Machine learning: algorithms, real-world applications and research directions</article-title>
          <source>SN Comput Sci</source>
          <year>2021</year>
          <month>03</month>
          <day>22</day>
          <volume>2</volume>
          <issue>3</issue>
          <fpage>160</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33778771"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s42979-021-00592-x</pub-id>
          <pub-id pub-id-type="medline">33778771</pub-id>
          <pub-id pub-id-type="pii">592</pub-id>
          <pub-id pub-id-type="pmcid">PMC7983091</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>World Health Organization (WHO)</collab>
          </person-group>
          <source>Prevention of Mother-to-Child Transmission of Hepatitis B Virus: Guidelines on Antiviral Prophylaxis in Pregnancy</source>
          <year>2021</year>
          <publisher-loc>Geneva, Switzerland</publisher-loc>
          <publisher-name>WHO</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Körber</surname>
              <given-names>Nina</given-names>
            </name>
            <name name-style="western">
              <surname>Pohl</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Grubeck-Loebenstein</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Wawer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Knolle</surname>
              <given-names>PA</given-names>
            </name>
            <name name-style="western">
              <surname>Roggendorf</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Protzer</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Hepatitis B vaccine non-responders show higher frequencies of CD24CD38 regulatory B cells and lower levels of IL-10 expression compared to responders</article-title>
          <source>Front Immunol</source>
          <year>2021</year>
          <month>9</month>
          <day>10</day>
          <volume>12</volume>
          <fpage>713351</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34566969"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fimmu.2021.713351</pub-id>
          <pub-id pub-id-type="medline">34566969</pub-id>
          <pub-id pub-id-type="pmcid">PMC8461011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Starnes</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Tabor</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>The Basic Practice of Statistics (6th Edition)</source>
          <year>2018</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>BFW High School Publishers</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thu Nguyen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Van Tran</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Cam Ho</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xuan Nguyen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Trong Nguyen</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>A systematic analysis with the hierarchical cluster analysis strategy on the complex interaction of TERT and CTNNB1 somatic mutations in Vietnamese hepatocellular carcinoma patients</article-title>
          <source>Gene</source>
          <year>2024</year>
          <month>11</month>
          <day>15</day>
          <volume>927</volume>
          <fpage>148646</fpage>
          <pub-id pub-id-type="doi">10.1016/j.gene.2024.148646</pub-id>
          <pub-id pub-id-type="medline">38851365</pub-id>
          <pub-id pub-id-type="pii">S0378-1119(24)00527-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>HTT</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TT</given-names>
            </name>
          </person-group>
          <article-title>Multi-clustering study on the association between human leukocyte antigen and hepatitis B virus-related hepatocellular carcinoma and cirrhosis in Viet Nam</article-title>
          <source>World J Gastroenterol</source>
          <year>2024</year>
          <month>12</month>
          <day>14</day>
          <volume>30</volume>
          <issue>46</issue>
          <fpage>4880</fpage>
          <lpage>4903</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wjgnet.com/1007-9327/full/v30/i46/4880.htm"/>
          </comment>
          <pub-id pub-id-type="doi">10.3748/wjg.v30.i46.4880</pub-id>
          <pub-id pub-id-type="medline">39679310</pub-id>
          <pub-id pub-id-type="pmcid">PMC11612715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>CT</given-names>
            </name>
            <name name-style="western">
              <surname>Bui</surname>
              <given-names>HTT</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Ta</surname>
              <given-names>VT</given-names>
            </name>
          </person-group>
          <article-title>Multidimensional machine learning for assessing parameters associated with COVID-19 in Vietnam: validation study</article-title>
          <source>JMIR Form Res</source>
          <year>2023</year>
          <month>02</month>
          <day>16</day>
          <volume>7</volume>
          <fpage>e42895</fpage>
          <pub-id pub-id-type="doi">10.2196/42895</pub-id>
          <pub-id pub-id-type="medline">36668902</pub-id>
          <pub-id pub-id-type="pii">v7i1e42895</pub-id>
          <pub-id pub-id-type="pmcid">PMC9937111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chinn</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A simple method for converting an odds ratio to effect size for use in meta‐analysis</article-title>
          <source>Statist Med</source>
          <year>2000</year>
          <month>11</month>
          <volume>19</volume>
          <issue>22</issue>
          <fpage>3127</fpage>
          <lpage>3131</lpage>
          <pub-id pub-id-type="doi">10.1002/1097-0258(20001130)19:22&#60;3127::aid-sim784&#62;3.3.co;2-d</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blockeel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Devos</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Frénay</surname>
              <given-names>Benoît</given-names>
            </name>
            <name name-style="western">
              <surname>Nanfack</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nijssen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Decision trees: from efficient prediction to responsible AI</article-title>
          <source>Front Artif Intell</source>
          <year>2023</year>
          <month>7</month>
          <day>26</day>
          <volume>6</volume>
          <fpage>1124553</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37565044"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/frai.2023.1124553</pub-id>
          <pub-id pub-id-type="medline">37565044</pub-id>
          <pub-id pub-id-type="pmcid">PMC10411911</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sánchez-Hechavarría</surname>
              <given-names>Miguel Enrique</given-names>
            </name>
            <name name-style="western">
              <surname>Ghiya</surname>
              <given-names>Shreya</given-names>
            </name>
            <name name-style="western">
              <surname>Carrazana-Escalona</surname>
              <given-names>Ramon</given-names>
            </name>
            <name name-style="western">
              <surname>Cortina-Reyna</surname>
              <given-names>Sergio</given-names>
            </name>
            <name name-style="western">
              <surname>Andreu-Heredia</surname>
              <given-names>Adán</given-names>
            </name>
            <name name-style="western">
              <surname>Acosta-Batista</surname>
              <given-names>Carlos</given-names>
            </name>
            <name name-style="western">
              <surname>Saá-Muñoz</surname>
              <given-names>Nicolás Armando</given-names>
            </name>
          </person-group>
          <article-title>Introduction of application of Gini coefficient to heart rate variability spectrum for mental stress evaluation</article-title>
          <source>Arq Bras Cardiol</source>
          <year>2019</year>
          <volume>113</volume>
          <issue>4</issue>
          <fpage>725</fpage>
          <lpage>733</lpage>
          <pub-id pub-id-type="doi">10.5935/abc.20190185</pub-id>
          <pub-id pub-id-type="medline">31508693</pub-id>
          <pub-id pub-id-type="pii">S0066-782X2019005017103</pub-id>
          <pub-id pub-id-type="pmcid">PMC7020869</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Statistical Power Analysis for the Behavioral Sciences (2nd ed)</source>
          <year>1998</year>
          <publisher-loc>Hillsdale, NJ</publisher-loc>
          <publisher-name>Lawrence Erlbaum Associates</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Padarath</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Deroubaix</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kramvis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The complex role of HBeAg and its precursors in the pathway to hepatocellular carcinoma</article-title>
          <source>Viruses</source>
          <year>2023</year>
          <month>03</month>
          <day>27</day>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>857</fpage>
          <pub-id pub-id-type="doi">10.3390/v15040857</pub-id>
          <pub-id pub-id-type="medline">37112837</pub-id>
          <pub-id pub-id-type="pii">v15040857</pub-id>
          <pub-id pub-id-type="pmcid">PMC10144019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ye</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ju</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>The relationship of maternal hepatitis B e antigen and response to vaccination of infants born to women with chronic infection</article-title>
          <source>BMC Pregnancy Childbirth</source>
          <year>2023</year>
          <month>07</month>
          <day>15</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>518</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpregnancychildbirth.biomedcentral.com/articles/10.1186/s12884-023-05815-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12884-023-05815-y</pub-id>
          <pub-id pub-id-type="medline">37454068</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12884-023-05815-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC10349460</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jia</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qiao</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Peripheral blood mononuclear cell traffic plays a crucial role in mother-to-infant transmission of hepatitis B virus</article-title>
          <source>Int J Biol Sci</source>
          <year>2015</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>266</fpage>
          <lpage>73</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25678845"/>
          </comment>
          <pub-id pub-id-type="doi">10.7150/ijbs.10813</pub-id>
          <pub-id pub-id-type="medline">25678845</pub-id>
          <pub-id pub-id-type="pii">ijbsv11p0266</pub-id>
          <pub-id pub-id-type="pmcid">PMC4323366</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ngoc Tram</surname>
              <given-names>H T</given-names>
            </name>
            <name name-style="western">
              <surname>Thu Huong</surname>
              <given-names>B T</given-names>
            </name>
            <name name-style="western">
              <surname>Duc Hinh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thuy</surname>
              <given-names>P T</given-names>
            </name>
            <name name-style="western">
              <surname>Lan Anh</surname>
              <given-names>L T</given-names>
            </name>
            <name name-style="western">
              <surname>Kim Phuon</surname>
              <given-names>D T</given-names>
            </name>
            <name name-style="western">
              <surname>Thu Lan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cam Tu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Koerber</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bauer</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Khanh Lam</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Manh Ha</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Minh Duc</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Thanh Hiep</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Protzer</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Tien Dung</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Multidimensional analysis of the mother-to-child transmission risk factors in chronic hepatitis B virus infection in pregnant women in Vietnam</article-title>
          <source>Clin Ter</source>
          <year>2023</year>
          <volume>174</volume>
          <issue>3</issue>
          <fpage>266</fpage>
          <lpage>274</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.clinicaterapeutica.it/2023/174/3/13_MINH DUC_MULTIDIMENSIONAL.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.7417/CT.2023.2533</pub-id>
          <pub-id pub-id-type="medline">37199363</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Juszczuk</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kozak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dziczkowski</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Głowania</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jach</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Probierz</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Real-world data difficulty estimation with the use of entropy</article-title>
          <source>Entropy</source>
          <year>2021</year>
          <month>12</month>
          <day>01</day>
          <volume>23</volume>
          <issue>12</issue>
          <fpage>1621</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=e23121621"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/e23121621</pub-id>
          <pub-id pub-id-type="medline">34945927</pub-id>
          <pub-id pub-id-type="pii">e23121621</pub-id>
          <pub-id pub-id-type="pmcid">PMC8700715</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Bi</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xiong</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <source>Front Immunol</source>
          <year>2022</year>
          <month>1</month>
          <day>19</day>
          <volume>13</volume>
          <fpage>1119124</fpage>
          <pub-id pub-id-type="doi">10.3389/fimmu.2022.1119124</pub-id>
          <pub-id pub-id-type="medline">36741383</pub-id>
          <pub-id pub-id-type="pmcid">PMC9892180</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
