<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e66960</article-id><article-id pub-id-type="doi">10.2196/66960</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>A Machine Learning&#x2013;Based Scoring System to Identify High Immunoactivity Microsatellite Stability Tumors by Quantifying Similarity to Microsatellite Instability-High Tumors in Colorectal Cancers: Development and Quantitative Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Yan</surname><given-names>Hongkai</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Jiang</surname><given-names>Li</given-names></name><degrees>Dr rer nat</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Li</surname><given-names>Yaqi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Wang</surname><given-names>Fengchong</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Mo</surname><given-names>Shaobo</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sheng</surname><given-names>Weiqi</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Huang</surname><given-names>Dan</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff6">6</xref><xref ref-type="aff" rid="aff7">7</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Peng</surname><given-names>Junjie</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Oncology, Shanghai Medical College, Fudan University</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Pediatric Cardiology, Xinhua Hospital Affiliated to Shanghai Jiao Tong University School of Medicine</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of Colorectal Surgery, Fudan University Shanghai Cancer Center</institution><addr-line>270 Dong&#x2019;An Road</addr-line><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff4"><institution>Weifang Key Laboratory of Collaborative Innovation of Intelligent Diagnosis and Treatment and Molecular Diseases, School of Basic Medical Sciences, Shandong Second Medical University</institution><addr-line>Weifang</addr-line><country>China</country></aff><aff id="aff5"><institution>Weifang Ten Nanometer Biotechnology Co., Ltd.</institution><addr-line>Weifang</addr-line><country>China</country></aff><aff id="aff6"><institution>Department of Pathology, Fudan University Shanghai Cancer Center</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff7"><institution>Institute of Pathology, Fudan University</institution><addr-line>Shanghai</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Lai</surname><given-names>Jiaying</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Ma</surname><given-names>Weijie</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Junjie Peng, MD, PhD, Department of Colorectal Surgery, Fudan University Shanghai Cancer Center, 270 Dong&#x2019;An Road, Shanghai, 200032, China, 86 02164175590; <email>pengjj@shca.org.cn</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>16</day><month>10</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e66960</elocation-id><history><date date-type="received"><day>02</day><month>10</month><year>2024</year></date><date date-type="rev-recd"><day>23</day><month>08</month><year>2025</year></date><date date-type="accepted"><day>26</day><month>08</month><year>2025</year></date></history><copyright-statement>&#x00A9; Hongkai Yan, Li Jiang, Yaqi Li, Fengchong Wang, Shaobo Mo, Weiqi Sheng, Dan Huang, Junjie Peng. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 16.10.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e66960"/><abstract><sec><title>Background</title><p>Microsatellite stability (MSS) colorectal cancers (CRCs) have a limited response to immune checkpoint inhibitors (ICIs) compared to microsatellite instability-high (MSI-H) CRCs. Nevertheless, previous studies have shown that some MSS CRCs are sensitive to ICIs, although established criteria for treatment justification are still lacking.</p></sec><sec><title>Objective</title><p>This study aimed to test the tumor-infiltrating lymphocyte (TIL) features of MSS and develop a novel computational tool for the similarity prediction between MSS and MSI-H status in patients with CRC based on multiple factors.</p></sec><sec sec-type="methods"><title>Methods</title><p>We collected and analyzed data from 188 patients with CRC, including MSI status, immune cell distributions, clinical features, and gene mutations, using statistical methods and Cox regression. An ensemble machine learning&#x2013;based MSI-H score was developed using stacked extreme gradient boosting classifiers to quantify the similarity of patient data to MSI-H data based on immune cell distributions, clinical features, and gene mutations. The model was robust and could address missing input data for immune cell distributions and gene mutations.</p></sec><sec sec-type="results"><title>Results</title><p>The scorer performed well (mean Cohen &#x03BA; of 0.40, SD 0.05, over 10 random seeds) in identifying MSI-H&#x2013;like MSS samples with TIL distributions similar to genuine MSI-H CRCs. No significant difference was observed between the TIL features of MSI-H&#x2013;like MSS CRCs and MSI-H CRCs. The disparity between MSI-H&#x2013;like MSS CRCs and MSS CRCs potentially lies in the T regulatory cells (<italic>P</italic>=.09) and macrophage (<italic>P</italic>=.16) populations within the tumor stromal region.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Some patients with MSS CRC presented similar immune cell distributions with high immunoactivity compared to patients with MSI-H CRC. The MSI-H score serves as a metric to quantify the similarity of MSS CRCs to MSI-H CRCs and presents a promising avenue for more personalized and effective cancer immunotherapy treatment, offering a clinical reference for potential ICI targets in MSS CRCs.</p></sec></abstract><kwd-group><kwd>colorectal tumor</kwd><kwd>machine learning</kwd><kwd>ML</kwd><kwd>immunotherapy</kwd><kwd>immunoactivity</kwd><kwd>tumors</kwd><kwd>mutation</kwd><kwd>genetics</kwd><kwd>cancer</kwd><kwd>colorectal</kwd><kwd>colorectal cancer</kwd><kwd>microsatellite stability</kwd><kwd>immune cell</kwd><kwd>gene mutation</kwd><kwd>Cox regression</kwd><kwd>regression analysis</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Colorectal cancer (CRC) is one of the leading causes of cancer death worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. Microsatellite status divides CRCs into two subtypes: (1) deficient mismatch repair or microsatellite instability-high (MSI-H) tumors and (2) proficient mismatch repair or microsatellite stability (MSS) and microsatellite instability-low tumors [<xref ref-type="bibr" rid="ref2">2</xref>]. These 2 subtypes are distinct in terms of clinicopathological factors, gene mutations, and the immune microenvironment [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. One of the pivotal treatment modalities in the field of CRC is immunotherapy, especially the administration of immune checkpoint inhibitors (ICIs), including antiprogrammed cell death-1 (PD-1) and antiprogrammed cell death ligand 1 (PD-L1) antibodies [<xref ref-type="bibr" rid="ref4">4</xref>]. A reliable predictor of immunotherapy response and immunoactivity is MSI-H status; notably, the Food and Drug Administration and the European Medicines Agency granted approval for the use of ICIs to treat MSI-H CRC in 2017 and 2021, respectively [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>While ICIs offer an alternative to surgery and chemotherapy for MSI-H CRCs, the use of ICIs to treat MSS CRCs still lacks justification, with no guidelines for identifying high immunoactivity MSS CRCs; thus, a large population of patients with MSS CRC lacks effective treatment options [<xref ref-type="bibr" rid="ref8">8</xref>]. However, MSS status is not an absolute marker for excluding immunotherapy. Part of MSS CRCs showed response to ICI therapies [<xref ref-type="bibr" rid="ref9">9</xref>]. A meta-analysis provides evidence for the application of ICI therapies in nonmetastatic MSS CRCs and highlights its safety and the potential for organ preservation with this approach [<xref ref-type="bibr" rid="ref10">10</xref>]. In addition, Motta et al [<xref ref-type="bibr" rid="ref11">11</xref>] demonstrated that some MSS CRCs (up to 20%) harbor a similar profile, including immunological, genetic, pathological, and clinical characteristics, to MSI-H tumors. Therefore, identifying MSS CRCs with similar profiles to MSI-H CRCs could be a reasonable approach, and a strategy for achieving this is urgently needed. Tumor-infiltrating lymphocytes (TILs), a polymorphic group consisting primarily of effector T lymphocytes, regulatory T lymphocytes, natural killer cells, dendritic cells, and macrophages, are a critical feature of CRC immunology [<xref ref-type="bibr" rid="ref12">12</xref>]. TILs are useful in immunotherapy and immunoactivity prediction [<xref ref-type="bibr" rid="ref13">13</xref>]. Notably, the intratumoral spatial heterogeneity of TILs is an important factor for precisely stratifying prognostic immune subgroups of MSI-H CRC [<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>In this study, we developed a novel MSI-H score based on ensemble machine learning to quantify the degree of similarity of immunoactivity between patients with MSS CRC and patients with MSI-H CRC. A subgroup of patients with MSS CRC with high MSI-H scores was defined as patients with MSI-H&#x2013;like MSS CRC, exhibiting MSI-H&#x2013;like features in immune cell distributions, gene mutations, pathological reports, and clinical characteristics. This work paves the way for more personalized, accurate, and effective cancer immunotherapy treatments, delivering a clinical reference for identifying potential ICI targets and advancing patient care.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Recruitment</title><p>Data from 188 patients with stage II CRC and tissue samples were collected from the institutional database of the Fudan University Shanghai Cancer Center between 2013 and 2019. The American Joint Committee on Cancer staging system was used to determine each patient&#x2019;s stage [<xref ref-type="bibr" rid="ref15">15</xref>]. Tested by next-generation sequencing, 24 patients were classified as MSI-H. None of the patients had radiation therapy, chemotherapy, or immunotherapy before tumor resection. Clinical and pathological data were obtained from patient records and postoperative pathology reports.</p></sec><sec id="s2-2"><title>Multiplex Immunohistochemistry Staining</title><p>Sections (4 mm thick) were cut from formalin-fixed, paraffin-embedded CRC tissue and control tonsil tissue for multiplex immunohistochemistry (mIHC). The slides were dewaxed in xylene, rehydrated, and rinsed in graded ethanol solutions and tap water. Antibody diluent/block (72424205; PerkinElmer) was applied to block endogenous peroxidase. The slides were boiled in a Tris-EDTA buffer (pH: 9; 643901; Klinipath) and underwent microwave treatment (MWT) for antigen retrieval. Information on the primary antibodies and the corresponding fluorophores is provided in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, including 2 panels (<xref ref-type="fig" rid="figure1">Figure 1</xref>). One antigen required 1 round of labeling, including primary antibody incubation, secondary antibody incubation, and tyramide signal amplification (TSA) visualization, followed by labeling of the subsequent antibody. After incubation with the primary antibody for 1 hour at room temperature, the slides were incubated with Opal Polymer HRP Ms+Rb (2414515; PerkinElmer) at 37 &#x2103; for 10 minutes. TSA visualization was performed with the Opal 7-Color IHC Kit (NEL797B001KT; PerkinElmer) containing the fluorophores 4,6-diamidino-2-phenylindole (DAPI; Thermo Scientific) and the TSA Coumarin system (NEL703001KT; PerkinElmer). MWT was performed to remove the antibody-TSA complex with the Tris-EDTA buffer (pH: 9). TSA single-stained slides were finished with MWT, counterstained with DAPI for 5 minutes, and enclosed in Antifade mounting medium (I0052; NobleRyder).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Two panels of multiplex immunohistochemistry (mIHC). (A) Representative hematoxylin and eosin (HE) and mIHC staining images of panel 1: the upper line of images represents HE staining and the staining of CD8, CD45RO, and CD3; the lower line of images represents programmed cell death-1 (PD-1) staining, programmed cell death ligand 1 (PD-L1) staining, and the merge image. (B) Representative HE and mIHC staining images of panel 2: the upper line of images represents HE staining and the staining of CD4, FOXP3, and CD68; the lower line of images represents CD163 staining, PD-L1 staining, and the merge image.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66960_fig01.png"/></fig></sec><sec id="s2-3"><title>Image Acquisition and Analysis</title><p>Multiplexed and single-color control slides were scanned at an absolute magnification of 200&#x00D7; by the PerkinElmer Vectra automated multispectral microscope. Representative fields from the single-color slides were imaged, and a spectral library for unmixing was generated by inForm image analysis software (version 2.1; PerkinElmer). Index cases were stained using the multiplex method and then imaged. Channels were unmixed using the spectral library. All settings were saved within an algorithm to allow for batch analysis of multiple original multispectral images of the same tissue [<xref ref-type="bibr" rid="ref16">16</xref>].</p></sec><sec id="s2-4"><title>Quantification of Immune Cell Densities and Classification</title><p>The nuclear morphological features were based on DAPI staining. The numbers of immune cells in each image were scored as percent cellularity (number of positive cells/number of nucleated cells). Five representative fields at 200&#x00D7; magnification of tissue area were selected. The densities of immune cells were segmented independently by 2 pathologists. Immune variables were classified based on the patterns of fluorochrome intensity.</p></sec><sec id="s2-5"><title>Patient Follow-Up</title><p>Patients were monitored every 3&#x2010;6 months for 3 years, then every 6&#x2010;12 months up to 5 years. Follow-ups included rectal exams, carcinoembryonic antigen (CEA) tests, annual radiological studies, and colonoscopies as needed.</p></sec><sec id="s2-6"><title>Test of MSI and CRC-Relevant Mutations</title><p>The ColonCore panel (Burning Rock) is designed for simultaneous detection of MSI status and mutations in 37 CRC-related genes (Table S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). The MSI detection method was a read-count-distribution&#x2013;based approach, using the coverage ratio of a specific set of repeat lengths as the main characteristic of each microsatellite locus. The MSI status of a sample was determined by the percentage of unstable loci in the given sample [<xref ref-type="bibr" rid="ref17">17</xref>].</p></sec><sec id="s2-7"><title>Statistical Tests and Survival Analysis</title><p>Statistical analysis was performed and visualized by R (version 3.4.3; R Foundation for Statistical Computing), SPSS software (version 25.0; IBM Corp), and GraphPad Prism 7 software (GraphPad Software Inc). All group-wise comparisons were conducted by the 2-sided unpaired Mann-Whitney <italic>U</italic> test, followed by the Bonferroni procedure. The Cox proportional hazards regression model was used to assess the hazard ratios, 95% CIs, and <italic>P</italic> values for univariate and multivariate analysis. Variables with <italic>P</italic>&#x003C;.10 after adjusting for common clinicopathological parameters were included in the multivariate analysis. Survival times were compared using the log-rank test. A <italic>P</italic> value of &#x003C;.05 was considered statistically significant, and all <italic>P</italic> values corresponded to 2-sided statistical tests.</p></sec><sec id="s2-8"><title>Feature Engineering</title><p>The process from feature engineering to model evaluation is depicted in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>. Categorical features were one-hot encoded as dummy variables. The mutation landscape was also one-hot encoded based on gene classes, with 2 classification stringencies, using the Database for Annotation, Visualization, and Integrated Discovery (DAVID) gene functional classification tool [<xref ref-type="bibr" rid="ref18">18</xref>]. To further engineer the mutation landscape, we calculated the joint posterior mutation probability <inline-formula><mml:math id="ieqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></inline-formula> with the following equation:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x220F;</mml:mo><mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mrow><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x220F;</mml:mo><mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x220F;</mml:mo><mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>P</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>g</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>P</mml:mi><mml:mo stretchy="false">(</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi>M</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:mstyle></mml:math></inline-formula> is the probability that a patient has MSI-high status given their mutation landscape and is based on previous probabilities and the frequencies of mutated genes in MSI-H and MSS populations, <inline-formula><mml:math id="ieqn3"><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> is a mutated gene in a patient&#x2019;s sample, <inline-formula><mml:math id="ieqn4"><mml:mi>M</mml:mi><mml:mo>=</mml:mo><mml:mfenced open="{" close="}" separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfenced></mml:math></inline-formula> is a set of all detected mutated genes in the sample, <inline-formula><mml:math id="ieqn5"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> is the probability of MSI-H in a CRC sample (0.83), <inline-formula><mml:math id="ieqn6"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> is the probability of MSS in a CRC sample (0.17), <inline-formula><mml:math id="ieqn7"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> is the frequency of a mutated gene <inline-formula><mml:math id="ieqn8"><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> in a CRC MSI-H population, and <inline-formula><mml:math id="ieqn9"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> is the frequency of a mutated gene <inline-formula><mml:math id="ieqn10"><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula> in a CRC MSS population. <inline-formula><mml:math id="ieqn11"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>I</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn12"><mml:mi>P</mml:mi><mml:mfenced separators="|"><mml:mrow><mml:msub><mml:mrow><mml:mi>g</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>|</mml:mo><mml:mi>M</mml:mi><mml:mi>S</mml:mi><mml:mi>S</mml:mi></mml:mrow></mml:mfenced></mml:math></inline-formula> were based on the findings of Serebriiskii et al [<xref ref-type="bibr" rid="ref19">19</xref>] and 2 datasets [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] on cBioPortal [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. Our Bayesian-based metric can explicitly incorporate previous biological knowledge, including MSI-H/MSS prevalence in populations with CRC and microsatellite status&#x2013;specific mutation frequencies, and probabilistic reasoning into the modeling process. Leveraging these priors potentially enhances the model&#x2019;s ability to distinguish MSI-H from MSS cases. This metric was included in the dataset along with the other features and used for model training.</p><p>Though no missing data were presented in the dataset, our model can handle missing input from users because we trained several models with varied complexities, as elaborated in the following section.</p></sec><sec id="s2-9"><title>Model Training and Deployment</title><p>Sample microsatellite status was one-hot labeled (MSI-H as 1 and MSS as 0). Multiple extreme gradient boosting (XGBoost) models [<xref ref-type="bibr" rid="ref24">24</xref>] were trained to identify MSI-H&#x2013;like tumors with different combinations of features, that is, patient metainformation, mutational landscape&#x2013;derived features, and mIHC results, such as PD-L1, CD163, and CD8 mIHC staining results. The predicted likeliness of MSI-H by the models was defined as MSI-H score. Specifically, 44 models, with different combinations of features shown in Table S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, were trained. To address potential bias caused by class imbalance, the scale_pos_weight parameter was set to the class ratio during model training.</p><p>The models were then deployed as a public web interface. As users&#x2019; data privacy is prioritized, users&#x2019; input is never stored on our server. Each model ensemble consists of 10 submodels, each trained with a distinct random seed. The final prediction for any user input is the average of the middle 6 submodel outputs, excluding the extremes. One of the XGBoost tree models in pseudocode is shown in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p></sec><sec id="s2-10"><title>Visualization and Clustering of TILs</title><p>To understand the feature importance in an unbiased and holistic way, a massive exploratory XGBoost model with all features was trained. These features include the features patient metainformation, mutational landscape&#x2013;derived features, and all mIHC results. The massive model can capture the full spectrum of the MSI-H score variation and avoid the potential bias or noise introduced by the feature selection process. The feature importance was then computed using both the XGBoost built-in function and the Shapley additive explanations package [<xref ref-type="bibr" rid="ref25">25</xref>].</p><p>Following classification by this model (threshold=0.3 defining MSI-H, chosen so that predicted MSI-H proportion approximates the epidemiologically documented prevalence of MSI-H CRC), we visualized all mIHC features of all samples by grouped box plots. For each cell type, we compared 4 groups (all MSS vs MSI-H, other MSS vs MSI-H, other MSS vs MSI-H&#x2013;like MSS, and MSI-H&#x2013;like MSS vs MSI-H) by 2-sided unpaired Mann-Whitney <italic>U</italic> test and Benjamini-Hochberg adjustment [<xref ref-type="bibr" rid="ref26">26</xref>]. To cluster cells based on 4 comparisons, we projected each cell type into a 4D latent space using a formula measuring similarity between cell percentages of former and latter populations:</p><disp-formula id="E2"><label>(2)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>m</mml:mi><mml:mi>i</mml:mi><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>r</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mo>,</mml:mo><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo></mml:mstyle><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mfrac><mml:mrow><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:msqrt><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt></mml:mfrac><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext></mml:mtd><mml:mtd><mml:mtext>&#x00A0;</mml:mtext><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>f</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mo>&#x2260;</mml:mo><mml:mi>m</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>l</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi></mml:mrow></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>p</mml:mi><mml:mrow><mml:mi>a</mml:mi><mml:mi>d</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext></mml:mtd><mml:mtd><mml:mi>o</mml:mi><mml:mi>t</mml:mi><mml:mi>h</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>w</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <italic>r</italic><sub>former</sub> and <italic>r</italic><sub>latter</sub> represent the percentages of a specific cell type (eg, CD8+ cells) measured in individual samples belonging to the &#x201C;former&#x201D; and &#x201C;latter&#x201D; groups, respectively. <italic>p<sub>adj</sub></italic> is the adjusted <italic>P</italic> value of a comparison. We then performed hierarchical clustering based on Euclidean distance in latent space with complete linkage [<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec><sec id="s2-11"><title>Feature and Model Evaluation</title><p>Model generalizability was assessed by training models with identical hyperparameters through stratified 5-fold cross-validation. Cohen &#x03BA; coefficients were computed on each hold-out fold, and the mean Cohen &#x03BA; was computed based on the 5 &#x03BA;&#x2019;s, with greater &#x03BA;&#x2019;s indicating better model performance. This training and validation process was repeated 10 times with different random stratified splits and model initializations.</p></sec><sec id="s2-12"><title>Ethical Considerations</title><p>Ethics approval was obtained from the Ethics Committee of Fudan University Shanghai Cancer Center, and informed consent was obtained from all participants (1808190&#x2010;12). Neither the patients nor the public were involved in this study (ie, only database tissue samples and data from patient records and postoperative pathology reports were used). All patient data collected for this study were deidentified prior to analysis.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Higher TIL Infiltration in the Stromal Region Than in the Tumor Region</title><p>TILs were analyzed using the mIHC method. Significant differences were found between the stromal region and the tumor region (<xref ref-type="fig" rid="figure2">Figure 2</xref>). The stromal region showed a higher prevalence of CD3+ T cells (<italic>P</italic>&#x003C;.001), CD8+ T cells (<italic>P</italic>&#x003C;.001), memory T cells (<italic>P</italic>&#x003C;.001), CD8+ memory T cells (<italic>P</italic>&#x003C;.001), CD3+ PD-1+ T cells (<italic>P</italic>&#x003C;.001), CD4+ T cells (<italic>P</italic>=.048), regulatory T cells (Tregs; <italic>P</italic>&#x003C;.001), macrophages (<italic>P</italic>=.001), M1 macrophages (<italic>P</italic>=.003), M2 macrophages (<italic>P</italic>=.001), and PD-L1+ cells (<italic>P</italic>=.007) than the tumor region. However, no significant difference was observed for CD8+ PD-1+ T cells and PD-L1+ macrophages between the stromal region and tumor region.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>The difference in tumor-infiltrating lymphocytes between the stromal region and tumor region. Each cell type ratio of relevant samples is shown by a box plot. The cell percentage difference in the stromal region and tumor region was compared, and all <italic>P</italic> values were adjusted with the Bonferroni procedure and are shown on the right side. NS: not significant; PD-1: programmed cell death-1; PD-L1: programmed cell death ligand 1; Treg: regulatory T cell. *<italic>P</italic>&#x003C;.05; **<italic>P</italic>&#x003C;.01; ***<italic>P</italic>&#x003C;.001.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66960_fig02.png"/></fig></sec><sec id="s3-2"><title>Prognostic Impact of Clinical Characteristics, MSI Status, and Immune Cell Infiltration</title><p>Variables (<xref ref-type="table" rid="table1">Table 1</xref>) commonly collected in clinics and related to prognosis or with a <italic>P</italic> value &#x003C;.10 in univariate analysis (Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) were analyzed using the Cox proportional hazards regression model (Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Microsatellite status was not linked to overall survival or disease-free survival in multivariate analysis. Significant overall survival predictors included age, CEA, M1 macrophage (CD68+ CD163&#x2013;) infiltration in stromal region, and PD-1+ T cell (CD3+ PD-1+) infiltration in tumor region. Significant disease-free survival predictors included age, CEA, tumor differentiation, and CD8+ T cell (CD8+) infiltration in tumor region (Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Clinical characteristics related to microsatellite instability-high (MSI-H) and microsatellite stability (MSS) status.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Characteristic</td><td align="left" valign="bottom">Patients, n</td><td align="left" valign="bottom">MSS tumor (n=164), n (%)</td><td align="left" valign="bottom">MSI-H tumor (n=24), n (%)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Sex</td><td align="left" valign="top">.82</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Male</td><td align="left" valign="top">106</td><td align="left" valign="top">93 (57)</td><td align="left" valign="top">13 (54)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Female</td><td align="left" valign="top">82</td><td align="left" valign="top">71 (43)</td><td align="left" valign="top">11 (46)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Age (y)</td><td align="left" valign="top">.71</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x003C;65</td><td align="left" valign="top">111</td><td align="left" valign="top">96 (59)</td><td align="left" valign="top">15 (63)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2265;65</td><td align="left" valign="top">77</td><td align="left" valign="top">68 (41)</td><td align="left" valign="top">9 (38)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Mucinous</td><td align="left" valign="top">.006</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No</td><td align="left" valign="top">152</td><td align="left" valign="top">138 (84)</td><td align="left" valign="top">14 (58)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Yes</td><td align="left" valign="top">36</td><td align="left" valign="top">26 (16)</td><td align="left" valign="top">10 (42)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Differentiation</td><td align="left" valign="top">.002</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Poor</td><td align="left" valign="top">40</td><td align="left" valign="top">29 (18)</td><td align="left" valign="top">11 (50)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Moderate to well</td><td align="left" valign="top">140</td><td align="left" valign="top">129 (82)</td><td align="left" valign="top">11 (50)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">T stage</td><td align="left" valign="top">.59</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">T3</td><td align="left" valign="top">88</td><td align="left" valign="top">78 (48)</td><td align="left" valign="top">10 (42)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">T4</td><td align="left" valign="top">100</td><td align="left" valign="top">86 (52)</td><td align="left" valign="top">14 (58)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Tumor site</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Right</td><td align="left" valign="top">52</td><td align="left" valign="top">36 (22)</td><td align="left" valign="top">16 (67)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Left</td><td align="left" valign="top">52</td><td align="left" valign="top">49 (30)</td><td align="left" valign="top">3 (13)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Rectum</td><td align="left" valign="top">83</td><td align="left" valign="top">78 (48)</td><td align="left" valign="top">5 (21)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Lymphovascular invasion</td><td align="left" valign="top">.43</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No</td><td align="left" valign="top">149</td><td align="left" valign="top">128 (78)</td><td align="left" valign="top">21 (88)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Yes</td><td align="left" valign="top">39</td><td align="left" valign="top">36 (22)</td><td align="left" valign="top">3 (13)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Perineural invasion</td><td align="left" valign="top">.86</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No</td><td align="left" valign="top">136</td><td align="left" valign="top">119 (73)</td><td align="left" valign="top">17 (71)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Yes</td><td align="left" valign="top">52</td><td align="left" valign="top">45 (27)</td><td align="left" valign="top">7 (29)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">CEA<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (ng/ml)</td><td align="left" valign="top">.94</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x003C;5</td><td align="left" valign="top">124</td><td align="left" valign="top">108 (66)</td><td align="left" valign="top">16 (67)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">&#x2265;5</td><td align="left" valign="top">64</td><td align="left" valign="top">56 (34)</td><td align="left" valign="top">8 (33)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Chemotherapy</td><td align="left" valign="top">.45</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No</td><td align="left" valign="top">76</td><td align="left" valign="top">68 (41)</td><td align="left" valign="top">8 (33)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Yes</td><td align="left" valign="top">112</td><td align="left" valign="top">96 (59)</td><td align="left" valign="top">16 (67)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">Radiotherapy</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">No</td><td align="left" valign="top">163</td><td align="left" valign="top">142 (92)</td><td align="left" valign="top">21 (91)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Yes</td><td align="left" valign="top">15</td><td align="left" valign="top">13 (8)</td><td align="left" valign="top">2 (9)</td><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>CEA: carcinoembryonic antigen. </p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>TIL Distribution in MSI-H CRCs, All MSS CRCs, MSI-H&#x2013;Like MSS CRCs, and Other MSS CRCs</title><p>The mIHC experiment was performed to examine the TILs in all CRCs (<xref ref-type="fig" rid="figure3">Figure 3</xref>). MSI-H CRCs exhibited significantly higher infiltration of TILs compared to MSS CRCs in both the tumor region and stromal region. Specifically, MSI-H CRCs had a more abundant presence of PD-L1+ M2 macrophages (<italic>P</italic>=.001), CD163+ cells (<italic>P</italic>=.001), PD-L1+ macrophages (<italic>P</italic>=.01), M2 macrophages (<italic>P</italic>=.001), and macrophages (<italic>P</italic>=.03) in the stromal region, as well as M2 macrophages in the tumor region (<italic>P</italic>=.02).</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Tumor-infiltrating lymphocyte (TIL) distributions in microsatellite instability-high (MSI-H), all microsatellite stability (MSS), MSI-H&#x2013;like MSS, and other MSS colorectal cancers (CRCs). The box plot on the right displays the percentages of a cell type in relevant samples for each class. Cell percentage differences were compared, and <italic>P</italic> values were adjusted and are presented in the heat map (<italic>P</italic>&#x003C;.001 when adjusted <italic>P</italic> values were &#x003C;.001 due to rounding), along with the median comparison result between the 2 populations (+: former population median&#x003E;latter population median; -: former population median&#x003C;latter population median). FDR: false discovery rate; mIHC: multiplex immunohistochemistry; PD-1: antiprogrammed cell death-1; PD-L1: antiprogrammed cell death ligand 1; Treg: regulatory T cell.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66960_fig03.png"/></fig><p>MSI-H&#x2013;like MSS CRCs exhibited TIL infiltration patterns akin to MSI-H CRCs but distinct from other MSS CRCs. No significant difference was observed between MSI-H&#x2013;like MSS and genuine MSI-H CRCs (the fourth column in <xref ref-type="fig" rid="figure3">Figure 3</xref>). Compared to other MSS CRCs, MSI-H&#x2013;like MSS CRCs showed higher infiltration of CD163+ cells in the stromal region (<italic>P</italic>=.001; the box plot and the third column in <xref ref-type="fig" rid="figure3">Figure 3</xref>) and potentially increased levels of PD-L1+ M2 macrophages (<italic>P</italic>=.13), FOXP3+ cells (<italic>P</italic>=.09), Tregs (<italic>P</italic>=.09), PD-L1+ macrophages (<italic>P</italic>=.16), M2 macrophages (<italic>P</italic>=.09), and macrophages (<italic>P</italic>=.16) in the stromal region, as well as M2 macrophages (<italic>P</italic>=.13) in the tumor region. The distinct infiltration patterns of TILs indicate that heightened presence of macrophages and Tregs are key factors in distinguishing MSI-H&#x2013;like MSS CRCs from MSS CRCs.</p><p>Macrophages were also found to be significantly more abundant in genuine MSI-H CRCs than in other MSS CRCs (the second column in <xref ref-type="fig" rid="figure3">Figure 3</xref>). Specifically, in the stromal region, PD-L1+ M2 macrophages (<italic>P</italic>&#x003C;.001), CD163+ cells (<italic>P</italic>&#x003C;.001), PD-L1+ macrophages (<italic>P</italic>=.004), M2 macrophages (<italic>P</italic>&#x003C;.001), M1 macrophages (<italic>P</italic>=.045), CD4+ T cells (<italic>P</italic>=.045), and macrophages (<italic>P</italic>=.01) were significantly more abundant in MSI-H CRCs than in other MSS CRCs. In the tumor region, M2 macrophages (<italic>P</italic>=.007), M1 macrophages (<italic>P</italic>=.045), and macrophages (<italic>P</italic>=.045) were found to be significantly increased in MSI-H CRCs compared with other MSS CRCs.</p><p>The TIL distribution shows that the model performed well. The scorer, which was trained and validated on only 3 types of lymphocytes, classified MSI-H&#x2013;like MSS CRC samples with similar TIL distributions as MSI-H CRC samples (the fourth column in <xref ref-type="fig" rid="figure3">Figure 3</xref>) rather than MSS CRC samples (the third column in <xref ref-type="fig" rid="figure3">Figure 3</xref>), despite most features of TILs (15 other lymphocytes) being unknown to the model. In addition, as anticipated, the heat map in <xref ref-type="fig" rid="figure3">Figure 3</xref> (second and third columns) revealed that other MSS CRC samples exhibited a slightly closer TIL distribution to MSI-H&#x2013;like MSS CRC samples than to MSI-H CRC samples.</p><p>The feature importance in a large predictive model is described in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>. TIL features from the whole or stromal region were more predictive than the tumor region alone for the MSI-H status. Top features for MSI-H score predictor included macrophage subtypes, mutational landscape, and immune cell distributions.</p></sec><sec id="s3-4"><title>MSI-H Score Predictor Generalization Ability Affected by Feature Number and Type</title><p>Increasing the number of features generally enhanced <inline-formula><mml:math id="ieqn13"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, indicating better generalization performance (<xref ref-type="fig" rid="figure4">Figure 4</xref>). However, models incorporating PD-L1 mIHC staining tended to exhibit lower <inline-formula><mml:math id="ieqn14"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> compared to those without, likely due to noise in PD-L1 measurements, as evidenced by the high SD of <inline-formula><mml:math id="ieqn15"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for models 2 to 4. This noise effect was mitigated by increasing model complexity; for example, model 44 had a greater <inline-formula><mml:math id="ieqn16"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mover><mml:mi>&#x03BA;</mml:mi><mml:mo stretchy="false">&#x00AF;</mml:mo></mml:mover></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> than model 41, despite including PD-L1. Feature importance analysis (<xref ref-type="fig" rid="figure5">Figure 5</xref>) revealed that while PD-L1 remained relevant, its importance diminished as models became more complex, suggesting that sophisticated models learned to filter out noise and extract useful information from PD-L1. The variable Spearman correlation matrix heat map is shown in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>. In addition, an MSI-H scorer web interface is freely accessible [<xref ref-type="bibr" rid="ref28">28</xref>].</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Box plot of mean Cohen &#x03BA; values evaluated through 5-fold cross-validation repeated over 10 random seeds for each model. In general, as model complexity increases, the model&#x2019;s ability to generalize tends to improve, as indicated by higher Cohen <italic>&#x03BA;</italic> values.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66960_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Bubble chart of feature importance. Each bubble represents a feature (y-axis) and its importance in the model (x-axis), as computed by using the native &#x201C;gain&#x201D; importance metric (built-in) from extreme gradient boosting (XGBoost; a darker bubble color indicates higher importance) and Shapley additive explanations (SHAP; a larger bubble area indicates higher importance). Mutation landscapes and tumor sites consistently have relatively dark, large bubbles, indicating their importance in the model. CEA: carcinoembryonic antigen.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66960_fig05.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Immunotherapy has been successful for treating MSI-H CRCs but is not as effective in MSS CRCs, which comprise the majority of CRCs. Thus, we developed a machine learning&#x2013;based MSI-H predictor to generate a robust and reliable score that can capture the complexity and heterogeneity of CRC and better target patients with MSS CRC who may benefit from immunotherapy. Our study also provides insights into the immune landscape of CRC and the role of immune cell distributions, clinical features, and gene mutations in influencing MSI status. This CRC prognostic study mostly agrees with our previous research [<xref ref-type="bibr" rid="ref29">29</xref>] and with findings from other authors [<xref ref-type="bibr" rid="ref30">30</xref>]. For example, according to our results, TIL infiltration, primarily by macrophages or CD163+ cells, was significantly higher in MSI-H CRCs than in MSS CRCs (<xref ref-type="fig" rid="figure3">Figure 3</xref>), consistent with previous studies [<xref ref-type="bibr" rid="ref31">31</xref>].</p><p>We observed a higher abundance of TIL subsets in the stromal region than in the tumor region, indicating a more active immune response in the stromal region (<xref ref-type="fig" rid="figure2">Figure 2</xref>). Comparing the MSI predictive performance of models 42, 43, and 44 (<xref ref-type="fig" rid="figure4">Figure 4</xref>) also highlights the importance of stromal TILs. Regional disparities underscore the importance of analyzing the complete tumor region for comprehensive insights. Our scorer successfully identified MSI-H&#x2013;like MSS samples with TIL distributions similar to genuine MSI-H CRCs (<xref ref-type="fig" rid="figure3">Figure 3</xref>). In addition, the balance of proinflammatory and anti-inflammatory scale is an important feature for immunological characters. Macrophages can be classified into 2 main subtypes: M1 macrophages with proinflammatory and antitumor functions and M2 macrophages with anti-inflammatory and protumor functions. The ratio of M1/M2 macrophages may influence immunotherapy outcomes, reflecting the balance between proinflammatory and anti-inflammatory signals in the tumor microenvironment [<xref ref-type="bibr" rid="ref32">32</xref>]. Tregs are frequently known to be immunosuppressive and can predict both the host immune response and chemotherapeutic response [<xref ref-type="bibr" rid="ref33">33</xref>]. Both macrophages and Tregs are important in the regulation of immunoactivity. As is shown in our results, the distribution of macrophages and Tregs appears to be important in differentiating MSI-H&#x2013;like MSS CRCs from other MSS CRCs based on TIL infiltration patterns (<xref ref-type="fig" rid="figure3">Figure 3</xref>). By comparing <inline-formula><mml:math id="ieqn17"><mml:mover accent="true"><mml:mrow><mml:mi>&#x03BA;</mml:mi></mml:mrow><mml:mo>&#x0304;</mml:mo></mml:mover></mml:math></inline-formula> variations within model sets and between specific model set pairs (<xref ref-type="fig" rid="figure4">Figure 4</xref>)&#x2014;including model 5/6/7 versus 13/16/19, 27/28/29 versus 35/38/41, 2/3/4 versus 12/15/18, 24/25/26 versus 34/27/40, 11/14/17 versus 20/21/22, and 33/36/39 versus 42/43/44&#x2014;we observed that CD163 mIHC result increased the predictive value for whole-tumor MSI scores but reduced it for tumor region scores. To better understand the differences in M2 macrophages and Tregs between MSS CRCs and MSI-H CRCs, further research on their function in CRCs is necessary.</p><p>Our analysis revealed that PD-L1+ M2 macrophages in the total region, mutational landscape, CD163+ cells in the stromal region, PD-L1+ M2 macrophages in the stromal region, and tumor site were the most important features for predicting MSI-H status (<xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>), aligning with other research results [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. Macrophages can express PD-L1 and interact with PD-1+ T cells, which may affect the response to immunotherapy [<xref ref-type="bibr" rid="ref34">34</xref>]. However, PD-L1+ macrophages potentially indicate M1-like polarization profiles [<xref ref-type="bibr" rid="ref35">35</xref>]. The stroma is important because it can influence the extracellular matrix formation, angiogenesis, immune response, and therapeutic resistance of tumors [<xref ref-type="bibr" rid="ref3">3</xref>]. The importance of the mutational landscape for prediction is widely known [<xref ref-type="bibr" rid="ref36">36</xref>]. We did not find an obvious immunological explanation for why tumor site would impact the similarity between MSI-H and MSS. Further study is needed to clarify the underlying mechanisms. Moreover, we observed that feature number and type influenced the generalization ability of the MSI-H score prediction models (<xref ref-type="fig" rid="figure4">Figure 4</xref> and <xref ref-type="fig" rid="figure5">Figure 5</xref>). This suggests that the omission of diverse variables requires specific computational models, and our machine learning scorer is adept at incorporating all such considerations, thereby highlighting our advantage.</p><p>On the basis of our results, we proposed a hypothesis regarding the changes that occur in MSI-H&#x2013;like MSS CRCs compared to other MSS CRCs. MSI-H&#x2013;like MSS CRCs foster an immunosuppressive microenvironment with M2 macrophages, Tregs, and PD-L1 that inhibits T cell responses [<xref ref-type="bibr" rid="ref37">37</xref>]. However, there are enough T cells present that can be reactivated upon PD-1/PD-L1 blockade, leading to the sensitivity of MSI-H&#x2013;like MSS CRCs to ICIs. The abundance of macrophages suggests that there may be some M1-like populations that, when disinhibited, promote antitumor immunity. Detailed differences in immune cell populations and their functions in MSI-H&#x2013;like MSS CRCs and other MSS CRCs should be further investigated to understand the mechanisms underlying the differential response to immunotherapy. Furthermore, future clinical trials could be conducted to evaluate ICI treatment between patients with MSI-H&#x2013;like MSS CRC and other patients with MSS CRC with low MSI-H scores.</p></sec><sec id="s4-2"><title>Limitations</title><p>Limitations of our study include the lack of internal or external validation of the MSI-H score in patients with MSS CRC receiving immunotherapy and the absence of investigation into the underlying molecular mechanisms. Further research and clinical trials are needed to validate our MSI-H score and elucidate the associated mechanisms.</p></sec><sec id="s4-3"><title>Conclusions</title><p>In conclusion, our study revealed significant variations in TIL distribution across tumor regions and MSI status. Integrating clinical, TIL, and mutational data, we developed a robust MSI-H scorer that captures CRC&#x2019;s complexity and heterogeneity. Macrophages, gene mutations, and tumor site emerged as key predictors. MSI-H&#x2013;like MSS CRCs exhibited TIL infiltration patterns with high immunoactivity similar to MSI-H CRCs, distinctly different from other MSS CRCs. Our privacy-protected MSI-H score predictor is freely available on the web, enabling clinical and research applications.</p></sec></sec></body><back><ack><p>This study was supported by grants from the National Natural Science Foundation of China (82473500 to JP and 82372974 to YL) and the Natural Science Foundation of Shandong Province (ZR2023QC282 to LJ). The sponsors had no role in the study design; data collection, analysis, or interpretation; writing the report; or the decision to submit the paper for publication.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>JP and DH contributed to the study conception and design. HY, YL, and LJ developed the methodology. LJ, HY, WS, SM, and FW performed the analysis and interpretation. YL, HY, and LJ drafted and revised the manuscript. JP and DH supervised the study. All authors read and approved the final manuscript.</p></fn><fn fn-type="conflict"><p>FW is employed by Weifang Ten Nanometer Biotechnology Co, Ltd. All other authors declare no other conflicts of interest.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CEA</term><def><p>carcinoembryonic antigen</p></def></def-item><def-item><term id="abb2">CRC</term><def><p>colorectal cancer</p></def></def-item><def-item><term id="abb3">DAPI</term><def><p>4,6-diamidino-2-phenylindole</p></def></def-item><def-item><term id="abb4">DAVID</term><def><p>Database for Annotation, Visualization, and Integrated Discovery</p></def></def-item><def-item><term id="abb5">ICI</term><def><p>immune checkpoint inhibitor</p></def></def-item><def-item><term id="abb6">mIHC</term><def><p>multiplex immunohistochemistry</p></def></def-item><def-item><term id="abb7">MSI-H</term><def><p>microsatellite instability-high</p></def></def-item><def-item><term id="abb8">MSS</term><def><p>microsatellite stability</p></def></def-item><def-item><term id="abb9">MWT</term><def><p>microwave treatment</p></def></def-item><def-item><term id="abb10">PD-1</term><def><p>antiprogrammed cell death-1</p></def></def-item><def-item><term id="abb11">PD-L1</term><def><p>antiprogrammed cell death ligand 1</p></def></def-item><def-item><term id="abb12">TIL</term><def><p>tumor-infiltrating lymphocyte</p></def></def-item><def-item><term id="abb13">Treg</term><def><p>regulatory T cell</p></def></def-item><def-item><term id="abb14">TSA</term><def><p>tyramide signal amplification</p></def></def-item><def-item><term id="abb15">XGBoost</term><def><p>extreme gradient boosting</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Siegel</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Wagle</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Cercek</surname><given-names>A</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Jemal</surname><given-names>A</given-names> </name></person-group><article-title>Colorectal cancer statistics, 2023</article-title><source>CA Cancer J Clin</source><year>2023</year><volume>73</volume><issue>3</issue><fpage>233</fpage><lpage>254</lpage><pub-id pub-id-type="doi">10.3322/caac.21772</pub-id><pub-id pub-id-type="medline">36856579</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>K</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>H</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Luo</surname><given-names>H</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>X</given-names> </name></person-group><article-title>Microsatellite instability: a review of what the oncologist should know</article-title><source>Cancer Cell Int</source><year>2020</year><volume>20</volume><issue>1</issue><fpage>16</fpage><pub-id pub-id-type="doi">10.1186/s12935-019-1091-8</pub-id><pub-id pub-id-type="medline">31956294</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ganesh</surname><given-names>K</given-names> </name><name name-style="western"><surname>Stadler</surname><given-names>ZK</given-names> </name><name name-style="western"><surname>Cercek</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Immunotherapy in colorectal cancer: rationale, challenges and potential</article-title><source>Nat Rev Gastroenterol Hepatol</source><year>2019</year><month>06</month><volume>16</volume><issue>6</issue><fpage>361</fpage><lpage>375</lpage><pub-id pub-id-type="doi">10.1038/s41575-019-0126-x</pub-id><pub-id pub-id-type="medline">30886395</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Gu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Application of PD-1 blockade in cancer immunotherapy</article-title><source>Comput Struct Biotechnol J</source><year>2019</year><volume>17</volume><fpage>661</fpage><lpage>674</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2019.03.006</pub-id><pub-id pub-id-type="medline">31205619</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Golshani</surname><given-names>G</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name></person-group><article-title>Advances in immunotherapy for colorectal cancer: a review</article-title><source>Therap Adv Gastroenterol</source><year>2020</year><volume>13</volume><fpage>1756284820917527</fpage><pub-id pub-id-type="doi">10.1177/1756284820917527</pub-id><pub-id pub-id-type="medline">32536977</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Casak</surname><given-names>SJ</given-names> </name><name name-style="western"><surname>Marcus</surname><given-names>L</given-names> </name><name name-style="western"><surname>Fashoyin-Aje</surname><given-names>L</given-names> </name><etal/></person-group><article-title>FDA approval summary: pembrolizumab for the first-line treatment of patients with MSI-H/dMMR advanced unresectable or metastatic colorectal carcinoma</article-title><source>Clin Cancer Res</source><year>2021</year><month>09</month><day>1</day><volume>27</volume><issue>17</issue><fpage>4680</fpage><lpage>4684</lpage><pub-id pub-id-type="doi">10.1158/1078-0432.CCR-21-0557</pub-id><pub-id pub-id-type="medline">33846198</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trullas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Delgado</surname><given-names>J</given-names> </name><name name-style="western"><surname>Genazzani</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The EMA assessment of pembrolizumab as monotherapy for the first-line treatment of adult patients with metastatic microsatellite instability-high or mismatch repair deficient colorectal cancer</article-title><source>ESMO Open</source><year>2021</year><month>06</month><volume>6</volume><issue>3</issue><fpage>100145</fpage><pub-id pub-id-type="doi">10.1016/j.esmoop.2021.100145</pub-id><pub-id pub-id-type="medline">33940347</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Deng</surname><given-names>M</given-names> </name><name name-style="western"><surname>Xue</surname><given-names>D</given-names> </name><etal/></person-group><article-title>PD-1/PD-L1 inhibitors for early and middle stage microsatellite high-instability and stable colorectal cancer: a review</article-title><source>Int J Colorectal Dis</source><year>2024</year><month>05</month><day>29</day><volume>39</volume><issue>1</issue><fpage>83</fpage><pub-id pub-id-type="doi">10.1007/s00384-024-04654-3</pub-id><pub-id pub-id-type="medline">38809459</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guven</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Kavgaci</surname><given-names>G</given-names> </name><name name-style="western"><surname>Erul</surname><given-names>E</given-names> </name><etal/></person-group><article-title>The efficacy of immune checkpoint inhibitors in microsatellite stable colorectal cancer: a systematic review</article-title><source>Oncologist</source><year>2024</year><month>05</month><day>3</day><volume>29</volume><issue>5</issue><fpage>e580</fpage><lpage>e600</lpage><pub-id pub-id-type="doi">10.1093/oncolo/oyae013</pub-id><pub-id pub-id-type="medline">38309719</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Neoadjuvant immunotherapy for DNA mismatch repair proficient/microsatellite stable non-metastatic rectal cancer: a systematic review and meta-analysis</article-title><source>Front Immunol</source><year>2025</year><volume>16</volume><fpage>1523455</fpage><pub-id pub-id-type="doi">10.3389/fimmu.2025.1523455</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Motta</surname><given-names>R</given-names> </name><name name-style="western"><surname>Cabezas-Camarero</surname><given-names>S</given-names> </name><name name-style="western"><surname>Torres-Mattos</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Immunotherapy in microsatellite instability metastatic colorectal cancer: current status and future perspectives</article-title><source>J Clin Transl Res</source><year>2021</year><month>08</month><day>26</day><volume>7</volume><issue>4</issue><fpage>511</fpage><lpage>522</lpage><pub-id pub-id-type="medline">34541365</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mantovani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Allavena</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sica</surname><given-names>A</given-names> </name><name name-style="western"><surname>Balkwill</surname><given-names>F</given-names> </name></person-group><article-title>Cancer-related inflammation</article-title><source>Nature New Biol</source><year>2008</year><month>07</month><day>24</day><volume>454</volume><issue>7203</issue><fpage>436</fpage><lpage>444</lpage><pub-id pub-id-type="doi">10.1038/nature07205</pub-id><pub-id pub-id-type="medline">18650914</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brummel</surname><given-names>K</given-names> </name><name name-style="western"><surname>Eerkens</surname><given-names>AL</given-names> </name><name name-style="western"><surname>de Bruyn</surname><given-names>M</given-names> </name><name name-style="western"><surname>Nijman</surname><given-names>HW</given-names> </name></person-group><article-title>Tumour-infiltrating lymphocytes: from prognosis to treatment selection</article-title><source>Br J Cancer</source><year>2023</year><month>02</month><volume>128</volume><issue>3</issue><fpage>451</fpage><lpage>458</lpage><pub-id pub-id-type="doi">10.1038/s41416-022-02119-4</pub-id><pub-id pub-id-type="medline">36564565</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jung</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Yoo</surname><given-names>SY</given-names> </name><name name-style="western"><surname>Bae</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JH</given-names> </name></person-group><article-title>Intratumoral spatial heterogeneity of tumor-infiltrating lymphocytes is a significant factor for precisely stratifying prognostic immune subgroups of microsatellite instability-high colorectal carcinomas</article-title><source>Mod Pathol</source><year>2022</year><month>12</month><volume>35</volume><issue>12</issue><fpage>2011</fpage><lpage>2022</lpage><pub-id pub-id-type="doi">10.1038/s41379-022-01137-0</pub-id><pub-id pub-id-type="medline">35869301</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weiser</surname><given-names>MR</given-names> </name></person-group><article-title>AJCC 8th edition: colorectal cancer</article-title><source>Ann Surg Oncol</source><year>2018</year><month>06</month><volume>25</volume><issue>6</issue><fpage>1454</fpage><lpage>1455</lpage><pub-id pub-id-type="doi">10.1245/s10434-018-6462-1</pub-id><pub-id pub-id-type="medline">29616422</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gorris</surname><given-names>MAJ</given-names> </name><name name-style="western"><surname>Halilovic</surname><given-names>A</given-names> </name><name name-style="western"><surname>Rabold</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Eight-color multiplex immunohistochemistry for simultaneous detection of multiple immune checkpoint molecules within the tumor microenvironment</article-title><source>J Immunol</source><year>2018</year><month>01</month><day>1</day><volume>200</volume><issue>1</issue><fpage>347</fpage><lpage>354</lpage><pub-id pub-id-type="doi">10.4049/jimmunol.1701262</pub-id><pub-id pub-id-type="medline">29141863</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>L</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Fang</surname><given-names>X</given-names> </name><etal/></person-group><article-title>A novel and reliable method to detect microsatellite instability in colorectal cancer by next-generation sequencing</article-title><source>J Mol Diagn</source><year>2018</year><month>03</month><volume>20</volume><issue>2</issue><fpage>225</fpage><lpage>231</lpage><pub-id pub-id-type="doi">10.1016/j.jmoldx.2017.11.007</pub-id><pub-id pub-id-type="medline">29277635</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Huang</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Sherman</surname><given-names>BT</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>Q</given-names> </name><etal/></person-group><article-title>The DAVID gene functional classification tool: a novel biological module-centric algorithm to functionally analyze large gene lists</article-title><source>Genome Biol</source><year>2007</year><volume>8</volume><issue>9</issue><fpage>R183</fpage><pub-id pub-id-type="doi">10.1186/gb-2007-8-9-r183</pub-id><pub-id pub-id-type="medline">17784955</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Serebriiskii</surname><given-names>IG</given-names> </name><name name-style="western"><surname>Connelly</surname><given-names>C</given-names> </name><name name-style="western"><surname>Frampton</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Comprehensive characterization of RAS mutations in colon and rectal cancers in old and young patients</article-title><source>Nat Commun</source><year>2019</year><month>08</month><day>19</day><volume>10</volume><issue>1</issue><fpage>3722</fpage><pub-id pub-id-type="doi">10.1038/s41467-019-11530-0</pub-id><pub-id pub-id-type="medline">31427573</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mondaca</surname><given-names>S</given-names> </name><name name-style="western"><surname>Walch</surname><given-names>H</given-names> </name><name name-style="western"><surname>Nandakumar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chatila</surname><given-names>WK</given-names> </name><name name-style="western"><surname>Schultz</surname><given-names>N</given-names> </name><name name-style="western"><surname>Yaeger</surname><given-names>R</given-names> </name></person-group><article-title>Specific mutations in APC, but not alterations in DNA damage response, associate with outcomes of patients with metastatic colorectal cancer</article-title><source>Gastroenterology</source><year>2020</year><month>11</month><volume>159</volume><issue>5</issue><fpage>1975</fpage><lpage>1978</lpage><pub-id pub-id-type="doi">10.1053/j.gastro.2020.07.041</pub-id><pub-id pub-id-type="medline">32730818</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chatila</surname><given-names>WK</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JK</given-names> </name><name name-style="western"><surname>Walch</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Genomic and transcriptomic determinants of response to neoadjuvant therapy in rectal cancer</article-title><source>Nat Med</source><year>2022</year><month>08</month><volume>28</volume><issue>8</issue><fpage>1646</fpage><lpage>1655</lpage><pub-id pub-id-type="doi">10.1038/s41591-022-01930-z</pub-id><pub-id pub-id-type="medline">35970919</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cerami</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dogrusoz</surname><given-names>U</given-names> </name><etal/></person-group><article-title>The cBio cancer genomics portal: an open platform for exploring multidimensional cancer genomics data</article-title><source>Cancer Discov</source><year>2012</year><month>05</month><volume>2</volume><issue>5</issue><fpage>401</fpage><lpage>404</lpage><pub-id pub-id-type="doi">10.1158/2159-8290.CD-12-0095</pub-id><pub-id pub-id-type="medline">22588877</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>J</given-names> </name><name name-style="western"><surname>Aksoy</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Dogrusoz</surname><given-names>U</given-names> </name><etal/></person-group><article-title>Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal</article-title><source>Sci Signal</source><year>2013</year><month>04</month><day>2</day><volume>6</volume><issue>269</issue><fpage>pl1</fpage><pub-id pub-id-type="doi">10.1126/scisignal.2004088</pub-id><pub-id pub-id-type="medline">23550210</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Guestrin</surname><given-names>C</given-names> </name></person-group><article-title>Xgboost: a scalable tree boosting system</article-title><conf-name>Proceedings of the 22nd ACM SIGKKD International Conference on Knowledge Discovery and Data Mining</conf-name><conf-date>Aug 13, 2016 to Aug 17, 2026</conf-date><conf-loc>San Francisco, CA</conf-loc><fpage>785</fpage><lpage>794</lpage></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lundberg</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SI</given-names> </name></person-group><article-title>A unified approach to interpreting model predictions</article-title><source>arXiv</source><comment>Preprint posted online on  May 22, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1705.07874</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Benjamini</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hochberg</surname><given-names>Y</given-names> </name></person-group><article-title>Controlling the false discovery rate: a practical and powerful approach to multiple testing</article-title><source>J R Stat Soc Ser C Appl Stat</source><year>1995</year><month>01</month><day>1</day><volume>57</volume><issue>1</issue><fpage>289</fpage><lpage>300</lpage><pub-id pub-id-type="doi">10.1111/j.2517-6161.1995.tb02031.x</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McQuitty</surname><given-names>LL</given-names> </name></person-group><article-title>Hierarchical linkage analysis for the isolation of types</article-title><source>Educ Psychol Meas</source><year>1960</year><month>04</month><volume>20</volume><issue>1</issue><fpage>55</fpage><lpage>67</lpage><pub-id pub-id-type="doi">10.1177/001316446002000106</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>MSI-H score predictor</article-title><access-date>2025-09-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.jiangbioinfo.com/msi-score/">https://www.jiangbioinfo.com/msi-score/</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Dai</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Prognostic impact of programed cell death-1 (PD-1) and PD-ligand 1 (PD-L1) expression in cancer cells and tumor infiltrating lymphocytes in colorectal cancer</article-title><source>Mol Cancer</source><year>2016</year><month>08</month><day>24</day><volume>15</volume><issue>1</issue><fpage>55</fpage><pub-id pub-id-type="doi">10.1186/s12943-016-0539-x</pub-id><pub-id pub-id-type="medline">27552968</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Idos</surname><given-names>GE</given-names> </name><name name-style="western"><surname>Kwok</surname><given-names>J</given-names> </name><name name-style="western"><surname>Bonthala</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kysh</surname><given-names>L</given-names> </name><name name-style="western"><surname>Gruber</surname><given-names>SB</given-names> </name><name name-style="western"><surname>Qu</surname><given-names>C</given-names> </name></person-group><article-title>The prognostic implications of tumor infiltrating lymphocytes in colorectal cancer: a systematic review and meta-analysis</article-title><source>Sci Rep</source><year>2020</year><month>02</month><day>25</day><volume>10</volume><issue>1</issue><fpage>3360</fpage><pub-id pub-id-type="doi">10.1038/s41598-020-60255-4</pub-id><pub-id pub-id-type="medline">32099066</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Millen</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hendry</surname><given-names>S</given-names> </name><name name-style="western"><surname>Narasimhan</surname><given-names>V</given-names> </name><etal/></person-group><article-title>CD8<sup>+</sup> tumor-infiltrating lymphocytes within the primary tumor of patients with synchronous de novo metastatic colorectal carcinoma do not track with survival</article-title><source>Clin Transl Immunology</source><year>2020</year><volume>9</volume><issue>7</issue><fpage>e1155</fpage><pub-id pub-id-type="doi">10.1002/cti2.1155</pub-id><pub-id pub-id-type="medline">32953115</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Edin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wikberg</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Dahlin</surname><given-names>AM</given-names> </name><etal/></person-group><article-title>The distribution of macrophages with a M1 or M2 phenotype in relation to prognosis and the molecular characteristics of colorectal cancer</article-title><source>PLoS One</source><year>2012</year><volume>7</volume><issue>10</issue><fpage>e47045</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0047045</pub-id><pub-id pub-id-type="medline">23077543</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oshi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sarkar</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Intratumoral density of regulatory T cells is a predictor of host immune response and chemotherapy response in colorectal cancer</article-title><source>Am J Cancer Res</source><year>2022</year><volume>12</volume><issue>2</issue><fpage>490</fpage><lpage>503</lpage><pub-id pub-id-type="medline">35261782</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tian</surname><given-names>T</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>J</given-names> </name></person-group><article-title>Tumor-associated macrophages (TAMs) in colorectal cancer (CRC): from mechanism to therapy and prognosis</article-title><source>IJMS</source><year>2021</year><month>08</month><day>6</day><volume>22</volume><issue>16</issue><fpage>8470</fpage><pub-id pub-id-type="doi">10.3390/ijms22168470</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elomaa</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ahtiainen</surname><given-names>M</given-names> </name><name name-style="western"><surname>V&#x00E4;yrynen</surname><given-names>SA</given-names> </name><etal/></person-group><article-title>Spatially resolved multimarker evaluation of CD274 (PD-L1)/PDCD1 (PD-1) immune checkpoint expression and macrophage polarisation in colorectal cancer</article-title><source>Br J Cancer</source><year>2023</year><month>06</month><volume>128</volume><issue>11</issue><fpage>2104</fpage><lpage>2115</lpage><pub-id pub-id-type="doi">10.1038/s41416-023-02238-6</pub-id><pub-id pub-id-type="medline">37002343</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>E</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Han</surname><given-names>W</given-names> </name><etal/></person-group><article-title>The mutational landscape of MSI-H and MSS colorectal cancer</article-title><source>J Clin Oncol</source><year>2019</year><month>05</month><day>26</day><volume>37</volume><issue>15_suppl</issue><fpage>e15122</fpage><pub-id pub-id-type="doi">10.1200/JCO.2019.37.15_suppl.e15122</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Rajput</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>N</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name></person-group><article-title>Mechanisms of immunosuppression in colorectal cancer</article-title><source>Cancers (Basel)</source><year>2020</year><month>12</month><day>20</day><volume>12</volume><issue>12</issue><fpage>3850</fpage><pub-id pub-id-type="doi">10.3390/cancers12123850</pub-id><pub-id pub-id-type="medline">33419310</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Detailed information on the experimental protocols, genetic panels, model specifications, and statistical analyses performed in this study, as provided by 6 supplementary tables.</p><media xlink:href="formative_v9i1e66960_app1.doc" xlink:title="DOC File, 249 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>The flowchart of feature engineering, model training, deployment, and feature and model evaluation.</p><media xlink:href="formative_v9i1e66960_app2.png" xlink:title="PNG File, 203 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>An example of one of the extreme gradient boosting tree models in pseudocode.</p><media xlink:href="formative_v9i1e66960_app3.doc" xlink:title="DOC File, 92 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>Feature importance in a large predictive model.</p><media xlink:href="formative_v9i1e66960_app4.pdf" xlink:title="PDF File, 894 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Heat map of Spearman correlation coefficients between all pairs of variables (features and targets).</p><media xlink:href="formative_v9i1e66960_app5.pdf" xlink:title="PDF File, 534 KB"/></supplementary-material></app-group></back></article>