<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="letter" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e83124</article-id>
      <article-id pub-id-type="pmid">41605495</article-id>
      <article-id pub-id-type="doi">10.2196/83124</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Development and Validation of a Protein Electrophoresis Classification Algorithm: Tabular Data-Based Alternative</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Schwartz</surname>
            <given-names>Amy</given-names>
          </name>
        </contrib>
        <contrib contrib-type="editor">
          <name>
            <surname>Balcarras</surname>
            <given-names>Matthew</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ishola</surname>
            <given-names>Ayomide</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Jiang</surname>
            <given-names>Shan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Mazuir</surname>
            <given-names>Auriane</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution/>
            <institution>Laboratoire B2A</institution>
            <addr-line>51 Rue Division Leclerc</addr-line>
            <addr-line>Brumath, 67170</addr-line>
            <country>France</country>
            <phone>33 03 88 52 93 10</phone>
            <email>auriane.mazuir@gmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-0476-2642</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ricotier</surname>
            <given-names>Gatien</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-9974-231X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Filhine-Tresarrieu</surname>
            <given-names>Pierre</given-names>
          </name>
          <degrees>MSc, PharmD, DES</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0000-7103-7386</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Laboratoire B2A</institution>
        <addr-line>Brumath</addr-line>
        <country>France</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Institut de Recherche Mathématique Avancée</institution>
        <addr-line>Strasbourg</addr-line>
        <country>France</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Auriane Mazuir <email>auriane.mazuir@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>28</day>
        <month>1</month>
        <year>2026</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e83124</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>8</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>30</day>
          <month>10</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>1</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Auriane Mazuir, Gatien Ricotier, Pierre Filhine-Tresarrieu. Originally published in JMIR Formative Research (https://formative.jmir.org), 28.01.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2026/1/e83124" xlink:type="simple"/>
      <abstract>
        <p>Serum protein electrophoresis (SPE) is routinely interpreted through visual assessment of electropherogram images by medical laboratory scientists. We introduce an efficient tabular data–based machine learning approach that directly leverages numerical SPE profiles, offering a robust and interpretable alternative to image-based deep learning methods.</p>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>CatBoost</kwd>
        <kwd>serum protein electrophoresis</kwd>
        <kwd>convolutional neural network</kwd>
        <kwd>tabular data analysis</kwd>
        <kwd>clinical informatics</kwd>
        <kwd>diagnostic interpretation</kwd>
        <kwd>computational efficiency</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Serum protein electrophoresis (SPE) is a key technique for separating and quantifying major serum protein fractions. Recent studies [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>] have used convolutional neural networks (CNNs) to classify SPE results. Although these models have shown good performance, they primarily replicate the visual interpretation performed by medical laboratory scientists (MLS). Yet electropherograms are inherently numerical curves—that is, tabular data. This raises a simple question: why analyze an image when the analytical signal already exists as a numerical table?</p>
      <p>Although image-based CNNs remain the dominant approach, we explicitly reframe SPE classification as a purely tabular learning problem concerning numerical SPE profiles. We evaluate this perspective by comparing our approach to the CNN-based study of Lee et al [<xref ref-type="bibr" rid="ref1">1</xref>] by using the same dataset [<xref ref-type="bibr" rid="ref4">4</xref>] and identical training-test splits, without additional data cleaning or hyperparameter tuning.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <p>Input data were obtained by extracting numerical profiles from electropherograms and gel images as illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>. Each image underwent grayscale conversion, cropping of the analytical region, interpolation into 150 point profiles, and min-max normalization. We computed SPE fractions by using local-minima detection (albumin, α-1, α-2, β, γ) and included demographic and biochemical variables from the dataset (sex, age, serum protein, serum albumin).</p>
      <p>The 6 pathological categories defined in the reference dataset [<xref ref-type="bibr" rid="ref4">4</xref>] were acute phase protein increase (74 cases), monoclonal gammopathy (264 cases), polyclonal gammopathy (244 cases), hypoproteinemia (249 cases), nephrotic syndrome (165 cases), and normal profiles (293 cases). Each case corresponds to a specific distribution pattern of proteins. These SPEs were collected in [<xref ref-type="bibr" rid="ref1">1</xref>] between January 2018 and July 2019.</p>
      <p>As recently emphasized [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], tree-based gradient boosting models remain the strongest performers for tabular data, often surpassing deep learning. After converting SPE images into numerical matrices, we reconfirmed this by evaluating XGBoost (extreme gradient boosting), TabPFN (tabular foundation model), and CatBoost (categorical boosting). Without any hyperparameter optimization, CatBoost consistently produced the best results, especially on the gel-extracted data.</p>
      <p>All results were obtained with the default CatBoost parameters from R implementation [<xref ref-type="bibr" rid="ref7">7</xref>] running on R software (version 4.4.3; R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref8">8</xref>]. To enable a fair comparison to the CNN baseline in [<xref ref-type="bibr" rid="ref1">1</xref>], we used the exact same training and test splits as in [<xref ref-type="bibr" rid="ref1">1</xref>]: specifically, 10% of the cases were reserved for testing. However, the distribution of these cases differed between gel and electropherogram representations. Each experiment was repeated with 100 different seeds to estimate CIs for all performance metrics.</p>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Preprocessing steps showing the tabular data extraction for both types of serum protein electrophoresis images.</p>
        </caption>
        <graphic xlink:href="formative_v10i1e83124_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>In total, 1289 SPE cases were available, each providing a gel and an electropherogram. However, the image quality varied substantially across samples: gel image heights ranged from 29 to 556 pixels (mean 81.9, SD 48.1) and widths from 96 to 876 pixels (mean 275, SD 129); electropherogram images heights ranged from 98 to 704 pixels (mean 410.7, SD 183.1) and widths from 250 to 1075 pixels (mean 649.6, SD 288.3) (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
      <p>Among the 20 polyclonal gammopathy cases in the gel test set, the sensitivity reported by [<xref ref-type="bibr" rid="ref1">1</xref>] is 0.800, whereas our approach achieves a mean sensitivity of 0.941, with a 95% CI of 0.937-0.945 across 100 repeated runs. Except for monoclonal gammopathies where we, by contrast with [<xref ref-type="bibr" rid="ref1">1</xref>], removed the unusual spike delimitation in our preprocessing step, the CatBoost-based tabular approach outperformed the CNN baseline across most categories. Weighted sensitivity, specificity, and <italic>F</italic><sub>1</sub>-scores were improved when using tabular data rather than images.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>Sensitivities, specificities, and F1-scores for protein electropherograms and gels, comparing the original model with the average performance of our model over 100 repeated runs, stratified by pathology, with weighted averages computed over the entire dataset.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="100"/>
          <col width="70"/>
          <col width="80"/>
          <col width="70"/>
          <col width="80"/>
          <col width="70"/>
          <col width="80"/>
          <col width="70"/>
          <col width="80"/>
          <col width="70"/>
          <col width="80"/>
          <col width="70"/>
          <col width="80"/>
          <thead>
            <tr valign="top">
              <td>
                <break/>
              </td>
              <td>Sens<sup>a</sup> Ref<sup>b</sup> Electro<sup>c</sup></td>
              <td>Sens CatBoost<sup>d</sup> Electro</td>
              <td>Spec<sup>e</sup> Ref Electro</td>
              <td>Spec CatBoost Electro</td>
              <td><italic>F</italic><sub>1</sub>-score Ref Electro</td>
              <td><italic>F</italic><sub>1</sub>-score CatBoost Electro</td>
              <td>Sens Ref Gel</td>
              <td>Sens CatBoost Gel</td>
              <td>Spec Ref Gel</td>
              <td>Spec CatBoost Gel</td>
              <td><italic>F</italic><sub>1</sub>-score Ref Gel</td>
              <td><italic>F</italic><sub>1</sub>-score CatBoost Gel</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Acute phase protein (n<sup>f</sup>=5 or 9)</td>
              <td>
                <italic>0.600</italic>
                <sup>g</sup>
              </td>
              <td>0.594 (0.587-0.601)</td>
              <td>0.951</td>
              <td><italic>0.990</italic> (0.990-0.991)</td>
              <td>0.429</td>
              <td><italic>0.648</italic> (0.640-0.655)</td>
              <td>0.222</td>
              <td><italic>0.441</italic> (0.437-0.445)</td>
              <td>0.882</td>
              <td><italic>0.993</italic> (0.992-0.994)</td>
              <td>0.160</td>
              <td><italic>0.576</italic> (0.571-0.582)</td>
            </tr>
            <tr valign="top">
              <td>Monoclonal gammopathy (n=29 or 24)</td>
              <td>
                <italic>0.862</italic>
                <sup>h</sup>
              </td>
              <td>0.690 (0.689-0.691)</td>
              <td>
                <italic>1.000</italic>
                <sup>h</sup>
              </td>
              <td>0.984 (0.983-0.986)</td>
              <td>
                <italic>0.926</italic>
                <sup>h</sup>
              </td>
              <td>0.792 (0.790-0.794)</td>
              <td>
                <italic>0.792</italic>
              </td>
              <td>0.658 (0.650-0.665)</td>
              <td>0.981</td>
              <td><italic>0.998</italic> (0.997-0.999)</td>
              <td>
                <italic>0.844</italic>
              </td>
              <td>0.788 (0.783-0.793)</td>
            </tr>
            <tr valign="top">
              <td>Polyclonal gammopathy (n=22 or 20)</td>
              <td>0.818</td>
              <td><italic>1.000</italic> (1.000-1.000)</td>
              <td>
                <italic>0.981</italic>
              </td>
              <td>0.978 (0.977-0.979)</td>
              <td>0.857</td>
              <td><italic>0.950</italic> (0.947-0.953)</td>
              <td>0.800</td>
              <td><italic>0.941</italic> (0.937-0.945)</td>
              <td>0.917</td>
              <td><italic>0.983</italic> (0.982-0.984)</td>
              <td>0.711</td>
              <td><italic>0.925</italic> (0.923-0.928)</td>
            </tr>
            <tr valign="top">
              <td>Hypoproteinemia (n=26 or 25)</td>
              <td>0.846</td>
              <td><italic>0.878</italic> (0.874-0.881)</td>
              <td>0.853</td>
              <td><italic>0.974</italic> (0.973-0.975)</td>
              <td>0.698</td>
              <td><italic>0.887</italic> (0.884-0.889)</td>
              <td>0.520</td>
              <td><italic>0.831</italic> (0.827-0.834)</td>
              <td>0.893</td>
              <td><italic>0.898</italic> (0.896-0.899)</td>
              <td>0.531</td>
              <td><italic>0.738</italic> (0.735-0.740)</td>
            </tr>
            <tr valign="top">
              <td>Nephrotic syndrome (n=16 or 21)</td>
              <td>0.687</td>
              <td><italic>0.853</italic> (0.845-0.861)</td>
              <td>
                <italic>0.991</italic>
              </td>
              <td>0.0.954 (0.953-0.955)</td>
              <td>
                <italic>0.786</italic>
              </td>
              <td>0.783 (0.778-0.788)</td>
              <td>0.238</td>
              <td><italic>0.699</italic> (0.692-0.706)</td>
              <td>
                <italic>0.972</italic>
              </td>
              <td>0.944 (0.942-0.945)</td>
              <td>0.345</td>
              <td><italic>0.704</italic> (0.698-0.709)</td>
            </tr>
            <tr valign="top">
              <td>Normal (n=30 or 29)</td>
              <td>0.667</td>
              <td><italic>0.920</italic> (0.914-0.925)</td>
              <td>
                <italic>0.949</italic>
              </td>
              <td>0.939 (0.939-0.939)</td>
              <td>0.727</td>
              <td><italic>0.868</italic> (0.865-0.870)</td>
              <td>0.759</td>
              <td><italic>0.935</italic> (0.932-0.937)</td>
              <td>0.879</td>
              <td><italic>0.925</italic> (0.923-0.927)</td>
              <td>0.698</td>
              <td><italic>0.854</italic> (0.851-0.856)</td>
            </tr>
            <tr valign="top">
              <td>Weighted scores</td>
              <td>0.773</td>
              <td><italic>0.852</italic> (0.850-0.854)</td>
              <td>0.952</td>
              <td><italic>0.967</italic> (0.966-0.967)</td>
              <td>0.784</td>
              <td><italic>0.849</italic> (0.848-0.851)</td>
              <td>0.602</td>
              <td><italic>0.790</italic> (0.788-0.792)</td>
              <td>0.922</td>
              <td><italic>0.950</italic> (0.950-0.951)</td>
              <td>0.599</td>
              <td><italic>0.786</italic> (0.784-0.788)</td>
            </tr>
          </tbody>
        </table>
        <table-wrap-foot>
          <fn id="table1fn1">
            <p><sup>a</sup>Sens: sensitivity.</p>
          </fn>
          <fn id="table1fn2">
            <p><sup>b</sup>Ref: reference.</p>
          </fn>
          <fn id="table1fn3">
            <p><sup>c</sup>Electro: electropherogram.</p>
          </fn>
          <fn id="table1fn4">
            <p><sup>d</sup>CatBoost: categorical boosting.</p>
          </fn>
          <fn id="table1fn5">
            <p><sup>e</sup>Spec: specificity.</p>
          </fn>
          <fn id="table1fn6">
            <p><sup>f</sup>The values of n correspond to the number of test samples for electropherograms and gels, respectively. Values in parentheses report the 95% CIs of our model.</p>
          </fn>
          <fn id="table1fn7">
            <p><sup>g</sup>Italicized values indicate the best-performing model for each metric and category.</p>
          </fn>
          <fn id="table1fn8">
            <p><sup>h</sup>Denotes the use of unusual spike delimitation on all electropherograms of monoclonal gammopathies.</p>
          </fn>
        </table-wrap-foot>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>The main limitation appears in the monoclonal gammopathy class [<xref ref-type="bibr" rid="ref9">9</xref>] on electropherograms, for which CNNs in [<xref ref-type="bibr" rid="ref1">1</xref>] report higher performance. A plausible explanation lies in the structure of the original dataset: in [<xref ref-type="bibr" rid="ref4">4</xref>], electropherograms corresponding to monoclonal gammopathies systematically contain manually drawn spike delimitations added by MLS during routine interpretation. These annotations are specific to this class and may therefore serve as highly discriminative visual cues for the convolutional model, artificially boosting its performance. In contrast, this dataset bias is removed from our tabular dataset using our preprocessing pipeline. It removes all such manual markings to retain a purely signal-based representation, thereby eliminating visual hints that CNN may have leveraged in the original setting. Despite this bias, our framework achieves stable and homogeneous performance across all pathological categories and does not show a specific degradation for monoclonal gammopathies.</p>
      <p>Our CatBoost results were obtained using the default parameters, without any form of tuning. This choice was intentional: it demonstrates that even an entirely nonoptimized tabular model already outperforms the CNN baseline on most categories, even on a dataset with several low quality images. Consequently, additional improvements are highly plausible. More extensive hyperparameter optimization such as tuning tree depth, learning rate, and boosting iterations could further enhance performance. Likewise, hybrid approaches that enrich numerical profiles with peak-shape descriptors or selectively integrate localized image-based features may help address the specific challenges posed by narrow M-spikes in monoclonal gammopathies.</p>
      <p>Reframing SPE classification as a tabular learning task leads to immediate performance improvements, even before any optimization. Beyond accuracy, this approach offers several practical advantages. First, the approach is computationally efficient: CatBoost trains rapidly on a standard laptop and requires no graphics processing unit, and it integrates easily into routine workflows. Second, this approach is readily generalizable, especially with modern SPE analyzers that already store raw numerical curves internally. It means the classification model can be applied directly to these exported values without any image-processing pipeline. Finally, tree-based models offer greater interpretability, allowing laboratories to analyze feature importance and understand which parts of the curve contribute to the classification—a key requirement for clinical use. Moreover, unsurprisingly, this framework is image type agnostic: when exchanging the train-test split for electropherogram and gel, performance remains consistent. This confirms that the improvement comes from the change in data structure rather than from the image source itself.</p>
      <p>In summary, transitioning from image-based deep learning to tabular data-based machine learning increases performance and improves robustness, interpretability, reproducibility, and ease of deployment. This redefinition of the SPE classification problem, unconventional for practitioners yet natural for computational systems, provides a compelling alternative to CNN-based approaches and a promising basis for clinically reliable automation.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Use of generative artificial intelligence for code development.</p>
        <media xlink:href="formative_v10i1e83124_app1.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">CatBoost</term>
          <def>
            <p>categorical boosting</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CNN</term>
          <def>
            <p>convolutional neural network</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">MLS</term>
          <def>
            <p>medical laboratory scientists</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SPE</term>
          <def>
            <p>serum protein electrophoresis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">TabPFN</term>
          <def>
            <p>tabular foundation model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">XGBoost</term>
          <def>
            <p>extreme gradient boosting</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Generative artificial intelligence tools were used to assist in the development and refinement of the computational code employed in this study. Specifically, ChatGPT (OpenAI) was used under full human supervision during several nondecisional stages of the coding process (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>PF-T is a partner in B2A, and the other authors are employed by B2A.</p>
      </sec>
    </notes>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All data used in this study are publicly accessible through the Harvard Dataverse repository, as released by Lee [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jeong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jeon</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Park</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of a deep learning-based protein electrophoresis classification algorithm</article-title>
          <source>PLoS One</source>
          <year>2022</year>
          <volume>17</volume>
          <issue>8</issue>
          <fpage>e0273284</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0273284"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0273284</pub-id>
          <pub-id pub-id-type="medline">36001575</pub-id>
          <pub-id pub-id-type="pii">PONE-D-22-01732</pub-id>
          <pub-id pub-id-type="pmcid">PMC9401151</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chabrun</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Dieu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ferre</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gaillard</surname>
              <given-names>Olivier</given-names>
            </name>
            <name name-style="western">
              <surname>Mery</surname>
              <given-names>Anthony</given-names>
            </name>
            <name name-style="western">
              <surname>Chao de la Barca</surname>
              <given-names>Juan Manuel</given-names>
            </name>
            <name name-style="western">
              <surname>Taisne</surname>
              <given-names>Audrey</given-names>
            </name>
            <name name-style="western">
              <surname>Urbanski</surname>
              <given-names>Geoffrey</given-names>
            </name>
            <name name-style="western">
              <surname>Reynier</surname>
              <given-names>Pascal</given-names>
            </name>
            <name name-style="western">
              <surname>Mirebeau-Prunier</surname>
              <given-names>Delphine</given-names>
            </name>
          </person-group>
          <article-title>Achieving expert-level interpretation of serum protein electrophoresis through deep learning driven by human reasoning</article-title>
          <source>Clin Chem</source>
          <year>2021</year>
          <month>10</month>
          <day>01</day>
          <volume>67</volume>
          <issue>10</issue>
          <fpage>1406</fpage>
          <lpage>1414</lpage>
          <pub-id pub-id-type="doi">10.1093/clinchem/hvab133</pub-id>
          <pub-id pub-id-type="medline">34491313</pub-id>
          <pub-id pub-id-type="pii">6365844</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elfert</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Kaminski</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Matek</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hoermann</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Axelsen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Marr</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Piehler</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Expert-level detection of M-proteins in serum protein electrophoresis using machine learning</article-title>
          <source>Clin Chem Lab Med</source>
          <year>2024</year>
          <month>11</month>
          <day>26</day>
          <volume>62</volume>
          <issue>12</issue>
          <fpage>2498</fpage>
          <lpage>2506</lpage>
          <pub-id pub-id-type="doi">10.1515/cclm-2024-0222</pub-id>
          <pub-id pub-id-type="medline">38879789</pub-id>
          <pub-id pub-id-type="pii">cclm-2024-0222</pub-id>
          <pub-id pub-id-type="pmcid">PMC11470231</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Dataset of deep learning based protein electrophoresis classification</article-title>
          <source>Harvard Dataverse</source>
          <year>2022</year>
          <month>06</month>
          <day>08</day>
          <access-date>2026-01-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.7910/DVN/FG9NXZ">https://doi.org/10.7910/DVN/FG9NXZ</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grinsztajn</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Oyallon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Why do tree-based models still outperform deep learning on tabular data?</article-title>
          <source>Advances in neural information processing systems</source>
          <year>2022</year>
          <volume>35</volume>
          <fpage>507</fpage>
          <lpage>520</lpage>
          <pub-id pub-id-type="doi">10.48550/arXiv.2207.08815</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shwartz-Ziv</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Armon</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Tabular data: deep learning is not all you need</article-title>
          <source>Information Fusion</source>
          <year>2022</year>
          <month>05</month>
          <volume>81</volume>
          <fpage>84</fpage>
          <lpage>90</lpage>
          <pub-id pub-id-type="doi">10.1016/j.inffus.2021.11.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <source>CatBoost</source>
          <access-date>2026-01-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://catboost.ai/">https://catboost.ai/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <source>R: The R Project for Statistical Computing</source>
          <access-date>2026-01-22</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rajkumar</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Lacy</surname>
              <given-names>MQ</given-names>
            </name>
            <name name-style="western">
              <surname>Kyle</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Monoclonal gammopathy of undetermined significance and smoldering multiple myeloma</article-title>
          <source>Blood Rev</source>
          <year>2007</year>
          <month>09</month>
          <volume>21</volume>
          <issue>5</issue>
          <fpage>255</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.blre.2007.01.002</pub-id>
          <pub-id pub-id-type="medline">17367905</pub-id>
          <pub-id pub-id-type="pii">S0268-960X(07)00003-3</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
