<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e66189</article-id><article-id pub-id-type="doi">10.2196/66189</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Identification of Major Bleeding Events in Postoperative Patients With Malignant Tumors in Chinese Electronic Medical Records: Algorithm Development and Validation</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Li</surname><given-names>Hui</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yao</surname><given-names>Haiyang</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Gao</surname><given-names>Yuxiang</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Luo</surname><given-names>Hang</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Cai</surname><given-names>Changbin</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Zhou</surname><given-names>Zhou</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Yuan</surname><given-names>Muhan</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Jiang</surname><given-names>Wei</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Thoracic Surgery, Beijing Chao-Yang Hospital, Capital Medical University</institution><addr-line>No.8 South Road of Workers' Stadium, Chaoyang District</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff2"><institution>Department of Technology, Shanghai Palan DataRx Co., Ltd</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff3"><institution>Sanofi China Medical Affairs, Sanofi China</institution><addr-line>Shanghai</addr-line><country>China</country></aff><aff id="aff4"><institution>Pharmaceutical Business Division, Basebit (Shanghai) Information Technology Co., Ltd</institution><addr-line>Shanghai</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Le</surname><given-names>Nam H</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zhang</surname><given-names>Yang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Hui Li, PhD, Department of Thoracic Surgery, Beijing Chao-Yang Hospital, Capital Medical University, No.8 South Road of Workers' Stadium, Chaoyang District, Beijing, 100020, China, 86 13701158350; <email>huilee@vip.sina.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>1</day><month>5</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e66189</elocation-id><history><date date-type="received"><day>05</day><month>09</month><year>2024</year></date><date date-type="rev-recd"><day>25</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>07</day><month>04</month><year>2025</year></date></history><copyright-statement>&#x00A9; Hui Li, Haiyang Yao, Yuxiang Gao, Hang Luo, Changbin Cai, Zhou Zhou, Muhan Yuan, Wei Jiang. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 1.5.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e66189"/><abstract><sec><title>Background</title><p>Postoperative bleeding is a serious complication following abdominal tumor surgery, but it is often not clearly diagnosed and documented in clinical practice in China. Previous studies have relied on manual interpretation of medical records to determine the presence of postoperative bleeding in patients, which is time-consuming and laborious. More critically, this manual approach severely hinders the efficient analysis of large volumes of medical data, impeding in-depth research into the incidence patterns and risk factors of postoperative bleeding. It remains unclear whether machine learning can play a role in processing large volumes of medical text to identify postoperative bleeding effectively.</p></sec><sec><title>Objective</title><p>This study aimed to develop a machine learning model tool for identifying postoperative patients with major bleeding based on the electronic medical record system.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study used data from the available information in the National Health and Medical Big Data (Eastern) Center in Jiangsu Province of China. We randomly selected the medical records of 2,000 patients who underwent in-hospital tumor resection surgery between January 2018 and December 2021 from the database. Physicians manually classified each note as present or absent for a major bleeding event during the postoperative hospital stay. Feature engineering involved bleeding expressions, high-frequency related expressions, and quantitative logical judgment, resulting in 270 features. Logistic regression (LR), K-nearest neighbor (KNN), and convolutional neural network (CNN) models were developed and trained using the 1600-note training set. The main outcomes were accuracy, sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) for each model.</p></sec><sec sec-type="results"><title>Results</title><p>Major bleeding was present in 4.31% (69/1600) of the training set and 4.75% (19/400) of the test set. In the test set, the LR method achieved an accuracy of 0.8275, a sensitivity of 0.8947, a specificity of 0.8241, a PPV of 0.2024, an NPV of 0.9937, and an <italic>F</italic><sub>1</sub>-score of 0.3301. The CNN method demonstrated an accuracy of 0.8900, sensitivity of 0.8421, specificity of 0.8924, PPV of 0.2807, NPV of 0.9913, and an <italic>F</italic><sub>1</sub>-score of 0.4211. While the KNN method showed a high specificity of 0.9948 and an accuracy of 0.9575 in the test set, its sensitivity was notably low at 0.2105. The C-statistic for the LR method was 0.9018 and for the CNN method was 0.8830.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Both the LR and CNN methods demonstrate good performance in identifying major bleeding in patients with postoperative malignant tumors from electronic medical records, exhibiting high sensitivity and specificity. Given the higher sensitivity of the LR method (89.47%) and the higher specificity of the CNN method (89.24%) in the test set, both models hold promise for practical application, depending on specific clinical priorities.</p></sec></abstract><kwd-group><kwd>machine learning</kwd><kwd>electronic medical record</kwd><kwd>postoperative patients with malignant tumors</kwd><kwd>postoperative bleeding</kwd><kwd>tumor surgery</kwd><kwd>abdominal</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Bleeding events are frequent complications encountered in postoperative clinical settings and can stem from the use of anticoagulant or antiplatelet drugs, invasive surgical treatment, or patient-related conditions and the existence of comorbidities, which are associated with increased morbidity, mortality, and health care costs [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref4">4</xref>]. Patients experiencing gastrointestinal bleeding while in the intensive care unit face a fourfold increase in mortality risk compared to those without bleeding issues, along with an additional eight-day stay within the unit [<xref ref-type="bibr" rid="ref4">4</xref>]. Ample clinical evidence supports the correlation between intraoperative blood loss, especially excessive blood loss and adverse effects on the tumor prognosis. On the other hand, concern about postoperative bleeding may become the main reason why clinicians might be overly cautious in using medications to prevent venous thrombosis (VTE), despite VTE being the second leading cause of death among patients with tumors. While the mortality rate of patients with tumors and VTE is three times higher than that of other patients, timely detection of bleeding risks through progress notes and balanced selection of anticoagulants are crucial for postoperative patients with malignant tumors [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Currently, there is a growing number of retrospective studies on postoperative bleeding risk in cancer patients. It is also worth noting the lack of fully established risk prediction schemes or risk assessment tools for postoperative bleeding. However, a major challenge in conducting these studies is determining whether patients experienced bleeding events during their past treatment processes. In the clinical setting in China, current approaches to identifying bleeding episodes predominantly rely on diagnostic records, which often lack precision due to inconsistent descriptions and missing details. The task of pinpointing bleeding events from a patient&#x2019;s medical history can be particularly challenging, especially when dealing with extensive records [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Despite the transition to electronic medical records, these valuable sources of data are frequently underused. Within these records, details within course notes, including physical examination reports and discharge summaries, often contain firsthand accounts of bleeding incidents or clear indications of their absence. Nonetheless, manual identification of these events can be both time-consuming and arduous, raising concerns about accuracy and practicality. Thus, emphasis should be placed on the imperative need to develop methodologies geared toward effectively identifying bleeding events within existing medical records.</p><p>In the realm of clinical research, the accurate identification of bleeding events within large clinical datasets holds paramount importance. Regrettably, the current landscape lacks automated and scalable machine learning (ML) methodologies tailored for this objective. This is a significant unmet need, especially considering the clinical need for early detection of postoperative bleeding to improve patient outcomes. ML has been regarded as a method for developing models that depict intricate nonlinear systems and handling a vast array of potential variables found in contemporary electronic medical records. ML techniques have found application in various health care scenarios such as forecasting cancer susceptibility, automatically categorizing clinical images, and predicting post-transplant prognosis. This is enabled by the abundance of real-world longitudinal datasets derived from the extensive integration of electronic health record (EHR) [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. For health care challenges involving large and complex datasets, especially those with unstructured data, ML methods have demonstrated advantages over traditional statistical regression methods [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. At present, mainstream text recognition methods such as support vector machine (SVM) or random forest (RF) are mostly suitable for large sample size data with high positivity rates, and the recognition of bleeding events may be a process of searching for sporadic positive events in large sample sizes [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. The aim of this study was to develop a machine learning model tool for identifying postoperative patients with major bleeding events based on the electronic medical record system.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Population</title><p>This study used a retrospective design. The data for this study were obtained from the National Health and Medical Big Data (Eastern) Center in Jiangsu Province, which is maintained by the Jiangsu Provincial Health Commission. This database includes clinical data from hospitals in Jiangsu province, with sensitive and identifiable information removed to protect privacy. Retrospective electronic medical records data, including clinical notes were used for identifying the study population. We specifically focused on patients with malignant tumor, who underwent surgical procedures between January 2018 and December 2021, reflecting actual clinical scenarios. To more accurately identify high-risk individuals for bleeding, we chose to focus on patients with chest, abdominal and gynecological malignant tumor instead of urinary system tumors. This decision was based on the fact that surgeries for chest, abdominal, and gynecological tumors involve greater surgical trauma, making postoperative bleeding more likely. Additionally, secondary tumors were excluded because they were typically palliatively resected, resulting in a lower risk of bleeding compared to primary tumors.</p><p>The primary objective was to develop a classifier capable of recognizing significant cases of bleeding with clinical relevance. To achieve this, we randomly allocated 1600 notes for training purposes and 400 notes for testing the model.</p><p>The inclusion criteria in the study population are as follows: (1) patients were diagnosed with primary chest, abdominal, or gynecological malignant tumors; (2) patients underwent surgical resection of the malignant tumor; and (3) patients were &#x2265;18 years old. The exclusion criteria are as follows: (1) patients were diagnosed with urinary system tumors; (2) patients were diagnosed with a secondary tumor; (3) patients only underwent endoscopic surgery (ie, gastroscopy, enteroscopy, cystoscopy); (4) patients diagnosed with bleeding before surgery or who underwent surgery due to bleeding events; and (5) hospitalization course record for the given visit is missing.</p><p>Each note was classified as major bleeding present, indicating that clinically relevant bleeding was referenced in the note, or major bleeding absent, indicating that clinically relevant bleeding was not referenced in the note. Major bleeding was defined as fatal or symptomatic bleeding in a critical area or organ or bleeding causing a fall in hemoglobin level of &#x2265;2 g/dL or transfusion of &#x2265;2 units of erythrocyte concentrate, according to the definition of the International Society of Thrombosis and Haemostasis [<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>We included medical history and physical examination notes, progress notes, and discharge summaries that were signed by an attending physician. All investigators with direct access to the data completed a data use agreement. An independent medical professional was selected to read the medical and surgical records and label whether there was bleeding, and the bleeding classification (ie, major bleeding or nonmajor bleeding). Then, another independent medical professional was designated to review the labeled cases and extract or highlight the text content in the medical records where bleeding events occurred for reference in developing bleeding-related regular expressions. Any disagreements or unclear content in the text reading between the two medical professionals were recorded and discussed in an external expert workshop. In the workshop, questionable clinical scenarios were confirmed by two experienced medical experts.</p></sec><sec id="s2-2"><title>Feature Engineering</title><p>Given the low positivity rate and limited sample size of major bleeding events in the existing data, the methodology for identifying major bleeding events during manual annotation was carefully considered when crafting features. A significant portion of these features was extracted through the application of natural language processing (NLP) techniques like Jieba (for Chinese word splitting) and regular expressions (for text and value extraction). These features are categorized as follows:</p><p>The first category: These features were generated from the content of manually annotated major bleeding events in the training set. This set includes 20 key features strongly linked to postoperative bleeding, such as terms like &#x201C;postoperative,&#x201D; &#x201C; hematochezia,&#x201D; &#x201C; hemorrhagic fluid,&#x201D; and &#x201C;treatment with hemostasis surgery.&#x201D; Furthermore, 241 additional features were compiled by segmenting factors associated with major bleeding events.</p><p>The second category: These features were derived by tokenizing the patient&#x2019;s course texts from the training set using Jieba and arranging them based on their frequency of occurrence. These features represent words that exhibit some relevance to major bleeding events but were not present in the first part. Features in this category were chosen based on a minimum frequency threshold of 900, aiming to complement the initial features and mitigate any shortcomings due to manually crafted features.</p><p>The third category: This feature reflects insights from clinical experts and primarily revolves around indicators like preoperative, intraoperative, and postoperative hemoglobin levels, bleeding volume, and transfusion volume. They are instrumental in establishing the logic for identifying postoperative major bleeding for each patient. Features in this segment heavily rely on quantitative logical assessments to fill in any gaps present in the preceding two feature groups. For instance, transfusion volumes extracted from structured surgical records and transfusion documentation are analyzed in conjunction with the standardized decision-making flowchart to determine major bleeding status. A detailed, structured process identification diagram is presented in <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Structured process identification diagram for feature selection.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66189_fig01.png"/></fig></sec><sec id="s2-3"><title>Model Training</title><p>A supervised learning approach was adopted, where a dataset comprising 2000 patients was divided into a training set of 1600 patients and a test set of 400 patients. Using the features generated through feature engineering and the corresponding labels assigned during manual review, three distinct ML models were successfully trained using different algorithms: logistic regression (LR), k-nearest neighbors (KNN), and convolutional neural networks (CNN).</p><p>Here is a breakdown of how each model was implemented:</p><list list-type="order"><list-item><p>LR:</p></list-item></list><list list-type="bullet"><list-item><p>Model implementation: developed using Keras</p></list-item><list-item><p>Architectural details: constructed with a single dense layer</p></list-item><list-item><p>Activation function: sigmoid function</p></list-item><list-item><p>Loss function: SigmoidFocalCrossEntropy</p></list-item><list-item><p>Optimizer: Adam optimizer</p></list-item><list-item><p>Validation split: 20% of the training dataset is set aside for validation</p></list-item></list><list list-type="order"><list-item><p>KNN:</p></list-item></list><list list-type="bullet"><list-item><p>Model implementation: Used the KNeighborsClassifier class from Scikit-Learn.</p></list-item></list><list list-type="order"><list-item><p>CNN:</p></list-item></list><list list-type="bullet"><list-item><p>Model implementation: implemented using Keras</p></list-item><list-item><p>Architectural details: featured 3 hidden layers - a 1-dimensional convolutional layer, a max pooling layer, and a dense layer</p></list-item><list-item><p>Activation functions: Rectified Linear Unit for the convolutional layer and sigmoid for the output layer</p></list-item><list-item><p>Output layer: a single dense unit</p></list-item><list-item><p>Validation split: Again, 20% of the training data is reserved for validation.</p></list-item></list><p>All three ML models were trained on the training set consisting of 1600 cases to learn the underlying patterns between the extracted features and the corresponding labels. The use of LR, KNN, and CNN allows for a diverse exploration of the dataset with different algorithms to harness their unique strengths in handling and learning from the data.</p></sec><sec id="s2-4"><title>Model Evaluation</title><p>Following the training phase, the evaluation of the three ML models&#x2014;LR, KNN, and CNN&#x2014;was conducted on the test set containing 400 cases. Various performance metrics were calculated for each model to assess their effectiveness in making predictions. These metrics included accuracy, sensitivity, positive predictive value (PPV), <italic>F</italic><sub>1</sub>-score, negative predictive value (NPV), and specificity of each model.</p><p>After calculating these metrics, the ML models with the highest sensitivity and specificity were chosen for further consideration due to their critical role in the diagnostic accuracy required by the specific use case. By prioritizing both sensitivity and specificity, you ensured a balanced assessment of the models&#x2019; performance regarding true positive and true negative rates.</p><p>In addition to the confusion matrices, which provide a detailed overview of true positives, false negatives, false positives, and true negatives at the default prediction threshold, we used the receiver operating characteristic curve and the corresponding area under the curve for each model. This comprehensive analysis of the receiver operating characteristic curves and area under the curve values allowed for a deeper understanding of the trade-offs between true positive and false positive rates across different thresholds and enabled the selection of the best-performing models based on a more nuanced evaluation beyond conventional accuracy metrics.</p></sec><sec id="s2-5"><title>Ethical Considerations</title><p>The study protocol was conducted in accordance with the Declaration of Helsinki and approved by the institutional review board at Shanghai Ethics Committee for Clinical Research (Approval number: SECCR/2023-119-01). The data used in this study were deidentified. Informed consent was waived by the Ethics Committee owing to the use of deidentified data.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Baseline Characteristics</title><p>The training set represented 1600 patients, of whom 48.5% (776) were female, the mean age was 62.86 (SD 9.16) years, with a total of 33654 course records. The test set represented 400 patients, of whom 49.3% (n=197) were female; the mean age was 62.81 (SD 9.21) years, with a total of 8491 course records. The ratio of postoperative major bleeding was 4.31% (n=69) in the training set and 4.75% (n=19) in the test set (<xref ref-type="table" rid="table1">Table 1</xref>).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Population characteristics of training and test sets.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Characteristics</td><td align="left" valign="bottom">Training set (n=1600)</td><td align="left" valign="bottom">Test set (n=400)</td></tr></thead><tbody><tr><td align="left" valign="top">Gender (female) n (%&#xFF09;</td><td align="left" valign="top">776 48.5</td><td align="left" valign="top">197 49.3</td></tr><tr><td align="left" valign="top">Age (years), mean (SD)</td><td align="left" valign="top">62.86 (9.16)</td><td align="left" valign="top">62.81 (9.21)</td></tr><tr><td align="left" valign="top">Course records, n</td><td align="left" valign="top">33,654</td><td align="left" valign="top">8491</td></tr><tr><td align="left" valign="top">Proportion of major bleeding, n (%&#xFF09;</td><td align="left" valign="top">69 (4.31)</td><td align="left" valign="top">19 (4.75)</td></tr></tbody></table></table-wrap></sec><sec id="s3-2"><title>Note-Based Feature Selection</title><p>A total of 270 features were selected; first, 261 features related to postoperative bleeding were selected by regular manual disassembly. Second, Jieba was used to segment the disease course text and select 8 features according to word frequency classification. Furthermore, the last feature was created by a logical recognition graph generated from expert opinion. More details are provided in the feature engineering section. The frequency of the top 20 features is shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>The frequency of the top 20 features selection.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Features (description or regularization in English)</td><td align="left" valign="bottom">Frequency</td></tr></thead><tbody><tr><td align="left" valign="top">Postoperative</td><td align="left" valign="top">1592</td></tr><tr><td align="left" valign="top">(chest | abdomen | pelvis) cavity | rectum. {0,2} depression | posterior fornix | anastomosis | pancreas | digestive tract | stomach | vagina</td><td align="left" valign="top">1579</td></tr><tr><td align="left" valign="top">(out | lose) blood (stop | break into)?| hematoma | subcutaneous Ecchymosis | congestion</td><td align="left" valign="top">1033</td></tr><tr><td align="left" valign="top">Bleeding</td><td align="left" valign="top">1002</td></tr><tr><td align="left" valign="top">Bloody fluid | red.{0,2} drainage fluid</td><td align="left" valign="top">995</td></tr><tr><td align="left" valign="top">Hemostasis</td><td align="left" valign="top">528</td></tr><tr><td align="left" valign="top">Laparotomy</td><td align="left" valign="top">513</td></tr><tr><td align="left" valign="top">Dark red</td><td align="left" valign="top">347</td></tr><tr><td align="left" valign="top">Emergency</td><td align="left" valign="top">343</td></tr><tr><td align="left" valign="top">(To | Perform).{0,15} to stop bleeding</td><td align="left" valign="top">303</td></tr><tr><td align="left" valign="top">Swelling</td><td align="left" valign="top">273</td></tr><tr><td align="left" valign="top">(To | Perform).{0,5} to stop bleeding</td><td align="left" valign="top">253</td></tr><tr><td align="left" valign="top">(((chest | abdomen | pelvis) cavity | rectum.{0,2} depression | posterior fornix | anastomosis | pancreas | digestive tract | stomach).{0,10}bleeding | vagina.{0,2}large .{0,2}(out | lose)blood)</td><td align="left" valign="top">251</td></tr><tr><td align="left" valign="top">Blood loss</td><td align="left" valign="top">246</td></tr><tr><td align="left" valign="top">Pelvic cavity.{0,5}Drainage.{0,5}(bloody liquid | fluid dark red)</td><td align="left" valign="top">225</td></tr><tr><td align="left" valign="top">(?: inject | give).{0,20}(?:suspended less white (?:red blood cell | erythrocyte blood transfusion volume | red blood cell)|whole blood | red blood cell | less white suspended red blood cell | red suspension | suspended less white Red).{1,20}(?:U | ML | CC | ml | u | ml | mL | cc | infusion | unit) | (?: in | out | to).{0,5}(? :U |ML |CC | milliliter | u | ml | mL | cc | infusion | unit).{0,5}(?: suspended less white(?:cell red blood cell | red blood cell transfusion amount | red blood cell) | whole blood | Red blood cells | less white suspension red blood cells | red suspension | suspension less white red)</td><td align="left" valign="top">220</td></tr><tr><td align="left" valign="top">((out | loss) blood | introduction | bloody Effusion).{0,15}(U | ML | CC | ml | u | ml | mL | cc | unit)</td><td align="left" valign="top">220</td></tr><tr><td align="left" valign="top">Lose.{0,20}(red blood cells | whole blood | red suspension)</td><td align="left" valign="top">220</td></tr><tr><td align="left" valign="top">Structured recognition results</td><td align="left" valign="top">206</td></tr><tr><td align="left" valign="top">(exist | has).{0,5} bleed</td><td align="left" valign="top">178</td></tr></tbody></table></table-wrap></sec><sec id="s3-3"><title>Comparison and Verification of the Efficiency of Three Machine Learning Models</title><p>In the identification of major bleeding events within the test set, the LR method had an accuracy of 0.8275, sensitivity of 0.8947, specificity of 0.8241, PPV of 0.2024, NPV of 0.9937, and <italic>F</italic><sub>1</sub>-score of 0.3301. The accuracy of the CNN method in the testing set was 0.8900, the sensitivity was 0.8421, the specificity was 0.8924, the PPV was 0.2807, the NPV was 0.9913, and the <italic>F</italic><sub>1</sub>-score was 0.4211. The KNN method had an accuracy of 0.9575, sensitivity of 0.2105, specificity of 0.9948, PPV of 0.6667, NPV of 0.9619, and <italic>F</italic><sub>1</sub>-score of 0.3200 (<xref ref-type="table" rid="table3">Table 3</xref>). The C-statistic was higher in the LR method (C=0.9018), followed by the CNN method (C=0.8830) (<xref ref-type="fig" rid="figure2">Figure 2</xref>).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>The performance of each model in identifying major bleeding events.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Sets and models</td><td align="left" valign="bottom">Accuracy</td><td align="left" valign="bottom">Sensitivity</td><td align="left" valign="bottom">Positive predictive value</td><td align="left" valign="bottom"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="bottom">Negative predictive value</td><td align="left" valign="bottom">Specificity</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="7">Training set (n=1600)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="top">0.8231</td><td align="left" valign="top">1.0000</td><td align="left" valign="top">0.1960</td><td align="left" valign="top">0.3278</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.8152</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CNN<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="top">0.9056</td><td align="left" valign="top">0.9710</td><td align="left" valign="top">0.3102</td><td align="left" valign="top">0.4702</td><td align="left" valign="top">0.9986</td><td align="left" valign="top">0.9027</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.9643</td><td align="left" valign="top">0.1884</td><td align="left" valign="top">0.9286</td><td align="left" valign="top">0.3132</td><td align="left" valign="top">0.9647</td><td align="left" valign="top">0.9993</td></tr><tr><td align="left" valign="top" colspan="7">Test set (n=400)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>LR</td><td align="left" valign="top">0.8275</td><td align="left" valign="top">0.8947</td><td align="left" valign="top">0.2024</td><td align="left" valign="top">0.3301</td><td align="left" valign="top">0.9937</td><td align="left" valign="top">0.8241</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>CNN</td><td align="left" valign="top">0.8900</td><td align="left" valign="top">0.8421</td><td align="left" valign="top">0.2807</td><td align="left" valign="top">0.4211</td><td align="left" valign="top">0.9913</td><td align="left" valign="top">0.8924</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>KNN</td><td align="left" valign="top">0.9575</td><td align="left" valign="top">0.2105</td><td align="left" valign="top">0.6667</td><td align="left" valign="top">0.3200</td><td align="left" valign="top">0.9619</td><td align="left" valign="top">0.9948</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>LR: logistic regression.</p></fn><fn id="table3fn2"><p><sup>b</sup>CNN: convolutional neural network.</p></fn><fn id="table3fn3"><p><sup>c</sup>KNN: K-nearest neighbor.</p></fn></table-wrap-foot></table-wrap><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Receiver operating characteristic curves for identifying major bleeding from clinical notes using the LR and CNN methods. AUC: area under the curve; LR: logistic regression; CNN: convolution neural network.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e66189_fig02.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this study, we implemented LR, CNN, and KNN algorithms for ML detection of bleeding events within electronic medical record systems. Notably, CNN was specifically applied to categorize radiology free-text reports and exhibited commendable accuracy [<xref ref-type="bibr" rid="ref17">17</xref>]. The CNN approach demonstrated a sensitivity of 84% and a specificity of 89% in detecting recorded bleeding events in patients at a threshold of 0.50, while the LR method showcased a sensitivity of 89% and a specificity of 82% at the same threshold. The KNN algorithm can be negatively impacted by high dimensionality in feature matrices. These findings underscore the viability of employing algorithms to pinpoint patients with bleeding events within extensive textual disease records.</p></sec><sec id="s4-2"><title>Evaluation Metrics and Application Context</title><p><italic>F</italic><sub>1</sub>-scores are frequently used in the evaluation of classifiers. The <italic>F</italic><sub>1</sub>-score can be considered the harmonic mean between the precision and recall (sensitivity) [<xref ref-type="bibr" rid="ref18">18</xref>]. Thus, it symmetrically represents both precision (how accurately the model identifies only true positive instances) and sensitivity (how well the classifier identifies all actual positive instances) in one metric. Although the three machine learning models in this investigation did not attain high <italic>F</italic><sub>1</sub>-scores on the test set, it must be clarified that the goal of the investigation was to detect bleeding events as accurately as possible in practical applications for subsequent manual assessment. Therefore, greater attention is paid to the sensitivity and specificity (ie, how well the classifier identifies all actual negative instances) of the model, the high values of which imply that the model can detect hemorrhagic events while avoiding false alarms, which is crucial for this investigation. Consequently, priority is placed on the models&#x2019; sensitivity and the attainment of greater specificity. Hence, we maintain that the models developed in this study remain valuable for identifying bleeding events within disease records and represent a promising avenue for future research endeavors.</p></sec><sec id="s4-3"><title>Comparison to Prior Work</title><p>The integration of NLP and ML has proven successful across various domains. For instance, in the context of health care, predictive models leveraging ML and statistical methods have demonstrated the ability to forecast occurrences such as postpartum hemorrhage upon labor admission with reasonable discriminatory power. A prior study demonstrated the effectiveness of a Hybrid CNN-LSTM Autoencoder model for the detection of bleeding events within EHR data. This was accomplished through the integration of a supervised CNN with a pretrained, unsupervised Bidirectional Long Short-Term Memory autoencoder. The primary objective was to accurately predict the presence of a bleeding event within a given English sentence from an EHR record [<xref ref-type="bibr" rid="ref19">19</xref>].</p><p>While prior research primarily focused on English EHR data and general bleeding event detection, such as the study by Li et al that used a Hybrid CNN-LSTM Autoencoder for sentence-level bleeding detection, and a more recent work that applied retrieval augmented generation with large language models for detecting nonsurgical major bleeding events in English EHRs [<xref ref-type="bibr" rid="ref20">20</xref>], our investigation centers on the identification of major bleeding events within Chinese EHR. Chinese NLP presents inherent complexities, especially in word segmentation within EHR text, posing a significant hurdle. Furthermore, the shift in focus to major bleeding, which exhibits a considerably lower incidence rate compared to general bleeding events, substantially reduces the availability of positive samples, thereby intensifying the challenge for robust predictive modeling. Building upon these complexities, unlike the prior studies&#x2019; sentence-level analysis, our research takes a holistic approach to identifying major bleeding events within the entire patient visit context. This broader, patient-centric perspective further complicates the analysis and requires a more comprehensive understanding of the clinical narrative. These critical distinctions&#x2014;language specificity, event granularity, and analytical scope&#x2014;collectively highlight the greater challenges of our study compared to existing work in the field.</p></sec><sec id="s4-4"><title>Strengths and Limitations</title><p>A key strength of this study lies in the strategic approach to model selection and feature engineering. Initially, SVM and RF algorithms were explored. However, recognizing their reliance on effective word segmentation and the challenges posed by the heterogeneous nature of the text data, the focus was deliberately shifted to LR, KNN, and CNN. To further enhance model performance, term frequency-inverse document frequency (TF-IDF) scoring was effectively used to compute feature representations, addressing the complexities of free-text data. Moreover, to overcome the limitation of a relatively small sample size, particularly the scarcity of positive samples (fewer than 100), and the resulting dispersed TF-IDF features, a targeted feature engineering approach was strategically adopted. This involved selecting predefined features associated with bleeding based on expert medical domain knowledge. This approach allowed the ML models to concentrate on clinically relevant features, represented as binary values (0 and 1), rather than solely relying on traditional TF-IDF metrics applied to the entire text. This targeted feature engineering proved crucial for enhancing performance in a limited data setting. As a result of these strengths, including strategic model selection, targeted feature engineering, manual annotation, and training with limited samples, the LR and CNN models demonstrated superior performance compared to SVM and RF models based on TF-IDF features for detecting major bleeding events. This aligns with findings from similar research, as evidenced by a study that underscored the enhanced performance of CNNs when expert annotation of text data is incorporated [<xref ref-type="bibr" rid="ref19">19</xref>], further validating the efficacy of CNNs in text classification tasks within the medical domain.</p><p>However, manual feature selection has certain limitations. First, engineers must dedicate time to writing code for feature extraction based on the definitions provided by the medical field, which is a more time-intensive process compared to the automatic feature computation performed by machines. To partially mitigate this, we prioritized high-impact features identified through clinical expert consultations, but future studies could integrate semiautomated pipelines to balance efficiency and domain specificity. Second, as previously discussed in related research [<xref ref-type="bibr" rid="ref21">21</xref>], the identification of postoperative bleeding and feature selection depends on manual procedures, thereby increasing the likelihood of bias and oversight. While dual annotations by multiple clinicians were used to reduce subjectivity, discrepancies were resolved through consensus rather than quantitative metrics, potentially affecting reproducibility. Future work should adopt standardized annotation protocols with inter-rater reliability assessments. Third, the chosen features encapsulate domain-specific knowledge, sometimes being closely tied to specific hospital departments with varying requirements and documentation practices in electronic medical records. Broader applicability requires multicenter collaboration to harmonize feature definitions across institutions. Moreover, our study included a retrospective analysis with a relatively limited number of specimens, a factor to be considered for broader applicability in ML analyses. Similar challenges have been observed in other comparable studies [<xref ref-type="bibr" rid="ref19">19</xref>]. Consequently, not only does this constrained universality impede application in diverse fields, but also when extending to other illnesses, hospitals, and departments, there is often a need to reimagine and recreate the features.</p></sec><sec id="s4-5"><title>Conclusions</title><p>Based on our new text feature selection method, both the LR and CNN methods perform well in identifying major bleeding occurring in postoperative patients with malignant tumors from electronic medical records.</p></sec></sec></body><back><ack><p>Medical writing support, under the direction of the authors, was provided by Hao Hu (Shanghai Palan DataRx Co.,Ltd) and funded by Sanofi. This study was sponsored by Sanofi.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are not publicly available due to patient privacy concerns but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: H Li, HY, YG, H Luo, CC, ZZ, MY</p><p>Data curation: WJ</p><p>Formal analysis: H Luo, HY, YG</p><p>Methodology: H Li, HY, YG, H Luo, CC, ZZ, MY</p><p>Supervision: H Li</p><p>Writing &#x2013; original draft: HY, YG, H Luo, WJ</p><p>Writing &#x2013; review &#x0026; editing: H Li, CC, ZZ, MY</p></fn><fn fn-type="conflict"><p>The authors individually and collectively are responsible for all content and editorial decisions and received no payment from Sanofi related to the development/presentation of this publication. CC, ZZ are Sanofi employees and may hold shares and/or stock options in the company. MY was previously a Sanofi employee and is now employed at Beijing Baheal Chengchuang Pharmaceutical Investment Co., Ltd. The other authors declare that there is no conflict of interest.</p></fn></fn-group><glossary><title>Abbreviation</title><def-list><def-item><term id="abb1">CNN</term><def><p>Convolutional neural network</p></def></def-item><def-item><term id="abb2">KNN</term><def><p>K-nearest neighbor</p></def></def-item><def-item><term id="abb3">LR</term><def><p>Logistic regression</p></def></def-item><def-item><term id="abb4">ML</term><def><p>Machine learning</p></def></def-item><def-item><term id="abb5">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb6">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb7">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb8">RF</term><def><p>random forest</p></def></def-item><def-item><term id="abb9">SVM</term><def><p>support vector machine</p></def></def-item><def-item><term id="abb10">VTE</term><def><p>venous thrombosis</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tasu</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Vesselle</surname><given-names>G</given-names> </name><name name-style="western"><surname>Herpe</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Postoperative abdominal bleeding</article-title><source>Diagn Interv Imaging</source><year>2015</year><volume>96</volume><issue>7-8</issue><fpage>823</fpage><lpage>831</lpage><pub-id pub-id-type="doi">10.1016/j.diii.2015.03.013</pub-id><pub-id pub-id-type="medline">26078019</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Maxwell</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Wilson</surname><given-names>MJA</given-names> </name></person-group><article-title>Complications of blood transfusion</article-title><source>Continuing Education in Anaesthesia Critical Care &#x0026; Pain</source><year>2006</year><month>12</month><volume>6</volume><issue>6</issue><fpage>225</fpage><lpage>229</lpage><pub-id pub-id-type="doi">10.1093/bjaceaccp/mkl053</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berger</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Bhatt</surname><given-names>DL</given-names> </name><name name-style="western"><surname>Steg</surname><given-names>PG</given-names> </name><etal/></person-group><article-title>Bleeding, mortality, and antiplatelet therapy: results from the Clopidogrel for High Atherothrombotic Risk and Ischemic Stabilization, Management, and Avoidance (CHARISMA) trial</article-title><source>Am Heart J</source><year>2011</year><month>07</month><volume>162</volume><issue>1</issue><fpage>98</fpage><lpage>105</lpage><pub-id pub-id-type="doi">10.1016/j.ahj.2011.04.015</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Griffith</surname><given-names>LE</given-names> </name><name name-style="western"><surname>Walter</surname><given-names>SD</given-names> </name><etal/></person-group><article-title>The attributable mortality and length of intensive care unit stay of clinically important gastrointestinal bleeding in critically ill patients</article-title><source>Crit Care</source><year>2001</year><month>12</month><volume>5</volume><issue>6</issue><fpage>368</fpage><lpage>375</lpage><pub-id pub-id-type="doi">10.1186/cc1071</pub-id><pub-id pub-id-type="medline">11737927</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Khorana</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Mackman</surname><given-names>N</given-names> </name><name name-style="western"><surname>Falanga</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Cancer-associated venous thromboembolism</article-title><source>Nat Rev Dis Primers</source><year>2022</year><month>02</month><day>17</day><volume>8</volume><issue>1</issue><fpage>11</fpage><pub-id pub-id-type="doi">10.1038/s41572-022-00336-y</pub-id><pub-id pub-id-type="medline">35177631</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anderson</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Morgano</surname><given-names>GP</given-names> </name><name name-style="western"><surname>Bennett</surname><given-names>C</given-names> </name><etal/></person-group><article-title>American Society of Hematology 2019 guidelines for management of venous thromboembolism: prevention of venous thromboembolism in surgical hospitalized patients</article-title><source>Blood Adv</source><year>2019</year><month>12</month><day>10</day><volume>3</volume><issue>23</issue><fpage>3898</fpage><lpage>3944</lpage><pub-id pub-id-type="doi">10.1182/bloodadvances.2019000975</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Taggart</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chapman</surname><given-names>WW</given-names> </name><name name-style="western"><surname>Steinberg</surname><given-names>BA</given-names> </name><etal/></person-group><article-title>Comparison of 2 natural language processing methods for identification of bleeding among critically ill patients</article-title><source>JAMA Netw Open</source><year>2018</year><month>10</month><day>5</day><volume>1</volume><issue>6</issue><fpage>e183451</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2018.3451</pub-id><pub-id pub-id-type="medline">30646240</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Corey</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Kashyap</surname><given-names>S</given-names> </name><name name-style="western"><surname>Lorenzi</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Development and validation of machine learning models to identify high-risk surgical patients using automatically curated electronic health record data (Pythia): a retrospective, single-site study</article-title><source>PLoS Med</source><year>2018</year><volume>15</volume><issue>11</issue><fpage>e1002701</fpage><pub-id pub-id-type="doi">10.1371/journal.pmed.1002701</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rajkomar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Oren</surname><given-names>E</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Scalable and accurate deep learning with electronic health records</article-title><source>NPJ Digit Med</source><year>2018</year><volume>1</volume><fpage>18</fpage><pub-id pub-id-type="doi">10.1038/s41746-018-0029-1</pub-id><pub-id pub-id-type="medline">31304302</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weller</surname><given-names>GB</given-names> </name><name name-style="western"><surname>Lovely</surname><given-names>J</given-names> </name><name name-style="western"><surname>Larson</surname><given-names>DW</given-names> </name><name name-style="western"><surname>Earnshaw</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Huebner</surname><given-names>M</given-names> </name></person-group><article-title>Leveraging electronic health records for predictive modeling of post-surgical complications</article-title><source>Stat Methods Med Res</source><year>2018</year><month>11</month><volume>27</volume><issue>11</issue><fpage>3271</fpage><lpage>3285</lpage><pub-id pub-id-type="doi">10.1177/0962280217696115</pub-id><pub-id pub-id-type="medline">29298612</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kourou</surname><given-names>K</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>TP</given-names> </name><name name-style="western"><surname>Exarchos</surname><given-names>KP</given-names> </name><name name-style="western"><surname>Karamouzis</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Fotiadis</surname><given-names>DI</given-names> </name></person-group><article-title>Machine learning applications in cancer prognosis and prediction</article-title><source>Comput Struct Biotechnol J</source><year>2015</year><volume>13</volume><fpage>8</fpage><lpage>17</lpage><pub-id pub-id-type="doi">10.1016/j.csbj.2014.11.005</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Murdoch</surname><given-names>TB</given-names> </name><name name-style="western"><surname>Detsky</surname><given-names>AS</given-names> </name></person-group><article-title>The inevitable application of big data to health care</article-title><source>JAMA</source><year>2013</year><month>04</month><day>3</day><volume>309</volume><issue>13</issue><fpage>1351</fpage><lpage>1352</lpage><pub-id pub-id-type="doi">10.1001/jama.2013.393</pub-id><pub-id pub-id-type="medline">23549579</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>LeCun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Bengio</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Hinton</surname><given-names>G</given-names> </name></person-group><article-title>Deep learning</article-title><source>Nature New Biol</source><year>2015</year><month>05</month><day>28</day><volume>521</volume><issue>7553</issue><fpage>436</fpage><lpage>444</lpage><pub-id pub-id-type="doi">10.1038/nature14539</pub-id><pub-id pub-id-type="medline">26017442</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delate</surname><given-names>T</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Clark</surname><given-names>NP</given-names> </name><name name-style="western"><surname>Witt</surname><given-names>DM</given-names> </name></person-group><article-title>Assessment of the coding accuracy of warfarin-related bleeding events</article-title><source>Thromb Res</source><year>2017</year><month>11</month><volume>159</volume><fpage>86</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1016/j.thromres.2017.10.004</pub-id><pub-id pub-id-type="medline">29035718</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilchesky</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tamblyn</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>A</given-names> </name></person-group><article-title>Validation of diagnostic codes within medical services claims</article-title><source>J Clin Epidemiol</source><year>2004</year><month>02</month><volume>57</volume><issue>2</issue><fpage>131</fpage><lpage>141</lpage><pub-id pub-id-type="doi">10.1016/S0895-4356(03)00246-4</pub-id><pub-id pub-id-type="medline">15125622</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schulman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Anger&#x00E5;s</surname><given-names>U</given-names> </name><name name-style="western"><surname>Bergqvist</surname><given-names>D</given-names> </name><etal/></person-group><article-title>Definition of major bleeding in clinical investigations of antihemostatic medicinal products in surgical patients</article-title><source>J Thromb Haemost</source><year>2010</year><month>01</month><volume>8</volume><issue>1</issue><fpage>202</fpage><lpage>204</lpage><pub-id pub-id-type="doi">10.1111/j.1538-7836.2009.03678.x</pub-id><pub-id pub-id-type="medline">19878532</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Ball</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Yang</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Deep learning to classify radiology free-text reports</article-title><source>Radiology</source><year>2018</year><month>03</month><volume>286</volume><issue>3</issue><fpage>845</fpage><lpage>852</lpage><pub-id pub-id-type="doi">10.1148/radiol.2017171115</pub-id><pub-id pub-id-type="medline">29135365</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rai</surname><given-names>T</given-names> </name><name name-style="western"><surname>Morisi</surname><given-names>A</given-names> </name><name name-style="western"><surname>Bacci</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Keeping pathologists in the loop and an adaptive F1-score threshold method for mitosis detection in canine perivascular wall tumours</article-title><source>Cancers (Basel)</source><year>2024</year><month>02</month><day>2</day><volume>16</volume><issue>3</issue><fpage>644</fpage><pub-id pub-id-type="doi">10.3390/cancers16030644</pub-id><pub-id pub-id-type="medline">38339394</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hu</surname><given-names>B</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Detection of bleeding events in electronic health record notes using convolutional neural network models enhanced with recurrent neural network autoencoders: deep learning approach</article-title><source>JMIR Med Inform</source><volume>7</volume><issue>1</issue><fpage>e10788</fpage><pub-id pub-id-type="doi">10.2196/10788</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kaplinsky</surname><given-names>P</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Fusillo</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Leader</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zwicker</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Mantha</surname><given-names>S</given-names> </name></person-group><article-title>Retrieval augmented generation for the detection of major bleeding events in the electronic health record</article-title><source>Blood</source><year>2024</year><month>11</month><day>5</day><volume>144</volume><issue>Supplement 1</issue><fpage>2263</fpage><lpage>2263</lpage><pub-id pub-id-type="doi">10.1182/blood-2024-203911</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jing</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhuang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>H</given-names> </name></person-group><article-title>Multiple machine learning approaches based on postoperative prediction of pulmonary complications in patients with emergency cerebral hemorrhage surgery</article-title><source>Front Surg</source><year>2021</year><volume>8</volume><fpage>797872</fpage><pub-id pub-id-type="doi">10.3389/fsurg.2021.797872</pub-id><pub-id pub-id-type="medline">35127804</pub-id></nlm-citation></ref></ref-list></back></article>