<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i6e33834</article-id>
      <article-id pub-id-type="pmid">35749214</article-id>
      <article-id pub-id-type="doi">10.2196/33834</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Patients With Delirium Based on Unstructured Clinical Notes: Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Focsa</surname>
            <given-names>Mircea</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zanotto</surname>
            <given-names>Bruna</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chen</surname>
            <given-names>YenPin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ren</surname>
            <given-names>Ziyou</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Ge</surname>
            <given-names>Wendong</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1557-5336</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Alabsi</surname>
            <given-names>Haitham</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6354-4679</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Jain</surname>
            <given-names>Aayushee</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5018-3234</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Ye</surname>
            <given-names>Elissa</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4851-6543</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Sun</surname>
            <given-names>Haoqi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5041-8312</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Fernandes</surname>
            <given-names>Marta</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7203-2832</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Magdamo</surname>
            <given-names>Colin</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8965-4630</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Tesh</surname>
            <given-names>Ryan A</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6154-6248</ext-link>
        </contrib>
        <contrib id="contrib9" contrib-type="author">
          <name name-style="western">
            <surname>Collens</surname>
            <given-names>Sarah I</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7010-7266</ext-link>
        </contrib>
        <contrib id="contrib10" contrib-type="author">
          <name name-style="western">
            <surname>Newhouse</surname>
            <given-names>Amy</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7392-4242</ext-link>
        </contrib>
        <contrib id="contrib11" contrib-type="author">
          <name name-style="western">
            <surname>MVR Moura</surname>
            <given-names>Lidia</given-names>
          </name>
          <degrees>MD, PhD, MPH</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1191-1315</ext-link>
        </contrib>
        <contrib id="contrib12" contrib-type="author">
          <name name-style="western">
            <surname>Zafar</surname>
            <given-names>Sahar</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5252-5376</ext-link>
        </contrib>
        <contrib id="contrib13" contrib-type="author">
          <name name-style="western">
            <surname>Hsu</surname>
            <given-names>John</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8244-231X</ext-link>
        </contrib>
        <contrib id="contrib14" contrib-type="author">
          <name name-style="western">
            <surname>Akeju</surname>
            <given-names>Oluwaseun</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6740-1250</ext-link>
        </contrib>
        <contrib id="contrib15" contrib-type="author">
          <name name-style="western">
            <surname>Robbins</surname>
            <given-names>Gregory K</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7545-5817</ext-link>
        </contrib>
        <contrib id="contrib16" contrib-type="author">
          <name name-style="western">
            <surname>Mukerji</surname>
            <given-names>Shibani S</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5677-6954</ext-link>
        </contrib>
        <contrib id="contrib17" contrib-type="author">
          <name name-style="western">
            <surname>Das</surname>
            <given-names>Sudeshna</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9486-6811</ext-link>
        </contrib>
        <contrib id="contrib18" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Westover</surname>
            <given-names>M Brandon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Massachusetts General Hospital</institution>
            <addr-line>50 Staniford Street</addr-line>
            <addr-line>Boston, MA, 02114</addr-line>
            <country>United States</country>
            <phone>1 650 862 1154</phone>
            <email>mwestover@mgh.harvard.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4803-312X</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: M Brandon Westover <email>mwestover@mgh.harvard.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>24</day>
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <volume>6</volume>
      <issue>6</issue>
      <elocation-id>e33834</elocation-id>
      <history>
        <date date-type="received">
          <day>27</day>
          <month>9</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>29</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>22</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>2</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Wendong Ge, Haitham Alabsi, Aayushee Jain, Elissa Ye, Haoqi Sun, Marta Fernandes, Colin Magdamo, Ryan A Tesh, Sarah I Collens, Amy Newhouse, Lidia MVR Moura, Sahar Zafar, John Hsu, Oluwaseun Akeju, Gregory K Robbins, Shibani S Mukerji, Sudeshna Das, M Brandon Westover. Originally published in JMIR Formative Research (https://formative.jmir.org), 24.06.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2022/6/e33834" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Delirium in hospitalized patients is a syndrome of acute brain dysfunction. Diagnostic (International Classification of Diseases [ICD]) codes are often used in studies using electronic health records (EHRs), but they are inaccurate.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>We sought to develop a more accurate method using natural language processing (NLP) to detect delirium episodes on the basis of unstructured clinical notes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We collected 1.5 million notes from &#62;10,000 patients from among 9 hospitals. Seven experts iteratively labeled 200,471 sentences. Using these, we trained three NLP classifiers: Support Vector Machine, Recurrent Neural Networks, and Transformer. Testing was performed using an external data set. We also evaluated associations with delirium billing (ICD) codes, medications, orders for restraints and sitters, direct assessments (Confusion Assessment Method [CAM] scores), and in-hospital mortality. F1 scores, confusion matrices, and areas under the receiver operating characteristic curve (AUCs) were used to compare NLP models. We used the φ coefficient to measure associations with other delirium indicators.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The transformer NLP performed best on the following parameters: micro F1=0.978, macro F1=0.918, positive AUC=0.984, and negative AUC=0.992. NLP detections exhibited higher correlations (φ) than ICD codes with deliriogenic medications (0.194 vs 0.073 for ICD codes), restraints and sitter orders (0.358 vs 0.177), mortality (0.216 vs 0.000), and CAM scores (0.256 vs –0.028).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Clinical notes are an attractive alternative to ICD codes for EHR delirium studies but require automated methods. Our NLP model detects delirium with high accuracy, similar to manual chart review. Our NLP approach can provide more accurate determination of delirium for large-scale EHR-based studies regarding delirium, quality improvement, and clinical trails.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>delirium</kwd>
        <kwd>electronic health records</kwd>
        <kwd>clinical notes</kwd>
        <kwd>machine learning</kwd>
        <kwd>natural language processing</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Delirium is an acute neuropsychiatric syndrome with features of inattention and global cognitive dysfunction, associated with increased hospital length of stay, in-hospital mortality, and long-term cognitive disability [<xref ref-type="bibr" rid="ref1">1</xref>]. Delirium occurs in up to 26% of hospitalized patients; prevalence rates may reach 42% in patients older than age 65 years [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
      <p>Electronic health records (EHRs) offer a rich source of information for studies of delirium; however, determining which patients have delirium is challenging. Manual review of medical records is time consuming, limiting studies to a small fraction of patients at risk. A more scalable approach is to use International Classification of Diseases (ICD) billing codes. This approach was recently used by a study [<xref ref-type="bibr" rid="ref3">3</xref>] to assess 200 patients admitted to a skilled nursing facility, revealing that ICD codes achieved 96.0% specificity but only 53.1% sensitivity. Another study [<xref ref-type="bibr" rid="ref4">4</xref>] analyzed clinical data from 184 older adults at one academic medical center and found that ICD codes had a specificity of 98% and sensitivity of 18%. Thus, ICD codes miss a large fraction of patients with delirium.</p>
      <p>On the other hand, rich information about patients’ status exists in narrative clinical notes from doctors, nurses, physical therapists, and other health care workers [<xref ref-type="bibr" rid="ref5">5</xref>]. However, extracting this information is challenging because of the flexibility of natural language.</p>
      <p>In this work, we collected 1.5 million clinical notes from over 10,000 patients from 7 distinct cohorts from among 9 hospitals and developed a natural language processing (NLP) algorithm to identify patients with delirium from unstructured EHR notes.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Set Description and Sentence Extraction</title>
        <p>We collected 1,565,678 clinical notes from 10,516 patients from 9 hospitals, including Massachusetts General Hospital, Brigham and Women's Hospital, Cooley Dickinson Hospital, Martha's Vineyard Hospital, McLean Hospital, Nantucket Cottage Hospital, Newton-Wellesley Hospital, North Shore Medical Center, and Spaulding Rehabilitation Hospital. These 10,516 patients were from 7 previously assembled cohort studies:</p>
        <list list-type="bullet">
          <list-item>
            <p>Antiepileptic drug (AED) data set: this data set comprises patients who received AEDs and is used to study adverse effects of AEDs (n=852).</p>
          </list-item>
          <list-item>
            <p>GIFTS data set: this data set comprises older patients admitted for orthopedic surgery and is used to study delirium (n=576).</p>
          </list-item>
          <list-item>
            <p>Dementia data set: this data set comprises patients who were at risk for dementia and is used to study dementia (n=802).</p>
          </list-item>
          <list-item>
            <p>COVID-19 data set: this data set comprises patients who were hospitalized for COVID-19 and is used to study hospitalization, intensive care unit admission, intubation, and mortality prediction for patients with COVID-19 (n=3429).</p>
          </list-item>
          <list-item>
            <p>NCC data set: this data set is used to study neurological diseases such as delirium, headache, and anosmia for patients at neurocritical care units (n=1985).</p>
          </list-item>
          <list-item>
            <p>LTM data set: this data set comprises acutely ill patients undergoing continuous electroencephalographic monitoring (n=395). These patients underwent in-person delirium assessments by research staff. Thus, this data set contains assessment records rather than clinical notes.</p>
          </list-item>
          <list-item>
            <p>Control data set: this data set comprises inpatients randomly selected as a control group from the Massachusetts General Brigham hospital system (n=2477).</p>
          </list-item>
        </list>
        <p>Demographic features of these cohorts are shown in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
      <sec>
        <title>Creating the Gold Standard: Sentence Labeling</title>
        <p>We first created a comprehensive collection of keywords related to delirium; these included the following: “delirium,” “delirious,” “encephalopathy,” “confused,” “confusion,” “agitated,” “agitation,” “inattentive,” “inattention,” “disorient,” “disoriented,” “disorientation,” “reorient,” “restraints,” “lethargy,” “psychosis,” “hallucination,” “inappropriate behavior,” “fluctuating arousal,” “altered mental status,” “mental status change,” “fluctuating mental status,” and “waxing and waning mental status.” We extracted all sentences containing any of these keywords from the assembled collection of notes.</p>
        <p>Next, we created a gold-standard set of labels for sentences. Examples are shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
        <p>We developed a graphical user interface (GUI) for efficient iterative labeling of sentences. Active learning, an algorithm to select the most informative samples, was used to select candidate sentences in each round. The labeling process was as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>Step 0: candidate sentences were randomly selected from the set of unlabeled sentences.</p>
          </list-item>
          <list-item>
            <p>Step 1: experts labeled candidate sentences and created regular expressions called “always patterns” (described below in Regular Expression Generation).</p>
          </list-item>
          <list-item>
            <p>Step 2: unlabeled sentences were screened for “always patterns,” corresponding labels were assigned to sentences that match, and these were added to the labeled set.</p>
          </list-item>
          <list-item>
            <p>Step 3: the labeled sentences were used to train a classifier (introduced in Prediction Model).</p>
          </list-item>
          <list-item>
            <p>Step 4: the classifier was used to scan unlabeled sentences and assign them a label and an embedding vector.</p>
          </list-item>
          <list-item>
            <p>Step 5: sentence embedding vectors were used to generate an embedding map via Uniform Manifold Approximation and Projection [<xref ref-type="bibr" rid="ref6">6</xref>].</p>
          </list-item>
          <list-item>
            <p>Step 6: candidate sentences were selected from the unlabeled data set with two query strategies: uncertainty based on the entropy of prediction scores and diversity based on the embedding map (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>). Each query selects half of the candidate sentences for the next round. Then, the process was reverted to step 1.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Regular Expression Generation</title>
        <p>While labeling sentences, experts created “always patterns”: a regular expression that, when present, warrants assigning the corresponding label to the sentence. <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> provides examples of “always patterns” for positive, negative, and neither patterns. The GUI used “always patterns” to scan the residual unlabeled sentences to assign a label to all matched sentences, thus enhancing labeling efficiency.</p>
      </sec>
      <sec>
        <title>Prediction Model</title>
        <p>We developed three models to identify delirium sentences: Support Vector Machine (SVM), long short-term memory (LSTM), and Transformer models. The LSTM model was also used in active learning when collecting labels. Details of the three models are as follows.</p>
        <p>SVM is a widely used text classifier based on a “bag of words” representation [<xref ref-type="bibr" rid="ref7">7</xref>]. Sentences with delirium-related keywords are first transformed into sentence vectors via “a bag of unigrams and bigrams,” and the SVM algorithm finds hyperplanes that separate different categories. The distances between sample points and hyperplanes are used to calculate prediction scores.</p>
        <p>Recurrent neural networks with LSTM units (RNN-LSTM) are common models for sequence learning, where an LSTM unit contains a cell for memory, an input gate to control input information flow, an output gate to control output information flow, and a forgetting gate to update memory [<xref ref-type="bibr" rid="ref8">8</xref>]. We used a 3-layer bidirectional RNN with LSTM units to encode sentences. The vector representation corresponding to the keyword location was used for classification.</p>
        <p>A transformer is a previously proposed [<xref ref-type="bibr" rid="ref9">9</xref>] transduction model that computes a representation of each word in a sentence relying on self-attention. It is also the model used in Bidirectional Encoder Representations from Transformers (BERT) [<xref ref-type="bibr" rid="ref10">10</xref>]. We used a 3-layer Transformer model to transform a sentence into a sequence of vectors. The vector representation corresponding to the delirium keyword was then used for classification. The word vectors from BERT were used as initial vectors.</p>
      </sec>
      <sec>
        <title>Comparison of Delirium NLP Results With Other Delirium Indicators</title>
        <p>To evaluate construct validity of our EHR-based delirium detection algorithms, we evaluated the strength of the association between presence of delirium as detected by our NLP models with other clinical outcomes or events known to be associated with delirium. These included the use of ICD billing codes for delirium; use of medications related to delirium; use of restraints and sitters; and in-hospital mortality. For one cohort (the LTM data set) we had access to one-time in-person delirium assessments using the Confusion Assessment Method (CAM), which has been already been validated as a good proxy for DSM-5 in prior studies. For these, we compared the presence of delirium, as defined by CAM, with the presence of positive delirium sentences in clinical notes during hospitalization. Details are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p>
      </sec>
      <sec>
        <title>Interrater Agreement</title>
        <p>Pairwise interrater agreement (IRA) is used to measure agreement between human and human (model) for each category. Details are provided in <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p>
      </sec>
      <sec>
        <title>Data Split for Evaluation</title>
        <p>We combined the AED, GIFTS, Dementia, COVID-19, NCC, and Control data sets to yield a data set for sentence labeling based on active learning. We collected 200,471 labeled sentences, including those directly labeled by human experts and those matched by “always patterns.” Of the 200,471 labeled sentences, 176,800 were “positive,” 15,577 were “negative,” and 8094 were “neither” sentences.</p>
        <p>We designed two types of tests for NLP delirium detection algorithms: an internal test and an external test (see <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>).</p>
        <sec>
          <title>Internal Test</title>
          <p>In the internal test, we followed the standard machine learning evaluation pipeline, randomly splitting the 200,471 labeled sentences into a training data set (120,283 sentences, 60%), validation data set (40,094 sentences, 20%) for hyperparameter tuning, and test data set (40,094 sentences, 20%) for performance evaluation.</p>
        </sec>
        <sec>
          <title>External Test</title>
          <p>The LTM data set was not used for training the NLP algorithms. It was used entirely for testing. The LTM data set contained 16,067 sentences: 14,378 positive, 1193 negative, and 496 neither sentences.</p>
        </sec>
      </sec>
      <sec>
        <title>Data Security and Ethics Approval</title>
        <p>We have ethics approval (2013P001024) from the MassGeneral Brigham institutional review board to work with identified data internally. We will deidentify the data for sharing them with external partners to test and improve the models together. Some existing deidentification algorithms have been developed, such as the Phsyionet algorithm [<xref ref-type="bibr" rid="ref11">11</xref>] and the Philter algorithm [<xref ref-type="bibr" rid="ref12">12</xref>], but the recall of these algorithms is close to 100% rather than 100% perfect. Another option is federated learning, namely training the model across multiple decentralized machines holding local data by us and our external partners, without exchanging them.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Performances of Delirium NLP classifiers</title>
        <p>In the following analysis, the 95% CIs were calculated through bootstrapping [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
        <p><xref ref-type="table" rid="table1">Table 1</xref> compares performances of SVM, RNN-LSTM, and Transformer on both internal and external tests. As the data set is an imbalanced multiclass data set, micro F1 scores, and macro F1 scores were used to evaluate performance [<xref ref-type="bibr" rid="ref14">14</xref>]. When using micro F1 scores, the performance of the SVM, RNN-LSTM, and Transformer models was close on both the internal and external test sets. However, when using macro F1 scores, which measure average performance across categories, on the internal test the Transformer (0.927, 95% CI 0.925-0.930) performed similarly to the RNN-LSTM (0.922, 95% CI 0.920-0.925), and both Transformer and RNN-LSTM outperformed the SVM (0.839, 95% CI 0.835-0.842). In the external test set, the Transformer (0.918, 95% CI 0.914-0.921) displayed the best performance, while the SVM (0.885, 95% CI 0.881-0.889) displayed slightly better performance than the RNN-LSTM (0.868, 95% CI 0.862-0.874). Overall, the Transformer was thus the best model based on both micro F1 and macro F1 metrics.</p>
        <p><xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates confusion matrices for the best Transformer, normalized by row to show recall (sensitivity), and by column to show precision (positive predictive value). For the Positive category, precision and recall on both the internal and external test were close to 0.99. For the Negative category, on the internal test, precision (0.916, 95% CI 0.911-0.920) was slightly higher than recall (0.893, 95% CI 0.889-0.897), while on the external test, recall (0.947, 95% CI 0.942-0.951) was much higher than precision (0.861, 95% CI 0.852-0.870). For the Neither category, on both internal and external tests, precision (0.916, 95% CI 0.909-0.923 vs 0.886, 95% CI 0.877-0.894) was better than recall (0.867, 95% CI 0.860-0.873 vs 0.848, 95% CI 0.836-0.859). In summary, performance on the Negative category was better than that on the Neither category, and performance on the Positive category was better still.</p>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> compares receiver operating characteristic (ROC) curves and areas under the ROC curve (AUCs) for the Positive, Negative, and Neither categories on both internal and external tests. On the internal test data, the Transformer (Positive: 0.981, 95% CI 0.980-0.983; Negative: 0.985, 95% CI 0.984-0.986; Neither: 0.974, 95% CI 0.971-0.976) and RNN-LSTM (Positive: 0.980, 95% CI 0.978-0.981; Negative: 0.982, 95% CI 0.981-0.983; Neither: 0.972, 95% CI 0.969-0.974) were close, and both were better than SVM (Positive: 0.962, 95% CI 0.961-0.964; Negative: 0.962, 95% CI 0.961-0.963; Neither: 0.966, 95% CI 0.963-0.968).</p>
        <p>On the external test, for the Positive category, the Transformer (0.984, 95% CI 0.983-0.985) was the best, and the SVM (0.974, 95% CI 0.972-0.976) was better than the RNN-LSTM (0.970, 95% CI 0.966-0.972). For the Negative category, the Transformer (0.992, 95% CI 0.991-0.993) was the best, followed by RNN-LSTM (0.984, 95% CI 0.982-0.985), and then the SVM (0.979, 95% CI 0.977-0.981). For the Neither category, the SVM (0.984, 95% CI 0.982-0.986) was the best, followed by the Transformer (0.969, 95% CI 0.967-0.973) and the RNN-LSTM (0.952, 95% CI 0.949-0.955).</p>
        <p>We conclude that overall, the Transformer model performed the best. Hereinafter, “NLP” refers to the Transformer model.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>F1 scores for the Support Vector Machine, recurrent neural networks with long short-term model, and the Transformer model.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="170"/>
            <col width="290"/>
            <col width="260"/>
            <col width="250"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Scores</td>
                <td>Support Vector Machine, mean (95% CI)</td>
                <td>Recurrent neural networks with long short-term model, mean (95% CI)</td>
                <td>Transformer, mean (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="5">
                  <bold>Micro F1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Internal test</td>
                <td>0.949 (0.948-0.951)</td>
                <td>0.977 (0.976-0.978)</td>
                <td>0.978 (0.977-0.979)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>External test</td>
                <td>0.964 (0.963-0.966)</td>
                <td>0.967 (0.965-0.968)</td>
                <td>0.978 (0.977-0.979)</td>
              </tr>
              <tr valign="top">
                <td colspan="5">
                  <bold>Macro F1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Internal test</td>
                <td>0.839 (0.835-0.842)</td>
                <td>0.922 (0.920-0.925)</td>
                <td>0.927 (0.925-0.930)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>External test</td>
                <td>0.885 (0.881-0.889)</td>
                <td>0.868 (0.862-0.874)</td>
                <td>0.918 (0.914-0.921)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Precision, recall, and F1 scores for delirium classifiers.</p>
          </caption>
          <graphic xlink:href="formative_v6i6e33834_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Receiver operating characteristic (ROC) curves for delirium classifiers. AUC: area under the curve, LSTM: long short-term model, SVM: Support Vector Machine.</p>
          </caption>
          <graphic xlink:href="formative_v6i6e33834_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Associations Between Delirium NLP Results and Other Delirium Indicators</title>
        <p>Next, we compared associations between delirium NLP results and other delirium indicators. Results are shown in <xref ref-type="table" rid="table2">Table 2</xref> For the NCC cohort (n=1985 patients), we assessed associations of NLP-detected delirium with delirium ICD code usage, medications, restraints and sitter orders, and mortality. For the LTM data set (n=395), we analyzed associations with CAM scores. For comparison, we also calculated the association of ICD code usage with the same delirium indicators.</p>
        <p>We calculated these delirium indicators at the patient level, such that each patient is assigned a “+1” for NLP-based detection of delirium if they have one or more sentences classified as Positive by the NLP Transformer algorithm; otherwise, they were assigned a “–1.” Similarly, patients were assigned scores of “+1” or “–1” for each of the other delirium indicators. We used the φ coefficient (mean square contingency coefficient) to measure associations between NLP-based delirium detections and each delirium indicator. When using our NLP detector to classify sentences in the NCC (or LTM) data set, the NCC (or LTM) data were only used as test data, as illustrated in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows that associations of delirium indicators with NLP results are much stronger than those with ICD codes.</p>
        <p>In the NCC data set, the NLP model identified 1117 out of 1985 patients with positive delirium sentences (which were verified to be correct through manual review) but no delirium ICD codes. This highlights the low sensitivity of delirium ICD codes relative to manual chart review, and the excellent sensitivity of the NLP algorithm.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Associations between delirium natural language processing indicators and other delirium indicators.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="300"/>
            <col width="300"/>
            <thead>
              <tr valign="top">
                <td colspan="2">Data sets and delirium indicators</td>
                <td>International Classification of Diseases codes, mean (95% CI)</td>
                <td>Natural language processing classifiers, mean (95% CI)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">
                  <bold>NCC</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>International Classification of Diseases codes</td>
                <td>1</td>
                <td>0.134 (0.133 to 0.135)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Medication</td>
                <td>0.073 (0.072 to 0.074)</td>
                <td>0.194 (0.192 to 0.197)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Restraints and sitter orders</td>
                <td>0.177 (0.176 to 0.179)</td>
                <td>0.358 (0.357 to 0.361)</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Mortality</td>
                <td>0.000 (–0.0002 to 0.0001)</td>
                <td>0.216 (0.215 to 0.217)</td>
              </tr>
              <tr valign="top">
                <td colspan="4">
                  <bold>LTM</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Confusion Assessment Method</td>
                <td>–0.028 (–0.025 to –0.030)</td>
                <td>0.256 (0.252 to 0.259)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Coverage Analysis</title>
        <p>In creating the gold standard for labeling sentences, we developed many “always patterns” for delirium. While this set of sentences was large, we hypothesized that it might not be exhaustive; therefore, we investigated the coverage of our “always patterns” in another data set.</p>
        <p>We analyzed the coverage of “always patterns” as follows. First, in the development data set (AED, GIFTS, Dementia, COVID-19, NCC, and control cohorts)—used for labeling the gold-standard set of sentences and for developing “always patterns”—97.6% (195,680) of sentences with delirium keywords were matched by at least one “always pattern.” In the LTM data set, which was not used for labeling sentences, 78.2% (12,569) of sentences with delirium keywords matched at least one “always pattern.”</p>
        <p>We next tested the extent to which sentences not matched by “always patterns” were still accurately classified by the NLP model. To accomplish this, we randomly selected 400 sentences as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>100 sentences that both the Transformer and LSTM models predicted “Positive” for delirium</p>
          </list-item>
          <list-item>
            <p>100 sentences that both the Transformer and LSTM models predicted “Negative” for delirium</p>
          </list-item>
          <list-item>
            <p>100 sentences that both the Transformer and LSTM models predicted “Neither”; namely, not relevant to delirium</p>
          </list-item>
          <list-item>
            <p>100 sentences on which the Transformer and LSTM models disagreed.</p>
          </list-item>
        </list>
        <p>Two human experts (SM and MBW) independently labeled these 400 unmatched sentences. Pairwise IRA results are shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>, where 95% CIs were calculated through Bootstrapping [<xref ref-type="bibr" rid="ref13">13</xref>]. For unmatched sentences, the performance of model IRA (LSTM, Transformer) was close to that of human IRA for the Negative category but displayed gaps for Positive and Neither categories compared with human IRA.</p>
        <p>We next investigated whether performance gaps in the new data set could be easily removed without repeating a large amount of sentence relabeling. For this investigation, we tried fine-tuning the Transformer model with a previously reported procedure [<xref ref-type="bibr" rid="ref10">10</xref>]. This was readily done (green bars).</p>
        <p>We conclude that the Transformer model is quite general, but not exhaustive; nevertheless, when gaps are encountered, the model can be readily tuned to accommodate previously unseen delirium sentence patterns.</p>
        <p><xref rid="figure4" ref-type="fig">Figure 4</xref> illustrates mortality rates for the patients with different numbers of days with delirium in the GIFTS data set. The mortality rate increases monotonically with the number of delirium days.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Pairwise interrater agreement (IRA) for unmatched sentences. LSTM: long short-term memory.</p>
          </caption>
          <graphic xlink:href="formative_v6i6e33834_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Mortality rate versus the number of days with delirium.</p>
          </caption>
          <graphic xlink:href="formative_v6i6e33834_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Our results show that an NLP approach can accurately detect patients with delirium, using unstructured clinical notes. These results are likely to be robust because they are based on a large collection of clinical notes from over 10,000 patients. The proposed delirium NLP approach is much more accurate, and especially more sensitive, than delirium ICD codes; it was able to detect patients who have delirium described in clinical notes but have no delirium ICD codes in their medical records. Further enhancing validity, NLP delirium detections are strongly associated with clinical factors known to be associated with delirium, including delirium-associated medications, use of restraints, and in-hospital mortality. This NLP tool will be useful for large-scale EHR research on delirium.</p>
      </sec>
      <sec>
        <title>Application</title>
        <p>The delirium NLP approach proposed in this work has many potential applications. First, the approach will be applied to many future large-scale studies regarding delirium, such as the causes of delirium and the effects of delirium on outcomes such as dementia. Second, the approach can review entire medical record in order to identify specific parts of the hospital, which seem to have more delirium, which can be used for quality improvement. We can use this to identify factors (eg, medications) that might explain why delirium occurs. Third, the approach can be used to develop a delirium prediction model for clinical trials. The detection results of the NLP approach can be used as targets of prediction models, and the prediction models can be used to identify patients at a high risk for delirium, which provides information for interventions. The barriers of the applications are data and trust or transparency.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Many prior studies have utilized ICD codes to identify delirium for large-scale EHR studies [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. Our findings confirm observations from these earlier studies that ICD codes generally have high specificity but low sensitivity, leading to many missed cases of delirium. We investigated this finding in detail in the NCC cohort, where we observed that 1117 of 1985 patients who had positive delirium sentences had no corresponding delirium ICD codes. To confirm these findings, we used the NLP Transformer model to select the sentence with the highest positive score for each patient, and then manually reviewed the 1117 selected sentences, thereby manually confirming that these were true positives. These results show that the NLP approach largely overcomes the low sensitivity of delirium ICD codes.</p>
        <p>NLP has been used to extract phenotypes from clinical notes in several previous studies. McCoy et al [<xref ref-type="bibr" rid="ref15">15</xref>] used NLP to analyze discharge notes to improve prediction of suicide and accidental death after discharge. Gundlapalli et al [<xref ref-type="bibr" rid="ref16">16</xref>] reported that a relatively simple case finding method based on string matching for specific keywords coupled with a negation algorithm and information extracted by a more complex NLP system could identify patients with inflammatory bowel disease. Zhou et al [<xref ref-type="bibr" rid="ref17">17</xref>] applied an NLP approach to identify patients with depression on the basis of discharge summaries. Yang et al [<xref ref-type="bibr" rid="ref18">18</xref>] explored transformer-based models for clinical concept extraction. Mascio et al [<xref ref-type="bibr" rid="ref19">19</xref>] analyzed the impact of various word representations, text preprocessing, and classification algorithms on the performance of different text classification tasks based on EHRs. Most prior medical NLP used negation detection algorithms to deal with the negative cases. However, we found many negative cases that did not contain clear negative expressions. Therefore, we classified phenotype expressions as positive, negative, or neither (not relevant), and trained 3-class classifiers.</p>
        <p>A few prior studies used NLP for delirium research. One such study [<xref ref-type="bibr" rid="ref20">20</xref>] summarized patterns in the delirium literature over time, using unsupervised learning methods; by contrast, our work used NLP to extract information from clinical notes. Another study [<xref ref-type="bibr" rid="ref21">21</xref>] detected delirium using an open-source NLP pipeline MedTaggerIE—an unstructured information management architecture–based information extraction framework. Shao et al [<xref ref-type="bibr" rid="ref22">22</xref>] experimented with 3 different topic modeling methods and a keyword search method for identifying delirium-related documents and sentences in clinical notes. Weir et al [<xref ref-type="bibr" rid="ref23">23</xref>] designed classifiers for patients with delirium by combining text data with ICD, Ninth Revision codes. Sun et al [<xref ref-type="bibr" rid="ref24">24</xref>] defined a generic process for developing a clinical risk prediction model, applied the model calibration process at 4 hospitals, and generated risk prediction models for delirium. Jauk et al [<xref ref-type="bibr" rid="ref25">25</xref>] implemented a random forest–based algorithm to identify hospitalized patients at high risk for delirium. A key difference between these prior studies and this study is that they aimed to detect delirium at the patient level (ie, whether a patient ever experienced delirium during a hospitalization). By contrast, our approach detects delirium at the sentence level, which provides more fine-grained temporal information (ie, on which days was a patient experiencing delirium). Such information is important for estimating the overall burden of delirium, and for studies that attempt to relate time-varying factors to the development of delirium.</p>
      </sec>
      <sec>
        <title>Strengths</title>
        <p>This work leveraged a large cohort composed of multiple different cohorts. These data sets provide a good source for variety of delirium expression in clinical notes. Additionally, we developed a novel GUI labeling tool and used active learning to enhance labeling efficiency. Furthermore, we compared 3 widely used NLP classifiers including a state-of-the-art Transformer model for delirium detection. Finally, we compared our delirium NLP detector with other delirium indicators, and we were able to demonstrate that our NLP method is substantially better than traditional methods based on ICD codes.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although our data were obtained from 9 hospitals, all were in the same geographic region (Massachusetts). Thus, our cohort may not be representative of other US or non-US populations. One important future direction is to test our delirium NLP algorithm using data from other regions. Additionally, the coverage rate of the “always pattern” for the development data set was 97.6% (n=195,680) owing to active learning, but decreased to 78.2% (n=12,569) on an independent test set. Further rounds of active learning to enlarge the available training data will help further expand the generalizability of the NLP Transformer model to new data sets. Nevertheless, our fine-tuning experiments show that extending the model to new data sets may require only a relatively small amount of additional labeling effort.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this work, we developed a new delirium NLP detection approach that identifies patients with delirium from unstructured clinical notes. In many cases, the delirium information was only recorded in clinical notes and was absent from ICD codes. We anticipate that this model will be useful for large-scale EHR-based research on delirium, especially detecting delirium at a fine-grained level such as the note and sentence levels. Additionally, the labeling process based on active learning developed for this study was very efficient, achieving a coverage rate of 97.6% (n=195,680) in the development data set after just 5 rounds of labeling. This labeling method can be used for other studies related to phenotype detection based on unstructured clinical notes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Demographic features.</p>
        <media xlink:href="formative_v6i6e33834_app1.docx" xlink:title="DOCX File , 19 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Examples for delirium sentences and always patterns.</p>
        <media xlink:href="formative_v6i6e33834_app2.docx" xlink:title="DOCX File , 87 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Two query strategies.</p>
        <media xlink:href="formative_v6i6e33834_app3.docx" xlink:title="DOCX File , 15 KB"/>
      </supplementary-material>
      <supplementary-material id="app4">
        <label>Multimedia Appendix 4</label>
        <p>Other delirium indicators.</p>
        <media xlink:href="formative_v6i6e33834_app4.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app5">
        <label>Multimedia Appendix 5</label>
        <p>Interrater agreement.</p>
        <media xlink:href="formative_v6i6e33834_app5.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app6">
        <label>Multimedia Appendix 6</label>
        <p>Data splitting.</p>
        <media xlink:href="formative_v6i6e33834_app6.docx" xlink:title="DOCX File , 24 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AUC</term>
          <def>
            <p>area under the curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CAM</term>
          <def>
            <p>Confusion Assessment Method</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">GUI</term>
          <def>
            <p>graphical user interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">IRA</term>
          <def>
            <p>interrater agreement</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">LSTM</term>
          <def>
            <p>long short-term model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NIH</term>
          <def>
            <p>National Institutes of Health</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">RNN-LSTM</term>
          <def>
            <p>recurrent neural networks with LSTM units</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">ROC</term>
          <def>
            <p>receiver operating characteristic</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">SVM</term>
          <def>
            <p>Support Vector Machine</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>SD was supported by the National Institutes of Health (NIH; P30AG062421). SM was supported by the James S McDonnell Foundation (K23MH115812). MBW and WG were supported by the Glenn Foundation for Medical Research and American Federation for Aging Research (Breakthroughs in Gerontology Grant), the American Academy of Sleep Medicine (AASM Foundation Strategic Research Award), and the NIH (R01NS102190, R01NS102574, R01NS107291, RF1AG064312, and R01AG062989).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>TG</given-names>
            </name>
            <name name-style="western">
              <surname>Tulebaev</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Inouye</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Delirium in elderly adults: diagnosis, prevention and treatment</article-title>
          <source>Nat Rev Neurol</source>
          <year>2009</year>
          <month>04</month>
          <volume>5</volume>
          <issue>4</issue>
          <fpage>210</fpage>
          <lpage>220</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19347026"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/nrneurol.2009.24</pub-id>
          <pub-id pub-id-type="medline">19347026</pub-id>
          <pub-id pub-id-type="pii">nrneurol.2009.24</pub-id>
          <pub-id pub-id-type="pmcid">PMC3065676</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bucht</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gustafson</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Sandberg</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Epidemiology of delirium</article-title>
          <source>Dement Geriatr Cogn Disord</source>
          <year>1999</year>
          <month>8</month>
          <day>26</day>
          <volume>10</volume>
          <issue>5</issue>
          <fpage>315</fpage>
          <lpage>318</lpage>
          <pub-id pub-id-type="doi">10.1159/000017161</pub-id>
          <pub-id pub-id-type="medline">10473930</pub-id>
          <pub-id pub-id-type="pii">17161</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sepulveda</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Franco</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Trzepacz</surname>
              <given-names>PT</given-names>
            </name>
            <name name-style="western">
              <surname>Gaviria</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Meagher</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Palma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Viñuelas</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Grau</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Vilella</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>de Pablo</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Delirium diagnosis defined by cluster analysis of symptoms versus diagnosis by DSM and ICD criteria: diagnostic accuracy study</article-title>
          <source>BMC Psychiatry</source>
          <year>2016</year>
          <month>05</month>
          <day>26</day>
          <volume>16</volume>
          <fpage>167</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpsychiatry.biomedcentral.com/articles/10.1186/s12888-016-0878-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12888-016-0878-6</pub-id>
          <pub-id pub-id-type="medline">27229307</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12888-016-0878-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC4882791</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Huybrechts</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Bateman</surname>
              <given-names>BT</given-names>
            </name>
            <name name-style="western">
              <surname>Patorno</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Marcantonio</surname>
              <given-names>ER</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of algorithms to identify delirium in administrative claims and drug utilization database</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2017</year>
          <month>08</month>
          <volume>26</volume>
          <issue>8</issue>
          <fpage>945</fpage>
          <lpage>953</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28485014"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pds.4226</pub-id>
          <pub-id pub-id-type="medline">28485014</pub-id>
          <pub-id pub-id-type="pmcid">PMC5583076</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Spasic</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Nenadic</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Clinical Text Data in Machine Learning: Systematic Review</article-title>
          <source>JMIR Med Inform</source>
          <year>2020</year>
          <month>03</month>
          <day>31</day>
          <volume>8</volume>
          <issue>3</issue>
          <fpage>e17984</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2020/3/e17984/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/17984</pub-id>
          <pub-id pub-id-type="medline">32229465</pub-id>
          <pub-id pub-id-type="pii">v8i3e17984</pub-id>
          <pub-id pub-id-type="pmcid">PMC7157505</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McInnes</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Healy</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saul</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Großberger</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>UMAP: Uniform Manifold Approximation and Projection</article-title>
          <source>JOSS</source>
          <year>2018</year>
          <month>09</month>
          <volume>3</volume>
          <issue>29</issue>
          <fpage>861</fpage>
          <pub-id pub-id-type="doi">10.21105/joss.00861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Support vector machine active learning with applications to text classification</article-title>
          <source>J Mach Learn Res</source>
          <year>2001</year>
          <volume>2</volume>
          <fpage>45</fpage>
          <lpage>66</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume2/tong01a/tong01a.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1201/9781315371658-17</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Graves</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Supervised Sequence Labelling</article-title>
          <source>Supervised Sequence Labelling with Recurrent Neural Networks</source>
          <year>2012</year>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>5</fpage>
          <lpage>13</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vaswani</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shazeer</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Parmar</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Uszkoreit</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Gomez</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kaiser</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Polosukhin</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Attention is all you need</article-title>
          <source>Adv Neural Inf Process Syst</source>
          <year>2007</year>
          <month>10</month>
          <day>01</day>
          <volume>45</volume>
          <issue>02</issue>
          <fpage>45-0602</fpage>
          <lpage>45-0602</lpage>
          <pub-id pub-id-type="doi">10.5860/choice.45-0602</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online October 11, 2018</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1810.04805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Neamatullah</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Douglass</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Lehman</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Reisner</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Villarroel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Moody</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Mark</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Clifford</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>Automated de-identification of free-text medical records</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2008</year>
          <month>07</month>
          <day>24</day>
          <volume>8</volume>
          <fpage>32</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-8-32"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1472-6947-8-32</pub-id>
          <pub-id pub-id-type="medline">18652655</pub-id>
          <pub-id pub-id-type="pii">1472-6947-8-32</pub-id>
          <pub-id pub-id-type="pmcid">PMC2526997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norgeot</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Muenzen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Peterson</surname>
              <given-names>TA</given-names>
            </name>
            <name name-style="western">
              <surname>Fan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Glicksberg</surname>
              <given-names>BS</given-names>
            </name>
            <name name-style="western">
              <surname>Schenk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Rutenberg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Oskotsky</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Sirota</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yazdany</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schmajuk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ludwig</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Butte</surname>
              <given-names>AJ</given-names>
            </name>
          </person-group>
          <article-title>Protected Health Information filter (Philter): accurately and securely de-identifying free-text clinical notes</article-title>
          <source>NPJ Digit Med</source>
          <year>2020</year>
          <volume>3</volume>
          <fpage>57</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-020-0258-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-020-0258-y</pub-id>
          <pub-id pub-id-type="medline">32337372</pub-id>
          <pub-id pub-id-type="pii">258</pub-id>
          <pub-id pub-id-type="pmcid">PMC7156708</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Davison</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hinkley</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Bootstrap Methods and Their Application</source>
          <year>1997</year>
          <publisher-loc>Cambridge</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rogati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>High-performing feature selection for text classification</article-title>
          <year>2002</year>
          <conf-name>CIKM02: Eleventh ACM International Conference on Information and Knowledge Management</conf-name>
          <conf-date>November 4-9, 2002</conf-date>
          <conf-loc>McLean, VA</conf-loc>
          <fpage>659</fpage>
          <lpage>661</lpage>
          <pub-id pub-id-type="doi">10.1145/584792.584911</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Roberson</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Snapper</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Perlis</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Improving Prediction of Suicide and Accidental Death After Discharge From General Hospitals With Natural Language Processing</article-title>
          <source>JAMA Psychiatry</source>
          <year>2016</year>
          <month>10</month>
          <day>01</day>
          <volume>73</volume>
          <issue>10</issue>
          <fpage>1064</fpage>
          <lpage>1071</lpage>
          <pub-id pub-id-type="doi">10.1001/jamapsychiatry.2016.2172</pub-id>
          <pub-id pub-id-type="medline">27626235</pub-id>
          <pub-id pub-id-type="pii">2548276</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gundlapalli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>South</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Phansalkar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kinney</surname>
              <given-names>AY</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Delisle</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Perl</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Samore</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <article-title>Application of Natural Language Processing to VA Electronic Health Records to Identify Phenotypic Characteristics for Clinical and Research Purposes</article-title>
          <source>Summit Transl Bioinform</source>
          <year>2008</year>
          <month>03</month>
          <day>01</day>
          <volume>2008</volume>
          <fpage>36</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/21347124"/>
          </comment>
          <pub-id pub-id-type="medline">21347124</pub-id>
          <pub-id pub-id-type="pmcid">PMC3041527</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Lei</surname>
              <given-names>VJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>Navathe</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sordo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Topaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Murrali</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Navathe</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rocha</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Identifying Patients with Depression Using Free-text Clinical Documents</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2015</year>
          <volume>216</volume>
          <fpage>629</fpage>
          <lpage>633</lpage>
          <pub-id pub-id-type="medline">26262127</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Bian</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hogan</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Clinical concept extraction using transformers</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>12</month>
          <day>09</day>
          <volume>27</volume>
          <issue>12</issue>
          <fpage>1935</fpage>
          <lpage>1942</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/33120431"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa189</pub-id>
          <pub-id pub-id-type="medline">33120431</pub-id>
          <pub-id pub-id-type="pii">5943218</pub-id>
          <pub-id pub-id-type="pmcid">PMC7727351</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mascio</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kraljevic</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Bean</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dobson</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bendayan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Comparative analysis of text classification approaches in electronic health records</article-title>
          <source>arXiv.</source>
          <comment>Preprint posted online May 8, 2020</comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.bionlp-1.9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>Mapping the Delirium Literature Through Probabilistic Topic Modeling and Network Analysis: A Computational Scoping Review</article-title>
          <source>Psychosomatics</source>
          <year>2019</year>
          <volume>60</volume>
          <issue>2</issue>
          <fpage>105</fpage>
          <lpage>120</lpage>
          <pub-id pub-id-type="doi">10.1016/j.psym.2018.12.003</pub-id>
          <pub-id pub-id-type="medline">30686485</pub-id>
          <pub-id pub-id-type="pii">S0033-3182(18)30520-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lopes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pagali</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Thorsteinsdottir</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>LeBrasseur</surname>
              <given-names>NK</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Rocca</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Olson</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>St Sauver</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sohn</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Ascertainment of Delirium Status Using Natural Language Processing From Electronic Health Records</article-title>
          <source>J Gerontol A Biol Sci Med Sci</source>
          <year>2022</year>
          <month>03</month>
          <day>03</day>
          <volume>77</volume>
          <issue>3</issue>
          <fpage>524</fpage>
          <lpage>530</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/35239951"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/gerona/glaa275</pub-id>
          <pub-id pub-id-type="medline">35239951</pub-id>
          <pub-id pub-id-type="pii">6542032</pub-id>
          <pub-id pub-id-type="pmcid">PMC8893184</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng-Treitler</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Estrada</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Identifying Documentation of Delirium in Clinical Notes through Topic Modeling</article-title>
          <year>2015</year>
          <conf-name>2015 International Conference on Healthcare Informatics</conf-name>
          <conf-date>October 21-23, 2015</conf-date>
          <conf-loc>Dallas, TX</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ichi.2015.47</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weir</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>LaFluer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>Q</given-names>
            </name>
          </person-group>
          <article-title>Big Data Analytics Using the VA’s ‘VINCI’ Database to Look at Delirium</article-title>
          <source>Big Data-Enabled Nursing</source>
          <year>2017</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>287</fpage>
          <lpage>299</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Depraetere</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Meesseman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>De Roo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vanbiervliet</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>De Baerdemaeker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Muys</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>von Dossow</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Hulde</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Szymanowsky</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A scalable approach for developing clinical risk prediction applications in different hospitals</article-title>
          <source>J Biomed Inform</source>
          <year>2021</year>
          <month>06</month>
          <volume>118</volume>
          <fpage>103783</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(21)00112-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2021.103783</pub-id>
          <pub-id pub-id-type="medline">33887456</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(21)00112-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jauk</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kramer</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Großauer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rienmüller</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Avian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Berghold</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leodolter</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Risk prediction of delirium in hospitalized patients using machine learning: An implementation and prospective evaluation study</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2020</year>
          <month>07</month>
          <day>01</day>
          <volume>27</volume>
          <issue>9</issue>
          <fpage>1383</fpage>
          <lpage>1392</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32968811"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa113</pub-id>
          <pub-id pub-id-type="medline">32968811</pub-id>
          <pub-id pub-id-type="pii">5910737</pub-id>
          <pub-id pub-id-type="pmcid">PMC7647341</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
