<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i3e31615</article-id>
      <article-id pub-id-type="pmid">35081036</article-id>
      <article-id pub-id-type="doi">10.2196/31615</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Performance of a Computational Phenotyping Algorithm for Sarcoidosis Using Diagnostic Codes in Electronic Medical Records: Case Validation Study From 2 Veterans Affairs Medical Centers</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mircheva</surname>
            <given-names>Iskra</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>van Poelgeest</surname>
            <given-names>R</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Agrawal</surname>
            <given-names>Lavlin</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Seedahmed</surname>
            <given-names>Mohamed I</given-names>
          </name>
          <degrees>MPH, MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Division of Pulmonary, Critical Care, Allergy and Immunology, and Sleep</institution>
            <institution>Department of Medicine</institution>
            <institution>University of California San Francisco</institution>
            <addr-line>513 Parnassus Ave</addr-line>
            <addr-line>HSE 1314, Box 0111</addr-line>
            <addr-line>San Francisco, CA, 94143</addr-line>
            <country>United States</country>
            <fax>1 (415) 502 2605</fax>
            <phone>1 (415) 476 0735</phone>
            <email>mohamed.seedahmed@ucsf.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7446-7346</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Mogilnicka</surname>
            <given-names>Izabella</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5735-8375</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Zeng</surname>
            <given-names>Siyang</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9346-301X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Luo</surname>
            <given-names>Gang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7217-4008</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Whooley</surname>
            <given-names>Mary A</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5943-0078</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>McCulloch</surname>
            <given-names>Charles E</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1279-6179</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Koth</surname>
            <given-names>Laura</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9541-3622</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Arjomandi</surname>
            <given-names>Mehrdad</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0116-9217</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Division of Pulmonary, Critical Care, Allergy and Immunology, and Sleep</institution>
        <institution>Department of Medicine</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>San Francisco Veterans Affairs Medical Center</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Experimental Physiology and Pathophysiology, Laboratory of the Centre for Preclinical Research</institution>
        <institution>Medical University of Warsaw</institution>
        <addr-line>Warsaw</addr-line>
        <country>Poland</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Biomedical Informatics and Medical Education</institution>
        <institution>School of Medicine, University of Washington</institution>
        <addr-line>Seattle, WA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Medicine</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Measurement Science Quality Enhancement Research Initiative</institution>
        <institution>San Francisco Veterans Affairs Healthcare System</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Department of Epidemiology &#38; Biostatistics</institution>
        <institution>University of California San Francisco</institution>
        <addr-line>San Francisco, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Mohamed I Seedahmed <email>mohamed.seedahmed@ucsf.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>3</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>3</month>
        <year>2022</year>
      </pub-date>
      <volume>6</volume>
      <issue>3</issue>
      <elocation-id>e31615</elocation-id>
      <history>
        <date date-type="received">
          <day>29</day>
          <month>6</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>12</month>
          <year>2021</year>
        </date>
        <date date-type="rev-recd">
          <day>24</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>24</day>
          <month>1</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Mohamed I Seedahmed, Izabella Mogilnicka, Siyang Zeng, Gang Luo, Mary A Whooley, Charles E McCulloch, Laura Koth, Mehrdad Arjomandi. Originally published in JMIR Formative Research (https://formative.jmir.org), 02.03.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2022/3/e31615" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Electronic medical records (EMRs) offer the promise of computationally identifying sarcoidosis cases. However, the accuracy of identifying these cases in the EMR is unknown.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study is to determine the statistical performance of using the International Classification of Diseases (ICD) diagnostic codes to identify patients with sarcoidosis in the EMR.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used the ICD diagnostic codes to identify sarcoidosis cases by searching the EMRs of the San Francisco and Palo Alto Veterans Affairs medical centers and randomly selecting 200 patients. To improve the diagnostic accuracy of the computational algorithm in cases where histopathological data are unavailable, we developed an <italic>index of suspicion</italic> to identify cases with a <italic>high index of suspicion</italic> for sarcoidosis (confirmed and probable) based on clinical and radiographic features alone using the American Thoracic Society practice guideline. Through medical record review, we determined the positive predictive value (PPV) of diagnosing sarcoidosis by two computational methods: using ICD codes alone and using ICD codes plus the <italic>high index of suspicion</italic>.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Among the 200 patients, 158 (79%) had a high index of suspicion for sarcoidosis. Of these 158 patients, 142 (89.9%) had documentation of nonnecrotizing granuloma, confirming biopsy-proven sarcoidosis. The PPV of using ICD codes alone was 79% (95% CI 78.6%-80.5%) for identifying sarcoidosis cases and 71% (95% CI 64.7%-77.3%) for identifying histopathologically confirmed sarcoidosis in the EMRs. The inclusion of the generated <italic>high index of suspicion</italic> to identify confirmed sarcoidosis cases increased the PPV significantly to 100% (95% CI 96.5%-100%). Histopathology documentation alone was 90% sensitive compared with <italic>high index of suspicion</italic>.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>ICD codes are reasonable classifiers for identifying sarcoidosis cases within EMRs with a PPV of 79%. Using a computational algorithm to capture <italic>index of suspicion</italic> data elements could significantly improve the case-identification accuracy.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>sarcoidosis</kwd>
        <kwd>electronic medical records</kwd>
        <kwd>EMRs</kwd>
        <kwd>computational phenotype</kwd>
        <kwd>diagnostic codes</kwd>
        <kwd>Veterans Affairs</kwd>
        <kwd>VA</kwd>
        <kwd>practice guidelines</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Sarcoidosis is a complex disease with an unknown etiology that can involve multiple organs, and no universal or standardized measures can fully secure its final diagnosis [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. In fact, it was only recently that the American Thoracic Society (ATS) published its first practice guideline to provide recommendations for diagnosing sarcoidosis and the necessary screening tests [<xref ref-type="bibr" rid="ref3">3</xref>]. The ATS practice guideline for diagnosis requires the presence of specific clinical and radiographic features, tissue biopsy revealing nonnecrotizing granulomas, and exclusion of alternative conditions that can mimic sarcoidosis [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>Data from electronic medical records (EMRs) are commonly used in research and by health care systems, including the United States Department of Veterans Affairs (VA), to predict outcomes or assess care quality [<xref ref-type="bibr" rid="ref5">5</xref>]. EMR data are generally captured in two forms: (1) <italic>structured data</italic>, including billing codes such as the International Classification of Diseases (ICD) codes, laboratory test results, and procedural codes; and (2) <italic>narrative or unstructured data</italic>, including progress notes, pathology reports, and imaging reports. ICD codes cast a wider net to capture patients in the EMR because they include both inpatient and outpatient claims compared with other classifiers such as Diagnosis-Related Group that only capture inpatient claims [<xref ref-type="bibr" rid="ref6">6</xref>]. Unstructured data contain many more details of the clinical conditions, but extracting these details is challenging and time consuming. In contrast, structured data are easier to search for, and they allow for identifying cases computationally using diagnostic codes. However, diagnostic codes can be inaccurate and difficult to verify. This is particularly true for the case definition of sarcoidosis, which is considered a diagnosis of exclusion and requires a review of clinical, radiological, and histopathological data for accurate diagnosis [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. A few studies have reported the development of sarcoidosis-specific “computationally identifying algorithms” based on structured data elements in the EMR, although they were not validated by manual chart review [<xref ref-type="bibr" rid="ref9">9</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Another study assessed the accuracy of using diagnostic codes to identify sarcoidosis cases [<xref ref-type="bibr" rid="ref14">14</xref>] but only used the ICD, Ninth Revision (ICD-9) code and not the ICD, Tenth Revision (ICD-10) code, and it did not include any computational algorithm development. In addition, previous studies on the diagnostic accuracy of ICD codes for other common pulmonary diseases that have less or similar complexity compared with sarcoidosis, such as chronic obstructive pulmonary disease, idiopathic pulmonary fibrosis, and asthma, showed positive predictive values (PPVs) of 42%-67% [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Moreover, researchers have previously developed predictive models and risk scores to use advanced computational methods to predict, commonly, less-complex case definitions in the EMR [<xref ref-type="bibr" rid="ref18">18</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. For example, in a study published by Himes et al [<xref ref-type="bibr" rid="ref19">19</xref>], Bayesian network machine learning models were constructed to predict chronic obstructive pulmonary disease. Therefore, given the complexity of securing a sarcoidosis diagnosis in the realm of real-world clinical data, it is essential to develop automated algorithms to detect confirmed and probable cases of sarcoidosis using data elements from structured and unstructured domains by incorporating the ATS diagnostic criteria [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref25">25</xref>].</p>
      </sec>
      <sec>
        <title>First Step</title>
        <p>As the first step in evaluating the knowledge gap in developing future sarcoidosis-specific “computationally identifying algorithms,” we designed this study (1) to estimate the statistical performance of using diagnostic codes (ICD-9 and ICD-10) alone compared with a new approach that uses additional information from radiology and clinical domains, but not histopathology, to inform the utility of these codes for performing clinical phenotyping of sarcoidosis cases in large EMR data sets of the VA and (2) to assess the computational challenges in querying sarcoidosis cases and extracting high-quality sarcoidosis-related research variables from the EMR accurately.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source and Collection</title>
        <p>This was an observational retrospective study of EMRs available through VA Informatics and Computing Infrastructure (VINCI). VINCI provides access to comprehensive and integrated veterans’ national deidentified data sets and offers the necessary computational and analytical tools in a secure, high-performance computing environment [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. This study was approved by the institutional review board of the University of California San Francisco and the Veterans Health Administration Research and Development Committee (15-16660). Patients or the public were not involved in the design, conduct, reporting, or dissemination plans of our research.</p>
        <p>We searched the EMR data in VINCI from 1989 to 2019 and identified all patients coded as having sarcoidosis in the VA health care system, as defined by the documentation of the ICD-9 and ICD-10 codes of 135 and D86.x (including subcodes), respectively. Data were extracted through executing SQL queries in an SQL Server 2017 database. A total of 14,833 sarcoidosis cases were identified.</p>
      </sec>
      <sec>
        <title>Study Design</title>
        <p>To determine the statistical performance of using diagnostic codes (ICD-9 and ICD-10) in identifying patients with sarcoidosis from the EMR, initially, we identified patients with at least one claim (inpatient or outpatient) of ICD diagnosis code for sarcoidosis. To ascertain the true diagnosis of sarcoidosis based on the ATS diagnostic criteria (clinical, radiographic, and pathological findings, as well as exclusion of other causes) [<xref ref-type="bibr" rid="ref3">3</xref>], 2 clinicians (MIS and IM) performed a comprehensive chart review. Of the 14,833 identified cases, a total of 200 (1.35%) were reviewed to limit the required chart review to a manageable level. As our access to the detailed medical records was limited to the two medical centers of San Francisco VA (SFVA) and Palo Alto VA (PAVA), the reviewed charts were selected from these two centers. We stratified the list of sarcoidosis cases from the 2 centers by site and used the <italic>lottery</italic> method to randomly select 100 patients from each site without a replacement [<xref ref-type="bibr" rid="ref28">28</xref>] (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Strengthening the Reporting of Observational Studies in Epidemiology flowchart. Selection criteria for sarcoidosis cases. ATS: American Thoracic Society; ICD: International Classification of Diseases; PA: Palo Alto; SF: San Francisco; VA: Veterans Affairs.</p>
          </caption>
          <graphic xlink:href="formative_v6i3e31615_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>On the basis of the ATS practice guidelines, the diagnosis can be confirmed for those who had a biopsy consistent with sarcoidosis, as well as consistent clinical and radiological findings and no evidence for an alternative diagnosis. However, the ATS practice guideline committee acknowledged that there were clinical situations in which a confirmatory biopsy may not be indicated or possible. Accordingly, based on the ATS practice guideline, those patients without biopsies can be classified as probable sarcoidosis [<xref ref-type="bibr" rid="ref3">3</xref>]. Therefore, given that not all suspected patients have a tissue biopsy in clinical practice, we generated an <italic>index of suspicion</italic> for sarcoidosis to identify patients with sarcoidosis (confirmed and probable) based on clinical and radiographic information, regardless of the availability of biopsy data, and to assess whether this approach would improve the diagnostic accuracy. The <italic>index of suspicion</italic> was applied to the initial cohort of patients with ICD codes for sarcoidosis (n=200). The clinical and radiological features were extracted from the available structured and unstructured data without including the histopathology results. If the patients were documented to have one or more of these features, they were assigned to the <italic>high index of suspicion</italic> group (<xref ref-type="boxed-text" rid="box1">Textbox 1</xref>); otherwise, the patients were assigned to the <italic>low index of suspicion</italic> group.</p>
        <boxed-text id="box1" position="float">
          <title>Criteria to determine <italic>high index of suspicion</italic>.</title>
          <p>
            <bold>Clinical and radiological features supportive of the diagnosis of sarcoidosis that were used for the determination of a <italic>high index of suspicion</italic>. Any patients with at least one of these features were included in the <italic>high index of suspicion</italic> group:</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Clinical</p>
              <list list-type="bullet">
                <list-item>
                  <p>Lofgren syndrome (defined as erythema nodosum, bilateral hilar lymphadenopathy, and polyarthralgia or polyarthritis)</p>
                </list-item>
                <list-item>
                  <p>Heerfordt syndrome (defined as facial nerve palsy, parotid gland enlargement, anterior uveitis, and low-grade fever)</p>
                </list-item>
                <list-item>
                  <p>Lupus pernio or erythema nodosum</p>
                </list-item>
                <list-item>
                  <p>Maculopapular or erythematous skin lesions or nodules</p>
                </list-item>
                <list-item>
                  <p>Facial nerve palsy</p>
                </list-item>
                <list-item>
                  <p>Symmetrical parotid enlargement</p>
                </list-item>
                <list-item>
                  <p>Optic neuritis, scleritis, uveitis, or retinitis</p>
                </list-item>
                <list-item>
                  <p>Lacrimal gland swelling</p>
                </list-item>
                <list-item>
                  <p>Evidence of granulomatous disease on direct laryngoscopy</p>
                </list-item>
                <list-item>
                  <p>Hepatomegaly or splenomegaly</p>
                </list-item>
                <list-item>
                  <p>Shortness of breath, dyspnea on exertion, cough, dizziness, or chest pain</p>
                </list-item>
                <list-item>
                  <p>Pulmonary function test with obstruction, restriction, or low diffusing capacity of the lungs for carbon monoxide</p>
                </list-item>
                <list-item>
                  <p>Cardiomyopathy, cardiac arrhythmia, or atrioventricular node block</p>
                </list-item>
                <list-item>
                  <p>Hypercalcemia, hypercalciuria, nephrolithiasis, or abnormal vitamin D levels</p>
                </list-item>
                <list-item>
                  <p>Elevated angiotensin-converting enzyme inhibitors or soluble interleukin-2 receptors</p>
                </list-item>
                <list-item>
                  <p>Bronchoalveolar lavage lymphocytosis</p>
                </list-item>
              </list>
            </list-item>
            <list-item>
              <p>Radiological</p>
              <list list-type="bullet">
                <list-item>
                  <p>Bilateral hilar lymphadenopathy (chest radiograph, computed tomography, and positron emission tomography)</p>
                </list-item>
                <list-item>
                  <p>Computed tomography chest with perilymphatic nodules tracking the peribronchovascular bundle</p>
                </list-item>
                <list-item>
                  <p>Diffuse infiltrates (chest radiograph, computed tomography, and positron emission tomography) or computed tomography chest or chest radiograph with fibrosis</p>
                </list-item>
                <list-item>
                  <p>Cardiac magnetic resonance imaging or positron emission tomography–computed tomography consistent with sarcoidosis</p>
                </list-item>
                <list-item>
                  <p>Enlargement or nodules in liver or spleen (computed tomography, positron emission tomography, or magnetic resonance imaging)</p>
                </list-item>
                <list-item>
                  <p>Magnetic resonance imaging brain with increased inflammation</p>
                </list-item>
                <list-item>
                  <p>Extrathoracic enlarged lymph nodes (computed tomography, magnetic resonance imaging, and positron emission tomography)</p>
                </list-item>
              </list>
            </list-item>
          </list>
        </boxed-text>
        <p>We then further classified the patients into 3 groups. Patients with a high index of suspicion and documented histopathological evidence of nonnecrotizing granulomas were categorized into the group of <italic>sarcoidosis with confirmed biopsy</italic>. Patients with a high index of suspicion and either no documented biopsy in the EMR or a biopsy showing no histopathological evidence of nonnecrotizing granulomas were categorized into the group of <italic>sarcoidosis without confirmed biopsy</italic> (probable sarcoidosis). Finally, those with a low index of suspicion were categorized into the group of <italic>unlikely sarcoidosis</italic> (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <p>Using the <italic>index of suspicion</italic> restricts the initially developed sarcoidosis cohort to capture those with a <italic>high index of suspicion</italic> for sarcoidosis from whom we identified confirmed cases. As we started with a random sample of those with sarcoidosis diagnostic codes, the further restriction of the sample to those with a <italic>high index of suspicion</italic> was still a random sample of the combination of both ICD codes and the <italic>index of suspicion</italic>. We compared the statistical performance of the two methods (ICD code alone vs ICD code with <italic>index of suspicion</italic>) to determine whether the use of this <italic>index of suspicion</italic> could improve the PPV of identification of sarcoidosis cases in the EMR.</p>
        <p>This approach provides more information than just relying on ICD codes alone to develop robust computational sarcoidosis-specific algorithms consistent with the recent ATS practice guideline recommendations.</p>
      </sec>
      <sec>
        <title>Disease-Related Variables</title>
        <p>Organ involvement was assessed based on the clinical history obtained from physicians’ notes and imaging and biopsy reports available in the computerized patient record system. For this assessment, to adjust for the variability in providers’ documentation, we adapted a set of criteria previously introduced in the National Institutes of Health–sponsored Genomic Research in Alpha-1 Antitrypsin Deficiency and Sarcoidosis (GRADS) study [<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>We collected the following data from the chart review: clinical site, gender, race, ICD-9 and ICD-10 codes for sarcoidosis (135 and D86, respectively), the pathological diagnosis from any available biopsy, organ involvement as described in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>, Scadding staging of chest x-ray (as described in radiology reports), history of bilateral hilar lymphadenopathy (based on radiology reports and clinical notes), pulmonary function test (PFT) pattern (as reported in PFT reports), the clinical status (acute, chronic, or remitting disease), and the treatment status of sarcoidosis.</p>
        <p>Pathological diagnoses were categorized into <italic>primary</italic> histopathological if the data were available in the pathology report domains and <italic>secondary</italic> if the data were available only in the clinical note domains because of either a remote history of biopsy or because the biopsy had been performed outside the VA. The PFT reports at the SFVA and PAVA used Crapo reference equations to calculate the lower limit of normal values for spirometry and lung volume measurements.</p>
        <p>Using the clinical data from chart abstraction, we classified the patients into the clinical phenotypes proposed by the GRADS study, with the exception of <italic>multi-organ phenotype</italic>, which we defined as the involvement of ≥3 organs.</p>
        <boxed-text id="box2" position="float">
          <title>Organ involvement assessment for sarcoidosis (with and without confirmed biopsy).</title>
          <p>
            <bold>Organ and assessment</bold>
          </p>
          <p>
            <bold>Lung</bold>
            <list list-type="bullet">
              <list-item>
                <p>Positive lung biopsy and positive mediastinal or hilar lymph node biopsy</p>
              </list-item>
              <list-item>
                <p>Chest x-ray, computed tomography (CT) chest, or positron emission tomography (PET) demonstrating bilateral hilar lymphadenopathy; CT chest with perilymphatic nodules tracking the peribronchovascular bundle; chest X-ray, CT chest, or PET with diffuse infiltrates; and CT chest or chest x-ray (CXR) with fibrosis</p>
              </list-item>
              <list-item>
                <p>Pulmonary function test (PFT) with obstruction, restriction, or low diffusing capacity of the lungs for carbon monoxide (DLCO)</p>
              </list-item>
            </list>
          </p>
          <p>
            <bold>Skin</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive skin biopsy</p>
            </list-item>
            <list-item>
              <p>Lupus pernio and erythema nodosum</p>
            </list-item>
          </list>
          <p>
            <bold>Eye</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive conjunctival or scleral biopsy</p>
            </list-item>
            <list-item>
              <p>Optic neuritis, scleritis, uveitis, or retinitis</p>
            </list-item>
          </list>
          <p>
            <bold>Cardiac</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive heart or pericardium biopsy</p>
            </list-item>
            <list-item>
              <p>Atrioventricular node block (second or third degree)</p>
            </list-item>
            <list-item>
              <p>Cardiomyopathy responsive to treatment</p>
            </list-item>
            <list-item>
              <p>Cardiac arrhythmia (eg, ventricular tachycardia)</p>
            </list-item>
            <list-item>
              <p>Cardiac magnetic resonance imaging (MRI) or PET-CT consistent with sarcoidosis</p>
            </list-item>
          </list>
          <p>
            <bold>Liver or spleen</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive liver or spleen biopsy</p>
            </list-item>
            <list-item>
              <p>Enlargement or nodules in liver or spleen (CT, PET, or MRI)</p>
            </list-item>
            <list-item>
              <p>Abnormal liver enzymes</p>
            </list-item>
          </list>
          <p>
            <bold>Neurosarcoidosis</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive brain or dura or peripheral nerve biopsy</p>
            </list-item>
            <list-item>
              <p>Clinical syndrome or symptoms consistent with central nervous system sarcoidosis along with a positive MRI</p>
            </list-item>
          </list>
          <p>
            <bold>Ear, nose, and throat</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>Positive biopsy from ear, nose, or throat</p>
            </list-item>
            <list-item>
              <p>Direct laryngoscopy consistent with granulomatous disease</p>
            </list-item>
          </list>
          <p>
            <bold>Multi-organ involvement</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>≥3 organs involved based on other criteria in this table</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Statistical Analyses</title>
        <p>All statistical analyses were performed with R software (The R Foundation for Statistical Computing) using RStudio (version 1.2.5). Descriptive statistics were computed to summarize the data. Categorical variables were presented as the frequency in percentages, and continuous data were presented as means and SDs. We estimated the PPV of the two aforementioned computational diagnostic criteria for sarcoidosis (ICD codes alone and ICD codes along with <italic>index of suspicion</italic>). We did not report the positive likelihood ratio, given that the specificity for using ICD codes alone could not be calculated because our study design did not include a review of noncases. The PPV for the criterion of using only the ICD code was calculated as the number of patients with an ICD code for sarcoidosis divided by the total number of patients verified to have sarcoidosis by chart review (<italic>gold standard</italic>). The PPV for the criterion of using the ICD codes and <italic>index of suspicion</italic> was calculated as the total number of patients with a high index of suspicion divided by the number of patients verified to have sarcoidosis by chart review (<italic>gold standard</italic>). The sensitivity of histopathology reports alone compared with chart review was calculated as the total number of patients with a high index of suspicion and confirmed biopsy divided by the number of patients verified to have sarcoidosis by chart review (<italic>gold standard</italic>). We computed 95% CIs using the exact binomial method. For our estimates, significance was defined as <italic>P&#60;</italic>.05.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Patients’ Characteristics</title>
        <p>A total of 14,833 patients with at least one ICD-9 or ICD-10 diagnostic code of sarcoidosis were identified. The study cohort included patients identified by the ICD codes of sarcoidosis (n=200). Of the 200 patients, 158 (79%) had a <italic>high index of suspicion</italic> for sarcoidosis based on clinical or radiographic findings. Of these 158 patients, 108 (68.4%) were identified with the ICD-9 code of 135 and 50 (31.6%) with the ICD-10 code of D86, and 142 (89.9%) had confirmed sarcoidosis based on histopathological evidence of nonnecrotizing granuloma and were classified as having <italic>sarcoidosis with confirmed biopsy</italic>; the remaining 16 (10.1%) patients with a <italic>high index of suspicion</italic> did not undergo a biopsy and were classified as having <italic>sarcoidosis without confirmed biopsy</italic> (probable sarcoidosis; <xref rid="figure1" ref-type="fig">Figure 1</xref>). No patient had nondiagnostic biopsy results for sarcoidosis.</p>
        <p><xref ref-type="table" rid="table1">Table 1</xref> summarizes the demographic data and baseline characteristics of patients with sarcoidosis (with and without confirmed biopsy). Among these patients, 89.9% (142/158) were men and there was a higher representation of African American patients than non-Hispanic White patients (85/158, 53.8%, vs 52/158, 32.9%, respectively). Overall, 90.5% (143/158) had a predominant pulmonary phenotype. Among these, 129 had PFT (36, 27.9%, 28, 21.7%, and 25, 19.4%, with restrictive, obstructive, and mixed patterns, respectively) and most were in Scadding stage II (47/143, 32.9%), followed by stage 0 and stage I (27/143, 18.9%, and 26/143, 18.2%, respectively). There was no significant difference in age between those who had a biopsy performed to diagnose sarcoidosis and those who did not (mean 65.5, SD 10.8, years vs mean 69.3, SD 10.3, years, respectively; <italic>P</italic>=.18). In terms of clinical phenotypes, 37.9% (60/158) had a <italic>multi-organ</italic> disease (≥3 organs; there were none with involvement of ≥5 organs), followed by stage II or stage III treated (45/158, 28.5%). Our study cohort did not include any individuals with acute presentation (acute, untreated). Some patients overlapped with multiple clinical groups.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Distribution of characteristics and clinical phenotype groups of patients with sarcoidosis (with and without confirmed biopsy; N=158).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="0"/>
            <col width="430"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="230"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <col width="230"/>
            <col width="0"/>
            <col width="0"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td colspan="4">Characteristics</td>
                <td colspan="6">Sarcoidosis with confirmed<break/>biopsy (n=142), n (%)</td>
                <td colspan="6">Sarcoidosis without confirmed biopsy (n=16)<sup>a</sup>, n (%)</td>
                <td colspan="2"><italic>P</italic> value</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="4">Age (years), mean (SD)</td>
                <td colspan="6">65.5 (10.8)</td>
                <td colspan="6">69.3 (10.3)</td>
                <td colspan="2">.18</td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Sex</bold>
                </td>
                <td>.59<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="6">Male</td>
                <td colspan="6">127 (89.4)</td>
                <td>15 (93.7)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="6">Female</td>
                <td colspan="6">15 (10.6)</td>
                <td>1 (6.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Race</bold>
                </td>
                <td>.62<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">African American</td>
                <td colspan="6">74 (52.1)</td>
                <td colspan="3">11 (68.8)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Non-Hispanic White</td>
                <td colspan="6">49 (34.5)</td>
                <td colspan="3">3 (18.8)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Hispanic White</td>
                <td colspan="6">3 (2.1)</td>
                <td colspan="3">0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Unknown</td>
                <td colspan="6">12 (8.5)</td>
                <td colspan="3">2 (12.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Other</td>
                <td colspan="6">4 (2.8)</td>
                <td colspan="3">0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>International Classification of Diseases codes for sarcoidosis</bold>
                </td>
                <td>.60<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">International Classification of Diseases, Ninth Revision</td>
                <td colspan="6">98 (69)</td>
                <td colspan="3">10 (62.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">International Classification of Diseases, Tenth Revision</td>
                <td colspan="6">44 (30.9)</td>
                <td colspan="3">6 (37.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Organ involvement</bold>
                </td>
                <td>.38<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Lung</td>
                <td colspan="6">86 (60.6)</td>
                <td colspan="3">12 (75)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Multi-organ (pulmonary without cardiac)</td>
                <td colspan="6">39 (27.5)</td>
                <td colspan="3">2 (12.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Multi-organ (pulmonary and cardiac)</td>
                <td colspan="6">4 (2.8)</td>
                <td colspan="3">0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Multi-organ (cardiac without pulmonary)</td>
                <td colspan="6">2 (1.4)</td>
                <td colspan="3">0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="2">
                  <break/>
                </td>
                <td colspan="5">Multi-organ (neither cardiac nor pulmonary)</td>
                <td colspan="6">11 (7.7)</td>
                <td colspan="3">2 (12.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Pulmonary function test pattern<sup>d</sup></bold>
                </td>
                <td>.03<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Obstructive</td>
                <td colspan="6">27 (19)</td>
                <td colspan="4">1 (6.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Restrictive</td>
                <td colspan="6">30 (21.1)</td>
                <td colspan="4">6 (37.5)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Mixed</td>
                <td colspan="6">20 (11.9)</td>
                <td colspan="4">5 (31.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Normal</td>
                <td colspan="6">39 (27.5)</td>
                <td colspan="4">1 (6.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="4">Missing</td>
                <td colspan="6">26 (18.3)</td>
                <td colspan="4">3 (18.8)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Scadding stage<sup>e</sup></bold>
                </td>
                <td>.06<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Stage 0</td>
                <td colspan="6">22 (15.5)</td>
                <td colspan="4">5 (31.3)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Stage I</td>
                <td colspan="6">23 (16.2)</td>
                <td colspan="4">3 (18.8)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Stage II</td>
                <td colspan="6">45 (31.7)</td>
                <td colspan="4">2 (12.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Stage III</td>
                <td colspan="6">17 (11.9)</td>
                <td colspan="4">4 (25)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Stage IV</td>
                <td colspan="6">22 (15.5)</td>
                <td colspan="4">0 (0)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="5">Missing</td>
                <td colspan="6">13 (9.2)</td>
                <td colspan="4">2 (12.5)</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="17">
                  <bold>Clinical phenotype group<sup>f</sup></bold>
                </td>
                <td>.06<sup>c</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 1: multi-organ</td>
                <td colspan="6">56 (39.4)</td>
                <td colspan="6">4 (25)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 2: nonacute, stage I, untreated</td>
                <td colspan="6">6 (4.2)</td>
                <td colspan="6">2 (12.5)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 3: stages II-III, treated</td>
                <td colspan="6">42 (29.6)</td>
                <td colspan="6">3 (18.8)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 4: stages II-III, untreated</td>
                <td colspan="6">14 (9.9)</td>
                <td colspan="6">2 (12.5)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 5: stage IV, treated</td>
                <td colspan="6">17 (11.9)</td>
                <td colspan="6">0 (0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 6: stage IV, untreated</td>
                <td colspan="6">4 (2.8)</td>
                <td colspan="6">2 (12.5)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 7: acute sarcoidosis, untreated</td>
                <td colspan="6">0 (0)</td>
                <td colspan="6">0 (0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 8: remitting, untreated</td>
                <td colspan="6">30 (21.1)</td>
                <td colspan="6">5 (31.3)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td colspan="2">Group 9: cardiac sarcoidosis, treated</td>
                <td colspan="6">6 (4.2)</td>
                <td colspan="6">0 (0)</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>Probable sarcoidosis: cases with clinical and radiological features consistent with sarcoidosis and do not have confirmatory biopsies.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Chi-square test.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Fisher exact test.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Evaluated based on pulmonary function test reports available in the computerized patient record system.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Scored based on reviewers’ interpretation of imaging reports using Scadding staging. Stage 0: normal chest radiograph; stage I: hilar or mediastinal nodal enlargement only; stage II: nodal enlargement and parenchymal disease; stage III: parenchymal disease only; stage IV: end-stage lung disease (pulmonary fibrosis).</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>Clinical phenotype groups [<xref ref-type="bibr" rid="ref29">29</xref>]: some patients overlapped with multiple clinical groups. Group 1: multi-organ involvement, patients with ≥3 organs involved; group 2: nonacute, stage I, untreated: patients with nonacute sarcoidosis, stage I, never treated for sarcoidosis; group 3: stage II-III, treated: patients with nonacute sarcoidosis, stage II or III, formerly treated for sarcoidosis or treated within 3 months of data review; group 4: stage II-III, untreated: patients with nonacute sarcoidosis, stage II or III, never treated for sarcoidosis; group 5: stage IV, treated: patients with nonacute sarcoidosis, stage IV, formerly treated for sarcoidosis or treated within 3 months of data review; group 6: stage IV, untreated: patients with nonacute sarcoidosis, stage IV, never treated for sarcoidosis; group 7: acute sarcoidosis, untreated: patients with acute sarcoidosis (Lofgren syndrome); group 8: remitting, untreated: patients who have had no evidence of active clinical disease for &#62;1 year; group 9: cardiac sarcoidosis, treated: patients with cardiac manifestations of sarcoidosis, formerly treated for sarcoidosis or treated within 3 months of data review.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Diagnostic Accuracy of ICD Codes</title>
        <p>We then calculated the PPV using ICD codes to identify VA patients who met the ATS definition of sarcoidosis from the VINCI database. For this calculation, we used the curated data set of 200 patients. The PPV of using only ICD codes was 79% (95% CI 78.6%-80.5%) for identifying sarcoidosis cases and 71% (95% CI 64.7%-77.3%) for identifying histopathologically confirmed sarcoidosis in the EMR. After chart review, the inclusion of the generated <italic>high index of suspicion</italic> to identify confirmed sarcoidosis cases increased the PPV significantly to 100% (95% CI 96.5%-100%) with 90% sensitivity of histopathology reports alone compared with chart review (<xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Contingency 2×2 table of using histopathology reports compared with high index of suspicion for sarcoidosis cases identification (N=200).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="570"/>
            <col width="0"/>
            <col width="160"/>
            <col width="0"/>
            <col width="160"/>
            <col width="80"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Among patients with International Classification of Diseases code for sarcoidosis</td>
                <td colspan="4">High index of suspicion<sup>a</sup> (chart review)</td>
              </tr>
              <tr valign="top">
                <td colspan="3">
                  <break/>
                </td>
                <td colspan="2">Yes</td>
                <td>No</td>
                <td>Total</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="7">
                  <bold>Histopathology report<sup>b</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Confirmed sarcoidosis</td>
                <td colspan="2">142<sup>c</sup></td>
                <td colspan="2">0<sup>d</sup></td>
                <td>142</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Not available<sup>e</sup></td>
                <td colspan="2">16<sup>f</sup></td>
                <td colspan="2">42<sup>g</sup></td>
                <td>58</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Total</td>
                <td colspan="2">158</td>
                <td colspan="2">42</td>
                <td>200</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>High index of suspicion for sarcoidosis based on both clinical and radiographic evidence but not biopsy.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Available biopsies with primary or secondary histopathological reports.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>Sarcoidosis group with histopathological evidence of nonnecrotizing granuloma.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>No sarcoidosis group because of lack of sufficient clinical and radiological features consistent with sarcoidosis even in the presence of the histopathological evidence of nonnecrotizing granuloma.</p>
            </fn>
            <fn id="table2fn5">
              <p><sup>e</sup>No biopsies were ordered or available in the electronic medical record.</p>
            </fn>
            <fn id="table2fn6">
              <p><sup>f</sup>Probable sarcoidosis group without histopathological evidence of nonnecrotizing granuloma.</p>
            </fn>
            <fn id="table2fn7">
              <p><sup>g</sup>No sarcoidosis group because of lack of sufficient clinical and radiological features consistent with sarcoidosis, in addition to the absence of the histopathological evidence of nonnecrotizing granuloma.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this observational retrospective study of VA EMRs, we reviewed the medical records of 200 randomly selected patients with ICD diagnostic codes for sarcoidosis from the SFVA and PAVA medical centers (<xref rid="figure1" ref-type="fig">Figure 1</xref>). In this sample, we found that ICD diagnostic codes performed reasonably well with a PPV of 79% for detecting patients with sarcoidosis and 71% for detecting patients with histopathologically confirmed cases as defined by the ATS clinical practice guideline. After applying the developed <italic>index of suspicion</italic> to the initial cohort, we also demonstrated that including a <italic>high index of suspicion</italic> that incorporated information from radiology and clinical domains, but not histopathology, significantly increased the diagnostic accuracy to 100% (95% CI 96.5%-100%). The results of this study will help researchers and health care systems better understand the accuracy of using diagnostic codes alone versus using ICD codes with a <italic>high index of suspicion</italic> for sarcoidosis as classifiers in detecting a complex disease such as sarcoidosis in the EMR. Furthermore, the study highlighted other computational challenges in querying sarcoidosis cases and accurately extracting high-quality sarcoidosis-related research variables from the EMR. This approach could be adapted to develop automated chart review algorithms using additional data elements from structured and unstructured domains by applying advanced computational methodologies such as natural language processing (NLP) and machine learning.</p>
        <p>The randomly selected cohort of veterans in this study with sarcoidosis (with and without confirmed biopsy) consisted of 89.9% (142/158) of men and 10.1% (16/158) of women. Although the gender distribution in our study was different from that in A Case Control Etiologic Study of Sarcoidosis [<xref ref-type="bibr" rid="ref30">30</xref>], it is closely reflective of the demographics in the veterans’ population [<xref ref-type="bibr" rid="ref31">31</xref>]. This study confirmed the higher prevalence of sarcoidosis in African American individuals (85/158, 53.8%) compared with non-Hispanic White individuals (52/158, 32.9%), a finding that many other epidemiological studies on sarcoidosis have previously reported [<xref ref-type="bibr" rid="ref32">32</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. At the same time, the study population was racially diverse, highlighting the potential utility of the VA EMRs for studying sarcoidosis in medically underserved populations [<xref ref-type="bibr" rid="ref37">37</xref>]. In our study, the PPV was reasonable compared with the study conducted by Ungprasert et al [<xref ref-type="bibr" rid="ref14">14</xref>] for detecting patients with sarcoidosis in the EMR. This difference could be due to not using the ICD-10 code and having a less diverse population (85% White vs 9% Black).</p>
        <p>Using ICD codes alone to extract health information is far more convenient than the time-consuming process of manually reviewing narrative data sets in unstructured data. However, using ICD codes to identify sarcoidosis cases in large data sets with thousands of patients poses several practical challenges. First, given the heterogeneity of sarcoidosis, it is challenging to efficiently confirm the presence of the disease. The verification process requires careful analysis of the available narrative data such as progress notes, imaging reports, and pathology reports to establish the case definition based on the sarcoidosis diagnostic criteria [<xref ref-type="bibr" rid="ref3">3</xref>]. Second, the precise identification of the type of organ involvement through the EMR is a complex process and requires a thorough review of unstructured data. Although there are subcodes for ICD diagnostic codes that aim to capture the involvement of various organs, health care providers may or may not be familiar with these subcodes and may or may not use them correctly.</p>
        <p>Moreover, there are no specific ICD codes for classifying the involvement of some organs in sarcoidosis (such as the central nervous system or gastrointestinal tract) [<xref ref-type="bibr" rid="ref38">38</xref>]. Third, ICD codes do not determine the extent of the disease, such as described by the stages of a chest x-ray [<xref ref-type="bibr" rid="ref39">39</xref>], because of a lack of ICD codes for different stages of pulmonary sarcoidosis [<xref ref-type="bibr" rid="ref38">38</xref>]. Analysis of pulmonary features requires a manual review of every patient’s radiology reports and cannot be performed using only ICD codes. Finally, ICD codes do not specify the various sarcoidosis presentations such as acute, remitting, or chronic disease [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Thus, they cannot be used to classify patients into the previously described phenotype groups.</p>
        <p>The definition of clinical phenotypes has become an essential goal for the sarcoidosis scientific community because genetic studies have identified different patterns of gene expression associated with disease severity and disease course [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. In 2015, the National Heart, Lung, and Blood Institute held a workshop to leverage current scientific knowledge and define platforms to address disease disparities, identify high-risk phenotypes, and improve sarcoidosis outcomes [<xref ref-type="bibr" rid="ref25">25</xref>]. A total of 9 different steps and research strategies were recommended to expand the scope of sarcoidosis research, including EMR-based research, to provide a unified and multidisciplinary approach. Such an approach is expected to bring together stakeholders interested in reducing the burden and severity of sarcoidosis. However, the major barrier in the efficient use of EMR data is the accurate extraction of research-quality variables, case definitions, and outcomes [<xref ref-type="bibr" rid="ref42">42</xref>]. Thus, the rapid identification of cases and extraction of relevant clinical variables from the EMR using computational phenotype algorithms have emerged as an important next step in EMR-based research. Furthermore, computational phenotype definitions are also essential for conducting pragmatic clinical trials and comparative effectiveness research, increasing the health care system’s capacity to effectively deliver precision medicine for patients with sarcoidosis [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>The two most applied approaches to defining computational phenotypes are (1) a <italic>high-throughput</italic> phenotype algorithm using only structured data (traditionally, the ICD diagnosis codes) and (2) a <italic>low-throughput</italic> phenotype algorithm that accesses structured and unstructured data to develop a sequential flowchart that should end with a case definition. Such a low-throughput approach uses high-performance computational tools such as NLP to process text and extract information using linguistic rules, thereby eliminating the need for a labor-intensive manual review by researchers [<xref ref-type="bibr" rid="ref7">7</xref>]. Accordingly, this approach is expected to streamline the development of registries and help enrich EMR-based research studies [<xref ref-type="bibr" rid="ref44">44</xref>]. Our study highlights the need to develop such automated methods to improve the computational case definition of sarcoidosis. Besides, there are other high-quality sarcoidosis-related research variables, including determining the date of the diagnosis, organ involvements, Scadding stages, and the clinical status (acute, chronic, or remitting disease). This approach will assist in automating the extraction of pre-existing or novel clinical phenotypes more precisely and efficiently from the EMR.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study includes several limitations. First, primary histopathological reports were not available for all the patients. In the cases where the biopsy report was unavailable (either because of a remote history of the biopsy or because the biopsy had been performed outside the VA), we relied on the <italic>secondary</italic> histopathological reports documented in the providers’ narrative within the clinical notes. This approach made the diagnosis of sarcoidosis less robust because the confirmatory biopsy reports in these patients could not be directly verified. However, we used the <italic>index of suspicion</italic> approach to define probable sarcoidosis cases regardless of whether a confirmatory biopsy report was available, which is consistent with the diagnostic algorithm recommended by the ATS practice guideline [<xref ref-type="bibr" rid="ref3">3</xref>]. Second, our definition of <italic>multi-organ phenotype</italic> involved ≥3 organs, instead of ≥5 organs as proposed by the GRADS study [<xref ref-type="bibr" rid="ref29">29</xref>]. We chose this approach because none of the evaluated patients were documented to have involvement of ≥5 organs, thus avoiding having no patients with <italic>multi-organ phenotype</italic>. Lack of patients with involvement of ≥5 organs could be due to EMR-related limitations such as missing data and variability in documentation among providers or simply because these patients were cared for at non-VA tertiary medical centers. Third, the generalizability of our findings obtained from VA EMRs to other populations could be limited because the veterans form a special population with a different demographic distribution and exposure from the general population. However, the EMR data of the VA health care system cover &#62;22 million veterans across the United States and &#62;14,000 patients with sarcoidosis ICD diagnosis codes, providing an enormous number of patients to study a rare disease. Moreover, the number of patients whose records were examined in this study was 200, which could be considered a small sample size. However, we analyzed data from nearly two-third of all patients with diagnostic codes for sarcoidosis in the VA health care system across northern California.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Although ICD codes can be used as reasonable classifiers to identify sarcoidosis cases within EMRs with a PPV of 79%, using computational algorithms to extract clinical and radiographic information (<italic>index of suspicion</italic>) from unstructured data could significantly improve the accuracy of case identification. Furthermore, to increase the efficiency of identifying sarcoidosis cases from large health care databases, more studies are required to develop a novel sarcoidosis-specific computational phenotype algorithm using automated emerging methods (such as machine learning and NLP). Moreover, our study sets the stage for promoting research on developing other such algorithms aiming to generate high-quality sarcoidosis-related research variables, such as determining the date of the diagnosis, organ involvements, Scadding stages, and the clinical status (acute, chronic, or remitting disease).</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ATS</term>
          <def>
            <p>American Thoracic Society</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMR</term>
          <def>
            <p>electronic medical record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">GRADS</term>
          <def>
            <p>Genomic Research in Alpha-1 Antitrypsin Deficiency and Sarcoidosis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICD</term>
          <def>
            <p>International Classification of Diseases</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">PAVA</term>
          <def>
            <p>Palo Alto Veterans Affairs</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">PFT</term>
          <def>
            <p>pulmonary function test</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">PPV</term>
          <def>
            <p>positive predictive value</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SFVA</term>
          <def>
            <p>San Francisco Veterans Affairs</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">VA</term>
          <def>
            <p>United States Department of Veterans Affairs</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">VINCI</term>
          <def>
            <p>VA Informatics and Computational Infrastructure</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank Dr James Frank and Michelle Dunn for their valuable administrative assistance. The authors also thank Jianhong Chen and James Potter for providing support in programming and data extraction. MIS was supported by funds from the United States Department of Veterans Affairs Research Fellowship Award and a National Research Service Award training grant from the National Center for Advancing Translational Sciences (TL1-TR001871). MA was supported by the Flight Attendants Medical Research Institute (CIA190001), the United States Department of Veterans Affairs Clinical Sciences Research and Development (CXV-00125), and the Tobacco-Related Disease Research Program of the University of California (T29IR0715).</p>
    </ack>
    <fn-group>
      <fn fn-type="con">
        <p>MIS and MA obtained funding. MIS, LK, and MA conceived and designed the study research and developed the study protocol. MIS, IM, SZ, CEM, LK, and MA worked on the methods. MIS, IM, SZ, CEM, LK, and MA analyzed and interpreted the data. MIS and IM wrote the original draft. MIS, IM, SZ, GL, MAW, CEM, LK, and MA reviewed and edited the manuscript. All authors read and approved the final manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>Advances in the diagnosis and treatment of sarcoidosis</article-title>
          <source>F1000Prime Rep</source>
          <year>2014</year>
          <volume>6</volume>
          <fpage>89</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://facultyopinions.com/prime/reports/pubmed/25374667"/>
          </comment>
          <pub-id pub-id-type="doi">10.12703/P6-89</pub-id>
          <pub-id pub-id-type="medline">25374667</pub-id>
          <pub-id pub-id-type="pii">89</pub-id>
          <pub-id pub-id-type="pmcid">PMC4191271</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Keller</surname>
              <given-names>AZ</given-names>
            </name>
          </person-group>
          <article-title>Anatomic sites, age attributes, and rates of sarcoidosis in U. S. veterans</article-title>
          <source>Am Rev Respir Dis</source>
          <year>1973</year>
          <month>04</month>
          <volume>107</volume>
          <issue>4</issue>
          <fpage>615</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1164/arrd.1973.107.4.615</pub-id>
          <pub-id pub-id-type="medline">4697670</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crouser</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Bonham</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Morgenthau</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Patterson</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Abston</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>RC</given-names>
            </name>
            <name name-style="western">
              <surname>Blankstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Drent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gerke</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Ghobrial</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Govender</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hamzeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>WE</given-names>
            </name>
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Kellermeyer</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Knight</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Koth</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Poletti</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Raman</surname>
              <given-names>SV</given-names>
            </name>
            <name name-style="western">
              <surname>Tukey</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Westney</surname>
              <given-names>GE</given-names>
            </name>
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Diagnosis and detection of sarcoidosis. An official American thoracic society clinical practice guideline</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2020</year>
          <month>04</month>
          <day>15</day>
          <volume>201</volume>
          <issue>8</issue>
          <fpage>e26</fpage>
          <lpage>51</lpage>
          <pub-id pub-id-type="doi">10.1164/rccm.202002-0251st</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
          </person-group>
          <article-title>The diagnosis of sarcoidosis</article-title>
          <source>Curr Opin Pulm Med</source>
          <year>2019</year>
          <month>09</month>
          <volume>25</volume>
          <issue>5</issue>
          <fpage>484</fpage>
          <lpage>96</lpage>
          <pub-id pub-id-type="doi">10.1097/MCP.0000000000000596</pub-id>
          <pub-id pub-id-type="medline">31365383</pub-id>
          <pub-id pub-id-type="pii">00063198-201909000-00014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Szeto</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Coleman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gholami</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Goldstein</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of computerized outpatient diagnoses in a Veterans Affairs general medicine clinic</article-title>
          <source>Am J Manag Care</source>
          <year>2002</year>
          <month>01</month>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.ajmc.com/pubMed.php?pii=370"/>
          </comment>
          <pub-id pub-id-type="medline">11814171</pub-id>
          <pub-id pub-id-type="pii">370</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hsia</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Krushat</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Fagan</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Tebbutt</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Kusserow</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of diagnostic coding for medicare patients under the prospective-payment system</article-title>
          <source>N Engl J Med</source>
          <year>1988</year>
          <month>02</month>
          <day>11</day>
          <volume>318</volume>
          <issue>6</issue>
          <fpage>352</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1056/nejm198802113180604</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pendergrass</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Crawford</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>Using electronic health records to generate phenotypes for research</article-title>
          <source>Curr Protoc Hum Genet</source>
          <year>2019</year>
          <month>01</month>
          <volume>100</volume>
          <issue>1</issue>
          <fpage>e80</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30516347"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cphg.80</pub-id>
          <pub-id pub-id-type="medline">30516347</pub-id>
          <pub-id pub-id-type="pmcid">PMC6318047</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Horsky</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Drucker</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ramelson</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Accuracy and completeness of clinical coding using ICD-10 for ambulatory visits</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2017</year>
          <volume>2017</volume>
          <fpage>912</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29854158"/>
          </comment>
          <pub-id pub-id-type="medline">29854158</pub-id>
          <pub-id pub-id-type="pmcid">PMC5977598</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cameli</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Caffarelli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Refini</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Bergantini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>d'Alessandro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Armati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tomai Pitinca</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Sestini</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gonnelli</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bargagli</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Hypercalciuria in sarcoidosis: a specific biomarker with clinical utility</article-title>
          <source>Front Med (Lausanne)</source>
          <year>2020</year>
          <volume>7</volume>
          <fpage>568020</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fmed.2020.568020"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fmed.2020.568020</pub-id>
          <pub-id pub-id-type="medline">33195314</pub-id>
          <pub-id pub-id-type="pmcid">PMC7658263</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sauer</surname>
              <given-names>WH</given-names>
            </name>
            <name name-style="western">
              <surname>Stern</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Royal</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>High-risk sarcoidosis. Current concepts and research imperatives</article-title>
          <source>Annals ATS</source>
          <year>2017</year>
          <month>12</month>
          <volume>14</volume>
          <issue>Supplement_6</issue>
          <fpage>S437</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1513/annalsats.201707-566ot</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Boffetta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rabkin</surname>
              <given-names>CS</given-names>
            </name>
            <name name-style="western">
              <surname>Gridley</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>A cohort study of cancer among sarcoidosis patients</article-title>
          <source>Int J Cancer</source>
          <year>2009</year>
          <month>06</month>
          <day>01</day>
          <volume>124</volume>
          <issue>11</issue>
          <fpage>2697</fpage>
          <lpage>700</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/ijc.24261"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/ijc.24261</pub-id>
          <pub-id pub-id-type="medline">19230028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kraaijvanger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Janssen Bonás</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Vorselaars</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Veltkamp</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Biomarkers in the diagnosis and prognosis of sarcoidosis: current use and future prospects</article-title>
          <source>Front Immunol</source>
          <year>2020</year>
          <volume>11</volume>
          <fpage>1443</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.3389/fimmu.2020.01443"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fimmu.2020.01443</pub-id>
          <pub-id pub-id-type="medline">32760396</pub-id>
          <pub-id pub-id-type="pmcid">PMC7372102</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Field</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Costabel</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Crystal</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Culver</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Drent</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Wolff</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Sarcoidosis in America. Analysis based on health care use</article-title>
          <source>Ann Am Thorac Soc</source>
          <year>2016</year>
          <month>08</month>
          <volume>13</volume>
          <issue>8</issue>
          <fpage>1244</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1513/AnnalsATS.201511-760OC</pub-id>
          <pub-id pub-id-type="medline">27509154</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ungprasert</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Matteson</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Crowson</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of diagnostic coding for sarcoidosis in electronic databases: a population-based study</article-title>
          <source>Lung</source>
          <year>2017</year>
          <month>12</month>
          <volume>195</volume>
          <issue>6</issue>
          <fpage>713</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28993879"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00408-017-0054-x</pub-id>
          <pub-id pub-id-type="medline">28993879</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00408-017-0054-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC5881941</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lacasse</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Daigle</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maltais</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Validity of chronic obstructive pulmonary disease diagnoses in a large administrative database</article-title>
          <source>Can Respiratory J</source>
          <year>2012</year>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>e5</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1155/2012/260374</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blais</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lemière</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Menzies</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Berbiche</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Validity of asthma diagnoses recorded in the medical services database of Quebec</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2006</year>
          <month>04</month>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>245</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1002/pds.1202</pub-id>
          <pub-id pub-id-type="medline">16374899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ley</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Urbania</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Husson</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Vittinghoff</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Brush</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Eisner</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Iribarren</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Collard</surname>
              <given-names>HR</given-names>
            </name>
          </person-group>
          <article-title>Code-based diagnostic algorithms for idiopathic pulmonary fibrosis. Case validation and improvement</article-title>
          <source>Annals ATS</source>
          <year>2017</year>
          <month>06</month>
          <volume>14</volume>
          <issue>6</issue>
          <fpage>880</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1513/annalsats.201610-764oc</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Elasy</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Clayton</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Type 2 diabetes risk forecasting from EMR data using machine learning</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>606</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23304333"/>
          </comment>
          <pub-id pub-id-type="medline">23304333</pub-id>
          <pub-id pub-id-type="pmcid">PMC3540444</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Himes</surname>
              <given-names>BE</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Ramoni</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>Prediction of chronic obstructive pulmonary disease (COPD) in asthma patients using electronic medical records</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2009</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>371</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19261943"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M2846</pub-id>
          <pub-id pub-id-type="medline">19261943</pub-id>
          <pub-id pub-id-type="pii">M2846</pub-id>
          <pub-id pub-id-type="pmcid">PMC2732240</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Romero-Brufau</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Whitford</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hickman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Morlan</surname>
              <given-names>BW</given-names>
            </name>
            <name name-style="western">
              <surname>Therneau</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Naessens</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Huddleston</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Using machine learning to improve the accuracy of patient deterioration predictions: Mayo Clinic Early Warning Score (MC-EWS)</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <month>06</month>
          <day>12</day>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1207</fpage>
          <lpage>15</lpage>
          <pub-id pub-id-type="doi">10.1093/jamia/ocaa347</pub-id>
          <pub-id pub-id-type="medline">33638343</pub-id>
          <pub-id pub-id-type="pii">6151565</pub-id>
          <pub-id pub-id-type="pmcid">PMC8661441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liao</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Ananthakrishnan</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Kumar</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cagan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>VS</given-names>
            </name>
            <name name-style="western">
              <surname>Goryachev</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Agniel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Churchill</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Plenge</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Szolovits</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kohane</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shaw</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Karlson</surname>
              <given-names>EW</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Methods to develop an electronic medical record phenotype algorithm to compare the risk of coronary artery disease across 3 chronic disease cohorts</article-title>
          <source>PLoS One</source>
          <year>2015</year>
          <volume>10</volume>
          <issue>8</issue>
          <fpage>e0136651</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0136651"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0136651</pub-id>
          <pub-id pub-id-type="medline">26301417</pub-id>
          <pub-id pub-id-type="pii">PONE-D-14-43453</pub-id>
          <pub-id pub-id-type="pmcid">PMC4547801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Makino</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshimoto</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ono</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Itoko</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Katsuki</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Koseki</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kudo</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haida</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kuroda</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yanagiya</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Saitoh</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hoshinaga</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yuzawa</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Suzuki</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence predicts the progression of diabetic kidney disease using big data machine learning</article-title>
          <source>Sci Rep</source>
          <year>2019</year>
          <month>08</month>
          <day>14</day>
          <volume>9</volume>
          <issue>1</issue>
          <fpage>11862</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-019-48263-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-019-48263-5</pub-id>
          <pub-id pub-id-type="medline">31413285</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-019-48263-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6694113</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martucci</surname>
              <given-names>VL</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kerchberger</surname>
              <given-names>VE</given-names>
            </name>
            <name name-style="western">
              <surname>Osterman</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Torstenson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Richmond</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Aldrich</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>A clinical phenotyping algorithm to identify cases of chronic obstructive pulmonary disease in electronic health records</article-title>
          <source>BioRxiv</source>
          <year>2019</year>
          <comment>(forthcoming)</comment>
          <pub-id pub-id-type="doi">10.1101/716779</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chicco</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jurman</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Machine learning can predict survival of patients with heart failure from serum creatinine and ejection fraction alone</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <month>02</month>
          <day>03</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>16</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-1023-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-1023-5</pub-id>
          <pub-id pub-id-type="medline">32013925</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-1023-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC6998201</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Crouser</surname>
              <given-names>ED</given-names>
            </name>
            <name name-style="western">
              <surname>Martin</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Eu</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Executive summary of the NHLBI workshop report: leveraging current scientific advancements to understand sarcoidosis variability and improve outcomes</article-title>
          <source>Ann Am Thorac Soc</source>
          <year>2017</year>
          <month>12</month>
          <volume>14</volume>
          <issue>Supplement_6</issue>
          <fpage>S415</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29048937"/>
          </comment>
          <pub-id pub-id-type="doi">10.1513/AnnalsATS.201707-563OT</pub-id>
          <pub-id pub-id-type="medline">29048937</pub-id>
          <pub-id pub-id-type="pmcid">PMC5802571</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Velarde</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Romesser</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Clegg</surname>
              <given-names>DO</given-names>
            </name>
            <name name-style="western">
              <surname>Efimova</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Oostema</surname>
              <given-names>SJ</given-names>
            </name>
            <name name-style="western">
              <surname>Scehnet</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>DuVall</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>An initiative using informatics to facilitate clinical research planning and recruitment in the VA health care system</article-title>
          <source>Contemp Clin Trials Commun</source>
          <year>2018</year>
          <month>09</month>
          <volume>11</volume>
          <fpage>107</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2451-8654(18)30038-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.conctc.2018.07.001</pub-id>
          <pub-id pub-id-type="medline">30035242</pub-id>
          <pub-id pub-id-type="pii">S2451-8654(18)30038-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6052195</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <article-title>VA Informatics and Computing Infrastructure (VINCI)</article-title>
          <source>U.S. Department of Veterans Affairs</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.hsrd.research.va.gov/for_researchers/vinci/">https://www.hsrd.research.va.gov/for_researchers/vinci/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Daniel</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Sampling Essentials: Practical Guidelines for Making Sampling Choices</source>
          <year>2012</year>
          <publisher-loc>Thousand Oaks, California, United States</publisher-loc>
          <publisher-name>SAGE Publications</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moller</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Koth</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Morris</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Drake</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Rossman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Leader</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Collman</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Hamzeh</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sweiss</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>O’Neal</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Senior</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Becich</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hochheiser</surname>
              <given-names>HS</given-names>
            </name>
            <name name-style="western">
              <surname>Kaminski</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wisniewski</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Gibson</surname>
              <given-names>KF</given-names>
            </name>
          </person-group>
          <article-title>Rationale and design of the genomic research in alpha-1 antitrypsin deficiency and sarcoidosis (GRADS) study. Sarcoidosis protocol</article-title>
          <source>Annals ATS</source>
          <year>2015</year>
          <month>10</month>
          <volume>12</volume>
          <issue>10</issue>
          <fpage>1561</fpage>
          <lpage>71</lpage>
          <pub-id pub-id-type="doi">10.1513/annalsats.201503-172ot</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Teirstein</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Judson</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Rossman</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Yeager</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bresnitz</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>DePalo</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Hunninghake</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Iannuzzi</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Johns</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>McLennan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Moller</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Newman</surname>
              <given-names>LS</given-names>
            </name>
            <name name-style="western">
              <surname>Rabin</surname>
              <given-names>DL</given-names>
            </name>
            <name name-style="western">
              <surname>Rose</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rybicki</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Weinberger</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Terrin</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Knatterud</surname>
              <given-names>GL</given-names>
            </name>
            <name name-style="western">
              <surname>Cherniak</surname>
              <given-names>R</given-names>
            </name>
            <collab>Case Control Etiologic Study of Sarcoidosis (ACCESS) research group</collab>
          </person-group>
          <article-title>Clinical characteristics of patients in a case control study of sarcoidosis</article-title>
          <source>Am J Respir Crit Care Med</source>
          <year>2001</year>
          <month>11</month>
          <day>15</day>
          <volume>164</volume>
          <issue>10 Pt 1</issue>
          <fpage>1885</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1164/ajrccm.164.10.2104046</pub-id>
          <pub-id pub-id-type="medline">11734441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <article-title>Department of veterans affairs statistics at a glance</article-title>
          <source>National Center for Veterans Analysis and Statistics</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.va.gov/vetdata/docs/Quickfacts/Stats_at_a_glance_4_6_20.PDF">https://www.va.gov/vetdata/docs/Quickfacts/Stats_at_a_glance_4_6_20.PDF</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mirsaeidi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Machado</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Schraufnagel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sweiss</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>Racial difference in sarcoidosis mortality in the United States</article-title>
          <source>Chest</source>
          <year>2015</year>
          <month>02</month>
          <volume>147</volume>
          <issue>2</issue>
          <fpage>438</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/25188873"/>
          </comment>
          <pub-id pub-id-type="doi">10.1378/chest.14-1120</pub-id>
          <pub-id pub-id-type="medline">25188873</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(15)30178-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC4314818</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cozier</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Berman</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Palmer</surname>
              <given-names>JR</given-names>
            </name>
            <name name-style="western">
              <surname>Boggs</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Serlin</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenberg</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Sarcoidosis in black women in the United States: data from the Black Women's Health Study</article-title>
          <source>Chest</source>
          <year>2011</year>
          <month>01</month>
          <volume>139</volume>
          <issue>1</issue>
          <fpage>144</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/20595459"/>
          </comment>
          <pub-id pub-id-type="doi">10.1378/chest.10-0413</pub-id>
          <pub-id pub-id-type="medline">20595459</pub-id>
          <pub-id pub-id-type="pii">S0012-3692(11)60026-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC3014690</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rybicki</surname>
              <given-names>BA</given-names>
            </name>
            <name name-style="western">
              <surname>Major</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Popovich</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Maliarik</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Iannuzzi</surname>
              <given-names>MC</given-names>
            </name>
          </person-group>
          <article-title>Racial differences in sarcoidosis incidence: a 5-year study in a health maintenance organization</article-title>
          <source>Am J Epidemiol</source>
          <year>1997</year>
          <month>02</month>
          <day>01</day>
          <volume>145</volume>
          <issue>3</issue>
          <fpage>234</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1093/oxfordjournals.aje.a009096</pub-id>
          <pub-id pub-id-type="medline">9012596</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brito-Zerón</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kostov</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Superville</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Baughman</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Ramos-Casals</surname>
              <given-names>M</given-names>
            </name>
            <collab>Autoimmune Big Data Study Group</collab>
          </person-group>
          <article-title>Geoepidemiological big data approach to sarcoidosis: geographical and ethnic determinants</article-title>
          <source>Clin Exp Rheumatol</source>
          <year>2019</year>
          <volume>37</volume>
          <issue>6</issue>
          <fpage>1052</fpage>
          <lpage>64</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.clinexprheumatol.org/pubmed/find-pii.asp?pii=31498063"/>
          </comment>
          <pub-id pub-id-type="medline">31498063</pub-id>
          <pub-id pub-id-type="pii">13691</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arkema</surname>
              <given-names>EV</given-names>
            </name>
            <name name-style="western">
              <surname>Cozier</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Sarcoidosis epidemiology: recent estimates of incidence, prevalence and risk factors</article-title>
          <source>Curr Opin Pulm Med</source>
          <year>2020</year>
          <month>09</month>
          <volume>26</volume>
          <issue>5</issue>
          <fpage>527</fpage>
          <lpage>34</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/32701677"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MCP.0000000000000715</pub-id>
          <pub-id pub-id-type="medline">32701677</pub-id>
          <pub-id pub-id-type="pii">00063198-202009000-00022</pub-id>
          <pub-id pub-id-type="pmcid">PMC7755458</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Population page - racial and ethnic minority veterans</article-title>
          <source>U.S. Department of Veterans Affairs</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.va.gov/HEALTHEQUITY/Race_Ethnicity.asp?utm_content&#38;utm_medium=email&#38;utm_name&#38;utm_source=govdelivery&#38;utm_term">https://www.va.gov/HEALTHEQUITY/Race_Ethnicity.asp?utm_content&#38;utm_medium=email&#38;utm_name&#38;utm_source=govdelivery&#38;utm_term</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="book">
          <source>The ICD-10 Classification of Mental and Behavioural Disorders</source>
          <year>1993</year>
          <publisher-loc>Geneva</publisher-loc>
          <publisher-name>World Health Organization</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Scadding</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Prognosis of intrathoracic sarcoidosis in England. A review of 136 cases after five years' observation</article-title>
          <source>Br Med J</source>
          <year>1961</year>
          <month>11</month>
          <day>04</day>
          <volume>2</volume>
          <issue>5261</issue>
          <fpage>1165</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/14497750"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.2.5261.1165</pub-id>
          <pub-id pub-id-type="medline">14497750</pub-id>
          <pub-id pub-id-type="pmcid">PMC1970202</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Su</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Bhakta</surname>
              <given-names>NR</given-names>
            </name>
            <name name-style="western">
              <surname>Solberg</surname>
              <given-names>OD</given-names>
            </name>
            <name name-style="western">
              <surname>Darnell</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Ramstein</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Garudadri</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Woodruff</surname>
              <given-names>PG</given-names>
            </name>
            <name name-style="western">
              <surname>Koth</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>Longitudinal analysis of sarcoidosis blood transcriptomic signatures and disease outcomes</article-title>
          <source>Eur Respir J</source>
          <year>2014</year>
          <month>10</month>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>985</fpage>
          <lpage>93</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://erj.ersjournals.com/cgi/pmidlookup?view=long&#38;pmid=25142485"/>
          </comment>
          <pub-id pub-id-type="doi">10.1183/09031936.00039714</pub-id>
          <pub-id pub-id-type="medline">25142485</pub-id>
          <pub-id pub-id-type="pii">09031936.00039714</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Sweiss</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>ES</given-names>
            </name>
            <name name-style="western">
              <surname>Moller</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Knox</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wade</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Noth</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Machado</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>JG</given-names>
            </name>
          </person-group>
          <article-title>Peripheral blood gene expression as a novel genomic biomarker in complicated sarcoidosis</article-title>
          <source>PLoS One</source>
          <year>2012</year>
          <volume>7</volume>
          <issue>9</issue>
          <fpage>e44818</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0044818"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0044818</pub-id>
          <pub-id pub-id-type="medline">22984568</pub-id>
          <pub-id pub-id-type="pii">PONE-D-12-04039</pub-id>
          <pub-id pub-id-type="pmcid">PMC3440319</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Vatani</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Zhong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Pathak</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Developing a portable natural language processing based phenotyping system</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2019</year>
          <month>04</month>
          <day>04</day>
          <volume>19</volume>
          <issue>Suppl 3</issue>
          <fpage>78</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-019-0786-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-019-0786-z</pub-id>
          <pub-id pub-id-type="medline">30943974</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-019-0786-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC6448187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Rachel Richesson</collab>
            <collab>Michelle Smerek</collab>
          </person-group>
          <article-title>Electronic health records-based phenotyping</article-title>
          <source>Rethinking Clinical Trials: A Living Textbook of Pragmatic Clinical Trials</source>
          <year>2014</year>
          <publisher-loc>Durham, North Carolina, United States</publisher-loc>
          <publisher-name>Duke Clinical Research Institute</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Neely</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Clement</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Riley</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Hegelan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Phelan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kraft</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Murdoch</surname>
              <given-names>DM</given-names>
            </name>
            <name name-style="western">
              <surname>Lucas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bartlett</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>McKellar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Que</surname>
              <given-names>LG</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of an electronic medical record (EMR)-based computed phenotype of HIV-1 infection</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2018</year>
          <month>02</month>
          <day>01</day>
          <volume>25</volume>
          <issue>2</issue>
          <fpage>150</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/28645207"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocx061</pub-id>
          <pub-id pub-id-type="medline">28645207</pub-id>
          <pub-id pub-id-type="pii">3884516</pub-id>
          <pub-id pub-id-type="pmcid">PMC6381767</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
