<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e58097</article-id><article-id pub-id-type="doi">10.2196/58097</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Concordance Between Survey and Electronic Health Record Data in the COVID-19 Citizen Science Study: Retrospective Cohort Analysis</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Crull</surname><given-names>Elizabeth</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>O'Brien</surname><given-names>Emily C</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Antiperovitch</surname><given-names>Pavel</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Asfaw</surname><given-names>Kirubel</given-names></name><degrees>MS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Beatty</surname><given-names>Alexis L</given-names></name><degrees>MD, MAS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Djibo</surname><given-names>Djeneba Audrey</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kaul</surname><given-names>Alan F</given-names></name><degrees>PharmD, MBA</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kornak</surname><given-names>John</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Marcus</surname><given-names>Gregory M</given-names></name><degrees>MD, MAS</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Modrow</surname><given-names>Madelaine Faulkner</given-names></name><degrees>MPH</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Olgin</surname><given-names>Jeffrey E</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Orozco</surname><given-names>Jaime</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Park</surname><given-names>Soo</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Peyser</surname><given-names>Noah</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Pletcher</surname><given-names>Mark J</given-names></name><degrees>MD, MPH</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Carton</surname><given-names>Thomas W</given-names></name><degrees>MS, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Health Services Research, Louisiana Public Health Institute</institution><addr-line>400 Poydras Street, Suite 1250</addr-line><addr-line>New Orleans</addr-line><addr-line>LA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Duke Clinical Research Institute, School of Medicine, Duke University</institution><addr-line>Durham</addr-line><addr-line>NC</addr-line><country>United States</country></aff><aff id="aff3"><institution>Department of Epidemiology and Biostatistics, University of California, San Francisco</institution><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff4"><institution>Division of Cardiology, University of California, San Francisco</institution><addr-line>San Francisco</addr-line><addr-line>CA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Safety, Surveillance, and Collaboration, CVS Health</institution><addr-line>Blue Bell</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff6"><institution>Medical Outcomes Management, Inc.</institution><addr-line>Sharon</addr-line><addr-line>MA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Johnny Wong</surname><given-names>Chiew Meng</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Jia</surname><given-names>Shyam</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Xu</surname><given-names>Yuan</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Elizabeth Crull, MPH, Department of Health Services Research, Louisiana Public Health Institute, 400 Poydras Street, Suite 1250, New Orleans, LA, 70130, United States, 1 5044954903; <email>ecrull@lphi.org</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>28</day><month>7</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e58097</elocation-id><history><date date-type="received"><day>05</day><month>03</month><year>2024</year></date><date date-type="rev-recd"><day>30</day><month>04</month><year>2025</year></date><date date-type="accepted"><day>06</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Elizabeth Crull, Emily C O'Brien, Pavel Antiperovitch, Kirubel Asfaw, Alexis L Beatty, Djeneba Audrey Djibo, Alan F Kaul, John Kornak, Gregory M Marcus, Madelaine Faulkner Modrow, Jeffrey E Olgin, Jaime Orozco, Soo Park, Noah Peyser, Mark J Pletcher, Thomas W Carton. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 28.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e58097"/><abstract><sec><title>Background</title><p>Real-world data reported by patients and extracted from electronic health records (EHRs) are increasingly leveraged for research, policy, and clinical decision-making. However, it is not always obvious the extent to which these 2 data sources agree with each other.</p></sec><sec><title>Objective</title><p>This study aimed to evaluate the concordance of variables reported by participants enrolled in an electronic cohort study and data available in their EHRs.</p></sec><sec sec-type="methods"><title>Methods</title><p>Survey data from COVID-19 Citizen Science, an electronic cohort study, were linked to EHR data from 7 health systems, comprising 34,908 participants. Concordance was evaluated for demographics, chronic conditions, and COVID-19 characteristics. Overall agreement, sensitivity, specificity, positive predictive value, negative predictive value, and &#x03BA; statistics with 95% CIs were calculated.</p></sec><sec sec-type="results"><title>Results</title><p>Of 34,017 participants with complete information, 62.3% (21,176/34,017) reported being female, and 62.4% (21,217/34,017) were female according to EHR data. The median age was 57 (IQR 42&#x2010;68) years. Out of 34,017 participants, 81.6% (27,744/34,017) of participants reported being White, and 79.5% (27,054/34,017) were White according to EHR data. In addition, 9.2% (3,124/34,017) of participants reported being Hispanic, and 6.6% (2,249/34,017) were Hispanic according to EHR data. Statistically significant discordance between data sources was detected for all demographic characteristics (<italic>P</italic>&#x003C;.05) except the female category (<italic>P</italic>=.57) and the American Indian and Alaska Native (<italic>P</italic>=.21) and &#x201C;other&#x201D; race categories (<italic>P</italic>=.33). Statistically significant discordance was detected for the 2 COVID-19 traits and all baseline medical conditions except diabetes (<italic>P</italic>=.17). The starkest absolute difference between data sources was for COVID-19 vaccination, which was 48.4% according to the EHR and 97.4% according to participant report. Overall agreement was high for all demographic characteristics, although chance-corrected agreement (&#x03BA;) and sensitivity were lower for the &#x201C;other&#x201D; race category (&#x03BA;=0.31, sensitivity =26.6%), Hispanic ethnicity (&#x03BA;=0.82, sensitivity=74%), and current smoker status (&#x03BA;=0.54, sensitivity=49.4%). Specificity and negative predictive value (NPV) were higher than corresponding specificity and positive predictive value (PPV) for all baseline medical conditions. Sleep apnea had the highest sensitivity of all medical conditions (83.5%), and anemia had the lowest (32.8%). Chance-corrected agreement (&#x03BA;) was highly variable for baseline medical conditions, ranging from 0.26 for anemia to 0.71 for diabetes. Overall and chance-corrected agreement between data sources for COVID-19 traits such as infection (84.6%, &#x03BA;=0.34) and vaccination (51.0%, &#x03BA;=0.05) was relatively lower than all other evaluated traits. The sensitivity for COVID-19 infection was 32.2%, and the sensitivity for COVID-19 vaccination was 49.7%. Although PPV for COVID-19 vaccination was 99.9%, the NPV was 5%.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Results suggest the need for improvements to point-of-care capture of patient demographic traits and COVID-19 infection and vaccination history, patient education about their medical conditions, and linkage to external data sources in EHR-only pragmatic research. Further, these results indicate that additional work is required to integrate and prioritize participant-reported data in pragmatic research.</p></sec><sec><title>Trial Registration</title><p>ClinicalTrials.gov NCT05548803; <ext-link ext-link-type="uri" xlink:href="https://clinicaltrials.gov/study/NCT05548803?term=pletcher&#x0026;rank=3">https://clinicaltrials.gov/study/NCT05548803</ext-link></p></sec></abstract><kwd-group><kwd>electronic health records</kwd><kwd>self-report</kwd><kwd>COVID-19</kwd><kwd>data accuracy</kwd><kwd>data validation</kwd><kwd>EHR</kwd><kwd>cohort</kwd><kwd>cohort analysis</kwd><kwd>real-world data</kwd><kwd>concordance</kwd><kwd>internet-based</kwd><kwd>portal</kwd><kwd>participant</kwd><kwd>report</kwd><kwd>reported</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The advent of electronic health record (EHR) systems and internet-based study portals have modernized and streamlined pragmatic clinical research [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>], defined as research that can be conducted in real-world settings with minimal change to clinical operation [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. The use of patient- and study participant&#x2013;reported data alongside EHR data is increasingly common in research and clinical practice to complement and validate EHR data sources [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. Therefore, there is potential value in linking and comparing patient experience and outcomes data gathered from mailed surveys [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>] and patient-facing web-based portals [<xref ref-type="bibr" rid="ref10">10</xref>] with data extracted from EHRs. This is especially true for clinical concepts that are notoriously difficult to qualify using medical coding alone, such as mood, gastrointestinal disorders, and chronic pain [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>EHR and participant-reported data each have significant limitations. EHR data are fraught with administrative error, incomplete mapping to clinical ontologies, lack of legacy health record data, and inability to extract important clinical information from unstructured physician notes [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Participant-reported data are subject to bias from social desirability [<xref ref-type="bibr" rid="ref10">10</xref>], fatigue [<xref ref-type="bibr" rid="ref14">14</xref>], and limited understanding of medical issues. How these limitations affect the reliability of different kinds of health-related information is of interest for this substudy. There is a particular need for understanding the reliability of health information related to COVID-19, especially because a large portion of home testing and vaccination occurred outside traditional health systems.</p><p>The COVID-19 Citizen Science Study (CCS) is a longitudinal digital cohort study designed to generate knowledge about participant-reported outcomes related to the COVID-19 pandemic [<xref ref-type="bibr" rid="ref15">15</xref>]. The study linked participant-reported data with their corresponding EHR data, thus presenting an opportunity to analyze the concordance between these data sources. The purpose of this study was to assess the concordance of COVID-19&#x2013;related outcomes, demographic characteristics, smoker status, and 12 common medical conditions.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>Our study evaluated concordance between two data sources: (1) participant-reported data from a web-based patient portal for the CCS study and (2) participants&#x2019; corresponding EHRs, conforming to a common data model. Data from the participant-reported data source were converted first to a CSV format and then imported into SAS 9.4 software (SAS Institute) as datasets. Data from the participants&#x2019; EHRs were loaded along an extract-transform-load pipeline from the source EHR to a relational database management system and finally into SAS 9.4 software as well to enable comparison with participant-reported data.</p></sec><sec id="s2-2"><title>Study Recruitment</title><p>Our concordance assessment used participant-reported and EHR data collected as part of the CCS study (ClinicalTrials.gov identifier NCT5548803), which has been described in detail previously [<xref ref-type="bibr" rid="ref15">15</xref>]. Participants were recruited from 7 major health systems in Texas, Louisiana, Mississippi, California, Utah, and New York that participate in the National Patient-Centered Clinical Research Network (PCORnet). Patients were eligible to join if they were 18 years or older and had at least one clinical encounter after January 1, 2019. Recruitment lasted from November 2020 to February 2022.</p></sec><sec id="s2-3"><title>Participant-Reported Data</title><p>Upon enrolling, participants were asked to respond to baseline surveys on demographics, smoking history, and medical conditions. Participants were then administered follow-up surveys about exposure to, diagnosis of, and vaccination against COVID-19, among other questions seeking to understand both individual experience and population-level trends related to the pandemic. These surveys were housed in the Eureka research platform (University of California San Francisco, with funding from the National Institutes of Health) [<xref ref-type="bibr" rid="ref16">16</xref>], which had web browser and smartphone functionality.</p></sec><sec id="s2-4"><title>EHR Data</title><p>For consenting and authorizing participants, EHR-limited datasets in the PCORnet Common Data Model format were extracted from the site-specific DataMarts maintained by all participating health systems [<xref ref-type="bibr" rid="ref17">17</xref>]. The CCS study data extraction query was developed by Duke University programmers using SAS 9.4 software and distributed to all sites to run in their local environments against their DataMart. The query extracted clinical data with a 5-year lookback from the recruitment start date through the most recently available data. Sensitive diagnoses were filtered out, and only a minimum necessary subset of laboratory and medication records was extracted. Only patients for whom identities were algorithmically matched or manually verified were included in the final analytic cohort.</p></sec><sec id="s2-5"><title>Concordance Definitions</title><p>Among 34,908 participants where linkage was possible, we evaluated concordance in the following domains: demographics, baseline medical conditions, current smoker status, COVID-19 diagnosis, and COVID-19 vaccination. We chose variables that were conceptually similar between the participant-reported and EHR sources (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>). Sex in both sources was defined as sex assigned at birth. Although gender identity was available from survey data, it was not available in EHR data and thus was not an eligible variable for concordance analysis. Race and ethnicity data abstracted from EHR data were populated according to health system practices. Race and ethnicity data abstracted from survey data were reported directly by study participants.</p><p>To promote comparability, measurement periods were aligned between data sources. Participants who had missing data in one or both sources were not considered for concordance analyses. Age data were not analyzed for concordance because a birthdate match between sources was a requirement for data to be considered for EHR data extraction; thus, discordant scenarios were inherently filtered out before analysis for this substudy.</p><p>For demographic, smoker status, and COVID-19 characteristics, the participant report was considered the criterion standard. For medical conditions, the EHR was considered the criterion standard.</p></sec><sec id="s2-6"><title>Statistical Approach</title><p>To test for marginal homogeneity between data sources, McNemar tests for paired nominal data were run on all 23 attributes, structured as dichotomous 2&#x00D7;2 contingency tables. Chi-square statistics and <italic>P</italic> values were calculated. A Bonferroni correction was applied to account for multiple comparisons, adjusting the significance threshold to .002 (.05/23). <italic>P</italic> values less than .001 were reported as <italic>P</italic>&#x003C;.001.</p><p>For all domains, the following statistics were generated along with their 95% CI values: overall agreement (or overall accuracy), sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), and Cohen &#x03BA;. We used the following ranges for Cohen &#x03BA; to describe observed agreement: strong (0.81&#x2010;1.00), good (0.61&#x2010;0.80), moderate (0.41&#x2010;0.60), fair (0.21&#x2010;0.40), poor (0.01&#x2010;0.20), and no agreement (&#x003C;0) [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. However, these ranges are to provide a guide, and the adequacy of the specific level of agreement should be considered specifically to the domain under consideration and the application to which it will be used.</p><p>Data were analyzed from December 2022 to July 2023 using SAS 9.4 software.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>The CCS study, the protocol for which covered data analysis activities conducted for this substudy, was approved by the Western Institutional Review Board on November 5, 2020. Participants were informed of their right to withdraw at any time without any consequences. Digital informed consent was obtained from participants before surveys were collected. To maintain confidentiality, participants were not asked to provide their name, only their pre-assigned unique code to enable linkage to EHRs. Participants were not compensated for their time spent completing the surveys.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>A total of 39,526 patients enrolled in the study across 7 sites. After the exclusion of participants whose identity could not be verified and participants with partially or completely missing EHR data, 34,908 participants were included in the final analytic cohort (<xref ref-type="fig" rid="figure1">Figure 1</xref>). Descriptive statistics of the 34,017 participants who responded to the baseline demographic survey and results from the McNemar test for marginal homogeneity between data sources are summarized in <xref ref-type="table" rid="table1">Table 1</xref>. The median age of the sample was 57 (IQR 42&#x2010;68, range 18&#x2010;100) years according to the EHR. The sample was predominantly female and White according to both sources. The sample was classified as 6.6% (2,249/34,017) Hispanic in the EHR and 9.2% (3,124/34,017) Hispanic according to participant-reported data (<italic>P</italic>&#x003C;.001).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Enrollment diagram and final analytic cohort for the COVID-19 Citizen Science Study concordance substudy. CCS: COVID-19 Citizen Science Study; CDM: common data model.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e58097_fig01.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparison of demographic information derived from electronic health records and participant self-reports.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">EHR<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> (N=34,017)</td><td align="left" valign="bottom">Participant report (N=34,017)</td><td align="left" valign="bottom"><italic>P</italic> value<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Age (years)<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">&#x2003;Mean (SD)</td><td align="left" valign="top">54.7 (16.1)</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top">&#x2003;Median (IQR), range</td><td align="left" valign="top">57 (42&#x2010;68), 18-100</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top" colspan="4">Sex, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Female</td><td align="left" valign="top">21,217 (62.4)</td><td align="left" valign="top">21,176 (62.3)</td><td align="left" valign="top" rowspan="3">.57</td></tr><tr><td align="left" valign="top">&#x2003;Male</td><td align="left" valign="top">12,780 (37.6)</td><td align="left" valign="top">12,742 (37.5)</td></tr><tr><td align="left" valign="top">&#x2003;Refused or missing</td><td align="left" valign="top">20 (&#x003C;1)</td><td align="left" valign="top">99 (&#x003C;1)</td></tr><tr><td align="left" valign="top" colspan="4">Race, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;American Indian or Alaska Native</td><td align="left" valign="top">104 (&#x003C;1)</td><td align="left" valign="top">91 (&#x003C;1)</td><td align="left" valign="top">.21</td></tr><tr><td align="left" valign="top">&#x2003;Asian, Native Hawaiian, or Pacific Islander</td><td align="left" valign="top">1797 (5.3)</td><td align="left" valign="top">2042 (6.0)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Black or African American</td><td align="left" valign="top">1286 (3.8)</td><td align="left" valign="top">1344 (4.0)</td><td align="left" valign="top">.001</td></tr><tr><td align="left" valign="top">&#x2003;White</td><td align="left" valign="top">27,054 (79.5)</td><td align="left" valign="top">27,744 (81.6)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Multiple races<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">93 (&#x003C;1)</td><td align="left" valign="top">1269 (3.7)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Other<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup></td><td align="left" valign="top">1039 (3.1)</td><td align="left" valign="top">1077 (3.2)</td><td align="left" valign="top">.33</td></tr><tr><td align="left" valign="top">&#x2003;Refused or missing</td><td align="left" valign="top">2644 (7.8)</td><td align="left" valign="top">450 (1.3)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="4">Ethnicity, n (%)</td></tr><tr><td align="left" valign="top">&#x2003;Hispanic</td><td align="left" valign="top">2249 (6.6)</td><td align="left" valign="top">3124 (9.2)</td><td align="left" valign="top" rowspan="3">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Non-Hispanic</td><td align="left" valign="top">28,903 (85.0)</td><td align="left" valign="top">30,528 (89.7)</td></tr><tr><td align="left" valign="top">&#x2003;Refused or missing</td><td align="left" valign="top">2865 (8.4)</td><td align="left" valign="top">365 (1.1)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>EHR: electronic health record.</p></fn><fn id="table1fn2"><p><sup>b</sup><italic>P</italic> values calculated by McNemar test.</p></fn><fn id="table1fn3"><p><sup>c</sup>Age data were not analyzed for concordance because a date of birth match between sources was a requirement for data to be considered for EHR data extraction; thus, discordant scenarios were inherently filtered out before analysis for this substudy</p></fn><fn id="table1fn4"><p><sup>d</sup>Not available.</p></fn><fn id="table1fn5"><p><sup>e</sup>The category &#x201C;Multiple Races&#x201D; is a mapped value in the PCORnet Common Data Model, and no further detail was available. In participant-reported data, &#x201C;Multiple Races&#x201D; was defined as participants who responded to 2 or more non-missing race categories.</p></fn><fn id="table1fn6"><p><sup>f</sup>In both data sources, there were no additional details for the &#x201C;Other&#x201D; categorization.</p></fn></table-wrap-foot></table-wrap><p>Statistically significant differences between the 2 data sources were detected for all characteristics except for the female category (<italic>P</italic>=.57), the American Indian or Alaska Native race category (<italic>P</italic>=.21), the &#x201C;other&#x201D; race category (<italic>P</italic>=.33), and diabetes (<italic>P</italic>=.17). The starkest absolute difference between data sources was for COVID-19 vaccination, in which 97.4% (28,291/29,053) of participants self-reported a vaccine while only 48.4% (14,076/29,053) had this documented in the EHR (<xref ref-type="table" rid="table2">Table 2</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Comparison of medical conditions and COVID-19 history as reported in electronic health records and participant self-reports.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variables</td><td align="left" valign="bottom">Participants, n</td><td align="left" valign="bottom">EHR<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>, n (%)</td><td align="left" valign="bottom">Participant report, n (%)</td><td align="left" valign="bottom"><italic>P</italic> value<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top">Smoking status</td><td align="left" valign="top">11,175</td><td align="left" valign="top">947 (8.5)</td><td align="left" valign="top">1396 (12.5)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="5">Medical conditions</td></tr><tr><td align="left" valign="top">&#x2003;Diabetes</td><td align="left" valign="top">31,744</td><td align="left" valign="top">3033 (9.6)</td><td align="left" valign="top">2979 (9.4)</td><td align="left" valign="top">.17</td></tr><tr><td align="left" valign="top">&#x2003;Hypertension</td><td align="left" valign="top">31,636</td><td align="left" valign="top">9440 (29.8)</td><td align="left" valign="top">10,953 (34.6)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Coronary artery disease or angina</td><td align="left" valign="top">31,573</td><td align="left" valign="top">2440 (7.7)</td><td align="left" valign="top">1941 (6.1)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Myocardial infarction</td><td align="left" valign="top">31,721</td><td align="left" valign="top">288 (0.9)</td><td align="left" valign="top">792 (2.5)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Congestive heart failure</td><td align="left" valign="top">31,686</td><td align="left" valign="top">689 (2.2)</td><td align="left" valign="top">545 (1.7)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Transient ischemic attack</td><td align="left" valign="top">31,659</td><td align="left" valign="top">533 (1.7)</td><td align="left" valign="top">945 (3)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Atrial fibrillation or flutter</td><td align="left" valign="top">31,495</td><td align="left" valign="top">1363 (4.3)</td><td align="left" valign="top">1769 (5.6)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Sleep apnea</td><td align="left" valign="top">31,075</td><td align="left" valign="top">2838 (9.1)</td><td align="left" valign="top">4902 (15.8)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;COPD<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">31,626</td><td align="left" valign="top">1469 (4.6)</td><td align="left" valign="top">4902 (3.5)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Asthma</td><td align="left" valign="top">31,738</td><td align="left" valign="top">3481 (11)</td><td align="left" valign="top">3150 (9.9)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Immunodeficiency</td><td align="left" valign="top">31,378</td><td align="left" valign="top">730 (2.3)</td><td align="left" valign="top">1611 (5.1)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Anemia</td><td align="left" valign="top">31,622</td><td align="left" valign="top">3838 (12.1)</td><td align="left" valign="top">3427 (10.8)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top" colspan="5">COVID-19</td></tr><tr><td align="left" valign="top">&#x2003;Infection</td><td align="left" valign="top">25,294</td><td align="left" valign="top">2319 (9.2)</td><td align="left" valign="top">4446 (17.6)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">&#x2003;Vaccination</td><td align="left" valign="top">29,053</td><td align="left" valign="top">14,076 (48.4)</td><td align="left" valign="top">28,291 (97.4)</td><td align="left" valign="top">&#x003C;.001</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>EHR: electronic health record.</p></fn><fn id="table2fn2"><p><sup>b</sup><italic>P</italic> values calculated by McNemar test.</p></fn><fn id="table2fn3"><p><sup>c</sup>COPD: chronic obstructive pulmonary disease.</p></fn></table-wrap-foot></table-wrap><p>Agreement between EHR and participant-reported characteristics according to 5 proportionate measures (overall agreement, sensitivity, specificity, PPV, and NPV) and one statistic of interrater reliability (Cohen &#x03BA;) are shown in <xref ref-type="table" rid="table3">Table 3</xref> and <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>. Overall agreement was above 95% for all demographic characteristics, where the participant report was considered the criterion standard. Chance-corrected agreement (&#x03BA;) was strong for most demographic characteristics except the &#x201C;other&#x201D; race category (&#x03BA;=0.31) and current smoker status (&#x03BA;=0.54). Sensitivity was 74% for the Hispanic characteristic, which translates to a relatively higher number of false negatives compared to other racial groups, 49.4% for current smoker status, and 26.6% for the &#x201C;other&#x201D; race category.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Agreement between electronic health record&#x2013; and participant-reported characteristics: overall agreement, &#x03BA; statistic, and accuracy metrics.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Variable</td><td align="left" valign="top">Overall agreement, % (95% CI)</td><td align="left" valign="top">&#x03BA; Statistic, % (95% CI)</td><td align="left" valign="top">Sensitivity, % (95% CI)</td><td align="left" valign="top">Specificity, % (95% CI)</td><td align="left" valign="top">PPV<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup>, % (95% CI)</td><td align="left" valign="top">NPV<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup>, % (95% CI)</td></tr></thead><tbody><tr><td align="left" valign="top">Female</td><td align="char" char="." valign="top">99.6 (99.5&#x2010;99.7)</td><td align="char" char="." valign="top">0.99 (0.99&#x2010;0.99)</td><td align="char" char="." valign="top">99.6 (99.5&#x2010;99.7)</td><td align="char" char="." valign="top">99.4 (99.3&#x2010;99.6)</td><td align="char" char="." valign="top">99.7 (99.6&#x2010;99.7)</td><td align="left" valign="top">99.4 (99.2&#x2010;99.5)</td></tr><tr><td align="left" valign="top" colspan="7">Non-Hispanic</td></tr><tr><td align="left" valign="top">&#x2003;Asian American and Pacific Islander</td><td align="char" char="." valign="top">99.2 (99.1&#x2010;99.3)</td><td align="char" char="." valign="top">0.93 (0.92&#x2010;0.94)</td><td align="char" char="." valign="top">92.0 (90.7&#x2010;93.3)</td><td align="char" char="." valign="top">99.6 (99.6&#x2010;99.7)</td><td align="char" char="." valign="top">93.9 (92.8&#x2010;95.1)</td><td align="left" valign="top">99.5 (99.4&#x2010;99.6)</td></tr><tr><td align="left" valign="top">&#x2003;Black</td><td align="char" char="." valign="top">99.6 (99.5&#x2010;99.7)</td><td align="char" char="." valign="top">0.94 (0.93&#x2010;0.95)</td><td align="char" char="." valign="top">97.2 (96.2&#x2010;98.2)</td><td align="char" char="." valign="top">99.7 (99.6&#x2010;99.7)</td><td align="char" char="." valign="top">91.6 (90.0&#x2010;93.2)</td><td align="left" valign="top">99.9 (99.9&#x2010;99.9)</td></tr><tr><td align="left" valign="top">&#x2003;White</td><td align="char" char="." valign="top">96.1 (95.9&#x2010;99.3)</td><td align="char" char="." valign="top">0.87 (0.86&#x2010;0.88)</td><td align="char" char="." valign="top">99.2 (99.0&#x2010;99.3)</td><td align="char" char="." valign="top">83.3 (82.3&#x2010;84.3)</td><td align="char" char="." valign="top">96.2 (95.9&#x2010;96.4)</td><td align="left" valign="top">95.9 (95.3&#x2010;96.4)</td></tr><tr><td align="left" valign="top">&#x2003;Other</td><td align="char" char="." valign="top">96.9 (96.7&#x2010;97.1)</td><td align="char" char="." valign="top">0.31 (0.27&#x2010;0.36)</td><td align="char" char="." valign="top">26.6 (23.6&#x2010;29.6)</td><td align="char" char="." valign="top">99.0 (98.8&#x2010;99.1)</td><td align="char" char="." valign="top">42.7 (38.4&#x2010;47.0)</td><td align="left" valign="top">97.9 (97.7&#x2010;98.1)</td></tr><tr><td align="left" valign="top">Hispanic</td><td align="char" char="." valign="top">97.9 (97.7&#x2010;98.0)</td><td align="char" char="." valign="top">0.82 (0.81&#x2010;0.83)</td><td align="char" char="." valign="top">74.0 (72.1&#x2010;75.9)</td><td align="char" char="." valign="top">99.7 (99.6&#x2010;99.8)</td><td align="char" char="." valign="top">94.9 (93.8&#x2010;96.0)</td><td align="left" valign="top">98.1 (97.9&#x2010;98.2)</td></tr><tr><td align="left" valign="top">Current smoker</td><td align="char" char="." valign="top">91.4 (90.9&#x2010;91.9)</td><td align="char" char="." valign="top">0.54 (0.52&#x2010;0.57)</td><td align="char" char="." valign="top">49.4 (46.8&#x2010;52.1)</td><td align="char" char="." valign="top">97.4 (97.1&#x2010;97.7)</td><td align="char" char="." valign="top">72.9 (70.0&#x2010;75.7)</td><td align="left" valign="top">93.1 (92.6&#x2010;93.6)</td></tr><tr><td align="left" valign="top">Diabetes</td><td align="char" char="." valign="top">95.1 (94.9&#x2010;95.3)</td><td align="char" char="." valign="top">0.71 (0.70&#x2010;0.73)</td><td align="char" char="." valign="top">73.5 (71.9&#x2010;75.1)</td><td align="char" char="." valign="top">97.4 (97.2&#x2010;97.6)</td><td align="char" char="." valign="top">74.8 (73.3&#x2010;76.4)</td><td align="left" valign="top">97.2 (97.0&#x2010;97.4)</td></tr><tr><td align="left" valign="top">Hypertension</td><td align="char" char="." valign="top">85.0 (84.6&#x2010;85.4)</td><td align="char" char="." valign="top">0.66 (0.64&#x2010;0.68)</td><td align="char" char="." valign="top">82.9 (82.1&#x2010;83.6)</td><td align="char" char="." valign="top">85.9 (85.4&#x2010;86.4)</td><td align="char" char="." valign="top">71.4 (70.6&#x2010;72.3)</td><td align="left" valign="top">92.2 (91.8&#x2010;92.6)</td></tr><tr><td align="left" valign="top">Coronary artery disease/angina</td><td align="char" char="." valign="top">94.0 (93.7&#x2010;94.3)</td><td align="char" char="." valign="top">0.54 (0.52&#x2010;0.56)</td><td align="char" char="." valign="top">51.0 (49.0&#x2010;53.0)</td><td align="char" char="." valign="top">97.6 (97.4&#x2010;97.8)</td><td align="char" char="." valign="top">64.1 (62.0&#x2010;66.2)</td><td align="left" valign="top">96.0 (95.7&#x2010;96.2)</td></tr><tr><td align="left" valign="top">Myocardial infarction</td><td align="char" char="." valign="top">97.6 (97.5&#x2010;97.8)</td><td align="char" char="." valign="top">0.30 (0.25&#x2010;0.35)</td><td align="char" char="." valign="top">57.6 (51.9&#x2010;63.3)</td><td align="char" char="." valign="top">98.0 (97.9&#x2010;98.2)</td><td align="char" char="." valign="top">21.0 (18.1&#x2010;23.8)</td><td align="left" valign="top">99.6 (99.5&#x2010;99.7)</td></tr><tr><td align="left" valign="top">Congestive heart failure</td><td align="char" char="." valign="top">98.0 (97.9&#x2010;98.2)</td><td align="char" char="." valign="top">0.48 (0.44&#x2010;0.52)</td><td align="char" char="." valign="top">44.1 (40.4&#x2010;47.8)</td><td align="char" char="." valign="top">99.2 (99.1&#x2010;99.3)</td><td align="char" char="." valign="top">55.8 (51.6&#x2010;60.0)</td><td align="left" valign="top">98.8 (98.6&#x2010;98.9)</td></tr><tr><td align="left" valign="top">Transient ischemic attack</td><td align="char" char="." valign="top">97.6 (97.4&#x2010;97.7)</td><td align="char" char="." valign="top">0.47 (0.43&#x2010;0.50)</td><td align="char" char="." valign="top">66.2 (62.2&#x2010;70.2)</td><td align="char" char="." valign="top">98.1 (97.9&#x2010;98.3)</td><td align="char" char="." valign="top">37.4 (34.3&#x2010;40.4)</td><td align="left" valign="top">99.4 (99.3&#x2010;99.5)</td></tr><tr><td align="left" valign="top">Atrial fibrillation</td><td align="char" char="." valign="top">97.0 (96.8&#x2010;97.2)</td><td align="char" char="." valign="top">0.68 (0.66&#x2010;0.70)</td><td align="char" char="." valign="top">80.3 (78.2&#x2010;82.4)</td><td align="char" char="." valign="top">97.8 (97.6&#x2010;98.0)</td><td align="char" char="." valign="top">61.8 (59.6&#x2010;64.1)</td><td align="left" valign="top">99.1 (99.0&#x2010;99.2)</td></tr><tr><td align="left" valign="top">Sleep apnea</td><td align="char" char="." valign="top">90.4 (90.0&#x2010;90.7)</td><td align="char" char="." valign="top">0.56 (0.55&#x2010;0.58)</td><td align="char" char="." valign="top">83.5 (82.2&#x2010;84.9)</td><td align="char" char="." valign="top">91.0 (90.7&#x2010;91.4)</td><td align="char" char="." valign="top">48.4 (47.0&#x2010;49.8)</td><td align="left" valign="top">98.2 (98.1&#x2010;98.4)</td></tr><tr><td align="left" valign="top">COPD<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="char" char="." valign="top">95.2 (95.0&#x2010;95.5)</td><td align="char" char="." valign="top">0.39 (0.36&#x2010;0.42)</td><td align="char" char="." valign="top">36.7 (34.2&#x2010;39.2)</td><td align="char" char="." valign="top">98.1 (97.9&#x2010;98.3)</td><td align="char" char="." valign="top">48.4 (45.5&#x2010;51.4)</td><td align="left" valign="top">97.0 (96.8&#x2010;97.1)</td></tr><tr><td align="left" valign="top">Asthma</td><td align="char" char="." valign="top">90.2 (89.9&#x2010;90.6)</td><td align="char" char="." valign="top">0.48 (0.46&#x2010;0.50)</td><td align="char" char="." valign="top">50.8 (49.1&#x2010;52.5)</td><td align="char" char="." valign="top">95.1 (94.9&#x2010;95.4)</td><td align="char" char="." valign="top">56.1 (54.4&#x2010;57.9)</td><td align="left" valign="top">94.0 (93.7&#x2010;94.3)</td></tr><tr><td align="left" valign="top">Immunodeficiency</td><td align="char" char="." valign="top">94.6 (94.4&#x2010;94.9)</td><td align="char" char="." valign="top">0.26 (0.22&#x2010;0.29)</td><td align="char" char="." valign="top">45.1 (41.5&#x2010;48.7)</td><td align="char" char="." valign="top">95.8 (95.6&#x2010;96.0)</td><td align="char" char="." valign="top">20.4 (18.5&#x2010;22.4)</td><td align="left" valign="top">98.7 (98.5&#x2010;98.8)</td></tr><tr><td align="left" valign="top">Anemia</td><td align="char" char="." valign="top">85.0 (84.6&#x2010;85.4)</td><td align="char" char="." valign="top">0.26 (0.24&#x2010;0.28)</td><td align="char" char="." valign="top">32.8 (31.3&#x2010;34.3)</td><td align="char" char="." valign="top">92.2 (91.9&#x2010;92.5)</td><td align="char" char="." valign="top">36.7 (35.1&#x2010;38.3)</td><td align="left" valign="top">90.8 (90.5&#x2010;91.2)</td></tr><tr><td align="left" valign="top">COVID-19 infection</td><td align="char" char="." valign="top">84.6 (84.1&#x2010;85.0)</td><td align="char" char="." valign="top">0.34 (0.33&#x2010;0.36)</td><td align="char" char="." valign="top">32.2 (30.9&#x2010;33.6)</td><td align="char" char="." valign="top">95.8 (95.5&#x2010;96.0)</td><td align="char" char="." valign="top">61.8 (59.8&#x2010;63.8)</td><td align="left" valign="top">86.9 (86.5&#x2010;87.3)</td></tr><tr><td align="left" valign="top">COVID-19 vaccination</td><td align="char" char="." valign="top">51.0 (50.4&#x2010;51.6)</td><td align="char" char="." valign="top">0.05 (0.04&#x2010;0.06)</td><td align="char" char="." valign="top">49.7 (49.1&#x2010;50.3)</td><td align="char" char="." valign="top">98.2 (97.2&#x2010;99.1)</td><td align="char" char="." valign="top">99.9 (99.9&#x2010;99.9)</td><td align="left" valign="top">5.0 (4.6&#x2010;5.3)</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>PPV: positive predictive value.</p></fn><fn id="table3fn2"><p><sup>b</sup>NPV: negative predictive value.</p></fn><fn id="table3fn3"><p><sup>c</sup>COPD: chronic obstructive pulmonary disease.</p></fn></table-wrap-foot></table-wrap><p>The criterion standard for baseline medical conditions was the EHR. Overall agreement, specificity, and NPV between data sources was above 85% for all baseline medical conditions, although there was heterogeneity in sensitivity, PPV, and chance-corrected agreement (&#x03BA;). Sensitivity ranged from 32.8 (95% CI 31.3&#x2010;34.3) to 83.5% (95% CI 82.2&#x2010;84.9), being lowest for anemia and highest for sleep apnea. PPV ranged from 20.4% (95% CI 18.4&#x2010;22.4) to 74.8% (95% CI 73.3&#x2010;76.4), being lowest for immunodeficiency and highest for diabetes. Finally, chance-corrected agreement (&#x03BA;) was good for 3 of 12 compared baseline medical conditions, moderate for 5, and fair for 4. Chance-corrected agreement (&#x03BA;) ranged from 0.26 (95% CI 0.22&#x2010;0.29) to 0.71 (95% CI 0.70&#x2010;0.73) for non-HIV immunodeficiency and diabetes, respectively.</p><p>The criterion standard for COVID-19 variables was participant-reported data. While chance-corrected agreement was fair for COVID-19 infection (&#x03BA;=0.34), it was poor for COVID-19 vaccination (&#x03BA;=0.05). Of 25,294 participants whose COVID-19 infection data could be compared, there were 3899 cases of discordance, 77.3% (3013/3899) of which were classified as the participant reporting a COVID-19 diagnosis but this not being reflected in the EHR. Similarly, of the 29,053 participants whose COVID-19 vaccine data could be compared, there were 14,243 cases of discordance, 99.9% (14,229/14,243) of which were classified as the participant reporting a COVID-19 vaccine but this not being reflected in the EHR.</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Results</title><p>We evaluated the concordance survey data from participants enrolled in a COVID-19 study and their linked EHR data. We had four main findings: (1) sensitivity and chance-corrected agreement were strong for all demographic characteristics except for Hispanic ethnicity, the &#x201C;other&#x201D; race category, and current smoker status, indicating that a relatively lower proportion of patients were correctly identified in the EHR as such in comparison to other traits; (2) when the EHR was the criterion standard, as we considered it to be for medical conditions, specificity and NPV were higher than corresponding sensitivity and PPV, suggesting that patients were better able to report in concordance with their EHR the absence of a medical condition rather than the presence of one; (3) chance-corrected agreement, sensitivity, and PPV varied widely for medical conditions, with no clear pattern emerging as to which types were more likely to be self-reported in concordance with the EHR; and (4) COVID-19 infection and vaccination had relatively low chance-corrected agreement and overall agreement compared to most demographic traits and many medical conditions, along with very low sensitivity, indicating that these health events are poorly captured in patients&#x2019; EHRs.</p><p>Our findings suggest the need for improvements to point-of-care capture of patient demographic traits and COVID-19 infection and vaccination history, patient education about their medical conditions, and linkage to external data sources in EHR-only pragmatic research. Our results indicating specifically that the capture of Hispanic ethnicity and &#x201C;other&#x201D; race category is not as sensitive as other race categories demonstrates that current point-of-care processes for collecting racial and ethnic information from patients may be insufficient, regardless of whether such data points are captured by a clinician, administrative worker, or the patients themselves. It is critical to have granular and accurate capture of patient race and ethnicity to provide the most culturally sensitive clinical care. Similarly, our findings that captured COVID-19 health events were relatively discordant between data sources, suggesting an interruption of health data flow back to the EHR. This could originate from improper integration of COVID-19 testing and vaccination data sources, especially when these events happen outside of the health system, resulting in health care providers not having the most up-to-date information about the health status of their patients. Finally, the lower specificity and NPV of medical conditions when compared to their corresponding sensitivity and PPV suggest that (1) EHRs may not be capturing medical conditions, especially those that are pre-existing; and (2) patients may not be aware that they have certain medical conditions, most marked for conditions like anemia and COPD, both of which showed only about a third of patients reporting the presence of these conditions in concordance with their EHRs. Comprehensive and customized patient education and communication are suggested to support self-management of medical conditions and patient autonomy outside the point of care. Further, and for the sake of research integrity, those engaging in EHR-only research should make attempts to access and query as many views and tables as is feasible to properly categorize a patient as having or not having a medical condition.</p><p>Increasingly, novel research designs rely on the integration of multiple data sources to answer research questions. The COVID-19 pandemic accelerated already growing interest in real-world data use cases [<xref ref-type="bibr" rid="ref20">20</xref>], including leveraging existing EHR data and bringing research directly to people through participant-facing portals. Direct-to-participant research has numerous benefits, including potential for greater geographic reach and diversity, lower participant burden with few or no in-person visits, and platforms that enable capture of relevant patient-centered endpoints [<xref ref-type="bibr" rid="ref21">21</xref>]. These strengths complement those of EHR data, which, through national networks like PCORnet, can be standardized into research-grade data to facilitate rapid insights into key clinical outcomes [<xref ref-type="bibr" rid="ref22">22</xref>]. To our knowledge, this is the first study to examine patterns of concordance across participant-reported and EHR data in the context of COVID-19.</p><p>Our finding that participants self-reported COVID-19 infection and vaccination at higher rates than what was evident in their clinical records illustrates the fragmented nature of real-world data. Ongoing work to enhance the quality and reliability of EHR data in the context of COVID-19, including network-level curation [<xref ref-type="bibr" rid="ref22">22</xref>], linkage to external sources where appropriate (eg, state vaccine [<xref ref-type="bibr" rid="ref23">23</xref>] or policy [<xref ref-type="bibr" rid="ref24">24</xref>] databases), and systematic phenotype development and testing [<xref ref-type="bibr" rid="ref25">25</xref>], is critical to maximizing the research value of these data. In parallel, the implementation of best practices to enhance the validity of participant reports, including stakeholder engagement in survey design, readability assessments, and cultural and linguistic adaptation, is essential to enhancing the reliability of findings from participant-facing research [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Our findings are broadly consistent with those from prior studies, suggesting that fitness-for-use depends on context [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. We found that no single data source may be appropriate for EHR-based pragmatic research, consistent with prior work illustrating the potential biases that can arise in participant-reported data and how they vary [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>].</p></sec><sec id="s4-2"><title>Limitations</title><p>Several limitations to our study are worth noting. First, the CCS study comprises participants who were mostly White and female. Therefore, results may not generalize to broader populations. Second, diversity within minority communities can make both responding to survey questions and the identification of race challenging for patients and clinicians, respectively, a fact that may skew findings from our demographic analyses. Third, participant-reported COVID-19 variables were not validated and are subject to reporting and recall bias. Fourth, we observed some attrition in reporting over time, which could lead to selection bias in analyses of longitudinal outcomes. Finally, we used EHR data for this concordance analysis, which may not represent all medical encounters for a given participant and which may not be of the highest or most accurate quality. EHR data used in this analysis were not linked to claims data from pharmacies, which are a major administrator of COVID-19 vaccines. Particularly for outcomes that are generally observed outside of the hospital, linkage to external data sources is likely warranted.</p></sec><sec id="s4-3"><title>Conclusions</title><p>We found that the integration of multiple data sources to investigate COVID-19 research questions enhances the capture of key elements but also introduces opportunities for disagreement. Future studies that leverage linked data should evaluate the concordance of overlapping elements and report levels of agreement. Transparent reporting will contribute to a broader understanding of data reliability and relevance and support future strategies to improve fitness-for-use of real-world data.</p></sec></sec></body><back><ack><p>This work was supported with funding from the Patient-Centered Outcomes Research Institute (PCORI; grant identification COVID-2020C2-10761). PCORI had no role in the design and conduct of the study; collection, management, analysis, and interpretation of the data; preparation, review, or approval of the manuscript; and decision to submit the manuscript for publication.</p></ack><notes><sec><title><bold>Disclaimer</bold></title><p>The views and conclusions presented here are solely the responsibility of the authors and do not necessarily reflect the official views of PCORI.</p></sec><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study are not publicly available to preserve patient privacy and confidentiality, but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>EC had full access to all the data in the study and takes responsibility for the integrity of the data and the accuracy of the data analysis. TWC, EC, ECO&#x2019;B, and MJP contributed to conception and design of the study. TWC, EC, ECO'B, and MJP assisted with acquisition, analysis, or interpretation of data. TWC, EC, and ECO&#x2019;B handled drafting of the manuscript. EC contributed to statistical analysis. TWC and MJP obtained funding. KA, MFM, JO, and SP assisted with administrative, technical, or material support. TWC, ECO'B, and MJP contributed to supervision.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">CCS</term><def><p>COVID-19 Citizen Science Study</p></def></def-item><def-item><term id="abb2">EHR</term><def><p>electronic health record</p></def></def-item><def-item><term id="abb3">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb4">PCORnet</term><def><p>National Patient-Centered Clinical Research Network</p></def></def-item><def-item><term id="abb5">PPV</term><def><p>positive predictive value</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cowie</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Blomster</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Curtis</surname><given-names>LH</given-names> </name><etal/></person-group><article-title>Electronic health records to facilitate clinical research</article-title><source>Clin Res Cardiol</source><year>2017</year><month>01</month><volume>106</volume><issue>1</issue><fpage>1</fpage><lpage>9</lpage><pub-id pub-id-type="doi">10.1007/s00392-016-1025-6</pub-id><pub-id pub-id-type="medline">27557678</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Staa</surname><given-names>T van</given-names> </name><name name-style="western"><surname>Goldacre</surname><given-names>B</given-names> </name><name name-style="western"><surname>Gulliford</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Pragmatic randomised trials using routine electronic health records: putting them to the test</article-title><source>BMJ</source><year>2012</year><month>02</month><day>7</day><volume>344</volume><issue>feb07 1</issue><fpage>e55</fpage><pub-id pub-id-type="doi">10.1136/bmj.e55</pub-id><pub-id pub-id-type="medline">22315246</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tosh</surname><given-names>G</given-names> </name><name name-style="western"><surname>Soares-Weiser</surname><given-names>K</given-names> </name><name name-style="western"><surname>Adams</surname><given-names>CE</given-names> </name></person-group><article-title>Pragmatic vs explanatory trials: the pragmascope tool to help measure differences in protocols of mental health randomized controlled trials</article-title><source>Dialogues Clin Neurosci</source><year>2011</year><volume>13</volume><issue>2</issue><fpage>209</fpage><lpage>215</lpage><pub-id pub-id-type="doi">10.31887/DCNS.2011.13.2/gtosh</pub-id><pub-id pub-id-type="medline">21842618</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Holtrop</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Glasgow</surname><given-names>RE</given-names> </name></person-group><article-title>Pragmatic research: an introduction for clinical practitioners</article-title><source>Fam Pract</source><year>2020</year><month>07</month><day>23</day><volume>37</volume><issue>3</issue><fpage>424</fpage><lpage>428</lpage><pub-id pub-id-type="doi">10.1093/fampra/cmz092</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Black</surname><given-names>N</given-names> </name></person-group><article-title>Patient reported outcome measures could help transform healthcare</article-title><source>BMJ</source><year>2013</year><month>01</month><day>28</day><volume>346</volume><issue>7896</issue><fpage>f167</fpage><comment><ext-link ext-link-type="uri" xlink:href="http://www.jstor.org/stable/23494165">http://www.jstor.org/stable/23494165</ext-link></comment><pub-id pub-id-type="doi">10.1136/bmj.f167</pub-id><pub-id pub-id-type="medline">23358487</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jandoo</surname><given-names>T</given-names> </name></person-group><article-title>WHO guidance for digital health: What it means for researchers</article-title><source>Digit Health</source><year>2020</year><volume>6</volume><fpage>2055207619898984</fpage><pub-id pub-id-type="doi">10.1177/2055207619898984</pub-id><pub-id pub-id-type="medline">31949918</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamilton</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Edelman</surname><given-names>D</given-names> </name><name name-style="western"><surname>Weinberger</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>GL</given-names> </name></person-group><article-title>Concordance between self-reported race/ethnicity and that recorded in a Veteran Affairs electronic medical record</article-title><source>N C Med J</source><year>2009</year><volume>70</volume><issue>4</issue><fpage>296</fpage><lpage>300</lpage><pub-id pub-id-type="medline">19835243</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Valikodath</surname><given-names>NG</given-names> </name><name name-style="western"><surname>Newman-Casey</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>PP</given-names> </name><name name-style="western"><surname>Musch</surname><given-names>DC</given-names> </name><name name-style="western"><surname>Niziol</surname><given-names>LM</given-names> </name><name name-style="western"><surname>Woodward</surname><given-names>MA</given-names> </name></person-group><article-title>Agreement of ocular symptom reporting between patient-reported outcomes and medical records</article-title><source>JAMA Ophthalmol</source><year>2017</year><month>03</month><day>1</day><volume>135</volume><issue>3</issue><fpage>225</fpage><lpage>231</lpage><pub-id pub-id-type="doi">10.1001/jamaophthalmol.2016.5551</pub-id><pub-id pub-id-type="medline">28125754</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fares</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Williamson</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Theisen</surname><given-names>MK</given-names> </name><etal/></person-group><article-title>Low concordance of patient-reported outcomes with clinical and clinical trial documentation</article-title><source>JCO Clin Cancer Inform</source><year>2018</year><month>12</month><volume>2</volume><fpage>1</fpage><lpage>12</lpage><pub-id pub-id-type="doi">10.1200/CCI.18.00059</pub-id><pub-id pub-id-type="medline">30652613</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Brien</surname><given-names>EC</given-names> </name><name name-style="western"><surname>Mulder</surname><given-names>H</given-names> </name><name name-style="western"><surname>Jones</surname><given-names>WS</given-names> </name><etal/></person-group><article-title>Concordance between patient-reported health data and electronic health data in the ADAPTABLE trial</article-title><source>JAMA Cardiol</source><year>2022</year><month>12</month><day>1</day><volume>7</volume><issue>12</issue><fpage>1235</fpage><lpage>1243</lpage><pub-id pub-id-type="doi">10.1001/jamacardio.2022.3844</pub-id><pub-id pub-id-type="medline">36322059</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Hostetter</surname><given-names>M</given-names> </name><name name-style="western"><surname>Klein</surname><given-names>S</given-names> </name></person-group><article-title>Using patient-reported outcomes to improve health care quality</article-title><source>The Commonwealth Fund</source><access-date>2025-06-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.commonwealthfund.org/publications/newsletter-article/using-patient-reported-outcomes-improve-health-care-quality">https://www.commonwealthfund.org/publications/newsletter-article/using-patient-reported-outcomes-improve-health-care-quality</ext-link></comment></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Verheij</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Curcin</surname><given-names>V</given-names> </name><name name-style="western"><surname>Delaney</surname><given-names>BC</given-names> </name><name name-style="western"><surname>McGilchrist</surname><given-names>MM</given-names> </name></person-group><article-title>Possible sources of bias in primary care electronic health record data use and reuse</article-title><source>J Med Internet Res</source><year>2018</year><month>05</month><day>29</day><volume>20</volume><issue>5</issue><fpage>e185</fpage><pub-id pub-id-type="doi">10.2196/jmir.9134</pub-id><pub-id pub-id-type="medline">29844010</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Menachemi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Collum</surname><given-names>TH</given-names> </name></person-group><article-title>Benefits and drawbacks of electronic health record systems</article-title><source>Risk Manag Healthc Policy</source><year>2011</year><volume>4</volume><fpage>47</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.2147/RMHP.S12985</pub-id><pub-id pub-id-type="medline">22312227</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zini</surname><given-names>MLL</given-names> </name><name name-style="western"><surname>Banfi</surname><given-names>G</given-names> </name></person-group><article-title>A narrative literature review of bias in collecting patient reported outcomes measures (PROMs)</article-title><source>Int J Environ Res Public Health</source><year>2021</year><month>11</month><day>26</day><volume>18</volume><issue>23</issue><fpage>12445</fpage><pub-id pub-id-type="doi">10.3390/ijerph182312445</pub-id><pub-id pub-id-type="medline">34886170</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beatty</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Peyser</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Butcher</surname><given-names>XE</given-names> </name><etal/></person-group><article-title>The COVID-19 Citizen Science Study: protocol for a longitudinal digital health cohort study</article-title><source>JMIR Res Protoc</source><year>2021</year><month>08</month><day>30</day><volume>10</volume><issue>8</issue><fpage>e28169</fpage><pub-id pub-id-type="doi">10.2196/28169</pub-id><pub-id pub-id-type="medline">34310336</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Peyser</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Marcus</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Beatty</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Olgin</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Pletcher</surname><given-names>MJ</given-names> </name></person-group><article-title>Digital platforms for clinical trials: The Eureka experience</article-title><source>Contemp Clin Trials</source><year>2022</year><month>04</month><volume>115</volume><fpage>106710</fpage><pub-id pub-id-type="doi">10.1016/j.cct.2022.106710</pub-id><pub-id pub-id-type="medline">35183763</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Forrest</surname><given-names>CB</given-names> </name><name name-style="western"><surname>McTigue</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Hernandez</surname><given-names>AF</given-names> </name><etal/></person-group><article-title>PCORnet&#x00AE; 2020: current state, accomplishments, and future directions</article-title><source>J Clin Epidemiol</source><year>2021</year><month>01</month><volume>129</volume><fpage>60</fpage><lpage>67</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.09.036</pub-id><pub-id pub-id-type="medline">33002635</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name></person-group><source>Practical Statistics for Medical Research</source><year>1999</year><publisher-name>Chapman &#x0026; Hall/CRC Press</publisher-name></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McHugh</surname><given-names>ML</given-names> </name></person-group><article-title>Interrater reliability: the kappa statistic</article-title><source>Biochem Med (Zagreb)</source><year>2012</year><volume>22</volume><issue>3</issue><fpage>276</fpage><lpage>282</lpage><pub-id pub-id-type="medline">23092060</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Corrigan-Curay</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sacks</surname><given-names>L</given-names> </name><name name-style="western"><surname>Woodcock</surname><given-names>J</given-names> </name></person-group><article-title>Real-world evidence and real-world data for evaluating drug safety and effectiveness</article-title><source>JAMA</source><year>2018</year><month>09</month><day>4</day><volume>320</volume><issue>9</issue><fpage>867</fpage><lpage>868</lpage><pub-id pub-id-type="doi">10.1001/jama.2018.10136</pub-id><pub-id pub-id-type="medline">30105359</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Jong</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>van Rijssel</surname><given-names>TI</given-names> </name><name name-style="western"><surname>Zuidgeest</surname><given-names>MGP</given-names> </name><etal/></person-group><article-title>Opportunities and challenges for decentralized clinical trials: European Regulators&#x2019; Perspective</article-title><source>Clin Pharmacol Ther</source><year>2022</year><month>08</month><volume>112</volume><issue>2</issue><fpage>344</fpage><lpage>352</lpage><pub-id pub-id-type="doi">10.1002/cpt.2628</pub-id><pub-id pub-id-type="medline">35488483</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qualls</surname><given-names>LG</given-names> </name><name name-style="western"><surname>Phillips</surname><given-names>TA</given-names> </name><name name-style="western"><surname>Hammill</surname><given-names>BG</given-names> </name><etal/></person-group><article-title>Evaluating foundational data quality in the National Patient-Centered Clinical Research Network (PCORnet&#x00AE;)</article-title><source>EGEMS (Wash DC)</source><year>2018</year><month>04</month><day>13</day><volume>6</volume><issue>1</issue><fpage>3</fpage><pub-id pub-id-type="doi">10.5334/egems.199</pub-id><pub-id pub-id-type="medline">29881761</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Groom</surname><given-names>HC</given-names> </name><name name-style="western"><surname>Crane</surname><given-names>B</given-names> </name><name name-style="western"><surname>Naleway</surname><given-names>AL</given-names> </name><etal/></person-group><article-title>Monitoring vaccine safety using the vaccine safety Datalink: Assessing capacity to integrate data from Immunization Information systems</article-title><source>Vaccine (Auckl)</source><year>2022</year><month>01</month><day>31</day><volume>40</volume><issue>5</issue><fpage>752</fpage><lpage>756</lpage><pub-id pub-id-type="doi">10.1016/j.vaccine.2021.12.048</pub-id><pub-id pub-id-type="medline">34980508</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamad</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lyman</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Lin</surname><given-names>F</given-names> </name><etal/></person-group><article-title>The U.S. COVID-19 County Policy Database: a novel resource to support pandemic-related research</article-title><source>BMC Public Health</source><year>2022</year><month>10</month><day>10</day><volume>22</volume><issue>1</issue><fpage>1882</fpage><pub-id pub-id-type="doi">10.1186/s12889-022-14132-6</pub-id><pub-id pub-id-type="medline">36217102</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lusczek</surname><given-names>ER</given-names> </name><name name-style="western"><surname>Ingraham</surname><given-names>NE</given-names> </name><name name-style="western"><surname>Karam</surname><given-names>BS</given-names> </name><etal/></person-group><article-title>Characterizing COVID-19 clinical phenotypes and associated comorbidities and complication profiles</article-title><source>PLoS ONE</source><year>2021</year><volume>16</volume><issue>3</issue><fpage>e0248956</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0248956</pub-id><pub-id pub-id-type="medline">33788884</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chang</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Gillespie</surname><given-names>EF</given-names> </name><name name-style="western"><surname>Shaverdian</surname><given-names>N</given-names> </name></person-group><article-title>Truthfulness in patient-reported outcomes: factors affecting patients&#x2019; responses and impact on data quality</article-title><source>Patient Relat Outcome Meas</source><year>2019</year><volume>10</volume><fpage>171</fpage><lpage>186</lpage><pub-id pub-id-type="doi">10.2147/PROM.S178344</pub-id><pub-id pub-id-type="medline">31354371</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Breeman</surname><given-names>S</given-names> </name><name name-style="western"><surname>Constable</surname><given-names>L</given-names> </name><name name-style="western"><surname>Duncan</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Verifying participant-reported clinical outcomes: challenges and implications</article-title><source>Trials</source><year>2020</year><month>03</month><day>4</day><volume>21</volume><issue>1</issue><fpage>241</fpage><pub-id pub-id-type="doi">10.1186/s13063-020-4169-7</pub-id><pub-id pub-id-type="medline">32131888</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>Real-world data: assessing electronic health records and medical claims data to support regulatory decision-making for drug and biological products</article-title><source>US Food and Drug Administration</source><year>2021</year><month>09</month><access-date>2022-03-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/152503/download">https://www.fda.gov/media/152503/download</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Considerations for the use of real-world data and real-world evidence to support regulatory decision-making for drug and biological products</article-title><source>US Food and Drug Administration</source><year>2021</year><month>12</month><access-date>2022-03-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/media/154714/download">https://www.fda.gov/media/154714/download</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heckbert</surname><given-names>SR</given-names> </name><name name-style="western"><surname>Kooperberg</surname><given-names>C</given-names> </name><name name-style="western"><surname>Safford</surname><given-names>MM</given-names> </name><etal/></person-group><article-title>Comparison of self-report, hospital discharge codes, and adjudication of cardiovascular events in the Women&#x2019;s Health Initiative</article-title><source>Am J Epidemiol</source><year>2004</year><month>12</month><day>15</day><volume>160</volume><issue>12</issue><fpage>1152</fpage><lpage>1158</lpage><pub-id pub-id-type="doi">10.1093/aje/kwh314</pub-id><pub-id pub-id-type="medline">15583367</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Stirratt</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Dunbar-Jacob</surname><given-names>J</given-names> </name><name name-style="western"><surname>Crane</surname><given-names>HM</given-names> </name><etal/></person-group><article-title>Self-report measures of medication adherence behavior: recommendations on optimal use</article-title><source>Transl Behav Med</source><year>2015</year><month>12</month><volume>5</volume><issue>4</issue><fpage>470</fpage><lpage>482</lpage><pub-id pub-id-type="doi">10.1007/s13142-015-0315-2</pub-id><pub-id pub-id-type="medline">26622919</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Woodfield</surname><given-names>R</given-names> </name><collab>UK Biobank Stroke Outcomes Group</collab><collab>UK Biobank Follow-up and Outcomes Working Group</collab><name name-style="western"><surname>Sudlow</surname><given-names>CLM</given-names> </name></person-group><article-title>Accuracy of patient self-report of stroke: a systematic review from the UK Biobank Stroke Outcomes Group</article-title><source>PLoS ONE</source><year>2015</year><volume>10</volume><issue>9</issue><fpage>e0137538</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0137538</pub-id><pub-id pub-id-type="medline">26355837</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Simpson</surname><given-names>CF</given-names> </name><name name-style="western"><surname>Boyd</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Carlson</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Griswold</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Guralnik</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Fried</surname><given-names>LP</given-names> </name></person-group><article-title>Agreement between self-report of disease diagnoses and medical record validation in disabled older women: factors that modify agreement</article-title><source>J Am Geriatr Soc</source><year>2004</year><month>01</month><volume>52</volume><issue>1</issue><fpage>123</fpage><lpage>127</lpage><pub-id pub-id-type="doi">10.1111/j.1532-5415.2004.52021.x</pub-id><pub-id pub-id-type="medline">14687326</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Description of concordance definitions by electronic health record and participant report.</p><media xlink:href="formative_v9i1e58097_app1.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Clinical codes used in medical condition and COVID-19 phenotypes.</p><media xlink:href="formative_v9i1e58097_app2.docx" xlink:title="DOCX File, 25 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>2x2 contingency tables for all concordance domains.</p><media xlink:href="formative_v9i1e58097_app3.docx" xlink:title="DOCX File, 39 KB"/></supplementary-material></app-group></back></article>