<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v4i1e13296</article-id>
      <article-id pub-id-type="pmid">31934872</article-id>
      <article-id pub-id-type="doi">10.2196/13296</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Using Natural Language Processing to Examine the Uptake, Content, and Readability of Media Coverage of a Pan-Canadian Drug Safety Research Project: Cross-Sectional Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Eysenbach</surname>
            <given-names>Gunther</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Matsuda</surname>
            <given-names>Shinichi</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Louren</surname>
            <given-names>An?lia</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Carvalho</surname>
            <given-names>Darlinton</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lopez Jornet</surname>
            <given-names>Pia</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Mohammadhassanzadeh</surname>
            <given-names>Hossein</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1922-0455</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Sketris</surname>
            <given-names>Ingrid</given-names>
          </name>
          <degrees>PharmD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1820-5883</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Traynor</surname>
            <given-names>Robyn</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1386-708X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Alexander</surname>
            <given-names>Susan</given-names>
          </name>
          <degrees>MHI</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5066-4636</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Winquist</surname>
            <given-names>Brandace</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9848-7819</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Stewart</surname>
            <given-names>Samuel Alan</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Dalhousie University</institution>
            <addr-line>5790 University Ave, Room 408</addr-line>
            <addr-line>Halifax, NS, B3H 4R2</addr-line>
            <country>Canada</country>
            <phone>1 902 494 6287</phone>
            <fax>1 902 494 1597</fax>
            <email>sam.stewart@dal.ca</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6299-7967</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Dalhousie University</institution>
        <addr-line>Halifax, NS</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Nova Scotia Health Authority</institution>
        <addr-line>Halifax, NS</addr-line>
        <country>Canada</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>University of Saskatchewan</institution>
        <addr-line>Saskatoon, SK</addr-line>
        <country>Canada</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Samuel Alan Stewart <email>sam.stewart@dal.ca</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>1</month>
        <year>2020</year>
      </pub-date>
      <volume>4</volume>
      <issue>1</issue>
      <elocation-id>e13296</elocation-id>
      <history>
        <date date-type="received">
          <day>3</day>
          <month>1</month>
          <year>2019</year>
        </date>
        <date date-type="rev-request">
          <day>27</day>
          <month>4</month>
          <year>2019</year>
        </date>
        <date date-type="rev-recd">
          <day>11</day>
          <month>7</month>
          <year>2019</year>
        </date>
        <date date-type="accepted">
          <day>26</day>
          <month>9</month>
          <year>2019</year>
        </date>
      </history>
      <copyright-statement>©Hossein Mohammadhassanzadeh, Ingrid Sketris, Robyn Traynor, Susan Alexander, Brandace Winquist, Samuel Alan Stewart. Originally published in JMIR Formative Research (http://formative.jmir.org), 14.01.2020.</copyright-statement>
      <copyright-year>2020</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on http://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2020/1/e13296" xlink:type="simple"/>
      <related-article related-article-type="correction-forward" xlink:title="This is a corrected version. See correction statement in:" xlink:href="http://formative.jmir.org/2020/6/e20211/" vol="6" page="e20211"> </related-article>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Isotretinoin, for treating cystic acne, increases the risk of miscarriage and fetal abnormalities when taken during pregnancy. The Health Canada–approved product monograph for isotretinoin includes pregnancy prevention guidelines. A recent study by the Canadian Network for Observational Drug Effect Studies (CNODES) on the occurrence of pregnancy and pregnancy outcomes during isotretinoin therapy estimated poor adherence to these guidelines. Media uptake of this study was unknown; awareness of this uptake could help improve drug safety communication.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The aim of this study was to understand how the media present pharmacoepidemiological research using the CNODES isotretinoin study as a case study.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>Google News was searched (April 25-May 6, 2016), using a predefined set of terms, for mention of the CNODES study. In total, 26 articles and 3 CNODES publications (original article, press release, and podcast) were identified. The article texts were cleaned (eg, advertisements and links removed), and the podcast was transcribed. A dictionary of 1295 unique words was created using natural language processing (NLP) techniques (term frequency-inverse document frequency, Porter stemming, and stop-word filtering) to identify common words and phrases. Similarity between the articles and reference publications was calculated using Euclidian distance; articles were grouped using hierarchical agglomerative clustering. Nine readability scales were applied to measure text readability based on factors such as number of words, difficult words, syllables, sentence counts, and other textual metrics.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The top 5 dictionary words were <italic>pregnancy</italic> (250 appearances), <italic>isotretinoin</italic> (220), <italic>study</italic> (209), <italic>drug</italic> (201), and <italic>women</italic> (185). Three distinct clusters were identified: Clusters 2 (5 articles) and 3 (4 articles) were from health-related websites and media, respectively; Cluster 1 (18 articles) contained largely media sources; 2 articles fell outside these clusters. Use of the term <italic>isotretinoin</italic> versus <italic>Accutane</italic> (a brand name of isotretinoin), discussion of pregnancy complications, and assignment of responsibility for guideline adherence varied between clusters. For example, the term <italic>pregnanc</italic> appeared most often in Clusters 1 (14.6 average times per article) and 2 (11.4) and relatively infrequently in Cluster 3 (1.8). Average readability for all articles was high (eg, Flesch-Kincaid, 13; Gunning Fog, 15; SMOG Index, 10; Coleman Liau Index, 15; Linsear Write Index, 13; and Text Standard, 13). Readability increased from Cluster 2 (Gunning Fog of 16.9) to 3 (12.2). It varied between clusters (average 13th-15th grade) but exceeded the recommended health information reading level (grade 6th to 8th), overall.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Media interpretation of the CNODES study varied, with differences in synonym usage and areas of focus. All articles were written above the recommended health information reading level. Analyzing media using NLP techniques can help determine drug safety communication effectiveness. This project is important for understanding how drug safety studies are taken up and redistributed in the media.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>mass media</kwd>
        <kwd>readability</kwd>
        <kwd>pharmacoepidemiology</kwd>
        <kwd>knowledge translation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Web-Based Health Information and News Media</title>
        <p>Easy access to health-related information has rapidly transformed the traditional health care delivery paradigm. Patients increasingly use the internet to seek health information and learn more about symptoms, diseases, treatments, self-management, risk mitigation strategies, and shared decision-making with their health care providers [<xref ref-type="bibr" rid="ref1">1</xref>]. Up to 35% of all adults in the United States (and up to 45% of women and people with higher education) consulted the internet for health or medical information, either for themselves or someone else [<xref ref-type="bibr" rid="ref2">2</xref>]. In the United Kingdom, 87% of adults read either electronic or traditional newspapers [<xref ref-type="bibr" rid="ref3">3</xref>]. In 2012, 66.8% of Canadians aged 16 years and older searched the Web for medical or health-related information per Statistics Canada’s <italic>Canadian Internet Use Survey</italic> [<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>News media can have a significant impact on people’s perception and interpretation of scientific research. Journalists and science writers present the results from scientific publications in news articles for the public, health care providers, and policymakers, but also may influence attitudes and health behaviors [<xref ref-type="bibr" rid="ref5">5</xref>]. Although some believe that the process of journalism is relatively linear with information received from researchers and transmitted by journalists to a poorly informed public, others discuss the cocreation of media with journalists and the public, voluntary health organizations, or professionals in health services delivery, government, and private sector health care companies [<xref ref-type="bibr" rid="ref3">3</xref>]. News media have guidelines and ethical principles for reporting [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>], as well as resources to help them interpret the technical material (eg, Evidencenetwork.ca and HealthNewsReviews.org) and review criteria for elements to include in health reporting [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. In addition, organizations such as the Health and Medicine Division of the National Academies of Sciences, Engineering, and Medicine have provided information on communicating the risk, benefit, and uncertainty related to drug therapy [<xref ref-type="bibr" rid="ref9">9</xref>]. Nevertheless, it has been reported that some media interpretation may be hard to comprehend, fail to provide context, or contain exaggeration, false impression, incorrect numbers, immature data, or not-yet approved methods from ongoing research [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. It is, therefore, critical to study how the media cover medical research and investigate the quality of reporting and presentation of scientific findings [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p>
        <p>The use of natural language processing (NLP) techniques and readability assessments can help us better understand how the media are reporting on the medical research we conduct. We used a study conducted by the Canadian Network for Observational Drug Effect Studies (CNODES) evaluating the effectiveness of one aspect of the isotretinoin Pregnancy Prevention Program in Canada [<xref ref-type="bibr" rid="ref16">16</xref>] as a case study to explore how the media present pharmacoepidemiological research.</p>
      </sec>
      <sec>
        <title>Canadian Network for Observational Drug Effect Studies</title>
        <p>CNODES is a network of Canadian pharmacoepidemiologists—distributed across 7 provincial sites and supported by 4 collaborative teams working across all sites—funded by the Canadian Institutes of Health Research (CIHR) to study the risks and benefits of postmarketed drugs [<xref ref-type="bibr" rid="ref17">17</xref>]. CNODES responds to queries on drug safety and effectiveness from decision makers and other stakeholders (eg, Health Canada and federal, provincial, and territorial pharmacare decision makers) by using meta-analytic methods to combine deidentified administrative health data from across Canada, the United Kingdom, and the United States [<xref ref-type="bibr" rid="ref18">18</xref>]. The CNODES knowledge translation team leads the network’s activities related to translating and mobilizing research results from specific studies for decision makers, stakeholders, and the public via the media. The results of the CNODES isotretinoin study, described below and published in the <italic>Canadian Medical Association Journal</italic> (CMAJ) in April 2016 [<xref ref-type="bibr" rid="ref16">16</xref>], were shared via a press release, subsequent media interviews with lead investigators, and a podcast developed by CMAJ to accompany the publication.</p>
      </sec>
      <sec>
        <title>Case Study: Isotretinoin and Pregnancy Prevention Program Adherence</title>
        <p>Isotretinoin, a known and potent teratogen, is widely used to treat cystic acne. Fetal exposure may result in a range of severe congenital anomalies and may increase the risk of spontaneous and induced abortion [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Although the risks of pregnancy during isotretinoin therapy are well recognized, research continues to reveal poor adherence to pregnancy prevention guidelines and programs [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref24">24</xref>]. In Canada, a voluntary pregnancy prevention program was designed to prevent fetal exposure to isotretinoin. It requires informed written consent, 2 pregnancy tests with negative results before starting isotretinoin, and 2 reliable forms of contraception during treatment [<xref ref-type="bibr" rid="ref25">25</xref>]. The objective of the CNODES study [<xref ref-type="bibr" rid="ref16">16</xref>] was to evaluate specific aspects of the effectiveness of the Canadian pregnancy prevention program in 4 provinces: British Columbia, Saskatchewan, Manitoba, and Ontario.</p>
        <p>In total, 59,271 female patients received 102,308 courses of isotretinoin therapy. Oral contraceptive use during treatment ranged from 24.3% to 32.9%. Overall, there were between 186 and 367 pregnancies during isotretinoin treatment (3.1-6.2 per 1000 isotretinoin users), depending on the method used to define pregnancy. When follow-up was extended to include the full gestational period (up to 42 weeks), there were 1473 pregnancies (24.9/1000 users) using the high specificity definition. Most of these (1331 pregnancies, or 90.4%) were lost spontaneously or terminated by medical intervention. A total of 118 live births were identified and 11 (9.3%) had a diagnosis of congenital malformation. Annual rates of pregnancy during isotretinoin therapy did not change between 1996 and 2011. The CNODES study concluded that adherence to the isotretinoin pregnancy prevention program was poor during the 15-year period [<xref ref-type="bibr" rid="ref16">16</xref>].</p>
      </sec>
      <sec>
        <title>Objectives of This Study</title>
        <p>This study examined media representation and uptake of the CNODES study on the occurrence of pregnancy and pregnancy outcomes during isotretinoin therapy. The specific objectives of this study were to use NLP and other text-analytic methods to: (1) summarize and comprehend the content of the media coverage; (2) identify relationships between the media articles; and (3) analyze the reading levels of the media articles. By obtaining these preliminary objectives, we aimed to explore potential improvements in the way we present future research.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Search Strategy and Media Sources</title>
        <p>Our overall study methodology is depicted in <xref rid="figure1" ref-type="fig">Figure 1</xref>. We conducted a search in Google News from April 25 to May 6, 2016, using a predefined set of relevant search terms, to identify the traditional media sources (eg, local, national, and international news sources) reporting the CNODES isotretinoin study [<xref ref-type="bibr" rid="ref16">16</xref>], but excluding social media sources. We used the following search strategy: (isotrétinoïne OR Accutane OR Clarus OR Epuris OR isotretinoin OR CNODES OR “Canadian Network for Observational Drug Effect Studies”). We also tracked the media sources captured on the <italic>News</italic> tab on the Altmetric.com page for this article [<xref ref-type="bibr" rid="ref26">26</xref>], although these sources were all also retrieved through our Google News search. All retrieved articles were screened for relevance and to identify duplicates. The screening process did not consider quality or scope of coverage but was only performed to ensure that the retrieved articles (1) were not already in the corpus of articles, and (2) covered the original CNODES study (ie, were not false positives). Only English language articles were considered. This resulted in a dataset of 26 media articles and 3 publications produced by CNODES (the original CMAJ article, a press release, and a podcast produced by CMAJ of an interview with the study authors [<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>]). The texts of the articles were extracted and all 29 articles (26 media articles and 3 reference CNODES sources) were stored on a cloud-based server. All text preprocessing and analysis, as described below, were completed in Eclipse (Standard Luna-R), Microsoft Visual Studio 2013, and Python 3.7 (NLTK 3.2.1 and TextStat 0.3.1 libraries).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Methodology schematic for our study. CMAJ: <italic>Canadian Medical Association Journal</italic>; CNODES: Canadian Network for Observational Drug Effect Studies; TF-IDF: term frequency-inverse document frequency.</p>
          </caption>
          <graphic xlink:href="formative_v4i1e13296_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Natural Language Processing</title>
        <p>NLP is, generally, the ability of computers to analyze and manipulate natural language text or speech to provide an understanding of the text and answer questions about its contents. Different studies have demonstrated the application of NLP to information retrieval in a variety of areas such as question answering, social media text mining, and decision support systems [<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Mendonça et al showed that encoding clinical data in patient documents using NLP techniques, along with clinical rules, can help identify health care–associated pneumonia in infants [<xref ref-type="bibr" rid="ref30">30</xref>]. In a similar study, Dublin et al used only radiograph reports of previous cases with pneumonia to train their system to classify reports as consistent with pneumonia, inconsistent with pneumonia, or requiring manual review [<xref ref-type="bibr" rid="ref31">31</xref>]. Knirsch et al utilized NLP methods to encode radiology reports which, along with other data in the patient repository, help detect patients who should be isolated but were not identified using the normal protocols [<xref ref-type="bibr" rid="ref32">32</xref>].</p>
        <p>In a different study, Wang et al combined text mining techniques with statistical analysis and patient electronic health records to detect adverse drug events. They applied NLP techniques to narrative discharge summaries to identify the safety of drugs throughout their entire lifecycle [<xref ref-type="bibr" rid="ref33">33</xref>]. McTaggart et al adopted an NLP approach to analyze and transform large volumes of collected prescriptions (about 100 million per annum) into regular structured information on medication dose instructions [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        <p>These studies, and many more, show that NLP is an interdisciplinary area that includes a variety of computational techniques that, alone or in combination with other approaches, can perform a diverse set of tasks and applications. Along with the main purpose of this study, we leveraged various text mining techniques to analyze media articles (each technique explored in detail below):</p>
        <list list-type="order">
          <list-item>
            <p>Frequent words analysis to study the occurrence of words in each article and cluster, recognize the pattern of the most frequently used words, and investigate how the articles and clusters differ.</p>
          </list-item>
          <list-item>
            <p>Term frequency-inverse document frequency (TF-IDF) weighting to calculate the closeness and/or separation between the articles through cosine similarity and Euclidean distance.</p>
          </list-item>
          <list-item>
            <p>Hierarchical agglomerative clustering (HAC) to group (ie, cluster) similar articles together and to compare them with the original CNODES study.</p>
          </list-item>
          <list-item>
            <p>Readability scales to calculate readability and analyze how easily the articles can be read and understood by an average reader.</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Data Cleaning and Text Preprocessing</title>
        <p>NLP consists of 3 general steps: (1) text collection; (2) preprocessing; and (3) text analysis. Preprocessing is a crucial yet often undervalued part of the process and is key to the performance and accuracy of any text analysis [<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. Links, advertisements, and all multimedia components (eg, images, figures, and videos) that are not informative or related to the content of the article were removed [<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        <p>The next step in preprocessing was the removal of stop words. Stop words (eg, conjunctions, prepositions, and articles) are uninformative, frequently occurring words that do not carry much meaning and do not contribute to the differentiation between documents [<xref ref-type="bibr" rid="ref38">38</xref>]. We used simple automated text-searching techniques to remove any words of a standard English stop word list [<xref ref-type="bibr" rid="ref39">39</xref>] (including 627 words) from all the collected media articles.</p>
        <p>The final preprocessing step was to perform stemming. Stemming is the process of connecting different words that are derivatives of the same root (eg, <italic>student</italic>, <italic>studies</italic>, and <italic>studied</italic> are various forms of their stem, <italic>study</italic>) [<xref ref-type="bibr" rid="ref40">40</xref>]. A stemming algorithm conflates all words with the same root to a common form. Stemming, compared with full word representations, improves the indexing time (ie, the time to create the dictionary and calculate the Vector Space Model (VSM) representation) in an information retrieval system by reducing the size of the dictionary (ie, index file) by 20%-50%. In addition, a shorter list of index terms helps to improve the relevancy of the retrieved documents [<xref ref-type="bibr" rid="ref41">41</xref>-<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>There are different algorithms for stemming. In this study, we used Porter stemming [<xref ref-type="bibr" rid="ref45">45</xref>], which is the most widely used stemming algorithm for different languages, including English. The Porter stemming algorithm is independent from the context and has significantly reduced the complexity of the rules associated with suffix removal [<xref ref-type="bibr" rid="ref46">46</xref>]. It is worth mentioning that, to avoid any duplication, Porter stemming transforms all the words to lowercase and then calculates the stems.</p>
      </sec>
      <sec>
        <title>Frequent Words Analysis</title>
        <p>The purpose of the frequent words analysis was to provide an overall summary of the content of the media articles and to compare the content of the different articles—and the clusters identified later in the analysis—to learn more about the texts and the areas of their focus. These findings will help to identify how and why the clusters are different and refine further analyses [<xref ref-type="bibr" rid="ref47">47</xref>].</p>
        <p>Although frequent words analysis can provide a valuable broad overview of the content of the documents, this approach does not provide much insight into the differences between documents, as common words tend to be common across all media outlets. To provide deeper insight into the relationships between media articles, we looked at how the articles might cluster together based on the content of their coverage.</p>
      </sec>
      <sec>
        <title>Article Clustering</title>
        <p>The objective of article clustering was to identify patterns in coverage of the CNODES study. Using a 3-step process of TF-IDF weighting, similarity calculation, and HAC, we identified 3 potential clusters of similar media coverage and used the frequent words analysis to provide insight into how these clusters might have differed in their language and coverage choices.</p>
      </sec>
      <sec>
        <title>Term Frequency-Inverse Document Frequency Weighting</title>
        <p>We used TF-IDF weighting in our analysis to gain insight into what makes individual articles unique. TF-IDF values represent the frequency of the words in a specific document relative to the frequency of that word over the entire corpus of documents [<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. The following equation depicts how the TF-IDF values are calculated in which <italic>w<sub>i,j</sub></italic> is the weight for term <italic>i</italic> in document <italic>j, N</italic> is the number of documents in the corpus, <italic>tf<sub>i,j</sub></italic> is the frequency of appearance of term <italic>i</italic> in document <italic>j,</italic> and <italic>df<sub>i</sub></italic> is the frequency of term <italic>i</italic> in the corpus [<xref ref-type="bibr" rid="ref50">50</xref>]:</p>
        <graphic xlink:href="formative_v4i1e13296_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>TF-IDF values were calculated for all unique terms (1-grams) and the combinations of 2 sequential terms (2-grams) from the corpus using the above weighting equation and stored in an <italic>n x k</italic> matrix—where each row represents an article (n=29) and each column (k=6158) represents a 1 or 2-gram. This is a standard VSM representation that prepares the data to calculate similarity between the documents.</p>
        <p>Like most information retrieval systems, we considered multiword phrases (ie, 2-grams) as some phrases can be more meaningful and informative than individual terms. For example, in our study, the phrase <italic>pregnancy prevention</italic> can distinguish articles and find a degree of similarity between the collected documents better than 2 single terms <italic>pregnancy</italic> or <italic>prevention</italic>. In the calculations, we merged the combination of any 2 words in sequence (ie, 2 words that appear together) as a new phrase (ie, 2-grams) and included it in the VSM.</p>
      </sec>
      <sec>
        <title>Similarity Calculations</title>
        <p>A similarity measure reflects the degree of closeness between 2 articles using a single numeric value [<xref ref-type="bibr" rid="ref51">51</xref>]. We chose cosine similarity as it is easy to calculate and interpret and is commonly used in the NLP literature [<xref ref-type="bibr" rid="ref52">52</xref>]. Cosine similarity returns a value between 0 and 1, where 2 documents with a similarity value of 1 are regarded as identical, and a value of 0 implies no similarity between the documents [<xref ref-type="bibr" rid="ref51">51</xref>]. The result of the similarity calculations is a symmetric <italic>n x n</italic> similarity matrix (in our case, n=29).</p>
      </sec>
      <sec>
        <title>Hierarchical Agglomerative Clustering</title>
        <p>In this study, we chose HAC to group the similarity matrix into groups of similar documents because of the flexibility of hierarchical approaches in the desired number of clusters, its efficiency for small datasets, and the feasibility of graphical representation of the results through a tree-like structure called a dendrogram [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>].</p>
        <p>In agglomerative clustering, cutting branches of the dendrogram at a selected height (cut-off point) defines the resulting clusters. Selecting the best cut-off point depends on a variety of parameters such as the desired number of clusters, the granularity of the categories, or the acceptable distance between the entities within the clusters [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>].</p>
        <p>We used Euclidean distance in the construction of the HAC clusters as it is more appropriate in this environment than the cosine similarity, but all the similarity values presented in this study are cosine similarity.</p>
      </sec>
      <sec>
        <title>Readability Analysis</title>
        <p>The final objective of our analysis was to measure the readability [<xref ref-type="bibr" rid="ref57">57</xref>] of the articles covering our initial study. Health literacy describes the extent to which one is able to acquire, interpret, and comprehend health information and services to make informed health decisions; the reading level of health information will either enable or impede its consumption [<xref ref-type="bibr" rid="ref58">58</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. Readability may be influenced by a variety of factors: the writing style, the clarity of words and sentences, and/or the degree to which a given text is compelling and comprehensible, based on a reader’s reading skill, prior knowledge, and motivation [<xref ref-type="bibr" rid="ref60">60</xref>-<xref ref-type="bibr" rid="ref63">63</xref>]. Although the average American reads at an 8th grade level, the American Medical Association and National Institutes of Health recommend that patient and health information be written at or below a 6th grade level [<xref ref-type="bibr" rid="ref64">64</xref>-<xref ref-type="bibr" rid="ref66">66</xref>].</p>
        <p>There are a variety of ways to measure the readability of a text. Friedman and Hoffman-Goetz [<xref ref-type="bibr" rid="ref67">67</xref>] found high concurrent validity and correlation between the various readability formulas, but no specific formula is accepted as the gold standard for assessing readability or reading ease of health information [<xref ref-type="bibr" rid="ref68">68</xref>].</p>
        <p>We used 9 well-formalized readability formulas (<xref ref-type="table" rid="table1">Table 1</xref>) to study the readability of the media articles. <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> further elaborates the readability formulas and the scores. Readability measures were developed using TextStat 0.3.1 library (Bansal and Aggarwal, MIT) in Python Package Index 3.4.4.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Readability formulas. C: number of characters; D: number of complex words; E: number of easy (not-complex words); P: number of polysyllables; S: number of sentences; W: number of words; Y: number of syllables; AC: average number of characters per 100 words; AS: average number of sentences per 100 words.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="198"/>
            <col width="156"/>
            <col width="253"/>
            <col width="393"/>
            <thead>
              <tr valign="top">
                <td>Readability score</td>
                <td>Score type</td>
                <td>Key statistical features</td>
                <td>Formula</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Flesch Reading Ease (FRES)</td>
                <td>Numeric score (0-100)</td>
                <td>Word length and sentence length</td>
                <td>FRES=206.83 - 1.015 x (W/S) - 84.6 x (Y/W)</td>
              </tr>
              <tr valign="top">
                <td>Flesch-Kincaid Grade (FKRA)</td>
                <td>US grade level</td>
                <td>Word length and sentence length</td>
                <td>FKRA=0.39 x (W/S) – 11.8 x (Y/W) – 15.59</td>
              </tr>
              <tr valign="top">
                <td>Gunning Fog Index (FOG)</td>
                <td>US grade level</td>
                <td>Number of complex words</td>
                <td>FOG=0.4 x [ (W/S) + 100 x (D/W)]</td>
              </tr>
              <tr valign="top">
                <td>Simple Measure of Gobbledygook Index</td>
                <td>US grade level</td>
                <td>Number of complex words</td>
                <td>SMOG=1.0430 x √(P x 30/S) + 3.1291</td>
              </tr>
              <tr valign="top">
                <td>Automated Readability Index (ARI)</td>
                <td>US grade level</td>
                <td>Number of characters</td>
                <td>ARI=4.71 x (C/W) + 0.5 x (W/S) – 21.43</td>
              </tr>
              <tr valign="top">
                <td>Coleman Liau Index (CLI)</td>
                <td>US grade level<sup>a</sup></td>
                <td>Number of characters</td>
                <td>CLI=0.0588 x AC + 0.296 x AS – 15.8</td>
              </tr>
              <tr valign="top">
                <td>Linsear Write Index (LWI)</td>
                <td>US grade level</td>
                <td>Sentence length, number of polysyllables</td>
                <td>(1) Find a 100-word sample from your writing; (2) Calculate Val=[E+(3×D)]/S; (3) If Val &#62;20, then LWI=Val/2; (4) If Val ≤ 20, then LWI=(Val-2) / 2;</td>
              </tr>
              <tr valign="top">
                <td>Dale-Chall Readability Score (DCRS)</td>
                <td>Numeric score (0-9.9)</td>
                <td>Number of difficult words</td>
                <td>DCRS=0.1579 x (D/S) + 0.0496 x (W/S)</td>
              </tr>
              <tr valign="top">
                <td>Text Standard</td>
                <td>US grade level</td>
                <td>
                  <break/>
                </td>
                <td>A voting system among the other metrics: the reading level that is most prevalent (the mode) among the other metrics calculated.</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The terms in the table are stemmed versions of the actual terms (for example, us represents various forms of the verb use, and pregnanc stands for pregnancy).Grade level may also be understood as the number of years of formal education needed to understand a given text, particularly when the level exceeds the typical range of US grades (e.g. 1-12). For example, grades 13-16 suggest undergraduate training, 17-18 graduate training, and 19+ professional qualification.[<xref ref-type="bibr" rid="ref63">63</xref>,<xref ref-type="bibr" rid="ref67">67</xref>]</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview of Retrieved Articles</title>
        <p>In total, 29 articles, including 26 media articles and 3 CNODES reference articles, comprised the corpus of documents for this study, and were represented in a VSM. The articles were of varying length: from 13 to 51 sentences, or 227 to 1011 words. The combined vocabulary of all articles contained 7745 unique terms (out of 11,263 total terms that appeared in the entire dataset). There was an average of 35 sentences, 740 words, and 1380 syllables per article, with an average of 30.9% (229/740) of the words being complex—words with 3 or more syllables that do not belong to a list of 3000 familiar words [<xref ref-type="bibr" rid="ref69">69</xref>].</p>
      </sec>
      <sec>
        <title>Frequent Words Analysis</title>
        <p><italic>Pregnanc</italic> (stem of pregnancy) is the most frequent individual term among all the text with 344 occurrences, followed by <italic>isotretinoin</italic> and <italic>studi</italic> (stem of study, studies, etc) with 306 and 245 occurrences, respectively. <italic>Preganc prevent</italic> (stem of pregnancy prevention) and <italic>birth defect</italic> are the most recurrent 2-grams with the frequency of 74 and 63.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> shows British Columbia (BC), 1 of the 4 provinces that was included in the study, appeared 35 times in the entire corpus. However, the other study provinces (not shown in <xref ref-type="table" rid="table2">Table 2</xref>), Saskatchewan, Ontario, and Manitoba, appeared 43, 40, and 26 times, respectively.</p>
        <p>Excluding those published by CNODES, only 2 articles (8% of the sources) mentioned or acknowledged CIHR, the study’s funder. <italic>Health Canada</italic> appeared in 13 and some variant of the phrase <italic>conflict of interest</italic> occurred in only 1 article beyond the CNODES articles.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Top 10 most frequent vocabulary terms (1-grams and 2-grams).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="170"/>
            <col width="110"/>
            <col width="260"/>
            <col width="170"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>1-grams</td>
                <td>Frequency</td>
                <td>Ratio</td>
                <td>2-grams</td>
                <td>Frequency</td>
                <td>Ratio</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>pregnanc</td>
                <td>344</td>
                <td>0.031</td>
                <td>pregnanc prevent</td>
                <td>74</td>
                <td>0.007</td>
              </tr>
              <tr valign="top">
                <td>isotretinoin</td>
                <td>306</td>
                <td>0.027</td>
                <td>birth defect</td>
                <td>63</td>
                <td>0.006</td>
              </tr>
              <tr valign="top">
                <td>studi</td>
                <td>245</td>
                <td>0.022</td>
                <td>birth control</td>
                <td>48</td>
                <td>0.004</td>
              </tr>
              <tr valign="top">
                <td>drug</td>
                <td>226</td>
                <td>0.020</td>
                <td>pregnanc test</td>
                <td>40</td>
                <td>0.004</td>
              </tr>
              <tr valign="top">
                <td>women</td>
                <td>188</td>
                <td>0.017</td>
                <td>women take</td>
                <td>39</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>us</td>
                <td>165</td>
                <td>0.015</td>
                <td>prevent program</td>
                <td>37</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>birth</td>
                <td>163</td>
                <td>0.014</td>
                <td>British Columbia</td>
                <td>35</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>research</td>
                <td>135</td>
                <td>0.012</td>
                <td>live birth</td>
                <td>33</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>treatment</td>
                <td>123</td>
                <td>0.011</td>
                <td>pregnanc rate</td>
                <td>33</td>
                <td>0.003</td>
              </tr>
              <tr valign="top">
                <td>acn</td>
                <td>118</td>
                <td>0.010</td>
                <td>isotretinoin user</td>
                <td>31</td>
                <td>0.003</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Similarity and Cluster Analysis</title>
        <p>The resulting values of cosine similarity calculations and HAC are presented in <xref rid="figure2" ref-type="fig">Figure 2</xref>. In the similarity matrix, green cells represent higher values of similarity (maximum of 1.0) between the articles; the similarity decreases as we move to the red side of the spectrum (minimum of 0.0). Using the similarity matrix and the dendrogram, we chose a cutoff in the dendrogram of 0.5, resulting in 3 distinct clusters. As the similarity values show, articles 28 and 29 are not significantly similar to any of the articles in the corpus. Consequently, they do not fit in any clusters. Articles 24 to 27 are similar to each other (with similarity values of 0.68 and above) but different from the remaining articles. Articles 19 to 23 are highly similar to each other, and articles 1 to 15 have higher values of similarity. These groups of articles were combined using HAC and form the 3 clusters: Cluster 1 (with 18 articles), Cluster 2 (with 5 articles), and Cluster 3 (with 4 articles).</p>
        <p>Further examination of the nature of the articles in each cluster showed Cluster 1, in addition to the 3 CNODES publications, included national and international news websites such as Reuters, CBC, The Globe and Mail, National Post, and CTV. Cluster 1 also included health-specific websites such as Medical Daily, Medical News Today, MD Magazine, and Medscape Medical News. The articles composing Cluster 3 were from regional news websites including CBC British Columbia and The Globe and Mail British Columbia. Articles in Cluster 2 did not include traditional news media outlets, but rather health-related and general interest websites (Science Daily, Parent Herald, and Science 2.0).</p>
        <p><xref rid="figure2" ref-type="fig">Figure 2</xref> also shows that the 3 CNODES publications—the CMAJ article, podcast, and press release—are highly similar to each other, with similarity values of 0.81 and above. Because of that degree of resemblance, the media articles maintain the same trend of similarity to the CNODES publications—an article which is similar to any of the 3 CNODES publications is similar to the other 2 and vice versa. <xref rid="figure3" ref-type="fig">Figure 3</xref> depicts this steady trend of similarity by comparing the similarity of each media article to the CNODES publications separately.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Cosine similarity values (between 0 and 1) between the media articles and CNODES publications, including CMAJ article, podcast, and press release article using TF-IDF calculations. Resulting dendrogram of hierarchical agglomerative clustering. Three clusters and 2 singletons, resulting from a cutoff point of 0.5. CMAJ: <italic>Canadian Medical Association Journal</italic>; CNODES: Canadian Network for Observational Drug Effect Studies; TF-IDF: term frequency-inverse document frequency.</p>
          </caption>
          <graphic xlink:href="formative_v4i1e13296_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Trend of similarity (cosine similarity) between the media articles and the CNODES publications: CMAJ article, podcast, and press release.</p>
          </caption>
          <graphic xlink:href="formative_v4i1e13296_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Frequent Words Analysis Within the Clusters</title>
        <p>In addition to studying the nature of the websites that published the media articles, we found that analysis of the frequent words within the clusters provides insight into how and to what extent the clusters are different. <xref ref-type="table" rid="table3">Table 3</xref> shows the 5 most common terms within each cluster, along with specific clinical terms that we selected <italic>a priori</italic> to measure across the clusters.</p>
        <p><italic>Pregnancy</italic> and <italic>isotretinoin</italic> are the most common terms in the articles of Clusters 1 and 2, while these 2 terms are not among the top 10 frequent terms of Cluster 3. In addition, Clusters 1 and 2 have 6 frequent terms in common, while only 2 frequent terms of Cluster 3 (<italic>studi</italic> and <italic>drug</italic>) appear in the top 10 frequent terms of Clusters 2 and 3. Frequent words analysis within the clusters, in accordance with the similarity matrix (see <xref rid="figure2" ref-type="fig">Figure 2</xref>), implies Clusters 1 and 2 are more similar to each other than to Cluster 3.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> reveals the articles in Clusters 1 and 2 preferred to use <italic>isotretinoin</italic> (ranked 2) rather than Accutan (ranked 32 and 20, respectively), which is a brand name of <italic>isotretinoin</italic>; <italic>isotretinoin</italic> and <italic>Accutan</italic> were the 54th and 12th most frequent words, respectively, among the articles in Cluster 3. These rankings show the articles in Cluster 3 have chosen to focus on the brand name of the drug, rather than its generic name.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows Clusters 1 and 2 have focused on <italic>patient</italic> and <italic>treatment</italic>, while these concepts are not in a high position in the articles of Cluster 3. <italic>Birth defect</italic> has a relatively constant focus across the clusters. Cluster 3 did not discuss <italic>fetal</italic>, <italic>fetal risk</italic>, <italic>fetal abnormality</italic>, or <italic>miscarriage</italic> at all. <italic>Acne</italic> is in a significantly lower position for Cluster 3 (ranked 60th).</p>
        <p>There is an overlap between the clinically important terms and the most frequent terms for each cluster. Hence, the top 5 most frequent terms of each cluster include the phrases that are not already mentioned in the 5 clinically most important terms. For example, since the 1st, 2nd, 3rd, and 7th most frequent terms of Cluster 1 are among the top 5 clinically important terms, the top 5 most frequent terms of Cluster 1 include the next 5 most frequent terms (the 4th, 5th, 6th, 8th, and 9th frequent terms of the cluster).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Most common terms, both overall and within each cluster.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="160"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Cluster<sup>a</sup></td>
                <td colspan="2">Cluster 1</td>
                <td colspan="2">Cluster 2</td>
                <td colspan="2">Cluster 3</td>
                <td colspan="2">Singleton 28</td>
                <td>Singleton 29</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="12">
                  <bold>5 clinically most important terms</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="5">
                  <break/>
                </td>
                <td>Isotretinoin</td>
                <td colspan="2">240 (2)<sup>b</sup></td>
                <td colspan="2">49 (2)</td>
                <td colspan="2">6 (48)</td>
                <td colspan="2">7 (2)</td>
                <td colspan="2">4 (7)</td>
              </tr>
              <tr valign="top">
                <td>Accutan<sup>c</sup></td>
                <td colspan="2">46 (32)</td>
                <td colspan="2">12 (20)</td>
                <td colspan="2">17 (11)</td>
                <td colspan="2">—<sup>d</sup></td>
                <td colspan="2">3 (12)</td>
              </tr>
              <tr valign="top">
                <td>pregnanc</td>
                <td colspan="2">263 (1)</td>
                <td colspan="2">57 (1)</td>
                <td colspan="2">8 (33)</td>
                <td colspan="2">12 (1)</td>
                <td colspan="2">5 (3)</td>
              </tr>
              <tr valign="top">
                <td>drug</td>
                <td colspan="2">166 (3)</td>
                <td colspan="2">28 (6)</td>
                <td colspan="2">23 (7)</td>
                <td colspan="2">1 (50)</td>
                <td colspan="2">8 (1)</td>
              </tr>
              <tr valign="top">
                <td>birth</td>
                <td colspan="2">127 (7)</td>
                <td colspan="2">22 (9)</td>
                <td colspan="2">8 (33)</td>
                <td colspan="2">1 (50)</td>
                <td colspan="2">5 (3)</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Top 5 most frequent terms<sup>a</sup></bold>
                  <bold>of cluster 1</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="5">
                  <break/>
                </td>
                <td>studi</td>
                <td colspan="2">160 (4)</td>
                <td colspan="2">31 (3)</td>
                <td colspan="2">42 (2)</td>
                <td colspan="2">6 (3)</td>
                <td colspan="2">6 (2)</td>
              </tr>
              <tr valign="top">
                <td>Us</td>
                <td colspan="2">142 (5)</td>
                <td colspan="2">18 (12)</td>
                <td colspan="2">3 (107)</td>
                <td colspan="2">—</td>
                <td colspan="2">2 (17)</td>
              </tr>
              <tr valign="top">
                <td>women</td>
                <td colspan="2">139 (6)</td>
                <td colspan="2">30 (4)</td>
                <td colspan="2">14 (14)</td>
                <td colspan="2">—</td>
                <td colspan="2">5 (3)</td>
              </tr>
              <tr valign="top">
                <td>treatment</td>
                <td colspan="2">101 (8)</td>
                <td colspan="2">16 (14)</td>
                <td colspan="2">3 (107)</td>
                <td colspan="2">2 (16)</td>
                <td colspan="2">1 (39)</td>
              </tr>
              <tr valign="top">
                <td>patient</td>
                <td colspan="2">94 (9)</td>
                <td colspan="2">11 (24)</td>
                <td colspan="2">9 (27)</td>
                <td colspan="2">2 (16)</td>
                <td colspan="2">1 (39)</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Top 5 most frequent terms<sup>a</sup></bold>
                  <bold>of cluster 2</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="5">
                  <break/>
                </td>
                <td>studi</td>
                <td colspan="2">160 (4)</td>
                <td colspan="2">31 (3)</td>
                <td colspan="2">42 (2)</td>
                <td colspan="2">6 (3)</td>
                <td colspan="2">6 (2)</td>
              </tr>
              <tr valign="top">
                <td>women</td>
                <td colspan="2">139 (6)</td>
                <td colspan="2">30 (4)</td>
                <td colspan="2">14 (14)</td>
                <td colspan="2">—</td>
                <td colspan="2">5 (3)</td>
              </tr>
              <tr valign="top">
                <td>prevent</td>
                <td colspan="2">72 (16)</td>
                <td colspan="2">30 (4)</td>
                <td colspan="2">4 (74)</td>
                <td colspan="2">5 (4)</td>
                <td colspan="2">2 (17)</td>
              </tr>
              <tr valign="top">
                <td>canadian</td>
                <td colspan="2">62 (22)</td>
                <td colspan="2">27 (7)</td>
                <td colspan="2">6 (48)</td>
                <td colspan="2">2 (16)</td>
                <td colspan="2">1 (39)</td>
              </tr>
              <tr valign="top">
                <td>take</td>
                <td colspan="2">55 (28)</td>
                <td colspan="2">24 (8)</td>
                <td colspan="2">9 (27)</td>
                <td colspan="2">—</td>
                <td colspan="2">3 (12)</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>Top 5 most frequent terms<sup>a</sup></bold>
                  <bold>of cluster 3</bold>
                </td>
              </tr>
              <tr valign="top">
                <td rowspan="5">
                  <break/>
                </td>
                <td>research</td>
                <td colspan="2">82 (13)</td>
                <td colspan="2">9 (27)</td>
                <td colspan="2">43 (1)</td>
                <td colspan="2">—</td>
                <td colspan="2">1 (39)</td>
              </tr>
              <tr valign="top">
                <td>studi</td>
                <td colspan="2">160 (4)</td>
                <td colspan="2">31 (3)</td>
                <td colspan="2">42 (2)</td>
                <td colspan="2">6 (3)</td>
                <td colspan="2">6 (2)</td>
              </tr>
              <tr valign="top">
                <td>health</td>
                <td colspan="2">66 (19)</td>
                <td colspan="2">1 (411)</td>
                <td colspan="2">38 (3)</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
              </tr>
              <tr valign="top">
                <td>Data</td>
                <td colspan="2">35 (44)</td>
                <td colspan="2">—</td>
                <td colspan="2">38 (3)</td>
                <td colspan="2">—</td>
                <td colspan="2">—</td>
              </tr>
              <tr valign="top">
                <td>said</td>
                <td colspan="2">56 (27)</td>
                <td colspan="2">7 (41)</td>
                <td colspan="2">33 (5)</td>
                <td colspan="2">—</td>
                <td colspan="2">4 (7)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>The terms in the table are stemmed versions of the actual terms (for example, us represents various forms of the verb use, and pregnanc stands for pregnancy).</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>Top 5 most frequent terms of each cluster exclude the 5 clinically important terms.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>The first number in the cells shows the frequency of occurrence of the term, and the second number in the parenthesis shows the ranking of the terms among all the termt in that cluster.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>Empty cells (represented with a —) are the terms that do not appear in the respective cluster/singleton.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Readability Analysis</title>
        <p>Overall, 9 readability formulas were calculated for each article in the corpus. Different readability formulas consider different variables in the calculations and measure readability from distinct perspectives (see <xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>All calculated readability scores are above United States grade 10. Text standard scores, which represent the most prevalent reading level among all the formulas, ranged between 12 and 18, except for one article with a readability level of 9. <xref rid="figure4" ref-type="fig">Figure 4</xref> demonstrates the distribution of readability levels of articles based on text-standard measure.</p>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows reading ease based on calculating the average of each readability score for the articles within the clusters. On average, the articles in Cluster 3 were the easiest to read, followed by the articles in Clusters 1 and 2.</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Distribution of readability levels of articles based on text-standard measure.</p>
          </caption>
          <graphic xlink:href="formative_v4i1e13296_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Average readability level of each cluster.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="70"/>
            <col width="102"/>
            <col width="105"/>
            <col width="98"/>
            <col width="74"/>
            <col width="137"/>
            <col width="102"/>
            <col width="102"/>
            <col width="133"/>
            <col width="77"/>
            <thead>
              <tr valign="top">
                <td>Cluster.</td>
                <td>Flesch Reading Ease</td>
                <td>Flesch-Kincaid Grade</td>
                <td>Gunning Fog Index</td>
                <td>SMOG Index</td>
                <td>Automated Readability Index</td>
                <td>Coleman Liau Index</td>
                <td>Linsear Write Index</td>
                <td>Dale-Chall Readability Score</td>
                <td>Text Standard</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Cluster 1</td>
                <td>40.78</td>
                <td>13.02</td>
                <td>15.19</td>
                <td>15.58</td>
                <td>15.21</td>
                <td>14.47</td>
                <td>13.27</td>
                <td>9.87</td>
                <td>16th grade</td>
              </tr>
              <tr valign="top">
                <td>Cluster 2</td>
                <td>29.89</td>
                <td>14.74</td>
                <td>16.59</td>
                <td>16.92</td>
                <td>16.76</td>
                <td>15.97</td>
                <td>10.62</td>
                <td>10.67</td>
                <td>17th grade</td>
              </tr>
              <tr valign="top">
                <td>Cluster 3</td>
                <td>49.19</td>
                <td>11.35</td>
                <td>13.75</td>
                <td>14.33</td>
                <td>13.35</td>
                <td>13.32</td>
                <td>8.85</td>
                <td>9.39</td>
                <td>14th grade</td>
              </tr>
              <tr valign="top">
                <td>Singleton 28</td>
                <td>36.79</td>
                <td>12.50</td>
                <td>12.99</td>
                <td>15.90</td>
                <td>15.20</td>
                <td>16.82</td>
                <td>13.75</td>
                <td>8.82</td>
                <td>12th grade</td>
              </tr>
              <tr valign="top">
                <td>Singleton 29</td>
                <td>49.55</td>
                <td>11.70</td>
                <td>15.11</td>
                <td>15.00</td>
                <td>15.40</td>
                <td>14.74</td>
                <td>8.08</td>
                <td>9.87</td>
                <td>14th grade</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Overall Results</title>
        <p>Our NLP analysis of media coverage showed that the interpretation of the CNODES isotretinoin study [<xref ref-type="bibr" rid="ref16">16</xref>] was diverse, with significant variations in content, language, areas of focus, and reading level. The primary focus of the media coverage was pregnancy and pregnancy prevention, but this focus was not consistent across all articles. Some articles focused more on the disease, drug, and treatment, while others emphasized the study and the related government regulations.</p>
        <p>Regardless of the method used to calculate reading level, the overall reading levels were too high for the average North American reader, where the target reading level should be grades 6-8 [<xref ref-type="bibr" rid="ref64">64</xref>-<xref ref-type="bibr" rid="ref66">66</xref>]. Consequently, these media stories may have failed to reach many potential isotretinoin users of child-bearing potential. Even when the reading level calculations were re-run under different scenarios, such as reducing the complexity of complex words (eg, isotretinoin) through substitutions with shorter terms (eg, drug), the reading levels remained well above recommended reading levels.</p>
        <p>Our results were similar to other studies which documented high reading levels for plain language communications of scientific advice. For example, in a study of 53 qualified health claims on food and dietary supplement labels, which are regulated by the United States Food and Drug Administration, the Flesch-Kincaid grade level ranged from 5.37 to 30.30, with 77% above a grade 9 reading level [<xref ref-type="bibr" rid="ref70">70</xref>].</p>
        <p>Overall disclosure of funders was low, with only 2 media articles naming CIHR as the funding organization. Financial disclosure is especially important in journalism covering pharmaceuticals where various conflicts of interest may exist [<xref ref-type="bibr" rid="ref71">71</xref>,<xref ref-type="bibr" rid="ref72">72</xref>].</p>
        <p>The CNODES study was covered by the Canadian newspaper, The Globe and Mail, which averages 3.1 million print and digital readers on a typical weekday. It received coverage from both national television (CBC and CTV) and more specialized media with niche audiences such as iPolitics, which covers federal, provincial, and international politics and policies. The study also received international coverage from Thomson Reuters (www.thomsonreuters.com), which covers a broad range of topics in media markets around the world. The articles varied in length, ranging from approximately 200 to 1000 words, in large part due to standard word limits set by each media outlet [<xref ref-type="bibr" rid="ref73">73</xref>].</p>
      </sec>
      <sec>
        <title>Words Used</title>
        <p>We had expected a significant overlap between some of the articles, with the potential for articles to be reprinted in different venues; overall, the words used in each media report were less similar than expected. Although there were commonalities between the articles, there was little evidence of republication or wholesale duplication of articles. We were not able to easily discern if certain articles were informed by others. Although the original CNODES source material did seem to influence the content of each article, each article author (or set of authors) clearly applied their own spin to the content. It is possible that if there had been more media coverage, patterns of duplication might have emerged. However, we have no evidence to suggest there were any patterns of reprinting in this corpus.</p>
        <p>The clusters varied in the extent of overlap with our original press release and the top words used. Documents 28 and 29 had less similarity to the other articles in the corpus. It is interesting that document 28 was the American Pharmacist, which would likely employ science writers, and document 29 was the CBC in Saskatchewan, where they had direct access to one of the authors of the study who resided in Saskatchewan and was able to provide additional information from the Saskatchewan perspective. It has been noted that several publications reprint the press releases they receive without additional comment or contextualization and many media outlets are vertically integrated, although these integrations were not reflected in our analysis. It is interesting that the 10th most frequent 2-gram was <italic>isotretinoin user</italic>, which is an epidemiologic term and comes directly from the research study with specific definitions. Of note, health care providers (eg, physicians, pharmacists, and nurses) did not come up in the top 10 words. Instead, the focus seemed to be on women using isotretinoin, many of whom were not also dispensed birth control pills, and the protective actions they should be taking rather than on what health care providers or policymakers should be doing.</p>
        <p><xref ref-type="table" rid="table3">Table 3</xref> shows the words by cluster. It is interesting that acn (stem of acne) came up in the top 10 only in Cluster 1. In media articles, it is useful to set the context: isotretinoin is approved only for severe cystic acne, although it is frequently used off-label. The top words in Cluster 2 were isotretinoin and pregnancy, so perhaps they were focusing more on the effects of isotretinoin than the purpose of it. Cluster 3 had research and study as their top 2 results, reflecting that they are focusing on reporting the results of the study conducted, rather than trying to consume and translate the results themselves. Clusters 1 and 2 used isotretinoin more frequently than Accutane (a common brand name for the isotretinoin product), while Cluster 3 used Accutane more frequently, reflecting different approaches on how to communicate the drug to the audience.</p>
        <p>Omission of specific parts of the media release were surprising, such as the lack of disclosure around study funding (CIHR) and potential conflicts of interest. Although many of the articles did mention Health Canada, better reporting about the study team would have provided better context for the research and information on potential competing interests.</p>
        <p><xref ref-type="table" rid="table4">Table 4</xref> shows the variability of reading levels, both between equations and across clusters. Regardless of which readability measure was used, each cluster showed a readability level that was too high, making it difficult for some patients to comprehend the material. The National Institutes of Health and American Medical Association suggest that health education material be written at a 6th to 8th grade level [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref74">74</xref>]. Readability calculations like these are not the only approach to measuring health literacy and are known to have shortcomings [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref75">75</xref>]. We looked at readability but not a reader’s motivation to read one of the media documents or their ability to comprehend it [<xref ref-type="bibr" rid="ref67">67</xref>]. We also did not examine numeracy, which is critical in the drug safety literature. In future, we will broaden our investigations of other aspects of health literacy, combining readability with the ability to find, process, and understand information, and to integrate these concepts with other sources of information to support health decision making [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref76">76</xref>]. Finally, although we looked at digital media coverage and examined specific aspects of health literacy, we did not examine electronic health literacy, which is an important concept.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Our study takes a novel approach to tracking the media coverage of academic research after it has been published and is an important part of growing the knowledge translation component of the CNODES project, but it has its shortcomings. Our search, although comprehensive from a keyword perspective, was limited to media outlets that published on the internet. We did not search the websites of individual newspapers, with the assumption that our general Google News search would capture all relevant mentions. We did not evaluate pictures that were associated with the media articles, the way in which numbers were reported, or links to other resources. We did not consider the expertise of the journalists, specifically, whether there was a difference in the reporting between health journalists and general assignment reporters. We did not examine the length of the media article beyond its influences on reading level, so there may be further insights to be gleaned from comparing article length with specific aspects such as funding source and article positioning (eg, front page). Finally, although we believe we have captured all meaningful media coverage of our study, our data capture window was relatively short, we did not use a commercial news aggregator, and we did not specifically examine gray literature, so there is always the potential that we have missed some media articles.</p>
        <p>We are currently not able to speak to who the articles may have deemed responsible for the original study results (ie, poor pregnancy prevention guideline adherence) or to determine the quality of the media report [<xref ref-type="bibr" rid="ref8">8</xref>]. This type of insight is nuanced and difficult to achieve using NLP techniques, but should be explored more in future work as these insights would be valuable. We have also not analyzed the way in which the media stories were received, understood, and used by patients, health care providers, and policymakers, nor what additional information these individuals may have used to support their decision making [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>There are many known limitations to using reading-level metrics [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref75">75</xref>]; thus, it is possible we are overestimating how difficult it may be to read the media coverage. It is important to understand that reading level is only one way to evaluate readability, and only one aspect of many to consider when communicating health information effectively [<xref ref-type="bibr" rid="ref77">77</xref>].</p>
        <p>Placement within the media content is an important determinant of consumption and could provide an indication of an article’s perceived value. In a digital age, these factors can change significantly over time and between users. We were unable to process this information. We did not specifically examine if independent sources (such as other researchers) were used by journalists to inform context and study validity, or whether patients, voluntary health organizations, or drug regulatory agencies provided their perspectives. We were unable to identify if the journalist was an employee of the news organization or if the article came from a news wire service or syndicated service. We also did not examine if a link to the original CMAJ article was provided.</p>
        <p>We did not consider the quality of the coverage in terms of source. Although we subjectively evaluated the coverage to deem it as relevant or not, an objective measure of quality (such as the DISCERN tool [<xref ref-type="bibr" rid="ref78">78</xref>]) or popularity could both assess the quality of the coverage and provide another document-level metric to understand the full extent of media coverage. Future work in this area should consider these factors.</p>
      </sec>
      <sec>
        <title>Recommendations and Implications for Practice</title>
        <p>It is important for researchers to understand how their research is presented by the media. Our analysis demonstrates that there is little consistency in how this is done using a peer-reviewed research article, even when accompanied by a crafted press release and outreach by the primary authors. If there are potentially controversial or sensitive issues arising from the research that need to be presented carefully, then the narrative around these issues should be appropriately constructed in the wording of the press releases and an effort needs to be made to monitor how the information is being translated in real time as it is disseminated. The reading levels of the media covering research can be quite high; more efforts should be made to simplify the press releases and other knowledge translation materials generated from the research so that journalists can more easily present the research in an accessible manner. Researchers can assist journalists by identifying other aspects of their research such as broader context and limitations [<xref ref-type="bibr" rid="ref13">13</xref>].</p>
      </sec>
      <sec>
        <title>Future Study</title>
        <p>Improving the reading levels of CNODES’ dissemination efforts, particularly outside of academic literature, could improve the ability of CNODES to reach key target audiences (eg, health care providers, decision makers). Further work is needed to develop automated media coverage analysis so that researchers can quickly and efficiently identify how their research is being covered and what is and is not being consumed, with the potential to react to it in real time and correct any potential misinterpretations by media outlets. Future research will need to augment readability approaches with other approaches, such as the use of mental model research [<xref ref-type="bibr" rid="ref79">79</xref>], to inform communications strategies. Expanding on the analysis with sentiment and qualitative analyses would also be valuable as there are insights into sentiment and attribution that were not explored in this paper. The approach to document similarity we took in this paper considered the documents as a whole, but there is potential for articles to overlap in content from certain sections of the document, while adding their own local or audience-specific context to a common theme. Future research into topic modeling [<xref ref-type="bibr" rid="ref80">80</xref>] could help identify themes that are common across documents, to contrast with document-specific themes.</p>
        <p>Although this study focused solely on the content of the words presented in the articles, future research should incorporate the use of photos, captions, hyperlinks, and multimedia to form a more complete picture of how a study was presented. Due to the changing and various ways of presenting information on the Web, this kind of project would require careful and deliberate planning and would be difficult to do on a retrospective basis.</p>
        <p>Extending this study to social media coverage would be a valuable addition; there are large and meaningful discussion sections accompanying some of the articles in this study (eg, doc09). Our research group has studied the altmetrics of our research on social media [<xref ref-type="bibr" rid="ref81">81</xref>]. Combining these two research arms in a single stream could provide more nuanced results.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study has demonstrated that NLP can be a valuable tool in understanding how research is conveyed to the public through digital media. Through NLP, we identified significant variations in the coverage of our research and what parts of our publications journalists focused on. We demonstrated how readability calculations can be applied to media coverage. Our future work will look at expanding our methods to better understand how our research is consumed by the media.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of articles (26 media articles, 3 Canadian Network for Observational Drug Effect Studies reference publications).</p>
        <media xlink:href="formative_v4i1e13296_app1.docx" xlink:title="DOCX File , 26 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Readability scales.</p>
        <media xlink:href="formative_v4i1e13296_app2.docx" xlink:title="DOCX File , 16 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">BC</term>
          <def>
            <p>British Columbia</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CIHR</term>
          <def>
            <p>Canadian Institutes of Health Research</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CMAJ</term>
          <def>
            <p>Canadian Medical Association Journal</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">CNODES</term>
          <def>
            <p>Canadian Network for Observational Drug Effect Studies</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HAC</term>
          <def>
            <p>hierarchical agglomerative clustering</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">TF-IDF</term>
          <def>
            <p>term frequency-inverse document frequency</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">VSM</term>
          <def>
            <p>vector space model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>CNODES, a collaborating center of the Drug Safety and Effectiveness Network, is funded by the Canadian Institutes of Health Research (Grant Numbers DSE-111845 and DSE-146021). HM, RT, SA, and IS have received salary support, in part, from CIHR for the CNODES project. The authors would like to acknowledge Kim Kelly for her literature search support.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Storino</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Castillo-Angeles</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Vargas</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mancias</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Bullock</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Demirjian</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Moser</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kent</surname>
              <given-names>TS</given-names>
            </name>
          </person-group>
          <article-title>Assessing the accuracy and readability of online health information for patients with pancreatic cancer</article-title>
          <source>JAMA Surg</source>
          <year>2016</year>
          <month>09</month>
          <day>1</day>
          <volume>151</volume>
          <issue>9</issue>
          <fpage>831</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1001/jamasurg.2016.0730</pub-id>
          <pub-id pub-id-type="medline">27144966</pub-id>
          <pub-id pub-id-type="pii">2517164</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Duggan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <source>Pew Research Center</source>
          <year>2013</year>
          <access-date>2019-07-01</access-date>
          <comment>Health Online 2013 <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.pewinternet.org/2013/01/15/health-online-2013/">https://www.pewinternet.org/2013/01/15/health-online-2013/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanson</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>O'Brien</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Whybrow</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Isaacs</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rapley</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Drug breakthrough offers hope to arthritis sufferers: qualitative analysis of medical research in UK newspapers</article-title>
          <source>Health Expect</source>
          <year>2017</year>
          <month>04</month>
          <volume>20</volume>
          <issue>2</issue>
          <fpage>309</fpage>
          <lpage>20</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/27145430"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/hex.12460</pub-id>
          <pub-id pub-id-type="medline">27145430</pub-id>
          <pub-id pub-id-type="pmcid">PMC5354054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="web">
          <source>Statistics Canada</source>
          <year>2012</year>
          <access-date>2018-12-03</access-date>
          <comment>Canadian Internet Use Survey <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www23.statcan.gc.ca/imdb/p2SV.pl?Function=getSurvey&#38;SDDS=4432">http://www23.statcan.gc.ca/imdb/p2SV.pl?Function=getSurvey&#38;SDDS=4432</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Walsh-Childers</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Braddock</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rabaza</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Schwitzer</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>One step forward, one step back: changes in news coverage of medical interventions</article-title>
          <source>Health Commun</source>
          <year>2018</year>
          <month>02</month>
          <volume>33</volume>
          <issue>2</issue>
          <fpage>174</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.1080/10410236.2016.1250706</pub-id>
          <pub-id pub-id-type="medline">27983868</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>CAJ Ethics Advisory Committee</collab>
          </person-group>
          <source>The Canadian Association of Journalists</source>
          <year>2011</year>
          <access-date>2018-12-03</access-date>
          <comment>CAJ: Ethics Guidelines <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://caj.ca/content.php?page=ethics-guidelines">http://caj.ca/content.php?page=ethics-guidelines</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <source>National NewsMedia Council</source>
          <year>2018</year>
          <access-date>2018-12-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mediacouncil.ca/media-ethics/">https://mediacouncil.ca/media-ethics/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zeraatkar</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Obeda</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ginsberg</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hirsh</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The development and validation of an instrument to measure the quality of health research reports in the lay media</article-title>
          <source>BMC Public Health</source>
          <year>2017</year>
          <month>04</month>
          <day>20</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>343</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-017-4259-y"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-017-4259-y</pub-id>
          <pub-id pub-id-type="medline">28427426</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-017-4259-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC5397754</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <source>The National Academies of Sciences, Engineering, Medicine</source>
          <year>2019</year>
          <access-date>2019-06-19</access-date>
          <comment>Health and Medicine Division <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.nationalacademies.org/hmd/">http://www.nationalacademies.org/hmd/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <source>Food and Drug Administration</source>
          <year>2007</year>
          <access-date>2019-07-01</access-date>
          <comment>The Future of Drug Safety - Promoting and Protecting the Health of the Public: FDA's Response to the Institute of Medicine's 2006 Report <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.fda.gov/media/77173/download">https://www.fda.gov/media/77173/download</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schwartz</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Woloshin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>On the prevention and treatment of exaggeration</article-title>
          <source>J Gen Intern Med</source>
          <year>2003</year>
          <month>02</month>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>153</fpage>
          <lpage>4</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://onlinelibrary.wiley.com/resolve/openurl?genre=article&#38;sid=nlm:pubmed&#38;issn=0884-8734&#38;date=2003&#38;volume=18&#38;issue=2&#38;spage=153"/>
          </comment>
          <pub-id pub-id-type="doi">10.1046/j.1525-1497.2003.21216.x</pub-id>
          <pub-id pub-id-type="medline">12542591</pub-id>
          <pub-id pub-id-type="pii">jgi21216</pub-id>
          <pub-id pub-id-type="pmcid">PMC1494822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bubela</surname>
              <given-names>TM</given-names>
            </name>
            <name name-style="western">
              <surname>Caulfield</surname>
              <given-names>TA</given-names>
            </name>
          </person-group>
          <article-title>Do the print media 'hype' genetic research? A comparison of newspaper stories and peer-reviewed research papers</article-title>
          <source>Can Med Assoc J</source>
          <year>2004</year>
          <month>04</month>
          <day>27</day>
          <volume>170</volume>
          <issue>9</issue>
          <fpage>1399</fpage>
          <lpage>407</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=15111473"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.1030762</pub-id>
          <pub-id pub-id-type="medline">15111473</pub-id>
          <pub-id pub-id-type="pmcid">PMC400292</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dentzer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Communicating medical news--pitfalls of health care journalism</article-title>
          <source>N Engl J Med</source>
          <year>2009</year>
          <month>01</month>
          <day>1</day>
          <volume>360</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>3</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMp0805753</pub-id>
          <pub-id pub-id-type="medline">19118299</pub-id>
          <pub-id pub-id-type="pii">360/1/1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moynihan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Bero</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Ross-Degnan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Watkins</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mah</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Soumerai</surname>
              <given-names>SB</given-names>
            </name>
          </person-group>
          <article-title>Coverage by the news media of the benefits and risks of medications</article-title>
          <source>N Engl J Med</source>
          <year>2000</year>
          <month>06</month>
          <day>1</day>
          <volume>342</volume>
          <issue>22</issue>
          <fpage>1645</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJM200006013422206</pub-id>
          <pub-id pub-id-type="medline">10833211</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haneef</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yavchitz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ravaud</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Baron</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Oransky</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Schwitzer</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Boutron</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Interpretation of health news items reported with or without spin: protocol for a prospective meta-analysis of 16 randomised controlled trials</article-title>
          <source>BMJ Open</source>
          <year>2017</year>
          <month>11</month>
          <day>17</day>
          <volume>7</volume>
          <issue>11</issue>
          <fpage>e017425</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://bmjopen.bmj.com/cgi/pmidlookup?view=long&#38;pmid=29151047"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2017-017425</pub-id>
          <pub-id pub-id-type="medline">29151047</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2017-017425</pub-id>
          <pub-id pub-id-type="pmcid">PMC5702017</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Dormuth</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Winquist</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Carney</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Bugden</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Teare</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lévesque</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Bérard</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Paterson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>RW</given-names>
            </name>
            <collab>CNODES (Canadian Network for Observational Drug Effect Studies) Investigators</collab>
          </person-group>
          <article-title>Occurrence of pregnancy and pregnancy outcomes during isotretinoin therapy</article-title>
          <source>Can Med Assoc J</source>
          <year>2016</year>
          <month>07</month>
          <day>12</day>
          <volume>188</volume>
          <issue>10</issue>
          <fpage>723</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=27114489"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.151243</pub-id>
          <pub-id pub-id-type="medline">27114489</pub-id>
          <pub-id pub-id-type="pii">cmaj.151243</pub-id>
          <pub-id pub-id-type="pmcid">PMC4938682</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Suissa</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Caetano</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dormuth</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Ernst</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Hemmelgarn</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lelorier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Martens</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Paterson</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Platt</surname>
              <given-names>RW</given-names>
            </name>
            <name name-style="western">
              <surname>Sketris</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Teare</surname>
              <given-names>G</given-names>
            </name>
            <collab>Canadian Network for Observational Drug Effect Studies (CNODES)</collab>
          </person-group>
          <article-title>CNODES: the Canadian Network for Observational Drug Effect Studies</article-title>
          <source>Open Med</source>
          <year>2012</year>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e134</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23687528"/>
          </comment>
          <pub-id pub-id-type="medline">23687528</pub-id>
          <pub-id pub-id-type="pmcid">PMC3654509</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="web">
          <source>Canadian Institutes of Health Research</source>
          <year>2012</year>
          <access-date>2018-12-03</access-date>
          <comment>About DSEN <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cihr-irsc.gc.ca/e/39389.html">http://www.cihr-irsc.gc.ca/e/39389.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lammer</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Hoar</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Agnish</surname>
              <given-names>ND</given-names>
            </name>
            <name name-style="western">
              <surname>Benke</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Curry</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Fernhoff</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Grix</surname>
              <given-names>AW</given-names>
            </name>
            <name name-style="western">
              <surname>Lott</surname>
              <given-names>IT</given-names>
            </name>
          </person-group>
          <article-title>Retinoic acid embryopathy</article-title>
          <source>N Engl J Med</source>
          <year>1985</year>
          <month>10</month>
          <day>3</day>
          <volume>313</volume>
          <issue>14</issue>
          <fpage>837</fpage>
          <lpage>41</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJM198510033131401</pub-id>
          <pub-id pub-id-type="medline">3162101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosa</surname>
              <given-names>FW</given-names>
            </name>
          </person-group>
          <article-title>Teratogenicity of isotretinoin</article-title>
          <source>Lancet</source>
          <year>1983</year>
          <month>08</month>
          <day>27</day>
          <volume>2</volume>
          <issue>8348</issue>
          <fpage>513</fpage>
          <pub-id pub-id-type="doi">10.1016/s0140-6736(83)90538-x</pub-id>
          <pub-id pub-id-type="medline">6136666</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(83)90538-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Crijns</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Straus</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Gispen-de Wied</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>de Jong-van den Berg</surname>
              <given-names>LT</given-names>
            </name>
          </person-group>
          <article-title>Compliance with pregnancy prevention programmes of isotretinoin in Europe: a systematic review</article-title>
          <source>Br J Dermatol</source>
          <year>2011</year>
          <month>02</month>
          <volume>164</volume>
          <issue>2</issue>
          <fpage>238</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1365-2133.2010.09976.x</pub-id>
          <pub-id pub-id-type="medline">20716214</pub-id>
          <pub-id pub-id-type="pii">BJD9976</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Azoulay</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Oraichi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bérard</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Patterns and utilization of isotretinoin for acne from 1984 to 2003: is there need for concern?</article-title>
          <source>Eur J Clin Pharmacol</source>
          <year>2006</year>
          <month>08</month>
          <volume>62</volume>
          <issue>8</issue>
          <fpage>667</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1007/s00228-006-0151-x</pub-id>
          <pub-id pub-id-type="medline">16791584</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Honein</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Erickson</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Can we ensure the safe use of known human teratogens? Introduction of generic isotretinoin in the US as an example</article-title>
          <source>Drug Saf</source>
          <year>2004</year>
          <volume>27</volume>
          <issue>14</issue>
          <fpage>1069</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.2165/00002018-200427140-00001</pub-id>
          <pub-id pub-id-type="medline">15554743</pub-id>
          <pub-id pub-id-type="pii">27141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cheetham</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Wong</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Kass</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshinaga</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Sorel</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>McCombs</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Sidney</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of the iPLEDGE program on isotretinoin fetal exposure in an integrated health care system</article-title>
          <source>J Am Acad Dermatol</source>
          <year>2011</year>
          <month>12</month>
          <volume>65</volume>
          <issue>6</issue>
          <fpage>1117</fpage>
          <lpage>25</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jaad.2010.09.017</pub-id>
          <pub-id pub-id-type="medline">21565419</pub-id>
          <pub-id pub-id-type="pii">S0190-9622(10)01101-1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Koren</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Nulman</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Pregnancy and isotretinoin therapy</article-title>
          <source>Can Med Assoc J</source>
          <year>2013</year>
          <month>03</month>
          <day>19</day>
          <volume>185</volume>
          <issue>5</issue>
          <fpage>411</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.cmaj.ca/cgi/pmidlookup?view=long&#38;pmid=23296582"/>
          </comment>
          <pub-id pub-id-type="doi">10.1503/cmaj.120729</pub-id>
          <pub-id pub-id-type="medline">23296582</pub-id>
          <pub-id pub-id-type="pii">cmaj.120729</pub-id>
          <pub-id pub-id-type="pmcid">PMC3602257</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <source>CMAJ Article Metrics</source>
          <year>2019</year>
          <access-date>2019-06-21</access-date>
          <comment>Occurrence of Pregnancy and Pregnancy Outcomes During Isotretinoin Therapy <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://cmaj.altmetric.com/details/6949654/news">https://cmaj.altmetric.com/details/6949654/news</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chowdhury</surname>
              <given-names>GG</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing</article-title>
          <source>Ann Rev Info Sci Tech</source>
          <year>2005</year>
          <month>01</month>
          <day>31</day>
          <volume>37</volume>
          <issue>1</issue>
          <fpage>51</fpage>
          <lpage>89</lpage>
          <pub-id pub-id-type="doi">10.1002/aris.1440370103</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mohammadhassanzadeh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shahriari</surname>
              <given-names>HR</given-names>
            </name>
          </person-group>
          <article-title>Prediction of user's trustworthiness in web-based social networks via text mining</article-title>
          <source>ISC Int J Inf Security</source>
          <year>2013</year>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>171</fpage>
          <lpage>87</lpage>
          <pub-id pub-id-type="doi">10.22042/ISECURE.2014.5.2.5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demner-Fushman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>McDonald</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>What can natural language processing do for clinical decision support?</article-title>
          <source>J Biomed Inform</source>
          <year>2009</year>
          <month>10</month>
          <volume>42</volume>
          <issue>5</issue>
          <fpage>760</fpage>
          <lpage>72</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(09)00108-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2009.08.007</pub-id>
          <pub-id pub-id-type="medline">19683066</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(09)00108-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC2757540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mendonça</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Haas</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shagina</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Larson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Extracting information on pneumonia in infants using natural language processing of radiology reports</article-title>
          <source>J Biomed Inform</source>
          <year>2005</year>
          <month>08</month>
          <volume>38</volume>
          <issue>4</issue>
          <fpage>314</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(05)00016-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2005.02.003</pub-id>
          <pub-id pub-id-type="medline">16084473</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(05)00016-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dublin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Baldwin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Walker</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Christensen</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Haug</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Ferraro</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carrell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
          </person-group>
          <article-title>Natural Language Processing to identify pneumonia from radiology reports</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2013</year>
          <month>08</month>
          <volume>22</volume>
          <issue>8</issue>
          <fpage>834</fpage>
          <lpage>41</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/23554109"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pds.3418</pub-id>
          <pub-id pub-id-type="medline">23554109</pub-id>
          <pub-id pub-id-type="pmcid">PMC3811072</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Knirsch</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Pablos-Mendez</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Respiratory isolation of tuberculosis patients using clinical guidelines and an automated clinical decision support system</article-title>
          <source>Infect Control Hosp Epidemiol</source>
          <year>1998</year>
          <month>02</month>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>94</fpage>
          <lpage>100</lpage>
          <pub-id pub-id-type="doi">10.1086/647773</pub-id>
          <pub-id pub-id-type="medline">9510106</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Hripcsak</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Markatou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Active computerized pharmacovigilance using natural language processing, statistics, and electronic health records: a feasibility study</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2009</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>328</fpage>
          <lpage>37</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/19261932"/>
          </comment>
          <pub-id pub-id-type="doi">10.1197/jamia.M3028</pub-id>
          <pub-id pub-id-type="medline">19261932</pub-id>
          <pub-id pub-id-type="pii">M3028</pub-id>
          <pub-id pub-id-type="pmcid">PMC2732239</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McTaggart</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nangle</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Caldwell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarez-Madrazo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Colhoun</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bennie</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Use of text-mining methods to improve efficiency in the calculation of drug exposure to support pharmacoepidemiology studies</article-title>
          <source>Int J Epidemiol</source>
          <year>2018</year>
          <month>04</month>
          <day>1</day>
          <volume>47</volume>
          <issue>2</issue>
          <fpage>617</fpage>
          <lpage>24</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/29420741"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/ije/dyx264</pub-id>
          <pub-id pub-id-type="medline">29420741</pub-id>
          <pub-id pub-id-type="pii">4840573</pub-id>
          <pub-id pub-id-type="pmcid">PMC5913611</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ahonen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Heinonen</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Klemettinen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Verkamo</surname>
              <given-names>AI</given-names>
            </name>
          </person-group>
          <source>CiteSeerX</source>
          <year>1997</year>
          <access-date>2018-12-04</access-date>
          <comment>Applying Data Mining Techniques in Text Analysis <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.104.492">http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.104.492</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Uysal</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Gunal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The impact of preprocessing on text classification</article-title>
          <source>Inf Process Manag</source>
          <year>2014</year>
          <volume>50</volume>
          <issue>1</issue>
          <fpage>104</fpage>
          <lpage>12</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2013.08.006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mierswa</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Wurst</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Klinkenberg</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Scholz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Euler</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>YALE: Rapid Prototyping for Complex Data Mining Tasks</article-title>
          <source>Proceedings of the 12th ACM SIGKDD international conference on Knowledge discovery and data mining</source>
          <year>2006</year>
          <conf-name>KDD'06</conf-name>
          <conf-date>August 20-23, 2006</conf-date>
          <conf-loc>Philadelphia, PA</conf-loc>
          <fpage>935</fpage>
          <lpage>40</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dl.acm.org/citation.cfm?id=1150531"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/1150402.1150531</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Ze-wen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Research on the Construction and Filter Method of Stop-word List in Text Preprocessing</article-title>
          <source>2011 Proceedings of the Fourth International Conference on Intelligent Computation Technology and Automation</source>
          <year>2011</year>
          <conf-name>ICICTA'11</conf-name>
          <conf-date>Mar 28-29, 2011</conf-date>
          <conf-loc>Shenzen, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ICICTA.2011.64</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="web">
          <source>Git Hub</source>
          <year>2017</year>
          <access-date>2019-07-01</access-date>
          <comment>Stop words list <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://raw.githubusercontent.com/hassanzade/StopWords/master/StopWords.txt">https://raw.githubusercontent.com/hassanzade/StopWords/master/StopWords.txt</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>NH</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>NS</given-names>
            </name>
          </person-group>
          <article-title>Porter stemming algorithm for semantic checking</article-title>
          <year>2012</year>
          <conf-name>15th International Conference on Computer and Information Technology</conf-name>
          <conf-date>Dec 20 - 23, 2012</conf-date>
          <conf-loc>Chittagong, Bangladesh</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lovins</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Development of a stemming algorithm</article-title>
          <source>Mech Translat Comp Linguistics</source>
          <year>1968</year>
          <volume>11</volume>
          <issue>1-2</issue>
          <fpage>22</fpage>
          <lpage>31</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/6b38/53f08c482fe1bfbe39d656d50a8c73976f3c.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hajeer</surname>
              <given-names>SI</given-names>
            </name>
            <name name-style="western">
              <surname>Ismail</surname>
              <given-names>RM</given-names>
            </name>
            <name name-style="western">
              <surname>Badr</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Tolba</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Hassanien</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Fouad</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Manaf</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Zamani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ahmad</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A new stemming algorithm for efficient information retrieval systems and web search engines</article-title>
          <source>Multimedia Forensics and Security: Foundations, Innovations, and Applications</source>
          <year>2017</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>117</fpage>
          <lpage>35</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baeza-Yates</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ribeiro-Neto</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <source>Modern Information Retrieval</source>
          <year>1999</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>ACM Press Books</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sembok</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ata</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Bakar</surname>
              <given-names>ZA</given-names>
            </name>
          </person-group>
          <article-title>A rule and template based stemming algorithm for Arabic language</article-title>
          <source>Int J Math Mod Meth Appl Sci</source>
          <year>2011</year>
          <volume>5</volume>
          <issue>5</issue>
          <fpage>974</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/d056/3e7c0b8bc0e196971c8b202764e2f0176710.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>MF</given-names>
            </name>
          </person-group>
          <article-title>An algorithm for suffix stripping</article-title>
          <source>Program</source>
          <year>1980</year>
          <volume>14</volume>
          <issue>3</issue>
          <fpage>130</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1108/eb046814</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Willett</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>The Porter stemming algorithm: then and now</article-title>
          <source>Program</source>
          <year>2006</year>
          <volume>40</volume>
          <issue>3</issue>
          <fpage>219</fpage>
          <lpage>23</lpage>
          <pub-id pub-id-type="doi">10.1108/00330330610681295</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Danneman</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Heimann</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Social Media Mining With R</source>
          <year>2014</year>
          <publisher-loc>Birmingham, UK</publisher-loc>
          <publisher-name>Packt Publishing Ltd</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aizawa</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>An information-theoretic perspective of tf–idf measures</article-title>
          <source>Inf Process Manag</source>
          <year>2003</year>
          <month>1</month>
          <volume>39</volume>
          <issue>1</issue>
          <fpage>45</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/S0306-4573(02)00021-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ramos</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Using tf-idf to determine word relevance in document queries</article-title>
          <source>Proceedings of the First Instructional Conference on Machine Learning</source>
          <year>2003</year>
          <conf-name>iCML'03</conf-name>
          <conf-date>December 3-8, 2003</conf-date>
          <conf-loc>Piscataway, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Yoshida</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>A comparative study of TF*IDF, LSI and multi-words for text classification</article-title>
          <source>Expert Syst Appl</source>
          <year>2011</year>
          <volume>38</volume>
          <issue>3</issue>
          <fpage>2758</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.eswa.2010.08.066</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Similarity measures for text document clustering</article-title>
          <source>Proceedings of the Sixth New Zealand Computer Science Research Student Conference</source>
          <year>2008</year>
          <conf-name>NZCSRSC'08</conf-name>
          <conf-date>2008</conf-date>
          <conf-loc>Christchurch, New Zealand</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dhillon</surname>
              <given-names>IS</given-names>
            </name>
            <name name-style="western">
              <surname>Modha</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Concept decompositions for large sparse text data using clustering</article-title>
          <source>Mach Learn</source>
          <year>2001</year>
          <volume>42</volume>
          <issue>1-2</issue>
          <fpage>143</fpage>
          <lpage>75</lpage>
          <pub-id pub-id-type="doi">10.1023/A:1007612920971</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Deshpande</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vaze</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rathod</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Jarhad</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Comparative study of document similarity algorithms and clustering algorithms for sentiment analysis</article-title>
          <source>Int J Emerg Trends Technol Comput Sci</source>
          <year>2014</year>
          <volume>3</volume>
          <issue>5</issue>
          <fpage>196</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/7b6f/936391052717edf6bb9214953e991a9ebbd5.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zepeda-Mendoza</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Resendis-Antonio</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Dubitzky</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wolkenhauer</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Yokota</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cho</surname>
              <given-names>KH</given-names>
            </name>
          </person-group>
          <article-title>Hierarchical agglomerative clustering</article-title>
          <source>Encyclopedia of Systems Biology</source>
          <year>2013</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Janssens</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Moor</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Glänzel</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Hybrid clustering for validation and improvement of subject-classification schemes</article-title>
          <source>Inf Process Manag</source>
          <year>2009</year>
          <month>11</month>
          <volume>45</volume>
          <issue>6</issue>
          <fpage>683</fpage>
          <lpage>702</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2009.06.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Langfelder</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Horvath</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Defining clusters from a hierarchical cluster tree: the Dynamic Tree Cut package for R</article-title>
          <source>Bioinformatics</source>
          <year>2008</year>
          <month>03</month>
          <day>1</day>
          <volume>24</volume>
          <issue>5</issue>
          <fpage>719</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1093/bioinformatics/btm563</pub-id>
          <pub-id pub-id-type="medline">18024473</pub-id>
          <pub-id pub-id-type="pii">btm563</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>DuBay</surname>
              <given-names>WH</given-names>
            </name>
          </person-group>
          <source>Impact Information</source>
          <year>2004</year>
          <access-date>2019-07-02</access-date>
          <comment>The Principles of Readability <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.impact-information.com/impactinfo/readability02.pdf">http://www.impact-information.com/impactinfo/readability02.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine (US) Committee on Health Literacy</collab>
            <name name-style="western">
              <surname>Nielsen-Bohlman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Panzer</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Kindig</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <source>Health Literacy: A Prescription To End Confusion</source>
          <year>2004</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="web">
          <source>Health Gov</source>
          <access-date>2019-07-19</access-date>
          <comment>Quick Guide to Health Literacy <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://health.gov/communication/literacy/quickguide/factsbasic.htm">https://health.gov/communication/literacy/quickguide/factsbasic.htm</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bailin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Grafstein</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The linguistic assumptions underlying readability formulae: a critique</article-title>
          <source>Lang Commun</source>
          <year>2001</year>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>285</fpage>
          <lpage>301</lpage>
          <pub-id pub-id-type="doi">10.1016/S0271-5309(01)00005-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Klare</surname>
              <given-names>GR</given-names>
            </name>
          </person-group>
          <source>The Measurement of Readability</source>
          <year>1963</year>
          <publisher-loc>Ames</publisher-loc>
          <publisher-name>Iowa State University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hargis</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Readability and computer documentation</article-title>
          <source>ACM J Comput Doc</source>
          <year>2000</year>
          <month>08</month>
          <day>1</day>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>122</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1145/344599.344634</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McLaughlin</surname>
              <given-names>GH</given-names>
            </name>
          </person-group>
          <article-title>SMOG grading - a new readability formula</article-title>
          <source>J Reading</source>
          <year>1969</year>
          <volume>12</volume>
          <issue>8</issue>
          <fpage>639</fpage>
          <lpage>46</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>BD</given-names>
            </name>
          </person-group>
          <source>National Center for Farmworker Health: On-Line Library</source>
          <year>2003</year>
          <access-date>2019-08-14</access-date>
          <comment>American Medical Association <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://lib.ncfh.org/pdfs/6617.pdf">http://lib.ncfh.org/pdfs/6617.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
          <source>MedlinePlus</source>
          <year>2017</year>
          <access-date>2019-07-19</access-date>
          <comment>How to Write Easy-To-Read Health Materials <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medlineplus.gov/etr.html">https://medlineplus.gov/etr.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eltorai</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Truntzer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Daniels</surname>
              <given-names>AH</given-names>
            </name>
          </person-group>
          <article-title>Readability of patient education materials on the American Orthopaedic Society for Sports Medicine website</article-title>
          <source>Phys Sportsmed</source>
          <year>2014</year>
          <month>11</month>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>125</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.3810/psm.2014.11.2099</pub-id>
          <pub-id pub-id-type="medline">25419896</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman-Goetz</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>A systematic review of readability and comprehension instruments used for print and web-based cancer information</article-title>
          <source>Health Educ Behav</source>
          <year>2006</year>
          <month>06</month>
          <volume>33</volume>
          <issue>3</issue>
          <fpage>352</fpage>
          <lpage>73</lpage>
          <pub-id pub-id-type="doi">10.1177/1090198105277329</pub-id>
          <pub-id pub-id-type="medline">16699125</pub-id>
          <pub-id pub-id-type="pii">33/3/352</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>The problems with current readability methods and formulas: missing that usability design</article-title>
          <source>Proceedings of the 2016 IEEE International Professional Communication Conference</source>
          <year>2016</year>
          <conf-name>IPCC'16</conf-name>
          <conf-date>October 2-5, 2016</conf-date>
          <conf-loc>Austin, TX, USA</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="web">
          <source>Character Count</source>
          <year>2017</year>
          <access-date>2019-06-20</access-date>
          <comment>List of 3000 Familiar Simple Words <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://charactercounttool.com/list-of-3000-familiar-words.html">https://charactercounttool.com/list-of-3000-familiar-words.html</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berhaupt-Glickstein</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hallman</surname>
              <given-names>WK</given-names>
            </name>
          </person-group>
          <article-title>Communicating scientific evidence in qualified health claims</article-title>
          <source>Crit Rev Food Sci Nutr</source>
          <year>2017</year>
          <month>09</month>
          <day>2</day>
          <volume>57</volume>
          <issue>13</issue>
          <fpage>2811</fpage>
          <lpage>24</lpage>
          <pub-id pub-id-type="doi">10.1080/10408398.2015.1069730</pub-id>
          <pub-id pub-id-type="medline">26558421</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Stassen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <source>Semantic Scholar</source>
          <year>2016</year>
          <access-date>2019-08-14</access-date>
          <comment>Health Research as News in South Africa: Measuring the Quality of Health Journalism at Six Daily Newspapers <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://pdfs.semanticscholar.org/903b/88d0c57753758cdd650bb627304660c0229e.pdf">https://pdfs.semanticscholar.org/903b/88d0c57753758cdd650bb627304660c0229e.pdf</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Moynihan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Heath</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Henry</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Selling sickness: the pharmaceutical industry and disease mongering</article-title>
          <source>Br Med J</source>
          <year>2002</year>
          <month>04</month>
          <day>13</day>
          <volume>324</volume>
          <issue>7342</issue>
          <fpage>886</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/11950740"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmj.324.7342.886</pub-id>
          <pub-id pub-id-type="medline">11950740</pub-id>
          <pub-id pub-id-type="pmcid">PMC1122833</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="web">
          <source>Kaiser Family Foundation</source>
          <year>2009</year>
          <access-date>2019-07-02</access-date>
          <comment>The State of Health Journalism in the US, March 2009 <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.kff.org/other/report/the-state-of-health-journalism-in-the/">https://www.kff.org/other/report/the-state-of-health-journalism-in-the/</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref74">
        <label>74</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hansberry</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzales</surname>
              <given-names>SF</given-names>
            </name>
            <name name-style="western">
              <surname>Baker</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Are we effectively informing patients? A quantitative analysis of on-line patient education resources from the American Society of Neuroradiology</article-title>
          <source>AJNR Am J Neuroradiol</source>
          <year>2014</year>
          <month>07</month>
          <volume>35</volume>
          <issue>7</issue>
          <fpage>1270</fpage>
          <lpage>5</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.ajnr.org/cgi/pmidlookup?view=long&#38;pmid=24763420"/>
          </comment>
          <pub-id pub-id-type="doi">10.3174/ajnr.A3854</pub-id>
          <pub-id pub-id-type="medline">24763420</pub-id>
          <pub-id pub-id-type="pii">ajnr.A3854</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref75">
        <label>75</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eloy</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kasabwala</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Agarwal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hansberry</surname>
              <given-names>DR</given-names>
            </name>
            <name name-style="western">
              <surname>Baredes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Setzen</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Readability assessment of patient education materials on major otolaryngology association websites</article-title>
          <source>Otolaryngol Head Neck Surg</source>
          <year>2012</year>
          <month>11</month>
          <volume>147</volume>
          <issue>5</issue>
          <fpage>848</fpage>
          <lpage>54</lpage>
          <pub-id pub-id-type="doi">10.1177/0194599812456152</pub-id>
          <pub-id pub-id-type="medline">22864405</pub-id>
          <pub-id pub-id-type="pii">0194599812456152</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref76">
        <label>76</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nutbeam</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Boxall</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>What influences the transfer of research into health policy and practice? Observations from England and Australia</article-title>
          <source>Public Health</source>
          <year>2008</year>
          <month>08</month>
          <volume>122</volume>
          <issue>8</issue>
          <fpage>747</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1016/j.puhe.2008.04.020</pub-id>
          <pub-id pub-id-type="medline">18561966</pub-id>
          <pub-id pub-id-type="pii">S0033-3506(08)00136-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref77">
        <label>77</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sykes</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wills</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Rowlands</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Popple</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Understanding critical health literacy: a concept analysis</article-title>
          <source>BMC Public Health</source>
          <year>2013</year>
          <month>02</month>
          <day>18</day>
          <volume>13</volume>
          <fpage>150</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/1471-2458-13-150"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/1471-2458-13-150</pub-id>
          <pub-id pub-id-type="medline">23419015</pub-id>
          <pub-id pub-id-type="pii">1471-2458-13-150</pub-id>
          <pub-id pub-id-type="pmcid">PMC3583748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref78">
        <label>78</label>
        <nlm-citation citation-type="web">
          <source>DISCERN</source>
          <year>1997</year>
          <access-date>2019-06-20</access-date>
          <comment>The DISCERN Instrument <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.discern.org.uk/discern_instrument.php">http://www.discern.org.uk/discern_instrument.php</ext-link>
                                                </comment>
        </nlm-citation>
      </ref>
      <ref id="ref79">
        <label>79</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bruine de Bruin</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Bostrom</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Assessing what to address in science communication</article-title>
          <source>Proc Natl Acad Sci USA</source>
          <year>2013</year>
          <month>08</month>
          <day>20</day>
          <volume>110</volume>
          <issue>Suppl 3</issue>
          <fpage>14062</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.pnas.org/cgi/pmidlookup?view=long&#38;pmid=23942122"/>
          </comment>
          <pub-id pub-id-type="doi">10.1073/pnas.1212729110</pub-id>
          <pub-id pub-id-type="medline">23942122</pub-id>
          <pub-id pub-id-type="pii">1212729110</pub-id>
          <pub-id pub-id-type="pmcid">PMC3752171</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref80">
        <label>80</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Blei</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>Probabilistic topic models</article-title>
          <source>Commun ACM</source>
          <year>2012</year>
          <volume>55</volume>
          <issue>4</issue>
          <fpage>77</fpage>
          <lpage>84</lpage>
          <pub-id pub-id-type="doi">10.1145/2133806.2133826</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref81">
        <label>81</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gamble</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Traynor</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Gruzd</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Dormuth</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Sketris</surname>
              <given-names>IS</given-names>
            </name>
          </person-group>
          <article-title>Measuring the impact of pharmacoepidemiologic research using altmetrics: a case study of a CNODES drug-safety article</article-title>
          <source>Pharmacoepidemiol Drug Saf</source>
          <year>2018</year>
          <month>03</month>
          <day>24</day>
          <pub-id pub-id-type="doi">10.1002/pds.4401</pub-id>
          <pub-id pub-id-type="medline">29575351</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
