<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i6e34834</article-id>
      <article-id pub-id-type="pmid">35767322</article-id>
      <article-id pub-id-type="doi">10.2196/34834</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Pretrained Transformer Language Models Versus Pretrained Word Embeddings for the Detection of Accurate Health Information on Arabic Social Media: Comparative Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wei</surname>
            <given-names>Shanzun</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rovetta</surname>
            <given-names>Alessandro</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zhou</surname>
            <given-names>Xinyu</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Albalawi</surname>
            <given-names>Yahya</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science and Information Systems</institution>
            <institution>University of Limerick</institution>
            <addr-line>Tierney Building</addr-line>
            <addr-line>Limerick, V94 T9PX</addr-line>
            <country>Ireland</country>
            <phone>353 61213028 ext 3724</phone>
            <email>yahalbalawi@gmail.com</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4264-6355</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Nikolov</surname>
            <given-names>Nikola S</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8022-0297</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Buckley</surname>
            <given-names>Jim</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6928-6746</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science and Information Systems</institution>
        <institution>University of Limerick</institution>
        <addr-line>Limerick</addr-line>
        <country>Ireland</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer and Information Sciences</institution>
        <institution>College of Arts and Science</institution>
        <institution>University of Taibah</institution>
        <addr-line>Al-Ula</addr-line>
        <country>Saudi Arabia</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>The Irish Software Research Centre, Lero</institution>
        <institution>University of Limerick</institution>
        <addr-line>Limerick</addr-line>
        <country>Ireland</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yahya Albalawi <email>yahalbalawi@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>29</day>
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <volume>6</volume>
      <issue>6</issue>
      <elocation-id>e34834</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>11</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>2</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>4</day>
          <month>4</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>21</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Yahya Albalawi, Nikola S Nikolov, Jim Buckley. Originally published in JMIR Formative Research (https://formative.jmir.org), 29.06.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2022/6/e34834" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In recent years, social media has become a major channel for health-related information in Saudi Arabia. Prior health informatics studies have suggested that a large proportion of health-related posts on social media are inaccurate. Given the subject matter and the scale of dissemination of such information, it is important to be able to automatically discriminate between accurate and inaccurate health-related posts in Arabic.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>The first aim of this study is to generate a data set of generic health-related tweets in Arabic, labeled as either accurate or inaccurate health information. The second aim is to leverage this data set to train a state-of-the-art deep learning model for detecting the accuracy of health-related tweets in Arabic. In particular, this study aims to train and compare the performance of multiple deep learning models that use pretrained word embeddings and transformer language models.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used 900 health-related tweets from a previously published data set extracted between July 15, 2019, and August 31, 2019. Furthermore, we applied a pretrained model to extract an additional 900 health-related tweets from a second data set collected specifically for this study between March 1, 2019, and April 15, 2019. The 1800 tweets were labeled by 2 physicians as <italic>accurate</italic>, <italic>inaccurate</italic>, or <italic>unsure</italic>. The physicians agreed on 43.3% (779/1800) of tweets, which were thus labeled as <italic>accurate</italic> or <italic>inaccurate</italic>. A total of 9 variations of the pretrained transformer language models were then trained and validated on 79.9% (623/779 tweets) of the data set and tested on 20% (156/779 tweets) of the data set. For comparison, we also trained a bidirectional long short-term memory model with 7 different pretrained word embeddings as the input layer on the same data set. The models were compared in terms of their accuracy, precision, recall, F<sub>1</sub> score, and macroaverage of the F<sub>1</sub> score.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>We constructed a data set of labeled tweets, 38% (296/779) of which were labeled as inaccurate health information, and 62% (483/779) of which were labeled as accurate health information. We suggest that this was highly efficacious as we did not include any tweets in which the physician annotators were unsure or in disagreement. Among the investigated deep learning models, the Transformer-based Model for Arabic Language Understanding version 0.2 (AraBERTv0.2)-large model was the most accurate, with an F<sub>1</sub> score of 87%, followed by AraBERT version 2–large and AraBERTv0.2-base.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Our results indicate that the pretrained language model AraBERTv0.2 is the best model for classifying tweets as carrying either inaccurate or accurate health information. Future studies should consider applying ensemble learning to combine the best models as it may produce better results.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>social media</kwd>
        <kwd>machine learning</kwd>
        <kwd>pretrained language models</kwd>
        <kwd>bidirectional encoder representations from transformers</kwd>
        <kwd>BERT</kwd>
        <kwd>deep learning</kwd>
        <kwd>health information</kwd>
        <kwd>infodemiology</kwd>
        <kwd>tweets</kwd>
        <kwd>language model</kwd>
        <kwd>health informatics</kwd>
        <kwd>misinformation</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>In the past 2 decades, there has been a dramatic increase in the number of people who use social media (SM) to participate in discussions on various topics, such as politics [<xref ref-type="bibr" rid="ref1">1</xref>], health [<xref ref-type="bibr" rid="ref2">2</xref>], and education [<xref ref-type="bibr" rid="ref3">3</xref>]. Regarding health-related information, several recent studies from Saudi Arabia found that Twitter is the preferred SM platform for communicating and accessing medical information. For example, it was preferred by orthopedic surgeons to reply to (personal and professional) medical questions [<xref ref-type="bibr" rid="ref4">4</xref>], by dental practitioners for medical consultations [<xref ref-type="bibr" rid="ref5">5</xref>], by patients with diabetes to search for health information [<xref ref-type="bibr" rid="ref6">6</xref>], by female students at a university in Saudi Arabia to read about systemic <italic>lupus erythematosus</italic> [<xref ref-type="bibr" rid="ref7">7</xref>], and by adolescents to search for oral health information [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>A significant problem with this form of communication is that there is no quality control over the medium, and most of the health information presented on Twitter seems inaccurate, as illustrated by the various studies summarized in <xref ref-type="table" rid="table1">Table 1</xref>. Indeed, multiple data science studies have used data sets of health-related communication on SM to study this phenomenon, and some studies [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>] went further to design frameworks for detecting the accuracy of health information on SM.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of studies that analyzed the accuracy of health information on social media.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="110"/>
            <col width="100"/>
            <col width="90"/>
            <col width="240"/>
            <col width="100"/>
            <col width="140"/>
            <col width="110"/>
            <col width="110"/>
            <thead>
              <tr valign="top">
                <td>Studies</td>
                <td>Number of tweets or documents</td>
                <td>Sources</td>
                <td>Methods to label</td>
                <td>Language covered</td>
                <td>Percentage of the accuracy</td>
                <td>Topics covered</td>
                <td>Type of study</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Swetland et al [<xref ref-type="bibr" rid="ref11">11</xref>]</td>
                <td>358</td>
                <td>Twitter</td>
                <td>Expert votes; relabeling in cases of disagreement</td>
                <td>English</td>
                <td>25.4% inaccurate</td>
                <td>COVID-19</td>
                <td>Exploratory</td>
              </tr>
              <tr valign="top">
                <td>Albalawi et al [<xref ref-type="bibr" rid="ref12">12</xref>]</td>
                <td>109</td>
                <td>Twitter</td>
                <td>Two physicians; delete if there is a disagreement</td>
                <td>Arabic</td>
                <td>31% inaccurate</td>
                <td>General</td>
                <td>Quantitative pilot study</td>
              </tr>
              <tr valign="top">
                <td>Saeed et al [<xref ref-type="bibr" rid="ref8">8</xref>]</td>
                <td>208</td>
                <td>Twitter</td>
                <td>Expert votes; relabeling in cases of disagreement</td>
                <td>Arabic</td>
                <td>38% inaccurate</td>
                <td>Cancer</td>
                <td>ML<sup>a</sup></td>
              </tr>
              <tr valign="top">
                <td>Sharma et al [<xref ref-type="bibr" rid="ref13">13</xref>]</td>
                <td>183</td>
                <td>Facebook</td>
                <td>Two physicians; delete if there is a disagreement</td>
                <td>English</td>
                <td>12% inaccurate</td>
                <td>Zika</td>
                <td>Quantitative</td>
              </tr>
              <tr valign="top">
                <td>Alnemer et al [<xref ref-type="bibr" rid="ref14">14</xref>]</td>
                <td>625</td>
                <td>Twitter</td>
                <td>Vote if the experts do not agree</td>
                <td>Arabic</td>
                <td>50% inaccurate</td>
                <td>Only tweets from health professionals</td>
                <td>Quantitative and exploratory study</td>
              </tr>
              <tr valign="top">
                <td>Zhao et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td>
                <td>5000</td>
                <td>Health forum</td>
                <td>Annotator voting; in addition, consulted an expert to validate information labeled as misleading</td>
                <td>Chinese</td>
                <td>11.4% misinformation</td>
                <td>Autism</td>
                <td>ML</td>
              </tr>
              <tr valign="top">
                <td>Sell et al [<xref ref-type="bibr" rid="ref15">15</xref>]</td>
                <td>2460</td>
                <td>Twitter</td>
                <td>Coders checked the interagreement on 200 tweets</td>
                <td>English</td>
                <td>10% inaccurate</td>
                <td>Ebola</td>
                <td>Quantitative</td>
              </tr>
              <tr valign="top">
                <td>Chew and Eysenbach [<xref ref-type="bibr" rid="ref16">16</xref>]</td>
                <td>5395</td>
                <td>Twitter</td>
                <td>Coder checked agreement on 125 tweets; unsubstantiated by the following reference standards: the CDC<sup>b</sup> and Public Health Agency of Canada for scientific claims and a panel of credible web-based news sources (eg, CNN<sup>c</sup> and BBC<sup>d</sup>) for news-related claims</td>
                <td>English</td>
                <td>4.5% inaccurate</td>
                <td>H1N1</td>
                <td>Exploratory</td>
              </tr>
              <tr valign="top">
                <td>Sicilia et al [<xref ref-type="bibr" rid="ref9">9</xref>]</td>
                <td>800</td>
                <td>Twitter</td>
                <td>Annotator’s agreement; relabeling in cases of disagreement; here, the definition for misinformation was “news items without a source”</td>
                <td>English</td>
                <td>Unknown</td>
                <td>Zika</td>
                <td>ML</td>
              </tr>
              <tr valign="top">
                <td>Kalyanam et al [<xref ref-type="bibr" rid="ref17">17</xref>]</td>
                <td>47 million</td>
                <td>Twitter</td>
                <td>Type of hashtags</td>
                <td>English</td>
                <td>25% of the analyzed tweets were speculative</td>
                <td>Ebola</td>
                <td>Quantitative</td>
              </tr>
              <tr valign="top">
                <td>Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>]</td>
                <td>409,484</td>
                <td>Twitter; keywords</td>
                <td>Although they used coders, their definition of a rumor included lack of a source; hence, unconfirmed information was automatically classified as uncredible; in addition, tweets were classified by only 1 coder who checked interagreement on 20 tweets</td>
                <td>Not noted, but the keywords were in English</td>
                <td>70% uncredible</td>
                <td>COVID-19</td>
                <td>ML</td>
              </tr>
              <tr valign="top">
                <td>Elhadad et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td>
                <td>7486</td>
                <td>Various websites</td>
                <td>Fact-checking websites and official websites</td>
                <td>English</td>
                <td>21%</td>
                <td>COVID-19</td>
                <td>ML</td>
              </tr>
              <tr valign="top">
                <td>Seltzer et al [<xref ref-type="bibr" rid="ref20">20</xref>]</td>
                <td>500</td>
                <td>Instagram</td>
                <td>Coders’ agreement</td>
                <td>English</td>
                <td>23%</td>
                <td>Zika</td>
                <td>Exploratory</td>
              </tr>
              <tr valign="top">
                <td>Ghenai et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td>
                <td>26,728</td>
                <td>Twitter</td>
                <td>Defined keywords to the extracted tweets based on rumors identified from the WHO<sup>e</sup> website; then, the coders labeled the tweets</td>
                <td>English</td>
                <td>32%</td>
                <td>Zika</td>
                <td>ML</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>ML: machine learning.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>CDC: Centers for Disease Control and Prevention.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>CNN: Cable News Network.</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>BBC: British Broadcasting Corporation.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>WHO: World Health Organization.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Previous studies have focused on specific health issues and sometimes on specific types of rumors [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. This suggests the need for a more general framework that can detect the accuracy of health information across known and previously unknown health conditions, such as during the outbreak of a previously unknown infectious disease.</p>
        <p>Given the prevalent use of Twitter for the spreading of health information in Saudi Arabia [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>], we aimed to inform the development of a new and more generic framework that is not bound to a specific disease or rumor type and detect the accuracy of a broad base of health-related tweets in Arabic.</p>
      </sec>
      <sec>
        <title>Related Work</title>
        <p>In this section, we review the methods used to label health-related tweets as either accurate or inaccurate to create labeled data sets. We also review previously proposed machine learning (ML) models for detecting the accuracy of health-related tweets, including deep learning (DL).</p>
        <sec>
          <title>Methods Used to Label Health-Related Tweets</title>
          <p>Studies addressing the accuracy of health-related tweets can be classified into 3 groups. The first group comprised studies that labeled health-related tweets according to the information they contained, regardless of the source of the information. The second group comprised studies that relied on external (fact-checking or very reputable) websites. The last group comprised studies that relied on various characteristics of the tweets or only on the source of the information to judge the accuracy of the tweets.</p>
          <p>Regarding the concepts of accuracy and misinformation, Chou et al [<xref ref-type="bibr" rid="ref25">25</xref>] defined <italic>misinformation</italic> as information that lacks scientific evidence. A more precise definition can be found in the study by Tan et al [<xref ref-type="bibr" rid="ref26">26</xref>], where the authors defined inaccurate information or misinformation as “explicitly false,” according to what would be deemed incorrect by expert consensus. In the study by Nyhan and Reifler [<xref ref-type="bibr" rid="ref27">27</xref>], the authors combined these definitions to describe misinformation or inaccurate health information as information that is not supported by clear evidence and expert opinion.</p>
          <p>Studies relying on the opinions of experts seemed to indirectly or directly use these definitions to assess accuracy; however, it should be noted that, although misinformation is inaccurate, it is not necessarily intended to be so. In contrast, disinformation is information that is intentionally deceptive [<xref ref-type="bibr" rid="ref28">28</xref>]. Examples of <italic>opinions of experts</italic> studies are included in <xref ref-type="table" rid="table1">Table 1</xref> [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. These involved labeling health-related tweets based on the opinions of health experts. The tweets were labeled as inaccurate or accurate by at least two experts. A third expert was typically involved when there was a disagreement between the original 2 experts: this expert cast the deciding vote for controversial tweets.</p>
          <p>Vraga and Bode [<xref ref-type="bibr" rid="ref29">29</xref>] criticized the abovementioned definition of misinformation, raising the point that there are many issues on which experts do not agree. However, they state that as long as there is more evidence supporting the information, the agreement rate between experts will increase. Taking a stricter approach, Albalawi et al [<xref ref-type="bibr" rid="ref12">12</xref>] and Sharma et al [<xref ref-type="bibr" rid="ref13">13</xref>] excluded tweets on which experts disagreed in an attempt to exclude uncertainty from their data sets. <xref ref-type="table" rid="table1">Table 1</xref> summarizes these studies.</p>
          <p>Unsurprisingly, studies that relied on expert opinion used relatively small data sets (ranging from 109 to 625 tweets) compared with studies that used other labeling methods (<xref ref-type="table" rid="table1">Table 1</xref>). Even those that used nonexperts but used manual coding (performed by nonexpert annotators) tended to work on a small sample of the data set [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref20">20</xref>].</p>
          <p>The second group comprised studies that relied on an external website, such as a fact-checking website, to label the tweets. One such example is the study by Elhaddad et al [<xref ref-type="bibr" rid="ref19">19</xref>], which relied on a fact-checking website to identify misleading information. A similar method was used by Ghenai et al [<xref ref-type="bibr" rid="ref21">21</xref>], who relied on the website of the World Health Organization (WHO) to identify 6 rumors. From these rumors, they derived keywords to extract relevant tweets. The drawback of this method is that only tweets relevant to specific rumors were extracted; thus, the model was trained only on this limited number of rumors. Furthermore, these methods are highly language restricted: both studies referred to in <xref ref-type="table" rid="table1">Table 1</xref> were performed in English, as mandated by the WHO website and the fact-checking website.</p>
          <p>Other methods relied on various characteristics of the tweets or only on the source of the information without judging the actual information. For example, in the study by Kalyanam et al [<xref ref-type="bibr" rid="ref17">17</xref>], the authors identified tweets as credible if they included hashtags that indicated that they originated from noted agencies or other reliable sources, and tweets were identified as speculative if they included hashtags that implied an increase in fear, rumors, or scams.</p>
          <p>Similarly, Sicilia et al [<xref ref-type="bibr" rid="ref9">9</xref>], Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>], and Chew and Eysenbach [<xref ref-type="bibr" rid="ref16">16</xref>] defined credible tweets as tweets that have information from a confirmed, reliable source, such as the WHO, Centers for Disease Control, or another official health agency. This method differs from the method used by the second group mentioned previously as it first identified a tweet and then examined its source. In contrast, the methods in the second group first identified a trustworthy website and then used the information on the website to identify tweets of interest.</p>
          <p>More generally, Yin et al [<xref ref-type="bibr" rid="ref30">30</xref>] stated that a website is <italic>trustworthy</italic> if it provides correct information and suggests that information is likely to be true if it is provided by a trustworthy website. Studies that relied on trustworthy websites to identify rumors [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] seemed to follow this definition, even if they did not explicitly state it.</p>
          <p>It should be noted that based on the data in <xref ref-type="table" rid="table1">Table 1</xref>, all Arabic studies that relied only on expert opinion [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>] were small scale and qualitative; therefore, it would be impossible to scale them up. Notably, the percentage of inaccurate tweets for English studies that rely on expert opinions is in the range of 10% to 25%, whereas the corresponding range for Arabic studies is 31% to 50%. This finding suggests a greater occurrence of inaccurate health-related tweets in Arabic than in English.</p>
        </sec>
        <sec>
          <title>ML Approaches</title>
          <p>Of the 14 studies reported in <xref ref-type="table" rid="table1">Table 1</xref>, which analyzed the accuracy of health-related tweets in general, 6 (43%) proceeded to train an ML model to detect the accuracy of health information, as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          <table-wrap position="float" id="table2">
            <label>Table 2</label>
            <caption>
              <p>Summary of studies that developed MLa models to detect the accuracy of health-related information.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="170"/>
              <col width="270"/>
              <col width="200"/>
              <col width="360"/>
              <thead>
                <tr valign="top">
                  <td>Study</td>
                  <td>ML approach</td>
                  <td>Results</td>
                  <td>Labeling type</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>Elhadad et al [<xref ref-type="bibr" rid="ref19">19</xref>]</td>
                  <td>Deep learning multimodel, GRU<sup>b</sup>, LSTM<sup>c</sup>, and CNN<sup>d</sup></td>
                  <td>99.99% (F<sub>1</sub> score)</td>
                  <td>Ground truth data from websites</td>
                </tr>
                <tr valign="top">
                  <td>Ghenai et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td>
                  <td>Random forest</td>
                  <td>94.5% (weighted average for F<sub>1</sub> score)</td>
                  <td>Crowdsource agreement but keywords are based on 4 WHO<sup>e</sup> website-identified rumors</td>
                </tr>
                <tr valign="top">
                  <td>Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>]</td>
                  <td>Ensemble learning and random forest+SVM<sup>f</sup></td>
                  <td>97.8% (accuracy)</td>
                  <td>Single annotator only after confirming source</td>
                </tr>
                <tr valign="top">
                  <td>Zhao et al [<xref ref-type="bibr" rid="ref10">10</xref>]</td>
                  <td>Random forest</td>
                  <td>84.4% (F<sub>1</sub> score)</td>
                  <td>Annotator vote; in addition, consulted an expert to validate misleading information</td>
                </tr>
                <tr valign="top">
                  <td>Sicilia et al [<xref ref-type="bibr" rid="ref9">9</xref>]</td>
                  <td>Random forest</td>
                  <td>69.9% (F<sub>1</sub> score)</td>
                  <td>Agreement of a health expert</td>
                </tr>
                <tr valign="top">
                  <td>Saeed et al [<xref ref-type="bibr" rid="ref8">8</xref>]</td>
                  <td>Random forest</td>
                  <td>83.5% (accuracy)</td>
                  <td>Agreement of a health expert</td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table2fn1">
                <p><sup>a</sup>ML: machine learning.</p>
              </fn>
              <fn id="table2fn2">
                <p><sup>b</sup>GRU: gated recurrent unit.</p>
              </fn>
              <fn id="table2fn3">
                <p><sup>c</sup>LSTM: long short-term memory.</p>
              </fn>
              <fn id="table2fn4">
                <p><sup>d</sup>CNN: convolutional neural network.</p>
              </fn>
              <fn id="table2fn5">
                <p><sup>e</sup>WHO: World Health Organization.</p>
              </fn>
              <fn id="table2fn6">
                <p><sup>f</sup>SVM: support vector machine.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
          <p>Studies reporting on training ML models included Elhadad et al [<xref ref-type="bibr" rid="ref19">19</xref>] and Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>], who used ensemble learning on an English data set. Elhadad et al [<xref ref-type="bibr" rid="ref19">19</xref>] used ensemble learning that involved multiple DL architectures, and Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>] trained ensemble models comprising traditional ML algorithms, such as support vector machine (SVM) and random forest (RF). Another similarity between these studies is the method used to identify misleading information. Elhadad et al [<xref ref-type="bibr" rid="ref19">19</xref>] built their data set by extracting ground truth data and rumors from fact-checking websites. Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>] considered tweets credible if they have a reliable source and misleading otherwise. Both models reported a high level of accuracy (&#62;97%), as shown in <xref ref-type="table" rid="table2">Table 2</xref>.</p>
          <p>From <xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>, it is clear that studies that relied on a fact-checking website [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] and studies that determined the accuracy of a tweet based on its source [<xref ref-type="bibr" rid="ref18">18</xref>] obtained a high level of accuracy, possibly as these models were trained on relatively large data sets.</p>
          <p>For example, Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>] trained their model using 409,484 tweets. However, automated labeling left open the possibility of incorrect labeling, and all these studies were conducted in English.</p>
          <p>Most of the studies that developed ML models focused on outbreaks (4/6, 67% of studies). Studies that developed ML models for nonoutbreak conditions [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref10">10</xref>] obtained less accurate results compared with outbreak conditions. This might be because these nonoutbreak condition models were trained on a limited number of documents compared with the outbreak models. We also found that the level of accuracy obtained for nonoutbreak data sets was approximately 84% (<xref ref-type="table" rid="table2">Table 2</xref>). It is also notable that all of these studies trained an RF model.</p>
          <p><xref ref-type="table" rid="table2">Table 2</xref> (and our associated literature review) suggests that recent advancements in DL have not been sufficiently applied to the detection of misleading Arabic health information. In our previous work, we have shown that DL architectures using word embedding as an input layer outperform other traditional ML models, such as SVM and naive Bayes, in the detection of Arabic health-related information on SM [<xref ref-type="bibr" rid="ref31">31</xref>]; however, in this paper, we move past that to the classification of Arabic health-related tweets based on their accuracy.</p>
          <p>Word embedding is a learned representation of words in natural language processing (NLP) [<xref ref-type="bibr" rid="ref32">32</xref>]. Words with similar meanings typically have similar numbers in their vectors. The closer the words are in meaning, the shorter the distance between the 2 vectors representing them. One of the main criticisms of the word embedding approach is that it is considered context free; that is, the embedding of a word is not affected by its position in the sentence [<xref ref-type="bibr" rid="ref33">33</xref>]. Hence, it is also referred to as static word embedding. However, in practice, the meaning of a word may depend on its position in a sentence.</p>
          <p>In recent years, pretrained language models have been proven to work well for many NLP tasks, including entity recognition, language translation, and text classification [<xref ref-type="bibr" rid="ref34">34</xref>]. Unlike static word embedding techniques, such as Skip-Gram and Continuous Bag of Words, language models can learn the context of the words and thus assign different values for the words depending on their context [<xref ref-type="bibr" rid="ref33">33</xref>]. There are different types of language models, including contextual word vectors and embeddings from language models [<xref ref-type="bibr" rid="ref33">33</xref>]. One of the most popular language models is the bidirectional encoder representations from transformers (BERT), which has been proven to perform well in text classification tasks.</p>
          <p>The superiority of transformer models compared with other text classification methods is well documented, especially in the recent literature. Multiple studies have compared transformer models with other DL models [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref39">39</xref>], and the results showed that transformers outperformed the ML models, including different DL architectures and traditional ML models, such as SVMs and RF. This indicates the potential capability of transformers to better detect the accuracy of Arabic health information on SM.</p>
          <p>Therefore, in this study, we aimed to contribute to this field by developing a data set of certified accurate or inaccurate Arabic health-related tweets and investigating the ability of the BERT or pretrained word embedding model to detect the accuracy of Arabic health-related tweets across a wide range of health-related issues.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>The empirical method comprised 2 parts. The first part addressed the extraction of health-related tweets using the model proposed in our previous study [<xref ref-type="bibr" rid="ref31">31</xref>]. In that study, we used a health lexicon that focused more on general health keywords rather than specific outbreaks, as a recent study suggested that general health misinformation is more likely to spread than, for example, COVID-19 [<xref ref-type="bibr" rid="ref40">40</xref>]. In contrast, <xref ref-type="table" rid="table1">Table 1</xref> illustrates that most studies in this area focused on a specific domain or disease outbreak.</p>
        <p>The extracted health-related tweets were labeled by health experts as either accurate or inaccurate. <xref rid="figure1" ref-type="fig">Figure 1</xref> presents an overview of this portion of the study.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Overview of the process followed in labeling tweets as either accurate or inaccurate [<xref ref-type="bibr" rid="ref31">31</xref>]. ML: machine learning.</p>
          </caption>
          <graphic xlink:href="formative_v6i6e34834_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>In the second part, we propose 2 types of trustworthiness—detecting models to automatically classify health-related tweets as either accurate or inaccurate—and evaluate them: bidirectional long short-term memory (BLSTM) DL models and pretrained transformer language models.</p>
      </sec>
      <sec>
        <title>Building Data Sets of Trustworthy Health-Related Tweets</title>
        <p>In this study, we used 2 data sets containing health-related tweets. The first data set was the result of our previous study [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>The first data set was extracted from 297,928 tweets posted between July 15 and August 31, 2019. Of these 297,928 tweets, 5000 (1.68%) were randomly sampled and labeled by 2 annotators as either <italic>health-related</italic> or <italic>not health-related</italic>. A third annotator resolved disagreements between the 2 annotators.</p>
        <p>The first data set was extracted during the summer holidays in Saudi Arabia for 45 consecutive days. To assess generality, we extracted the second data set for a different timeframe: during <italic>Hajj</italic> and <italic>Eid al Adha</italic> (Muslim holy days) and during school days between March 1 and April 15, 2019. The second set of 900 tweets used the ML methodology proposed in the same study [<xref ref-type="bibr" rid="ref31">31</xref>], as the availability of health professionals was constrained by the ongoing COVID-19 pandemic and the ML model derived in that study achieved a high-quality result (93% accuracy).</p>
        <p>The methodology proposed in the study by Albalawi et al [<xref ref-type="bibr" rid="ref31">31</xref>] comprised extracting tweets from a set of collected tweets with the help of a health lexicon and then further filtering out tweets not related to health with the help of an ML model. On the basis of the health lexicon, 217,702 tweets were extracted. Of the 217,702 tweets, we sampled 5000 (2.3%) tweets and applied the ML model to extract 900 (0.41%) health-related tweets.</p>
        <p>Finally, we added 900 tweets from the second data set to 900 tweets sampled from the first data set and had those 1800 tweets labeled as either accurate or inaccurate health information by 2 medical physicians.</p>
      </sec>
      <sec>
        <title>Labeling Accurate or Inaccurate Tweets</title>
        <p>The physicians were asked to manually label each of the 1800 health-related tweets into one of the following categories: <italic>accurate health information</italic>, <italic>inaccurate health information</italic>, and <italic>not sure about the accuracy</italic>.</p>
        <p>We followed the protocol of relying on the opinions of experts to define the accuracy of the information collected. Taking into account the points made by Vraga and Bode [<xref ref-type="bibr" rid="ref29">29</xref>], every tweet was assessed by 2 experts, and a tweet was included in the final data set for this study only if both experts agreed on its accuracy; that is, we reduced uncertainty by excluding information that was not sanctioned by all experts (indeed, later show that between-physician reliability in this coding was limited, buttressing the need for increased certainty when using human classification, as stated by Vraga and Bode [<xref ref-type="bibr" rid="ref32">32</xref>]). The <italic>not sure</italic> option was offered to the physicians to avoid forcing them to evaluate the tweets if they did not have enough relevant health knowledge to accurately evaluate them or if the tweets were ambiguous.</p>
        <p>Although other studies invited a third annotator to resolve disagreements, our approach was stricter in reducing uncertainty in the data set by excluding tweets for which there was a disagreement between the 2 annotators. Of 1800 tweets, the 2 physicians agreed on 779 (43.3%) tweets, which were labeled as containing either accurate or inaccurate health information. The physicians disagreed on 9.1% (163/1800) of tweets. The remaining 47.7% (858/1800) of tweets were labeled as <italic>unsure</italic> by at least one physician. We dropped the tweets on which at least one of the physicians was unsure and used the remaining 779 tweets in our experiments.</p>
        <p>Although the 779 tweets constituted a relatively small data set, most of the data sets constructed in the literature based on agreements between health experts were relatively small. As shown in <xref ref-type="table" rid="table1">Table 1</xref>, the highest number of health-related tweets judged by health experts in other studies was 625 in the study by Alnemer et al [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>These 779 tweets, labeled as either accurate or inaccurate, can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Please note that we only share tweet IDs and labels as the Twitter policy prevents the content of the tweets from being redistributed. These tweet IDs can be used to obtain the text of tweets using the Twitter application programming interface [<xref ref-type="bibr" rid="ref41">41</xref>].</p>
      </sec>
      <sec>
        <title>Considered DL Models</title>
        <sec>
          <title>Overview</title>
          <p>After completing the annotation of the health-related tweets as either accurate or inaccurate, we trained 16 classification models, 7 (44%) of which used a BLSTM architecture with pretrained word embeddings as their input layers, and 9 (56%) of which used a pretrained transformer language model. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates the steps implemented during this stage. Further details are provided in the following sections.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>Overview of the process used to train and select machine learning models. BLSTM: bidirectional long short-term memory.</p>
            </caption>
            <graphic xlink:href="formative_v6i6e34834_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
        </sec>
        <sec>
          <title>The BLSTM Architecture</title>
          <p>For 44% (7/16) of the trained models, we used a BLSTM architecture with pretrained word embeddings as the input layer. Long short-term memory (LSTM) is a type of recurrent neural network that takes advantage of dependencies between parts of the input sequence and can learn these dependencies. LSTM also preserves the information of past input. The BLSTM variation differs from LSTM because of its ability to learn the dependencies between past and future elements [<xref ref-type="bibr" rid="ref42">42</xref>]. BLSTM has been found to perform well in many NLP tasks, including text classification [<xref ref-type="bibr" rid="ref43">43</xref>]. The BLSTM model begins with input and embedding layers to which a dropout layer is added, followed by a BLSTM layer with another added dropout layer [<xref ref-type="bibr" rid="ref31">31</xref>]. BLSTM has been shown to perform better than traditional ML models (SVM, naive Bayes, k-nearest neighbors, and logistic regression) and conventional neural networks in a previous study on detecting Arabic health-related tweets [<xref ref-type="bibr" rid="ref31">31</xref>]. For the input layer, we used 7 pretrained word embedding models for Arabic [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref47">47</xref>]. It should be noted that AraVec, Mazajak, and ArWordVec come in 2 variations: Continuous Bag of Words and Skip-Gram and, finally, BLSTM fastText.</p>
        </sec>
        <sec>
          <title>Transformer Models</title>
          <p>BERT is a transformer language model that has shown superiority in many NLP tasks.</p>
          <p>Different Arabic pretrained language models exist, which are based on transformers that have been developed recently by the Arabic NLP community. Most of these pretrained language models were built on top of the BERT-base model. Some of them also provided a version based on BERT-large.</p>
          <p>The difference between BERT-base and BERT-large is that BERT-base uses 12 layers, 768 hidden layers, 12 heads, and approximately 136 million parameters, whereas the BERT-large model uses 24 layers, 1024 hidden layers, 16 heads, and approximately 370 million parameters [<xref ref-type="bibr" rid="ref48">48</xref>]. All models may not leverage BERT-large as it is more difficult to train and comes with a higher computational cost than BERT-base [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
          <p>Examples of pretrained Arabic language representation models that offer both base and large variants are ArabicBERT [<xref ref-type="bibr" rid="ref50">50</xref>] and Transformer-based Model for Arabic Language Understanding (AraBERT) [<xref ref-type="bibr" rid="ref51">51</xref>]. AraBERT was considered the first Arabic-specific transformer language model introduced in 2020 by Antoun et al [<xref ref-type="bibr" rid="ref51">51</xref>]. In 2021, an updated version of AraBERT was released [<xref ref-type="bibr" rid="ref52">52</xref>]. AraBERT is considered one of the best transformer language models for NLP, outperforming other models for Arabic sentiment analysis [<xref ref-type="bibr" rid="ref53">53</xref>]. AraBERT version 2 (AraBERTv2) preprocesses text using Farasa segmentation. Farasa segmentation involves breaking the words based on the prefix and suffix [<xref ref-type="bibr" rid="ref54">54</xref>], whereas AraBERT version 0.2 (AraBERTv0.2) preprocesses the text without using Farasa segmentation. In this study, we experimented with these 6 models: AraBERTv2, AraBERTv0.2, and ArabicBERT in both variants of BERT (base and large).</p>
          <p>In addition to 6 models, we also investigated 3 other state-of-the-art pretrained language models, namely QARiB [<xref ref-type="bibr" rid="ref55">55</xref>], MARBERT, and ARBERT [<xref ref-type="bibr" rid="ref56">56</xref>], which are based only on BERT-base. These models reportedly perform well on text classification tasks [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref55">55</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]. <xref ref-type="table" rid="table3">Table 3</xref> summarizes the characteristics of the pretrained language models used in this study.</p>
          <table-wrap position="float" id="table3">
            <label>Table 3</label>
            <caption>
              <p>Pretrained language models.</p>
            </caption>
            <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
              <col width="170"/>
              <col width="130"/>
              <col width="290"/>
              <col width="410"/>
              <thead>
                <tr valign="top">
                  <td>Name</td>
                  <td>Basis</td>
                  <td>Size</td>
                  <td>Corpus</td>
                </tr>
              </thead>
              <tbody>
                <tr valign="top">
                  <td>ARBERT [<xref ref-type="bibr" rid="ref56">56</xref>]</td>
                  <td>BERT<sup>a</sup>-base</td>
                  <td>61 GB of MSA<sup>b</sup> text (6.5 billion tokens)</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Books and news (news and Wikipedia articles)</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>MARBERT [<xref ref-type="bibr" rid="ref56">56</xref>]</td>
                  <td>BERT-base</td>
                  <td>128 GB of text (15.6 billion tokens)</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>1 billion Arabic tweets</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>QARiB [<xref ref-type="bibr" rid="ref55">55</xref>]</td>
                  <td>BERT-base</td>
                  <td>14 billion tokens; vocabulary: 64,000</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>420 million tweets and approximately 180 million sentences of text from Arabic Giga Word, Abulkhair Arabic Corpus, and OPUSc</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>ArabicBERT [<xref ref-type="bibr" rid="ref50">50</xref>]</td>
                  <td>BERT-base and BERT-large</td>
                  <td>95 GB of text and 8.2 billion words</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>Arabic OSCARd version, Wikipedia, and other resources</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>AraBERTv0.2<sup>e</sup> [<xref ref-type="bibr" rid="ref52">52</xref>]</td>
                  <td>BERT-base and BERT-large</td>
                  <td>77 GB, 200,095,961 lines, 8,655,948,860 words, or 82,232,988,358 characters</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>OSCAR unshuffled and filtered</p>
                      </list-item>
                      <list-item>
                        <p>Arabic Wikipedia articles</p>
                      </list-item>
                      <list-item>
                        <p>The 1.5 billion words Arabic Corpus</p>
                      </list-item>
                      <list-item>
                        <p>The OSIANf corpus</p>
                      </list-item>
                      <list-item>
                        <p>Assafir news articles</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
                <tr valign="top">
                  <td>AraBERTv2<sup>g</sup> [<xref ref-type="bibr" rid="ref52">52</xref>]</td>
                  <td>BERT-base and BERT-large</td>
                  <td>77 GB, 200,095,961 lines, 8,655,948,860 words, or 82,232,988,358 characters</td>
                  <td>
                    <list list-type="bullet">
                      <list-item>
                        <p>OSCAR, unshuffled and filtered</p>
                      </list-item>
                      <list-item>
                        <p>Arabic Wikipedia articles</p>
                      </list-item>
                      <list-item>
                        <p>The 1.5 billion words Arabic corpus</p>
                      </list-item>
                      <list-item>
                        <p>The OSIAN corpus</p>
                      </list-item>
                      <list-item>
                        <p>Assafir news articles</p>
                      </list-item>
                    </list>
                  </td>
                </tr>
              </tbody>
            </table>
            <table-wrap-foot>
              <fn id="table3fn1">
                <p><sup>a</sup>BERT: bidirectional encoder representations from transformers.</p>
              </fn>
              <fn id="table3fn2">
                <p><sup>b</sup>MSA: Modern Standard Arabic.</p>
              </fn>
              <fn id="table3fn3">
                <p><sup>c</sup>OPUS: open parallel corpus.</p>
              </fn>
              <fn id="table3fn4">
                <p><sup>d</sup>OSCAR: Open Superlarge Crawled Aggregated corpus.</p>
              </fn>
              <fn id="table3fn5">
                <p><sup>e</sup>AraBERTv0.2: Transformer-based Model for Arabic Language Understanding version 0.2version 0.2.</p>
              </fn>
              <fn id="table3fn6">
                <p><sup>f</sup>OSIAN: Open Source International Arabic News.</p>
              </fn>
              <fn id="table3fn7">
                <p><sup>g</sup>AraBERTv2: Transformer-based Model for Arabic Language Understanding version 0.2 version 2.</p>
              </fn>
            </table-wrap-foot>
          </table-wrap>
        </sec>
        <sec>
          <title>Evaluation Metrics</title>
          <p>The F<sub>1</sub> score, recall, precision, accuracy, and macroaverage of the F<sub>1</sub> score were used to evaluate the ML models, as detailed in <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>. The macroaveraged F<sub>1</sub> score is the averaged F<sub>1</sub> score across all classes, which are accurate and inaccurate health-related tweets [<xref ref-type="bibr" rid="ref58">58</xref>].</p>
          <boxed-text id="box1" position="float">
            <title>Metrics used to evaluate the machine learning models.</title>
            <p>
              <bold>Recall</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>True positives / (true positives + false negatives)</p>
              </list-item>
            </list>
            <p>
              <bold>Precision</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>True positives / (true positives + false positives)</p>
              </list-item>
            </list>
            <p>
              <bold>F<sub>1</sub> score</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>(2 × precision × recall) / (precision + recall)</p>
              </list-item>
            </list>
            <p>
              <bold>Accuracy</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p>(true positives + true negatives) / total sample</p>
              </list-item>
            </list>
            <p>
              <bold>Macroaveraged F<sub>1</sub> score</bold>
            </p>
            <list list-type="bullet">
              <list-item>
                <p><inline-graphic xlink:href="formative_v6i6e34834_fig3.png" xlink:type="simple" mimetype="image"/><bold>(1)</bold>, where N is the number of classes</p>
              </list-item>
            </list>
          </boxed-text>
        </sec>
        <sec>
          <title>Preprocessing Data</title>
          <p>In this study, text was preprocessed following the procedure outlined by the authors of the corresponding pretrained word embedding models. Li et al [<xref ref-type="bibr" rid="ref59">59</xref>] found that this is the best text preprocessing practice when working with pretrained word embeddings. Similarly, for all pretrained word embedding models [<xref ref-type="bibr" rid="ref44">44</xref>-<xref ref-type="bibr" rid="ref47">47</xref>] and pretrained language models [<xref ref-type="bibr" rid="ref50">50</xref>,<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>], we followed the steps provided by the original studies.</p>
          <p>Of the 779 tweets, we split the data set into training, validation, and test data sets in ratios of 507 tweets (65.1%) for training, 116 tweets (14.9%) for validating the model, and 156 tweets (20%) for testing.</p>
        </sec>
      </sec>
      <sec>
        <title>Ethics Approval</title>
        <p>This study did not require institutional review board approval from the Science and Engineering Research committee at the University of Limerick because ethical approval is not required for publicly available data. It should be emphasized, during the study, that any associated text that can be used to identify the authors of the tweets has been removed from the text (eg, @name, user ID).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Data Set Description</title>
        <p>The κ coefficient for all categories was 0.377, which is in fair agreement according to Cohen [<xref ref-type="bibr" rid="ref60">60</xref>]. However, the benchmark scale proposed by Fleiss et al [<xref ref-type="bibr" rid="ref61">61</xref>] to evaluate the agreement indicates that such a coefficient is poor (&#60;0.40=poor, 0.40-0.75=intermediate to good, and &#62;0.75=excellent). Given the low κ coefficients across the 3 categories, we considered only cases where both physicians were explicitly in agreement, as they were on 779 tweets from the original data sets.</p>
        <p>Of the 1021 tweets that were excluded, 874 (48.6%) were labeled <italic>not sure</italic> by at least one physician, and in the case of 147 (14.4%) tweets, the physicians disagreed regarding the accuracy of the tweets.</p>
        <p>Of the 779 tweets physicians agreed on in our data set, 296 (38%) were labeled as inaccurate and 483 (62%) were labeled as accurate. This finding is similar to the inaccuracies reported in other studies (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p><xref ref-type="boxed-text" rid="box2">Textbox 2</xref> presents examples of accurate and inaccurate health-related tweets. As can be seen from the tweets in the textbox, they cover a wide range of topics, including but not limited to psychology and cancer. Interestingly, in the third accurate tweet example, the difficulty for nonexperts in discerning accurate from inaccurate health information is illustrated, as advice against taking antidiarrhea drugs in the event of food poisoning is slightly counterintuitive.</p>
        <boxed-text id="box2" position="float">
          <title>Examples of inaccurate and accurate health-related tweets.</title>
          <p>
            <bold>Accurate</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>“Tomorrow enjoys the feast. and get closer to God with your sacrifice</p>
              <p>And eat but do not extravagant and feed the contented and be merciful as God has commanded you</p>
              <p>Eating too much red meat might:</p>
              <p>Raise the level of triglycerides</p>
              <p>Raise cholesterol</p>
              <p>Increase uric salt in the blood Increases gout attacks in the joints”</p>
            </list-item>
          </list>
          <list list-type="bullet">
            <list-item>
              <p>“Symptoms of social phobia</p>
              <p>Sometimes, social phobia can be accompanied by physical signs and symptoms, which may include:</p>
              <p>Flashness</p>
              <p>Rapid heart palpitations</p>
              <p>Shivering and sweating</p>
              <p>Upset stomach or nausea</p>
              <p>Difficulty catching breath</p>
              <p>Dizziness or lightheadedness</p>
              <p>Feeling like your mind has gone blank</p>
              <p>Muscle tension”</p>
            </list-item>
          </list>
          <list list-type="bullet">
            <list-item>
              <p>“In the event of food poisoning, please take care not to use antidiarrheal medicines, as they may worsen the condition”</p>
            </list-item>
            <list-item>
              <p>“Hemoglobin is a group of proteins in red blood cells whose function is to transport oxygen from the lungs to the body, return carbon dioxide from the body, and transport it to the lungs and get rid of it through breathing.</p>
            </list-item>
          </list>
          <p>Iron is an important element and enters the composition of hemoglobin, so if iron deficiency, hemoglobin decreases, and anemia occurs.”</p>
          <list list-type="bullet">
            <list-item>
              <p>“Among the ways to prevent lung cancer:</p>
              <p>Stay away from smoking</p>
              <p>Avoid passive smoking</p>
              <p>Avoid carcinogenic and radioactive materials.”</p>
            </list-item>
          </list>
          <p>
            <bold>Inaccurate</bold>
          </p>
          <list list-type="bullet">
            <list-item>
              <p>“Scientific research,</p>
              <p>The research says that Zamzam water bears the name (water), but it differs radically from water compounds, as all the waters of the world belong to the acidic compound, except for (Zamzam water).</p>
              <p>It is (alkaline!) Glory be to God. There is no other alkaline water on the face of the earth. So, when you drink it in abundance, the human body has a strong immunity against viruses!!”</p>
            </list-item>
          </list>
          <list list-type="bullet">
            <list-item>
              <p>“When Western scholars searched for the causes of mental illness, they found only two reasons (fear and sadness) fear of the future and sadness of the past, both of which are the opposite of happiness.”</p>
            </list-item>
            <list-item>
              <p>“Did you know that a 5-minute tantrum is so stressful that it weakens the immune system for more than 6 hours”</p>
            </list-item>
            <list-item>
              <p>“Cupping helps smokers to quit smoking or reduce the negative impact on the body through:</p>
              <p>Removing excess hemoglobin from the body by excreting aging red blood cells, and thus the disappearance of the pathological symptoms of high hemoglobin caused by smoking”</p>
            </list-item>
          </list>
          <list list-type="bullet">
            <list-item>
              <p>“Just a spoonful of cinnamon daily:</p>
              <p>Rich in anti-inflammatory and antioxidants</p>
              <p>Prevents all types of cancer</p>
              <p>Prevents heart disease</p>
              <p>Anti-diabetes”</p>
            </list-item>
          </list>
        </boxed-text>
        <p>Some tweets claimed the benefits of some traditional foods and spices. For example, some tweets promoted <italic>Zamzam</italic> (holy water for Muslims), claiming there was scientific research that stated that it could strengthen the human immune system; experts classified the information as inaccurate.</p>
        <p>In addition, the examples of accurate tweets presented here suggest that accurate health-related tweets tend to be more preventive in nature, a finding supported by the wider sampling of accurate tweets. As shown in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>, the accurate tweets advised users to stop eating too much red meat as it causes gout or increases cholesterol, stop smoking to prevent lung cancer, and stop taking anti-inflammatory drugs in the event of food poisoning. In contrast, as noted earlier, inaccurate tweets promoted natural and alternative medicine such as curbing eating and drinking <italic>Zamzam</italic> water for their health benefits. An interesting example was in relation to cancer, where accurate tweets advised readers to stop smoking; however, some of the inaccurate tweets were also preventive, and they advised taking a spoonful of cinnamon to prevent all types of cancer.</p>
      </sec>
      <sec>
        <title>DL Models</title>
        <p>In terms of the comparison of models, we observed that overall, BERT models performed better than BLSTM models based on the accuracy and the F<sub>1</sub> score for both classes (when referring to the metric accuracy in this section, we will call it <italic>model accuracy</italic> to disambiguate it from the accurate or inaccurate classification). Overall, AraBERTv0.2-large performed better than all other models. Specifically, the best model was AraBERTv0.2-large (macro F<sub>1</sub> score 87%), followed by AraBERTv2-large (macro F<sub>1</sub> score 86%) and AraBERTv0.2-base (macro F<sub>1</sub> score 85%), as shown in <xref ref-type="table" rid="table4">Table 4</xref>. These findings hide larger but still small variations in the precision and recall scores of individual techniques for inaccurate and accurate tweets. For example, although AraBERTv0.2-base achieved a recall of 78% for inaccurate tweets, AraBERTv0.2-large achieved a recall of &#62;83%.</p>
        <p>The results also suggest that, in general, BERT-large models tended to be better at detecting inaccurate tweets than the BERT-base models. The large AraBERTv2, AraBERTv0.2, and ArabicBERT models performed better than their base versions at detecting inaccurate health tweets, as shown in <xref ref-type="table" rid="table4">Table 4</xref>. In contrast, the BERT-base models might be better at detecting accurate tweets, except for the AraBERTv2, whose large and base versions performed similarly.</p>
        <p>Of the pretrained word embeddings, the results in <xref ref-type="table" rid="table4">Table 4</xref> show that Mazajak Skip-Gram is the best based on <italic>model accuracy</italic> and F<sub>1</sub> score.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Comparison of the performance of machine learning models for detecting the accuracy of health-related tweets.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="310"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="120"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Model and class</td>
                <td colspan="2">Precision</td>
                <td colspan="2">Recall</td>
                <td colspan="2">F<sub>1</sub> score</td>
                <td colspan="2">Macroaverage</td>
                <td>Model accuracy</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="12">
                  <bold>AraBERTv2<sup>a</sup>-base</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.804</td>
                <td colspan="2">0.7627</td>
                <td colspan="2">0.7826</td>
                <td colspan="2">0.8279</td>
                <td colspan="2">0.8397</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.86</td>
                <td colspan="2">0.8866</td>
                <td colspan="2">0.8731</td>
                <td colspan="2">0.8279</td>
                <td colspan="2">0.8397</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>AraBERTv2-large</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8276</td>
                <td colspan="2">0.8136<sup>b</sup></td>
                <td colspan="2">0.8205<sup>b</sup></td>
                <td colspan="2">0.8564<sup>b</sup></td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8878</td>
                <td colspan="2">0.8969</td>
                <td colspan="2">0.8923</td>
                <td colspan="2">0.8564<sup>b</sup></td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>AraBERTv0.2<sup>c</sup>-base</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8519</td>
                <td colspan="2">0.7797</td>
                <td colspan="2">0.8142</td>
                <td colspan="2">0.8543<sup>b</sup></td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8725</td>
                <td colspan="2">0.9175</td>
                <td colspan="2">0.8945</td>
                <td colspan="2">0.8543<sup>b</sup></td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>AraBERTv0.2-large</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8448</td>
                <td colspan="2">0.8305<sup>d</sup></td>
                <td colspan="2">0.8376<sup>d</sup></td>
                <td colspan="2">0.8701<sup>d</sup></td>
                <td colspan="2">0.8782<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.898<sup>d</sup></td>
                <td colspan="2">0.9072</td>
                <td colspan="2">0.9025<sup>d</sup></td>
                <td colspan="2">0.8701<sup>d</sup></td>
                <td colspan="2">0.8782<sup>d</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>MARBERT</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7759</td>
                <td colspan="2">0.7627</td>
                <td colspan="2">0.7692</td>
                <td colspan="2">0.8154</td>
                <td colspan="2">0.8269</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8571</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.8615</td>
                <td colspan="2">0.8154</td>
                <td colspan="2">0.8269</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>ARBERT</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7903</td>
                <td colspan="2">0.8305<sup>d</sup></td>
                <td colspan="2">0.8099</td>
                <td colspan="2">0.8447</td>
                <td colspan="2">0.8526</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8936</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.8796</td>
                <td colspan="2">0.8447</td>
                <td colspan="2">0.8526</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>QARiB</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7797</td>
                <td colspan="2">0.7797</td>
                <td colspan="2">0.7797</td>
                <td colspan="2">0.8228</td>
                <td colspan="2">0.8333</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.8228</td>
                <td colspan="2">0.8333</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>ArabicBERT<sup>e</sup>-large</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8654</td>
                <td colspan="2">0.7627</td>
                <td colspan="2">0.8108</td>
                <td colspan="2">0.8532</td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8654</td>
                <td colspan="2">0.9278<sup>b</sup></td>
                <td colspan="2">0.8955<sup>b</sup></td>
                <td colspan="2">0.8532</td>
                <td colspan="2">0.8654<sup>b</sup></td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>ArabicBERT-base</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8913<sup>d</sup></td>
                <td colspan="2">0.6949</td>
                <td colspan="2">0.781</td>
                <td colspan="2">0.83492</td>
                <td colspan="2">0.8525</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8364</td>
                <td colspan="2">0.9485<sup>d</sup></td>
                <td colspan="2">0.8889</td>
                <td colspan="2">0.83492</td>
                <td colspan="2">0.8525</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM<sup>f</sup> Mazajak CBOW<sup>g</sup></bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7719</td>
                <td colspan="2">0.7458</td>
                <td colspan="2">0.7586</td>
                <td colspan="2">0.8079</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8485</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.8571</td>
                <td colspan="2">0.8079</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM Mazajak Skip-Gram</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8542</td>
                <td colspan="2">0.6949</td>
                <td colspan="2">0.7664</td>
                <td colspan="2">0.8222</td>
                <td colspan="2">0.8397</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8333</td>
                <td colspan="2">0.9278<sup>b</sup></td>
                <td colspan="2">0.8780</td>
                <td colspan="2">0.8222</td>
                <td colspan="2">0.8397</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM ArWordVec Skip-Gram</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8261</td>
                <td colspan="2">0.6441</td>
                <td colspan="2">0.7238</td>
                <td colspan="2">0.7919</td>
                <td colspan="2">0.8141</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8091</td>
                <td colspan="2">0.9175</td>
                <td colspan="2">0.8148</td>
                <td colspan="2">0.7919</td>
                <td colspan="2">0.8141</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM ArWordVec CBOW</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7925</td>
                <td colspan="2">0.7119</td>
                <td colspan="2">0.75</td>
                <td colspan="2">0.805</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.835</td>
                <td colspan="2">0.8866</td>
                <td colspan="2">0.86</td>
                <td colspan="2">0.805</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM AraVec CBOW</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.6865</td>
                <td colspan="2">0.7797</td>
                <td colspan="2">0.7302</td>
                <td colspan="2">0.7737</td>
                <td colspan="2">0.7821</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8571</td>
                <td colspan="2">0.866</td>
                <td colspan="2">0.8172</td>
                <td colspan="2">0.7737</td>
                <td colspan="2">0.7821</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM AraVec Skip-Gram</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.7313</td>
                <td colspan="2">0.8305<sup>d</sup></td>
                <td colspan="2">0.7777</td>
                <td colspan="2">0.8136</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.8144</td>
                <td colspan="2">0.8144</td>
                <td colspan="2">0.8494</td>
                <td colspan="2">0.8136</td>
                <td colspan="2">0.8205</td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>BLSTM fastText</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Inaccurate</td>
                <td colspan="2">0.8158</td>
                <td colspan="2">0.5254</td>
                <td colspan="2">0.6392</td>
                <td colspan="2">0.7382</td>
                <td colspan="2">0.7756</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Accurate</td>
                <td colspan="2">0.7627</td>
                <td colspan="2">0.9278<sup>b</sup></td>
                <td colspan="2">0.8372</td>
                <td colspan="2">0.7382</td>
                <td colspan="2">0.7756</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>AraBERTv2: Transformer-based Model for Arabic Language Understanding version 2.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Represents the second-best value.</p>
            </fn>
            <fn id="table4fn3">
              <p><sup>c</sup>AraBERTv0.2: Transformer-based Model for Arabic Language Understanding version 0.2.</p>
            </fn>
            <fn id="table4fn4">
              <p><sup>d</sup>Indicates the best value.</p>
            </fn>
            <fn id="table4fn5">
              <p><sup>e</sup>BERT: bidirectional encoder representations from transformers.</p>
            </fn>
            <fn id="table4fn6">
              <p><sup>f</sup>BLSTM: bidirectional long short-term memory.</p>
            </fn>
            <fn id="table4fn7">
              <p><sup>g</sup>CBOW: Continuous Bag of Words.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>As noted earlier, the examples given in the <italic>Results</italic> section showed that accurate tweets were more focused on preventive medicine, whereas inaccurate tweets were more focused on alternative and natural medicine. However, it could be argued that this is because of the keywords used in extracting and filtering the tweets or because of the selected tweet examples. Nevertheless, a previous study mentioned that the prevalence of natural alternatives and alternative medicine compared with medicine provided by the health care system [<xref ref-type="bibr" rid="ref62">62</xref>] may be harmful. To illustrate the importance of this with respect to specific patients, there was a reported case of a patient with cancer who took alternative medicine promoted on SM, which caused the hospital to temporarily stop her cancer treatment to repair the damage caused by that medicine [<xref ref-type="bibr" rid="ref63">63</xref>]. At a more general level, going forward, insights such as these could provide additional levers with which to detect inaccurate health tweets.</p>
        <p>The results of BLSTM with pretrained word embedding models (AraVec, Skip-Gram, and Mazajak) are comparable with the results of some BERT models, including MARBERT, QARiB, and ArabicBERT-large. Indeed, this has been previously reported in the literature, where MARBERT and QARiB outperformed some of the other transformer models, such as ArabicBERT and AraBERT [<xref ref-type="bibr" rid="ref55">55</xref>,<xref ref-type="bibr" rid="ref56">56</xref>]. Again, a takeaway from this is that pretrained word embeddings might outperform pretrained BERT models in this first comparative study directed at Arabic. There is no guaranteed best model between pretrained word embeddings and pretrained transformer models for this language.</p>
        <p>However, in general, the results showed the superiority of the BERT models over BLSTM with pretrained word embedding models. Overall, 19 best or second-best results were obtained by the 9 BERT-based approaches, whereas only 3 best or second-best results were obtained by the 7 pretrained word embedding models.</p>
        <p>Most models performed better at detecting accurate health tweets than inaccurate tweets. The detection rate (recall) for accurate tweets ranged from 0.9485 to 0.8144. This means that most of the models missed only approximately 5% to 19% of the accurate tweets, which is a promising result. In contrast, the detection rate for inaccurate tweets was lower and had a wider range, from 0.8305 to 0.5254, implying that the best models missed up to 17% of inaccurate tweets. This is concerning as we would like to successfully identify all inaccurate tweets, and even the best model missed 17% of them.</p>
        <p>The flip side of this is precision: how many accurate or inaccurate tweets identified by the technique are actually accurate or inaccurate. In terms of inaccurate tweets, the approaches ranged from 0.7759 to 0.89130—quite a large span, which means that if the wrong technique is chosen, approximately one-quarter of the tweets identified as inaccurate is incorrectly classified. Probably, more of a concern is the number of tweets identified as accurate that are not. Similarly, here, the span ranged from 0.8913 to 0.7627, again implying that if the wrong technique is chosen, this could be problematic.</p>
        <p>Some models that had high detection rates for accurate health tweets could have low detection rates for inaccurate tweets. For example, the ArabicBERT-base and BLSTM fastText models were the best and second best for accurately detecting tweets, with success rates of 0.9485 and 0.9278, respectively. However, in detecting inaccurate tweets, BLSTM fastText had the lowest detection rate (52%) and the ArabicBERT-Base model had the second-lowest detection rate (69%). In other words, a practitioner who uses the best model for identifying accurate health tweets might miss approximately 30% to 48% of inaccurate tweets.</p>
        <p>Similarly, the ARBERT and AraVec Skip-Gram models performed similarly to the AraBERTv0.2-large model in terms of precision when detecting inaccurate health-related tweets; however, these 2 models did not perform as well on the other metrics. For example, the AraVec Skip-Gram model had the second-lowest rate of <italic>model accuracy</italic> in classifying accurate tweets as inaccurate. Although the ARBERT model performed well compared with the BLSTM models, with regard to classifying accurate tweets as inaccurate, it had the third-lowest rate of <italic>model accuracy</italic> among the 9 BERT models tested in this study. In other words, the ARBERT models incorrectly classified accurate tweets as inaccurate at a higher rate than the 6 other BERT models, as shown in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <p>Ideally, a technique would provide high precision in both identification and recall; however, this did occur in the data set for accurate or inaccurate tweets. AraBERTv0.2-large came closest in this regard with high-accuracy tweet precision and recall, best recall for inaccurate tweets, and suboptimal precision for inaccurate tweets. Similarly, AraBERTv2-large performed quite well across accurate tweets but did not perform quite well on inaccurate tweets.</p>
        <p>However, these models (AraBERTv0.2-large and AraBERTv2-large) consume relatively more resources, being based on BERT-large. Among the base models, AraBERTv0.2-base has an F<sub>1</sub> score of 0.8543, which is good, and also has a similar <italic>model accuracy</italic> to AraBERTv2-large. These models can be considered as an alternative if resources are an important consideration.</p>
        <p>Regarding the performance of pretrained word embeddings, we found that Mazajak Skip-Gram was the best. We made the same observation in our previous work on the detection of health-related tweets [<xref ref-type="bibr" rid="ref31">31</xref>].</p>
        <p>Finally, with respect to the accuracy of the best model in our study (ie, AraBERTv0.2-large), our results are satisfactory when compared with the results of previous studies [<xref ref-type="bibr" rid="ref8">8</xref>-<xref ref-type="bibr" rid="ref10">10</xref>] that make use of expert opinion. The F<sub>1</sub> score of our best model was 87%, whereas the best F<sub>1</sub> score reported in the study by Zhao et al [<xref ref-type="bibr" rid="ref10">10</xref>] was 84%, as shown in <xref ref-type="table" rid="table2">Table 2</xref>. Furthermore, although these previous studies targeted a specific health topic (such as cancer [<xref ref-type="bibr" rid="ref8">8</xref>] or autism [<xref ref-type="bibr" rid="ref10">10</xref>]), we used a data set of tweets on a wide range of health care topics, suggesting that it would be more difficult to classify our data set.</p>
        <p>It should be noted that all 3 studies with <italic>model accuracy</italic> or F<sub>1</sub> scores &#62;90% did not rely on expert opinion (<xref ref-type="table" rid="table1">Tables 1</xref> and <xref ref-type="table" rid="table2">2</xref>). In addition, 2 of these 3 studies [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>] targeted a specific outbreak condition (COVID-19), and their models were trained on a larger data set (eg, Al-Rakhami and Al-Amri [<xref ref-type="bibr" rid="ref18">18</xref>] trained their model on 409,484 tweets). For the third study [<xref ref-type="bibr" rid="ref21">21</xref>], the keywords used to extract initial tweets were derived from 6 preidentified rumors related to Zika. The size and nature of the data used to train these models might explain why they seemed to achieve better accuracy than the model proposed here. In this study, we trained a model to detect the accuracy of generic health-related information, making the approach applicable to tweets that are more or less categorical in their labeling (as illustrated in the samples in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref>).</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study only considered tweets agreed upon by experts. Although this helps us reduce the uncertainty in our data set, it might be a limitation as the model is not trained or tested on tweets that are more marginal—tweets about which the experts are unsure.</p>
        <p>One of the strengths of this model is that it was trained on general health-related tweets. The accuracy of the model for each health condition or topic may vary, and future studies should evaluate the model for specific health topics.</p>
        <p>All models used here are language dependent and might not be directly applicable to other languages. However, there are BERT alternatives for many languages, and there is evidence that BERT outperforms word embedding-based models. Therefore, we believe that this model could perform similarly in other languages.</p>
        <p>Regarding the metrics used to evaluate the models, it should be noted that the F<sub>1</sub> measure has been subjected to some criticism. Although we showed the F<sub>1</sub> score for both classes (accurate and inaccurate health tweets), it should be noted that the measure gives equal importance to both classes (accurate and inaccurate health tweets). Moreover, the F<sub>1</sub> score generally does not consider true negatives in its equation [<xref ref-type="bibr" rid="ref64">64</xref>,<xref ref-type="bibr" rid="ref65">65</xref>].</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>The goal of this study was to develop and evaluate a state-of-the-art ML model for detecting the medical trustworthiness of health-related tweets in Arabic. To achieve this, we first constructed a labeled data set to train the classifiers. We then compared 2 different DL approaches for training a classification model, namely, 6 pretrained word embedding models as an input model for BLSTM and 11 pretrained transformer language models. The percentage of inaccurate health tweets in the data is approximately 38% (296/799), which is comparable with previous studies that used data sets with a number of inaccurate health-related tweets in the range of 30% to 50%. Our AraBERTv0.2-large model achieved 87.7% <italic>model accuracy</italic> on the test data set, which is satisfactory. Overall, our results clearly indicate that the AraBERTv0.2-large model outperforms the other models in detecting the medical accuracy of health-related tweets.</p>
        <p>This study established an ML model to identify the accuracy of health-related tweets in response to the proliferation of health misinformation on SM. Although misinformation detection has been researched, only 1 study was concerned with detecting the accuracy of Arabic health-related tweets, and it was only for a specific topic (cancer). Furthermore, no DL model has been evaluated in prior studies to detect the accuracy of Arabic health-related tweets. In this study, we used a more extensive data set to develop a more general model using state-of-the-art ML models that have not been implemented before for this type of problem.</p>
        <p>The potential of such work cannot be overstated. If a robust model can be built, it will allow for the detection and dissemination of accurate tweets. Similarly, this would allow for the flagging of inaccurate tweets. Both measures would significantly improve health information dissemination on Twitter. However, it should be noted that although this work will improve the situation, it will still inaccurately classify 13% of the tweets.</p>
        <p>Moreover, the examples in <xref ref-type="boxed-text" rid="box2">Textbox 2</xref> imply disparities between accurate and inaccurate information in terms of the topics covered across the data set—a trend supported by the informal sampling of that data set. Accurate tweets seem to be more preventive, whereas inaccurate health tweets seem to promote <italic>natural</italic> and alternative medicine. Thus, it might be more feasible to develop a model for detecting health topics in combination with a model for detecting the accuracy of health information and thus improving accuracy.</p>
        <p>To further improve the accuracy of the developed model, ensemble learning can yield better results by combining models that perform well (ArabicBERT-large, ARBERT, and AraVec Skip-Gram). However, ArabicBERT and AraBERTv0.2 were trained on a similar corpus, as shown in <xref ref-type="table" rid="table3">Table 3</xref>. Another approach could be to combine models pretrained on different corpora, such as ArabicBERT-large and MARABER (ArabicBERT pretrained on Wikipedia articles and news articles; MARBERT pretrained on 1 billion tweets).</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Tweet IDs with their labels used in the study.</p>
        <media xlink:href="formative_v6i6e34834_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 26 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AraBERT</term>
          <def>
            <p>Transformer-based Model for Arabic Language Understanding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AraBERTv0.2</term>
          <def>
            <p>Transformer-based Model for Arabic Language Understanding version 0.2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">AraBERTv2</term>
          <def>
            <p>Transformer-based Model for Arabic Language Understanding version 2</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BLSTM</term>
          <def>
            <p>bidirectional long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">DL</term>
          <def>
            <p>deep learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">LSTM</term>
          <def>
            <p>long short-term memory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">RF</term>
          <def>
            <p>random forest</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">SM</term>
          <def>
            <p>social media</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">SVM</term>
          <def>
            <p>support vector machine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">WHO</term>
          <def>
            <p>World Health Organization</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank all physicians who participated in the process of evaluating the health tweets in this study. This work was supported, in part, by Taibah University, Al-Ula, Saudi Arabia, and by a grant from the Science Foundation Ireland (grant 13/RC/2094).</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ott</surname>
              <given-names>BL</given-names>
            </name>
          </person-group>
          <article-title>The age of Twitter: Donald J. Trump and the politics of debasement</article-title>
          <source>Critical Stud Media Commun</source>
          <year>2016</year>
          <month>12</month>
          <day>23</day>
          <volume>34</volume>
          <issue>1</issue>
          <fpage>59</fpage>
          <lpage>68</lpage>
          <pub-id pub-id-type="doi">10.1080/15295036.2016.1266686</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El Tantawi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bakhurji</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Ansari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>AlSubaie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al Subaie</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>AlAli</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Indicators of adolescents' preference to receive oral health information using social media</article-title>
          <source>Acta Odontol Scand</source>
          <year>2019</year>
          <month>04</month>
          <volume>77</volume>
          <issue>3</issue>
          <fpage>213</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1080/00016357.2018.1536803</pub-id>
          <pub-id pub-id-type="medline">30632864</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hew</surname>
              <given-names>KF</given-names>
            </name>
          </person-group>
          <article-title>Using Twitter for education: beneficial or simply a waste of time?</article-title>
          <source>Comput Educ</source>
          <year>2017</year>
          <month>03</month>
          <volume>106</volume>
          <fpage>97</fpage>
          <lpage>118</lpage>
          <pub-id pub-id-type="doi">10.1016/j.compedu.2016.12.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Justinia</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Alyami</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Qahtani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bashanfar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>El-Khatib</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yahya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zagzoog</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Social media and the orthopaedic surgeon: a mixed methods study</article-title>
          <source>Acta Inform Med</source>
          <year>2019</year>
          <month>03</month>
          <volume>27</volume>
          <issue>1</issue>
          <fpage>23</fpage>
          <lpage>8</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/31213739"/>
          </comment>
          <pub-id pub-id-type="doi">10.5455/aim.2019.27.23-28</pub-id>
          <pub-id pub-id-type="medline">31213739</pub-id>
          <pub-id pub-id-type="pii">AIM-27-23</pub-id>
          <pub-id pub-id-type="pmcid">PMC6511278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hamasha</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Alghofaili</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Obaid</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alhamdan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alotaibi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aleissa</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alenazi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Alshehri</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Geevarghese</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Social media utilization among dental practitioner in Riyadh, Saudi Arabia</article-title>
          <source>Open Dentistry J</source>
          <year>2019</year>
          <month>02</month>
          <day>28</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>101</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.2174/1874210601913010101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jamal</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khan</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>AlHumud</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Duhyyim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alrashed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bin Shabr</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Alteraif</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Almuziri</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Househ</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Qureshi</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Association of online health information-seeking behavior and self-care activities among type 2 diabetic patients in Saudi Arabia</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>08</month>
          <day>12</day>
          <volume>17</volume>
          <issue>8</issue>
          <fpage>e196</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/8/e196/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4312</pub-id>
          <pub-id pub-id-type="medline">26268425</pub-id>
          <pub-id pub-id-type="pii">v17i8e196</pub-id>
          <pub-id pub-id-type="pmcid">PMC4642387</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Omair</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>AlOhaly</surname>
              <given-names>RY</given-names>
            </name>
            <name name-style="western">
              <surname>Alashgar</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Awareness and misconceptions of female students in King Saud University on systemic lupus erythematosus</article-title>
          <source>Rheumatology (Sunnyvale)</source>
          <year>2015</year>
          <volume>05</volume>
          <issue>03</issue>
          <fpage>165</fpage>
          <pub-id pub-id-type="doi">10.4172/2161-1149.1000165</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yafooz</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Sarem</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hezzam</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Detecting health-related rumors on Twitter using machine learning methods</article-title>
          <source>Int J Advanced Comput Sci Application</source>
          <year>2020</year>
          <volume>11</volume>
          <issue>8</issue>
          <pub-id pub-id-type="doi">10.14569/ijacsa.2020.0110842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sicilia</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Giudice</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Pechenizkiy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Soda</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Health-related rumour detection on Twitter</article-title>
          <source>Proceedings of the 2017 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</source>
          <year>2017</year>
          <conf-name>2017 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)</conf-name>
          <conf-date>Nov 13-16, 2017</conf-date>
          <conf-loc>Kansas City, MO, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/bibm.2017.8217899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Da</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Detecting health misinformation in online health communities: incorporating behavioral features into machine learning based approaches</article-title>
          <source>Inf Process Manag</source>
          <year>2021</year>
          <month>01</month>
          <volume>58</volume>
          <issue>1</issue>
          <fpage>102390</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102390</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Swetland</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Rothrock</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Andris</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Nguyen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Rothrock</surname>
              <given-names>SG</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of health-related information regarding COVID-19 on Twitter during a global pandemic</article-title>
          <source>World Med Health Policy</source>
          <year>2021</year>
          <month>07</month>
          <day>29</day>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>503</fpage>
          <lpage>17</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34540337"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/wmh3.468</pub-id>
          <pub-id pub-id-type="medline">34540337</pub-id>
          <pub-id pub-id-type="pii">WMH3468</pub-id>
          <pub-id pub-id-type="pmcid">PMC8441792</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albalawi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nikolov</surname>
              <given-names>NS</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Trustworthy health-related tweets on social media in Saudi Arabia: tweet metadata analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2019</year>
          <month>10</month>
          <day>08</day>
          <volume>21</volume>
          <issue>10</issue>
          <fpage>e14731</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2019/10/e14731/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/14731</pub-id>
          <pub-id pub-id-type="medline">31596242</pub-id>
          <pub-id pub-id-type="pii">v21i10e14731</pub-id>
          <pub-id pub-id-type="pmcid">PMC6914129</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Yadav</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ferdinand</surname>
              <given-names>KC</given-names>
            </name>
          </person-group>
          <article-title>Zika virus pandemic-analysis of Facebook as a social media health information platform</article-title>
          <source>Am J Infect Control</source>
          <year>2017</year>
          <month>03</month>
          <day>01</day>
          <volume>45</volume>
          <issue>3</issue>
          <fpage>301</fpage>
          <lpage>2</lpage>
          <pub-id pub-id-type="doi">10.1016/j.ajic.2016.08.022</pub-id>
          <pub-id pub-id-type="medline">27776823</pub-id>
          <pub-id pub-id-type="pii">S0196-6553(16)30918-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alnemer</surname>
              <given-names>KA</given-names>
            </name>
            <name name-style="western">
              <surname>Alhuzaim</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Alnemer</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Alharbi</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Bawazir</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Barayyan</surname>
              <given-names>OR</given-names>
            </name>
            <name name-style="western">
              <surname>Balaraj</surname>
              <given-names>FK</given-names>
            </name>
          </person-group>
          <article-title>Are health-related tweets evidence based? Review and analysis of health-related tweets on twitter</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>10</month>
          <day>29</day>
          <volume>17</volume>
          <issue>10</issue>
          <fpage>e246</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/10/e246/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4898</pub-id>
          <pub-id pub-id-type="medline">26515535</pub-id>
          <pub-id pub-id-type="pii">v17i10e246</pub-id>
          <pub-id pub-id-type="pmcid">PMC4642373</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sell</surname>
              <given-names>TK</given-names>
            </name>
            <name name-style="western">
              <surname>Hosangadi</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Trotochaud</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Misinformation and the US Ebola communication crisis: analyzing the veracity and content of social media messages related to a fear-inducing infectious disease outbreak</article-title>
          <source>BMC Public Health</source>
          <year>2020</year>
          <month>05</month>
          <day>07</day>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>550</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcpublichealth.biomedcentral.com/articles/10.1186/s12889-020-08697-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12889-020-08697-3</pub-id>
          <pub-id pub-id-type="medline">32375715</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12889-020-08697-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7202904</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chew</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Pandemics in the age of Twitter: content analysis of Tweets during the 2009 H1N1 outbreak</article-title>
          <source>PLoS One</source>
          <year>2010</year>
          <month>11</month>
          <day>29</day>
          <volume>5</volume>
          <issue>11</issue>
          <fpage>e14118</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0014118"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0014118</pub-id>
          <pub-id pub-id-type="medline">21124761</pub-id>
          <pub-id pub-id-type="pmcid">PMC2993925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kalyanam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Velupillai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Doan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Conway</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lanckriet</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Facts and fabrications about Ebola: a Twitter based study</article-title>
          <source>Proceedings of the KDD ’15</source>
          <year>2015</year>
          <conf-name>KDD ’15</conf-name>
          <conf-date>Aug 10 – 13, 2015</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Rakhami</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Amri</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Lies kill, facts save: detecting COVID-19 misinformation in Twitter</article-title>
          <source>IEEE Access</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>155961</fpage>
          <lpage>70</lpage>
          <pub-id pub-id-type="doi">10.1109/access.2020.3019600</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elhadad</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gebali</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>An ensemble deep learning technique to detect COVID-19 misleading information</article-title>
          <source>Advances in Networked-Based Information Systems</source>
          <year>2021</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seltzer</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Horst-Martz</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Public sentiment and discourse about Zika virus on Instagram</article-title>
          <source>Public Health</source>
          <year>2017</year>
          <month>09</month>
          <volume>150</volume>
          <fpage>170</fpage>
          <lpage>5</lpage>
          <pub-id pub-id-type="doi">10.1016/j.puhe.2017.07.015</pub-id>
          <pub-id pub-id-type="medline">28806618</pub-id>
          <pub-id pub-id-type="pii">S0033-3506(17)30244-5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ghenai</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mejova</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Catching zika fever: application of crowdsourcing and machine learning for tracking health misinformation on Twitter</article-title>
          <source>Proceedings of the 2017 IEEE International Conference on Healthcare Informatics (ICHI)</source>
          <year>2017</year>
          <conf-name>2017 IEEE International Conference on Healthcare Informatics (ICHI)</conf-name>
          <conf-date>Aug 23-26, 2017</conf-date>
          <conf-loc>Park City, UT, USA</conf-loc>
          <pub-id pub-id-type="doi">10.1109/ichi.2017.58</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>McKee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Torbica</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Stuckler</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Systematic literature review on the spread of health-related misinformation on social media</article-title>
          <source>Soc Sci Med</source>
          <year>2019</year>
          <month>11</month>
          <volume>240</volume>
          <fpage>112552</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S0277-9536(19)30546-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.socscimed.2019.112552</pub-id>
          <pub-id pub-id-type="medline">31561111</pub-id>
          <pub-id pub-id-type="pii">S0277-9536(19)30546-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC7117034</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alhaddad</surname>
              <given-names>MS</given-names>
            </name>
          </person-group>
          <article-title>The use of social media among Saudi residents for medicines related information</article-title>
          <source>Saudi Pharm J</source>
          <year>2018</year>
          <month>12</month>
          <volume>26</volume>
          <issue>8</issue>
          <fpage>1106</fpage>
          <lpage>11</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1319-0164(18)30123-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jsps.2018.05.021</pub-id>
          <pub-id pub-id-type="medline">30510470</pub-id>
          <pub-id pub-id-type="pii">S1319-0164(18)30123-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC6257910</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alsobayel</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Use of social media for professional development by health care professionals: a cross-sectional web-based survey</article-title>
          <source>JMIR Med Educ</source>
          <year>2016</year>
          <month>09</month>
          <day>12</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e15</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2016/2/e15/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mededu.6232</pub-id>
          <pub-id pub-id-type="medline">27731855</pub-id>
          <pub-id pub-id-type="pii">v2i2e15</pub-id>
          <pub-id pub-id-type="pmcid">PMC5053809</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>WS</given-names>
            </name>
            <name name-style="western">
              <surname>Oh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>WM</given-names>
            </name>
          </person-group>
          <article-title>Addressing health-related misinformation on social media</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>12</month>
          <day>18</day>
          <volume>320</volume>
          <issue>23</issue>
          <fpage>2417</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.16865</pub-id>
          <pub-id pub-id-type="medline">30428002</pub-id>
          <pub-id pub-id-type="pii">2715795</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>AS</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chae</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Exposure to health (mis)information: lagged effects on young adults' health behaviors and potential pathways</article-title>
          <source>J Commun</source>
          <year>2015</year>
          <month>07</month>
          <day>06</day>
          <volume>65</volume>
          <issue>4</issue>
          <fpage>674</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1111/jcom.12163</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nyhan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Reifler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>When corrections fail: the persistence of political misperceptions</article-title>
          <source>Polit Behav</source>
          <year>2010</year>
          <month>3</month>
          <day>30</day>
          <volume>32</volume>
          <issue>2</issue>
          <fpage>303</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1007/s11109-010-9112-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pool</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fatehi</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Akhlaghpour</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Infodemic, misinformation and disinformation in pandemics: scientific landscape and the road ahead for public health informatics research</article-title>
          <source>Stud Health Technol Inform</source>
          <year>2021</year>
          <month>05</month>
          <day>27</day>
          <volume>281</volume>
          <fpage>764</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.3233/SHTI210278</pub-id>
          <pub-id pub-id-type="medline">34042681</pub-id>
          <pub-id pub-id-type="pii">SHTI210278</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vraga</surname>
              <given-names>EK</given-names>
            </name>
            <name name-style="western">
              <surname>Bode</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Defining misinformation and understanding its bounded nature: using expertise and evidence for describing misinformation</article-title>
          <source>Political Commun</source>
          <year>2020</year>
          <month>02</month>
          <day>06</day>
          <volume>37</volume>
          <issue>1</issue>
          <fpage>136</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1080/10584609.2020.1716500</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yin</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Truth discovery with multiple conflicting information providers on the web</article-title>
          <source>IEEE Trans Knowl Data Eng</source>
          <year>2008</year>
          <month>06</month>
          <volume>20</volume>
          <issue>6</issue>
          <fpage>796</fpage>
          <lpage>808</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2007.190745</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albalawi</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Buckley</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nikolov</surname>
              <given-names>NS</given-names>
            </name>
          </person-group>
          <article-title>Investigating the impact of pre-processing techniques and pre-trained word embeddings in detecting Arabic health information on social media</article-title>
          <source>J Big Data</source>
          <year>2021</year>
          <month>07</month>
          <day>02</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>95</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/34249602"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40537-021-00488-w</pub-id>
          <pub-id pub-id-type="medline">34249602</pub-id>
          <pub-id pub-id-type="pii">488</pub-id>
          <pub-id pub-id-type="pmcid">PMC8253467</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Corrado</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dean</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Efficient estimation of word representations in vector space</article-title>
          <source>Proceedings of Workshop at ICLR</source>
          <year>2013</year>
          <conf-name>Workshop at ICLR</conf-name>
          <conf-date>May 2-4, 2013</conf-date>
          <conf-loc>Scottsdale, Arizona, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ui.adsabs.harvard.edu/abs/2013arXiv1301.3781M"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Pre-trained models for natural language processing: a survey</article-title>
          <source>Sci China Technol Sci</source>
          <year>2020</year>
          <month>09</month>
          <day>15</day>
          <volume>63</volume>
          <issue>10</issue>
          <fpage>1872</fpage>
          <lpage>97</lpage>
          <pub-id pub-id-type="doi">10.1007/s11431-020-1647-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Azzouza</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Akli-Astouati</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>TwitterBERT: framework for Twitter sentiment analysis based on pre-trained language model representations</article-title>
          <source>Emerging Trends in Intelligent Computing and Informatics</source>
          <year>2020</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abu Farha</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Magdy</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>A comparative study of effective approaches for Arabic sentiment analysis</article-title>
          <source>Inf Process Manag</source>
          <year>2021</year>
          <month>03</month>
          <volume>58</volume>
          <issue>2</issue>
          <fpage>102438</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ipm.2020.102438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Al-Twairesh</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>The evolution of language models applied to emotion analysis of Arabic Tweets</article-title>
          <source>Information</source>
          <year>2021</year>
          <month>02</month>
          <day>17</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>84</fpage>
          <pub-id pub-id-type="doi">10.3390/info12020084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El-Alami</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Ouatik El Alaoui</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>En Nahnahi</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Contextual semantic embeddings based on fine-tuned AraBERT model for Arabic text multi-class categorization</article-title>
          <source>J King Saud University Comput Inf Sci</source>
          <year>2021</year>
          <month>2</month>
          <comment>(forthcoming)</comment>
          <pub-id pub-id-type="doi">10.1016/j.jksuci.2021.02.005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>El-Razzaz</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fakhr</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Maghraby</surname>
              <given-names>FA</given-names>
            </name>
          </person-group>
          <article-title>Arabic gloss WSD using BERT</article-title>
          <source>Applied Sci</source>
          <year>2021</year>
          <month>03</month>
          <day>13</day>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>2567</fpage>
          <pub-id pub-id-type="doi">10.3390/app11062567</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gomez-Perez</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Denaux</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Silva</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Understanding word embeddings and language models</article-title>
          <source>A Practical Guide to Hybrid Natural Language Processing</source>
          <year>2020</year>
          <publisher-loc>Cham</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Broniatowski</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Kerchner</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Farooq</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Jamison</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Quinn</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Ayers</surname>
              <given-names>JW</given-names>
            </name>
          </person-group>
          <article-title>Twitter and Facebook posts about COVID-19 are less likely to spread misinformation compared to other health topics</article-title>
          <source>PLoS One</source>
          <year>2022</year>
          <month>1</month>
          <day>12</day>
          <volume>17</volume>
          <issue>1</issue>
          <fpage>e0261768</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0261768"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0261768</pub-id>
          <pub-id pub-id-type="medline">35020727</pub-id>
          <pub-id pub-id-type="pii">PONE-D-21-17260</pub-id>
          <pub-id pub-id-type="pmcid">PMC8754324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <article-title>Developer agreement and policy</article-title>
          <source>Developer Platform</source>
          <access-date>2021-03-06</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://developer.twitter.com/en/developer-terms/agreement-and-policy">https://developer.twitter.com/en/developer-terms/agreement-and-policy</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Investigating LSTM for punctuation prediction</article-title>
          <source>Proceedings of the 2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)</source>
          <year>2016</year>
          <conf-name>2016 10th International Symposium on Chinese Spoken Language Processing (ISCSLP)</conf-name>
          <conf-date>Oct 17-20, 2016</conf-date>
          <conf-loc>Tianjin, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/iscslp.2016.7918492</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gulli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kapoor</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pal</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Deep Learning with TensorFlow 2 and Keras Regression, ConvNets, GANs, RNNs, NLP, and More with TensorFlow 2 and the Keras API, 2nd Edition</source>
          <year>2019</year>
          <publisher-loc>Birmingham, United Kingdom</publisher-loc>
          <publisher-name>Packt Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Soliman</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Eissa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>El-Beltagy</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>AraVec: a set of Arabic word embedding models for use in Arabic NLP</article-title>
          <source>Procedia Comput Sci</source>
          <year>2017</year>
          <volume>117</volume>
          <fpage>256</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.1016/j.procs.2017.10.117</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grave</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bojanowski</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mikolov</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Learning word vectors for 157 languages</article-title>
          <source>Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)</source>
          <year>2018</year>
          <conf-name>Eleventh International Conference on Language Resources and Evaluation (LREC 2018)</conf-name>
          <conf-date>May 7-12, 2018</conf-date>
          <conf-loc>Miyazaki, Japan</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fouad</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Mahany</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aljohani</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Abbasi</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>ArWordVec: efficient word embedding models for Arabic tweets</article-title>
          <source>Soft Comput</source>
          <year>2019</year>
          <month>6</month>
          <day>26</day>
          <volume>24</volume>
          <issue>11</issue>
          <fpage>8061</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1007/s00500-019-04153-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Farha</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Magdy</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Mazajak: an online Arabic sentiment analyser</article-title>
          <source>Proceedings of the Fourth Arabic Natural Language Processing Workshop</source>
          <year>2019</year>
          <conf-name>Fourth Arabic Natural Language Processing Workshop</conf-name>
          <conf-date>Jul 28-Aug 2, 2019</conf-date>
          <conf-loc>Florence, Italy</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/w19-4621</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Devlin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Toutanova</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title>
          <source>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</source>
          <year>2019</year>
          <conf-name>2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</conf-name>
          <conf-date>Jun 2-7, 2019</conf-date>
          <conf-loc>Minneapolis, Minnesota</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/N19-1423</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Chu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>A quantitative survey of communication optimizations in distributed deep learning</article-title>
          <source>IEEE Network</source>
          <year>2021</year>
          <month>5</month>
          <volume>35</volume>
          <issue>3</issue>
          <fpage>230</fpage>
          <lpage>7</lpage>
          <pub-id pub-id-type="doi">10.1109/mnet.011.2000530</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Safaya</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abdullatif</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yuret</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>KUISAIL at SemEval-2020 Task 12: BERT-CNN for Offensive Speech Identification in Social Media</article-title>
          <source>Proceedings of the Fourteenth Workshop on Semantic Evaluation</source>
          <year>2020</year>
          <conf-name>Proceedings of the Fourteenth Workshop on Semantic Evaluation</conf-name>
          <conf-date>Dec 12-13, 2020</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2020.semeval-1.271</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Antoun</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Baly</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Hajj</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>AraBERT: transformer-based model for Arabic language understanding</article-title>
          <source>Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection</source>
          <year>2020</year>
          <conf-name>Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection</conf-name>
          <conf-date>May, 2020</conf-date>
          <conf-loc>Marseille, France</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="web">
          <source>Hugging Face</source>
          <access-date>2021-06-15</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/aubmindlab/bert-large-arabertv2">https://huggingface.co/aubmindlab/bert-large-arabertv2</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Faraj</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Faraj</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abdullah</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>SarcasmDet at sarcasm detection task 2021 in Arabic using AraBERT pretrained model</article-title>
          <source>Proceedings of the Sixth Arabic Natural Language Processing Workshop</source>
          <year>2021</year>
          <conf-name>Proceedings of the Sixth Arabic Natural Language Processing Workshop</conf-name>
          <conf-date>Apr 19, 2021</conf-date>
          <conf-loc>Kyiv, Ukraine (Virtual)</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Darwish</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mubarak</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Farasa: a new fast and accurate Arabic word segmenter</article-title>
          <source>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16)</source>
          <year>2016</year>
          <conf-name>Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16)</conf-name>
          <conf-date>May 2016</conf-date>
          <conf-loc>Portorož, Slovenia</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdelali</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mubarak</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Darwish</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Samih</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Pre-training BERT on Arabic tweets: practical considerations</article-title>
          <source>arXiv</source>
          <year>2021</year>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdul-Mageed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Elmadany</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagoudi</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>Proceedings of the 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing (Volume 1: Long Papers)</source>
          <year>2011</year>
          <conf-name>The 59th Annual Meeting of the Association for Computational Linguistics and the 11th International Joint Conference on Natural Language Processing</conf-name>
          <conf-date>Aug, 2021</conf-date>
          <conf-loc>Online</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/2021.acl-long.551</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abu</surname>
              <given-names>FI</given-names>
            </name>
            <name name-style="western">
              <surname>Magdy</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Benchmarking transformer-based language models for Arabic sentiment and sarcasm detection</article-title>
          <source>Proceedings of the Sixth Arabic Natural Language Processing Workshop</source>
          <year>2021</year>
          <conf-name>Proceedings of the Sixth Arabic Natural Language Processing Workshop</conf-name>
          <conf-date>Apr, 2021</conf-date>
          <conf-loc>Kyiv, Ukraine (Virtual)</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhenyan</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Weiping</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chunxia</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>A supervised parameter estimation method of LDA</article-title>
          <source>Proceedings of the Asia-Pacific Web Conference</source>
          <year>2015</year>
          <conf-name>Asia-Pacific Web Conference</conf-name>
          <conf-date>2015</conf-date>
          <conf-loc>Guangzhou, China</conf-loc>
          <pub-id pub-id-type="doi">https://doi.org/10.1007/978-3-319-25255-1_33</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Caragea</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Caragea</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Comparison of word embeddings and sentence encodings as generalized representations for crisis Tweet classification tasks</article-title>
          <source>Proceedings of the ISCRAM Asian Pacific 2018 Conference</source>
          <year>2018</year>
          <conf-name>ISCRAM Asian Pacific 2018 Conference</conf-name>
          <conf-date>Nov, 2018</conf-date>
          <conf-loc>Wellington, New Zealand</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Weighted kappa: nominal scale agreement with provision for scaled disagreement or partial credit</article-title>
          <source>Psychol Bull</source>
          <year>1968</year>
          <month>10</month>
          <volume>70</volume>
          <issue>4</issue>
          <fpage>213</fpage>
          <lpage>20</lpage>
          <pub-id pub-id-type="doi">10.1037/h0026256</pub-id>
          <pub-id pub-id-type="medline">19673146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="book">
          <article-title>The measurement of interrater agreement</article-title>
          <source>Statistical Methods for Rates and Proportions</source>
          <year>2003</year>
          <publisher-loc>Hoboken, New Jersey, United States</publisher-loc>
          <publisher-name>Wiley</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nastasi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bryant</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Canner</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Dredze</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Camp</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Nagarajan</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Breast cancer screening and social media: a content analysis of evidence use and guideline opinions on twitter</article-title>
          <source>J Cancer Educ</source>
          <year>2018</year>
          <month>06</month>
          <volume>33</volume>
          <issue>3</issue>
          <fpage>695</fpage>
          <lpage>702</lpage>
          <pub-id pub-id-type="doi">10.1007/s13187-017-1168-9</pub-id>
          <pub-id pub-id-type="medline">28097527</pub-id>
          <pub-id pub-id-type="pii">10.1007/s13187-017-1168-9</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yamaguchi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Oya</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Horio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hida</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Drug-induced liver injury in a patient with nonsmall cell lung cancer after the self-administration of fenbendazole based on social media information</article-title>
          <source>Case Rep Oncol</source>
          <year>2021</year>
          <month>6</month>
          <day>17</day>
          <volume>14</volume>
          <issue>2</issue>
          <fpage>886</fpage>
          <lpage>91</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.karger.com?DOI=10.1159/000516276"/>
          </comment>
          <pub-id pub-id-type="doi">10.1159/000516276</pub-id>
          <pub-id pub-id-type="medline">34248555</pub-id>
          <pub-id pub-id-type="pii">cro-0014-0886</pub-id>
          <pub-id pub-id-type="pmcid">PMC8255718</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Powers</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>J Mach Learn Technol</source>
          <year>2011</year>
          <volume>2</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>63</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bioinfopublication.org/files/articles/2_1_1_JMLT.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hand</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Christen</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A note on using the F-measure for evaluating record linkage algorithms</article-title>
          <source>Stat Comput</source>
          <year>2017</year>
          <month>4</month>
          <day>19</day>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>539</fpage>
          <lpage>47</lpage>
          <pub-id pub-id-type="doi">10.1007/s11222-017-9746-6</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
