<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e47814</article-id>
      <article-id pub-id-type="pmid">39423004</article-id>
      <article-id pub-id-type="doi">10.2196/47814</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Fine-Tuned Bidirectional Encoder Representations From Transformers Versus ChatGPT for Text-Based Outpatient Department Recommendation: Comparative Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Khaw</surname>
            <given-names>Wan-Fei</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Rodrigues</surname>
            <given-names>Mário</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Teles</surname>
            <given-names>Ariel</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Jo</surname>
            <given-names>Eunbeen</given-names>
          </name>
          <degrees>BA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-2468-0114</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" equal-contrib="yes">
          <name name-style="western">
            <surname>Yoo</surname>
            <given-names>Hakje</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4341-5540</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Jong-Ho</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1309-0821</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Kim</surname>
            <given-names>Young-Min</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-6914-901X</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Song</surname>
            <given-names>Sanghoun</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff7" ref-type="aff">7</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4234-232X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Joo</surname>
            <given-names>Hyung Joon</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Medical Informatics</institution>
            <institution>Korea University College of Medicine</institution>
            <addr-line>73, Inchon-ro</addr-line>
            <addr-line>Seoul, 02841</addr-line>
            <country>Republic of Korea</country>
            <phone>82 2 920 5445</phone>
            <email>drjoohj@gmail.com</email>
          </address>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1846-8464</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Medical Informatics</institution>
        <institution>Korea University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Bio-Mechatronic Engineering</institution>
        <institution>Sungkyunkwan University College of Biotechnology and Bioengineering</institution>
        <addr-line>Gyeonggi</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Medical AI Research Center, Research Institute for Future Medicine</institution>
        <institution>Samsung Medical Center</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Korea University Research Institute for Medical Bigdata Science</institution>
        <institution>Korea University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Cardiology, Cardiovascular Center</institution>
        <institution>Korea University College of Medicine</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>School of Interdisciplinary Industrial Studies</institution>
        <institution>Hanyang University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <aff id="aff7">
        <label>7</label>
        <institution>Department of Linguistics</institution>
        <institution>Korea University</institution>
        <addr-line>Seoul</addr-line>
        <country>Republic of Korea</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Hyung Joon Joo <email>drjoohj@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>18</day>
        <month>10</month>
        <year>2024</year>
      </pub-date>
      <volume>8</volume>
      <elocation-id>e47814</elocation-id>
      <history>
        <date date-type="received">
          <day>2</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>3</day>
          <month>8</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>13</day>
          <month>8</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Eunbeen Jo, Hakje Yoo, Jong-Ho Kim, Young-Min Kim, Sanghoun Song, Hyung Joon Joo. Originally published in JMIR Formative Research (https://formative.jmir.org), 18.10.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2024/1/e47814" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patients often struggle with determining which outpatient specialist to consult based on their symptoms. Natural language processing models in health care offer the potential to assist patients in making these decisions before visiting a hospital.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to evaluate the performance of ChatGPT in recommending medical specialties for medical questions.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We used a dataset of 31,482 medical questions, each answered by doctors and labeled with the appropriate medical specialty from the health consultation board of NAVER (NAVER Corp), a major Korean portal. This dataset includes 27 distinct medical specialty labels. We compared the performance of the fine-tuned Korean Medical bidirectional encoder representations from transformers (KM-BERT) and ChatGPT models by analyzing their ability to accurately recommend medical specialties. We categorized responses from ChatGPT into those matching the 27 predefined specialties and those that did not. Both models were evaluated using performance metrics of accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>ChatGPT demonstrated an answer avoidance rate of 6.2% but provided accurate medical specialty recommendations with explanations that elucidated the underlying pathophysiology of the patient’s symptoms. It achieved an accuracy of 0.939, precision of 0.219, recall of 0.168, and an <italic>F</italic><sub>1</sub>-score of 0.134. In contrast, the KM-BERT model, fine-tuned for the same task, outperformed ChatGPT with an accuracy of 0.977, precision of 0.570, recall of 0.652, and an <italic>F</italic><sub>1</sub>-score of 0.587.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Although ChatGPT did not surpass the fine-tuned KM-BERT model in recommending the correct medical specialties, it showcased notable advantages as a conversational artificial intelligence model. By providing detailed, contextually appropriate explanations, ChatGPT has the potential to significantly enhance patient comprehension of medical information, thereby improving the medical referral process.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>natural language processing</kwd>
        <kwd>bidirectional encoder representations from transformers</kwd>
        <kwd>large language model</kwd>
        <kwd>generative pretrained transformer</kwd>
        <kwd>medical specialty prediction</kwd>
        <kwd>quality of care</kwd>
        <kwd>health care application</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>BERT</kwd>
        <kwd>AI technology</kwd>
        <kwd>conversational agent</kwd>
        <kwd>AI</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>chatbot</kwd>
        <kwd>application</kwd>
        <kwd>health care</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Natural language processing technology has the potential to transform the process of health care and further improve the quality of care [<xref ref-type="bibr" rid="ref1">1</xref>]. Among natural language processing deep learning models, transformer-based models, including bidirectional encoder representations from transformers (BERT), GPT, and XLNet, have shown excellent performance in many health care applications, such as clinical coding [<xref ref-type="bibr" rid="ref2">2</xref>], named entity recognition [<xref ref-type="bibr" rid="ref3">3</xref>], and disease prediction based on clinical notes [<xref ref-type="bibr" rid="ref4">4</xref>]. Both BERT and GPT are advanced deep learning models that use transformer architectures, but they are fundamentally different. BERT is designed for bidirectional understanding of text, while GPT is designed for generative tasks and uses a unidirectional approach [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>]. In particular, ChatGPT is a large language model (LLM) developed by OpenAI as an instance of GPT-3.5 that generates human-like text responses to a wide range of prompts and questions [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. ChatGPT performed at or near the passing threshold of 60% accuracy on the United States Medical Licensing Examination, suggesting the potential integration into clinical decision-making [<xref ref-type="bibr" rid="ref8">8</xref>]. Recently, the application of ChatGPT for general users seeking medical information has been highlighted [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      <p>The disparity in medical knowledge and literacy between health care professionals and the general public, often termed as information asymmetry, may inadvertently result in an inappropriate allocation of medical services due to misunderstandings or lack of awareness about health conditions [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Identifying the right outpatient specialist for their symptoms can be challenging for patients and often results in added costs and time. This is exacerbated by the current referral system, which leads to delays and increased missed clinical appointments [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref>]. Improving the process of identifying suitable medical professionals can enhance the quality of care, reduce costs, and boost patients’ satisfaction [<xref ref-type="bibr" rid="ref16">16</xref>]. To address this issue, we developed Korean Medical BERT (KM-BERT), a medical domain–specific pretrained BERT model, which was trained on a corpus of 6 million sentences from medical textbooks, health information news, and medical research papers [<xref ref-type="bibr" rid="ref17">17</xref>]. Furthermore, we developed the fine-tuned KM-BERT model capable of recommending medical specialties based on general user queries [<xref ref-type="bibr" rid="ref18">18</xref>].</p>
      <p>Comparing these models can reveal which types of tasks each model is better suited to in the health care domain. For instance, one model may excel at predicting disease outcomes based on patient notes, while the other might be better at generating human-like text for health-related chatbots. In this study, we compare the performance of this model with ChatGPT and a previously developed BERT model, in line with previous research.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Collection</title>
        <p>The previous BERT study collected 82,312 health care counsel posts from the NAVER portal, a Korean portal that provides medical questions and answers to general users [<xref ref-type="bibr" rid="ref18">18</xref>]. The data-set was collected from the NAVER portal, a Korean portal that provides medical questions and answers to general users. The medical question involves the portal user describing their symptoms and requesting medical advice and information, which includes laboratory tests, medications, procedures, presumptive diagnoses, and recommendations for health professionals and institutions. Medical questions posted by users of the portal are reviewed and responded to by certified doctors through the portal. Each post also includes a label indicating the relevant medical specialty. The dataset consisted of questions and medical specialty label pairs. Medical specialty labels for the questions were limited to 27 clinical departments for the development of the BERT model. The original dataset was divided into a training set consisting of 50,454 data pairs and a test set comprising 31,482 data pairs. The training set was used to develop the fine-tuned KM-BERT model through 5-fold cross-validation. From the original test set, wherein data pairs were posted between July 13, 2021, and September 13, 2021, this study used 31,482 data pairs after excluding 376 due to missing data (<xref ref-type="table" rid="table1">Table 1</xref>). The medical questions asked to ChatGPT are the same as the test set (31,482 data pairs) used to develop the fine-tuned KM-BERT model.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The number of test data used to measure the performance of ChatGPT and the fine-tuned Korean Medical bidirectional encoder representations from transformers (KM-BERT) model (N=31,482).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Specialty</td>
                <td>Value, n (%)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Anesthesiology</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Cardiac and thoracic surgery</td>
                <td>46 (0.15)</td>
              </tr>
              <tr valign="top">
                <td>Cardiology</td>
                <td>184 (0.58)</td>
              </tr>
              <tr valign="top">
                <td>Dentistry</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Dermatology</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Emergency medicine</td>
                <td>591 (1.88)</td>
              </tr>
              <tr valign="top">
                <td>Endocrinology</td>
                <td>169 (0.54)</td>
              </tr>
              <tr valign="top">
                <td>Family medicine</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Gastroenterology and hepatology</td>
                <td>306 (0.97)</td>
              </tr>
              <tr valign="top">
                <td>General surgery</td>
                <td>3268 (10.38)</td>
              </tr>
              <tr valign="top">
                <td>Hematology and oncology</td>
                <td>156 (0.50)</td>
              </tr>
              <tr valign="top">
                <td>Infectious diseases</td>
                <td>146 (0.46)</td>
              </tr>
              <tr valign="top">
                <td>Nephrology</td>
                <td>67 (0.21)</td>
              </tr>
              <tr valign="top">
                <td>Neurology</td>
                <td>558 (1.77)</td>
              </tr>
              <tr valign="top">
                <td>Neurosurgery</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Obstetrics and gynecology</td>
                <td>2644 (8.40)</td>
              </tr>
              <tr valign="top">
                <td>Ophthalmology</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Orthopedic surgery</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Otolaryngology</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Pediatrics</td>
                <td>389 (1.24)</td>
              </tr>
              <tr valign="top">
                <td>Plastic surgery</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Psychiatry</td>
                <td>500 (1.59)</td>
              </tr>
              <tr valign="top">
                <td>Pulmonology</td>
                <td>43 (0.14)</td>
              </tr>
              <tr valign="top">
                <td>Radiology</td>
                <td>422 (1.34)</td>
              </tr>
              <tr valign="top">
                <td>Rehabilitation medicine</td>
                <td>1980 (6.29)</td>
              </tr>
              <tr valign="top">
                <td>Rheumatology</td>
                <td>213 (0.68)</td>
              </tr>
              <tr valign="top">
                <td>Urology</td>
                <td>1980 (6.29)</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Generating ChatGPT Medical Specialty Recommendations for Questions</title>
        <p>ChatGPT is based on the GPT-3.5 series, and this study used “text-davinci-003” model, the latest version of the GPT-3.5 models available from the OpenAI application programming interface service at the time of the study [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. ChatGPT has a better understanding of English than low-resource languages [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. The questions were translated from Korean to English using the Google Translation application programming interface [<xref ref-type="bibr" rid="ref21">21</xref>]. Previous research has also been successful in translating medical words and sentences from Korean to English [<xref ref-type="bibr" rid="ref17">17</xref>]. ChatGPT can improve question comprehension depending on the prompting strategy [<xref ref-type="bibr" rid="ref22">22</xref>]. To prompt ChatGPT to answer the questions, the question was appended with the sentence, “In this case, which clinical department in the hospital would be better? Please recommend 3 in order of priority.”</p>
        <p>The training corpus used for ChatGPT has not been publicly disclosed, but it is understood that it was trained on a vast amount of text data from multiple languages and sources, including Korean [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. However, for this study, only translated sentences were used as inputs, which means they were not part of the original training samples used to develop ChatGPT. Furthermore, the original questions were randomly cross-checked to ensure that they were not indexed on Google.</p>
      </sec>
      <sec>
        <title>Evaluating the Performance of KM-BERT and ChatGPT</title>
        <p>This study was conducted in strict accordance with the “Guidelines for Developing and Reporting Machine Learning Predictive Models in Biomedical Research” as published by JMIR [<xref ref-type="bibr" rid="ref24">24</xref>]. The performance of appropriate medical specialty recommendations for medical questions from fine-tuned KM-BERT and ChatGPT was evaluated based on the same test set and 27 medical specialty labels. A confusion matrix for the 27 specialties was created to compare the first recommendation from each model to the correct medical specialty labels and to calculate true positives, false positives, true negatives, and false negatives [<xref ref-type="bibr" rid="ref25">25</xref>]. With an imbalance of data for each medical specialty, the performance was evaluated using macro-averaging accuracy, macro-averaging precision, macro-averaging recall, and macro-averaging <italic>F</italic><sub>1</sub>-score. The last layer of the fine-tuned KM-BERT used the softmax activation function for multiclassification, and performance was measured by comparing the first predicted medical specialty to the correct medical specialty label. The responses from ChatGPT were categorized into those that corresponded to the 27 predefined specialties and those that did not. This categorization was necessary because ChatGPT provided some responses that did not fit within the 27 predefined specialties. Out of a total of 31,482 questions, ChatGPT supplied first-rank responses corresponding to the 27 medical specialties 29,534 times (93.8%), second-rank responses 21,191 times (67.3%), and third-rank responses 19,291 times (61.3%).</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This research project, including the original data collection, was approved by the institutional review board of Korea University Anam Hospital (2024AN0315).</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Medical Specialty Recommendations by ChatGPT</title>
        <p>ChatGPT was able to recommend medical specialties for 29,534 (93.8%) of the total 31,482 questions. ChatGPT declined to answer the rest of the questions (eg, “Unfortunately, I cannot answer your question as I am not a qualified medical professional and cannot provide legal advice”). The responses provided by ChatGPT covered a wide range of 1685 clinical departments, centers, clinics, hospitals, and medical specialists. However, some of the responses did not fit into the predefined 27 clinical departments, with “department of internal medicine” being a common general response. ChatGPT also provided some answers that were not classifiable, such as those relating to medical schools or hospitals that could not be categorized (eg, “Korea University College of Medicine,” “Seoul National University Bundang Hospital,” and “Johns Hopkins Hospital”). ChatGPT gave hallucinated answers relating to clinics that were not actual locations, like “K Dental Clinic” [<xref ref-type="bibr" rid="ref19">19</xref>]. Overall, 842 of the 1685 distinct responses were able to be classified into 1 of the 27 clinical departments.</p>
        <p>ChatGPT had an answer avoidance rate of 6.2% for inquiries regarding medical specialty recommendations. <xref rid="figure1" ref-type="fig">Figure 1</xref> illustrates the response avoidance rate for each department of ChatGPT. Psychiatry had the highest avoidance rate, followed by family medicine and dermatology. On the other hand, nephrology, endocrinology, and rheumatology had the lowest avoidance rates, in that order.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Answer avoidance rate of ChatGPT to the medical specialty recommendation.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e47814_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Performance of ChatGPT and KM-BERT</title>
        <p>ChatGPT’s overall performance on medical specialty recommendations was lower than the fine-tuned KM-BERT model (accuracy 0.939 for ChatGPT vs 0.977 for KM-BERT, precision 0.219 for ChatGPT vs 0.570 for KM-BERT, recall 0.168 for ChatGPT vs 0.652 for KM-BERT, <italic>F</italic><sub>1</sub>-score 0.134 for ChatGPT vs 0.587 for KM-BERT). In ChatGPT, the departments with the highest <italic>F</italic><sub>1</sub>-score were otolaryngology, obstetrics and gynecology, and urology, in that order, and the departments with the lowest <italic>F</italic><sub>1</sub>-score were family medicine, rehabilitation medicine, and pulmonology (<xref rid="figure2" ref-type="fig">Figure 2</xref>).</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Accuracy, precision, recall, and F1-score of ChatGPT and KM-BERT for each department of test set evaluation. KM-BERT: Korean Medical bidirectional encoder representations from transformers.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e47814_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In the health care industry, it is crucial to provide patients with a clear justification or explanation for any artificial intelligence (AI)–based recommendations [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. The growing demand for explainable AI technology in health care is consistent with this requirement [<xref ref-type="bibr" rid="ref28">28</xref>]. ChatGPT is a significantly more advanced model than BERT in this regard [<xref ref-type="bibr" rid="ref29">29</xref>]. For instance, when presented with the query, “Yesterday, I sprained my back while lifting something heavy. I felt an electric current in my lower back, and when I stretched my lower back, it was a little stiff and my left leg was very numb.” While BERT can accurately suggest the most appropriate medical specialty in all cases, it can only offer a rough estimation by identifying the token the model is focusing on through a heatmap, etc (<xref rid="figure3" ref-type="fig">Figure 3</xref>). In contrast, ChatGPT can deduce the fundamental pathophysiology of the patient’s primary symptoms and provide a medical specialty recommendation accompanied by an explanation of the rationale, resulting in increased credibility and acceptance of the recommendation from the user’s perspective, even if it cannot address all inquiries. This may be one of the biggest advantages of ChatGPT as a conversational language model.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Medical specialty recommendation results of ChatGPT and BERT models. Left: output information from the KM-BERT model. The BERT model reliably predicts the medical specialty based on the calculated probability. The heatmap shows the average attention for each token, which can provide insights into the model’s decision-making process. The greater the brightness, the more attention. The order of the text under the heatmap has been changed as it was translated from Korean. Right: output from ChatGPT. Based on the input information, the model infers key pathophysiology and keywords from a medical perspective to recommend the appropriate medical specialty. BERT: bidirectional encoder representations from transformers; KM-BERT: Korean Medical bidirectional encoder representations from transformers.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e47814_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>While ChatGPT did not outperform the fine-tuned BERT model in recommending departments for health care services, it displayed numerous advantages as a conversational language model. The advantages of ChatGPT can be useful in the health care industry. First, ChatGPT can be applied to medical consultations to help patients understand medical information. Patients prefer to receive information that is written in plain language, particularly in health care, where there is an unfamiliar amount of terminology [<xref ref-type="bibr" rid="ref30">30</xref>]. Enhancing the ability of individuals to understand and interpret the meaning of health information needed to make appropriate health decisions can improve the efficiency of the health care system [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>]. Second, ChatGPT can assist clinicians in evaluating and diagnosing a patient’s symptoms. Patients sometimes have difficulty describing their symptoms [<xref ref-type="bibr" rid="ref34">34</xref>]. By analyzing patients’ textual descriptions, ChatGPT can provide a more specific description of their symptoms, which can help clinicians better understand their patients and provide appropriate treatment [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <p>The relatively poor performance of ChatGPT in this exploratory study could be attributed to the fact that the data sources used for its development were general data, mainly US-based data, with relatively little medical-specific data [<xref ref-type="bibr" rid="ref20">20</xref>]. However, OpenAI has recently launched a fine-tuning service for ChatGPT, which is expected to significantly enhance its performance. Fine-tuning will be especially crucial since each country operates a different medical service system. As a result, we can anticipate the emergence of several ChatGPT variants fine-tuned for use in the health care industry in the future.</p>
        <p>Finally, while ChatGPT offers incredible possibilities, concerns about the potential for generating untrue statements are growing [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]. As a generative model, some inaccuracies are inevitable, but they can be mitigated through fine-tuning with high-quality and reliable data resources [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref37">37</xref>]. It is also essential to develop and implement algorithms that can fact-check ChatGPT’s statements [<xref ref-type="bibr" rid="ref38">38</xref>]. By addressing these limitations, we can continue to explore the exciting potential of ChatGPT, ensuring that it remains a useful tool for the future of health care.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has several limitations. First, the training datasets used for the 2 models were entirely distinct. Despite the extensively large corpus upon which ChatGPT is trained, the KM-BERT model, due to its pretraining with a corpus specific to the medial domain, may exhibit superior performance in the task of medical specialty classification. Second, diverse prompting strategies can affect the classification performance of ChatGPT. A recent study revealed a comparative underperformance of contemporary LLMs against smaller, fine-tuned BERT models, particularly in a zero-shot setting [<xref ref-type="bibr" rid="ref39">39</xref>]. Moreover, the accuracy and <italic>F</italic><sub>1</sub>-scores of LLMs differed significantly, by upwards of 10%, contingent upon the prompting strategy that is adopted. It suggests that the application of advanced prompting methodologies, such as autogenerate prompting and chain-of-thought prompting, could potentially enhance the performance of ChatGPT in the context of this study’s task [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>]. Third, this study provides insight into the medical inference ability of ChatGPT through the medical specialty classification and a use case scenario. However, it does not extend to a quantitative evaluation of other complementary studies through objective experimentation. Notably, this study used real-world case data, not included in ChatGPT’s training phase. The other previous study has also highlighted ChatGPT’s capability to deduce medical symptoms, diagnoses, and treatments without explicit medical training [<xref ref-type="bibr" rid="ref6">6</xref>]. The impact of the additional inferred information generated by ChatGPT on users’ decision-making process and behavioral change necessitates further exploration.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In conclusion, this study highlighted the capabilities of AI models, such as fine-tuned KM-BERT and ChatGPT, in recommending medical specialties based on general user queries. The fine-tuned KM-BERT model performed better in this task, while ChatGPT showed its strengths as a conversational AI model that can provide more context-aware responses. Future studies could aim to leverage the strengths of each model to create a more comprehensive and effective system for recommending medical specialties. This could improve the health care referral process and result in better health outcomes for patients. Moreover, with the availability of fine-tuning services for ChatGPT, we can expect the development of many more specialized AI models, potentially revolutionizing the delivery of health care information to patients.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group/>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">BERT</term>
          <def>
            <p>bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">KM-BERT</term>
          <def>
            <p>Korean Medical bidirectional encoder representations from transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was supported by a grant of the Ministry of Science and ICT (Information and Communication Technology), Republic of Korea, under the ICT Challenge and Advanced Network of HRD program (IITP-2024-RS-2022-00156439) supervised by the Institute of Information and Communications Technology Planning and Evaluation, and a grant of the medical data–driven hospital support project through the Korea Health Information Service, funded by the Ministry of Health and Welfare, Republic of Korea.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data that support the findings of this study are available from the corresponding author, HJJ, upon reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>HJJ conceptualized the study and contributed to the development of the methodology. EJ and HY conducted the formal analysis. EJ used the software, conducted data curation, and handled the visualization. HJJ and EJ prepared the original draft and edited the manuscript. YMK and SS provided critical feedback and significant suggestions on the initial drafts. HY assisted with the revised drafts. HJJ and JHK provided project administration. HJJ supervised the study. All authors have read and agreed to the published version of the manuscript.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Locke</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bashall</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Al-Adely</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Moore</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kitchen</surname>
              <given-names>GB</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in medicine: A review</article-title>
          <source>Trends in Anaesthesia and Critical Care</source>
          <year>2021</year>
          <month>06</month>
          <volume>38</volume>
          <fpage>4</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1016/j.tacc.2021.02.007</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Teng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>A review on deep neural networks for ICD coding</article-title>
          <source>IEEE Trans. Knowl. Data Eng</source>
          <year>2022</year>
          <volume>35</volume>
          <issue>5</issue>
          <fpage>4357</fpage>
          <lpage>4375</lpage>
          <pub-id pub-id-type="doi">10.1109/tkde.2022.3148267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Natarajan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pakhomov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Are synthetic clinical notes useful for real natural language processing tasks: a case study on clinical entity recognition</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2021</year>
          <volume>28</volume>
          <issue>10</issue>
          <fpage>2193</fpage>
          <lpage>2201</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34272955"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jamia/ocab112</pub-id>
          <pub-id pub-id-type="medline">34272955</pub-id>
          <pub-id pub-id-type="pii">6323280</pub-id>
          <pub-id pub-id-type="pmcid">PMC8449609</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Antikainen</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Linnosmaa</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Umer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Oksala</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Eskola</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>van Gils</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hernesniemi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gabbouj</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Transformers for cardiac patient mortality risk prediction from heterogeneous electronic health records</article-title>
          <source>Sci Rep</source>
          <year>2023</year>
          <month>03</month>
          <day>02</day>
          <volume>13</volume>
          <issue>1</issue>
          <fpage>3517</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-023-30657-1"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-023-30657-1</pub-id>
          <pub-id pub-id-type="medline">36864069</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-023-30657-1</pub-id>
          <pub-id pub-id-type="pmcid">PMC9978282</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Poon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>BioGPT: generative pre-trained transformer for biomedical text generation and mining</article-title>
          <source>Brief Bioinform</source>
          <year>2022</year>
          <month>11</month>
          <day>19</day>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>bbac409</fpage>
          <pub-id pub-id-type="doi">10.1093/bib/bbac409</pub-id>
          <pub-id pub-id-type="medline">36156661</pub-id>
          <pub-id pub-id-type="pii">6713511</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nath</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marie</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ellershaw</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Korot</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Keane</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>New meaning for NLP: the trials and tribulations of natural language processing with GPT-3 in ophthalmology</article-title>
          <source>Br J Ophthalmol</source>
          <year>2022</year>
          <volume>106</volume>
          <issue>7</issue>
          <fpage>889</fpage>
          <lpage>892</lpage>
          <pub-id pub-id-type="doi">10.1136/bjophthalmol-2022-321141</pub-id>
          <pub-id pub-id-type="medline">35523534</pub-id>
          <pub-id pub-id-type="pii">bjophthalmol-2022-321141</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing ChatGPT</article-title>
          <source>OpenAI</source>
          <year>2022</year>
          <access-date>2023-02-14</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/chatgpt/">https://openai.com/blog/chatgpt/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sillos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elepaño</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Madriaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aggabao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Candido</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maningo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>
          <pub-id pub-id-type="medline">36812645</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00371</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Miao</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>What does ChatGPT say: the DAO from algorithmic intelligence to linguistic intelligence</article-title>
          <source>IEEE/CAA J. Autom. Sinica</source>
          <year>2023</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>575</fpage>
          <lpage>579</lpage>
          <pub-id pub-id-type="doi">10.1109/jas.2023.123486</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Seth</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Hunter-Smith</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Rozen</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Aesthetic surgery advice and counseling from artificial intelligence: a rhinoplasty consultation with ChatGPT</article-title>
          <source>Aesthetic Plast Surg</source>
          <year>2023</year>
          <volume>47</volume>
          <issue>5</issue>
          <fpage>1985</fpage>
          <lpage>1993</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37095384"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00266-023-03338-7</pub-id>
          <pub-id pub-id-type="medline">37095384</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00266-023-03338-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC10581928</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Seth</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cox</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Xie</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bulloch</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Hunter-Smith</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Rozen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ross</surname>
              <given-names>RJ</given-names>
            </name>
          </person-group>
          <article-title>Evaluating Chatbot efficacy for answering frequently asked questions in plastic surgery: a ChatGPT case study focused on breast augmentation</article-title>
          <source>Aesthet Surg J</source>
          <year>2023</year>
          <volume>43</volume>
          <issue>10</issue>
          <fpage>1126</fpage>
          <lpage>1135</lpage>
          <pub-id pub-id-type="doi">10.1093/asj/sjad140</pub-id>
          <pub-id pub-id-type="medline">37158147</pub-id>
          <pub-id pub-id-type="pii">7157259</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tadayon</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sadeqi Jabali</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Khanmohammadi</surname>
              <given-names>MT</given-names>
            </name>
            <name name-style="western">
              <surname>Rangraz Jeddi</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Information asymmetry between physicians and patients undergoing laparoscopic cholecystectomy: analysis of patients' awareness level</article-title>
          <source>J Am Med Dir Assoc</source>
          <year>2022</year>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>703</fpage>
          <lpage>704</lpage>
          <pub-id pub-id-type="doi">10.1016/j.jamda.2021.12.040</pub-id>
          <pub-id pub-id-type="medline">35114112</pub-id>
          <pub-id pub-id-type="pii">S1525-8610(21)01114-2</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fabes</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Avşar</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Spiro</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fernandez</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Eilers</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hessheimer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lorgelly</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Spiro</surname>
              <given-names>M</given-names>
            </name>
            <collab>Health Economics Survey Group</collab>
          </person-group>
          <article-title>Information asymmetry in hospitals: evidence of the lack of cost awareness in clinicians</article-title>
          <source>Appl Health Econ Health Policy</source>
          <year>2022</year>
          <volume>20</volume>
          <issue>5</issue>
          <fpage>693</fpage>
          <lpage>706</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35606636"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s40258-022-00736-x</pub-id>
          <pub-id pub-id-type="medline">35606636</pub-id>
          <pub-id pub-id-type="pii">10.1007/s40258-022-00736-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC9126693</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tong</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xue</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Development and validation of the health literacy environment scale for Chinese hospitals from patients' perspective</article-title>
          <source>Front Public Health</source>
          <year>2023</year>
          <volume>11</volume>
          <fpage>1130628</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37333562"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpubh.2023.1130628</pub-id>
          <pub-id pub-id-type="medline">37333562</pub-id>
          <pub-id pub-id-type="pmcid">PMC10273272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brach</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Keller</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hernandez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Baur</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Parker</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dreyer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Schyve</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lemerise</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schillinger</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Ten attributes of health literate health care organizations</article-title>
          <source>NAM Perspectives</source>
          <year>2012</year>
          <volume>02</volume>
          <issue>6</issue>
          <fpage>1</fpage>
          <lpage>27</lpage>
          <pub-id pub-id-type="doi">10.31478/201206a</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Champlin</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mackert</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Glowacki</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Donovan</surname>
              <given-names>EE</given-names>
            </name>
          </person-group>
          <article-title>Toward a better understanding of patient health literacy: a focus on the skills patients need to find health information</article-title>
          <source>Qual Health Res</source>
          <year>2017</year>
          <volume>27</volume>
          <issue>8</issue>
          <fpage>1160</fpage>
          <lpage>1176</lpage>
          <pub-id pub-id-type="doi">10.1177/1049732316646355</pub-id>
          <pub-id pub-id-type="medline">27179023</pub-id>
          <pub-id pub-id-type="pii">1049732316646355</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Jang</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Yum</surname>
              <given-names>YJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Joo</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>A pre-trained BERT for Korean medical natural language processing</article-title>
          <source>Sci Rep</source>
          <year>2022</year>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>13847</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41598-022-17806-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41598-022-17806-8</pub-id>
          <pub-id pub-id-type="medline">35974113</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41598-022-17806-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC9381714</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>YM</given-names>
            </name>
            <name name-style="western">
              <surname>Song</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Joo</surname>
              <given-names>HJ</given-names>
            </name>
          </person-group>
          <article-title>Predicting medical specialty from text based on a domain-specific pre-trained BERT</article-title>
          <source>Int J Med Inform</source>
          <year>2023</year>
          <volume>170</volume>
          <fpage>104956</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1386-5056(22)00270-2"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104956</pub-id>
          <pub-id pub-id-type="medline">36512987</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(22)00270-2</pub-id>
          <pub-id pub-id-type="pmcid">PMC9731829</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cahyawijaya</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Dai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Su</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wilie</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Lovenia</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ji</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chung</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Do</surname>
              <given-names>QV</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Yan</given-names>
            </name>
            <name name-style="western">
              <surname>Fung</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>A multitask, multilingual, multimodal evaluation of ChatGPT on reasoning, hallucination, and interactivity</article-title>
          <source>arXiv</source>
          <comment>Preprint published online February 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2302.04023"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2302.04023</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ke</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Qiu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: potential, prospects, and limitations</article-title>
          <source>Front Inform Technol Electron Eng</source>
          <year>2023</year>
          <volume>25</volume>
          <fpage>6</fpage>
          <lpage>11</lpage>
          <pub-id pub-id-type="doi">10.1631/fitee.2300089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>de Vries</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schoonvelde</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Schumacher</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>No longer lost in translation: evidence that Google translate works for comparative bag-of-words text applications</article-title>
          <source>Polit. Anal</source>
          <year>2018</year>
          <volume>26</volume>
          <issue>4</issue>
          <fpage>417</fpage>
          <lpage>430</lpage>
          <pub-id pub-id-type="doi">10.1017/pan.2018.26</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Adeloye</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rudan</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Can ChatGPT draft a research article? An example of population-level vaccine effectiveness analysis</article-title>
          <source>J Glob Health</source>
          <year>2023</year>
          <month>02</month>
          <day>17</day>
          <volume>13</volume>
          <fpage>01003</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36798998"/>
          </comment>
          <pub-id pub-id-type="doi">10.7189/jogh.13.01003</pub-id>
          <pub-id pub-id-type="medline">36798998</pub-id>
          <pub-id pub-id-type="pmcid">PMC9936200</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Haleem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Javaid</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>An era of ChatGPT as a significant futuristic support tool: a study on features, abilities, and challenges</article-title>
          <source>BenchCouncil Transactions on Benchmarks, Standards and Evaluations</source>
          <year>2022</year>
          <month>10</month>
          <volume>2</volume>
          <issue>4</issue>
          <fpage>100089</fpage>
          <pub-id pub-id-type="doi">10.1016/j.tbench.2023.100089</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Phung</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Tran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rana</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Karmakar</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shilton</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yearwood</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dimitrova</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Ho</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatesh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Berk</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Guidelines for developing and reporting machine learning predictive models in biomedical research: a multidisciplinary view</article-title>
          <source>J Med Internet Res</source>
          <year>2016</year>
          <volume>18</volume>
          <issue>12</issue>
          <fpage>e323</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2016/12/e323/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.5870</pub-id>
          <pub-id pub-id-type="medline">27986644</pub-id>
          <pub-id pub-id-type="pii">v18i12e323</pub-id>
          <pub-id pub-id-type="pmcid">PMC5238707</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Krstinić</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Braović</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Šerić</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Božić-Štulić</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Multi-label classifier performance evaluation with confusion matrix</article-title>
          <source>International Conference on Soft Computing, Artificial Intelligence and Machine Learning (SAIM 2020)</source>
          <year>2020</year>
          <fpage>01</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://csitcp.com/paper/10/108csit01.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.5121/csit.2020.100801</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vellido</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>The importance of interpretability and visualization in machine learning for applications in medicine and health care</article-title>
          <source>Neural Comput &amp; Applic</source>
          <year>2019</year>
          <volume>326</volume>
          <issue>24</issue>
          <fpage>18069</fpage>
          <lpage>18083</lpage>
          <pub-id pub-id-type="doi">10.1007/s00521-019-04051-w</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Blasimme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vayena</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Frey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Madai</surname>
              <given-names>VI</given-names>
            </name>
            <collab>Precise4Q consortium</collab>
          </person-group>
          <article-title>Explainability for artificial intelligence in healthcare: a multidisciplinary perspective</article-title>
          <source>BMC Med Inform Decis Mak</source>
          <year>2020</year>
          <volume>20</volume>
          <issue>1</issue>
          <fpage>310</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/s12911-020-01332-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12911-020-01332-6</pub-id>
          <pub-id pub-id-type="medline">33256715</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12911-020-01332-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC7706019</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tjoa</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Guan</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>A survey on explainable artificial intelligence (XAI): toward medical XAI</article-title>
          <source>IEEE Trans Neural Netw Learn Syst</source>
          <year>2021</year>
          <volume>32</volume>
          <issue>11</issue>
          <fpage>4793</fpage>
          <lpage>4813</lpage>
          <pub-id pub-id-type="doi">10.1109/TNNLS.2020.3027314</pub-id>
          <pub-id pub-id-type="medline">33079674</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Hsiang</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Patent claim generation by fine-tuning OpenAI GPT-2</article-title>
          <source>World Patent Information</source>
          <year>2020</year>
          <volume>62</volume>
          <fpage>101983</fpage>
          <pub-id pub-id-type="doi">10.1016/j.wpi.2020.101983</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Safeer</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Keenan</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Health literacy: the gap between physicians and patients</article-title>
          <source>Am Fam Physician</source>
          <year>2005</year>
          <volume>72</volume>
          <issue>3</issue>
          <fpage>463</fpage>
          <lpage>468</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aafp.org/link_out?pmid=16100861"/>
          </comment>
          <pub-id pub-id-type="medline">16100861</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Stocks</surname>
              <given-names>NP</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>CL</given-names>
            </name>
            <name name-style="western">
              <surname>Gravier</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kickbusch</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Beilby</surname>
              <given-names>JJ</given-names>
            </name>
          </person-group>
          <article-title>Health literacy--a new concept for general practice?</article-title>
          <source>Aust Fam Physician</source>
          <year>2009</year>
          <volume>38</volume>
          <issue>3</issue>
          <fpage>144</fpage>
          <lpage>147</lpage>
          <pub-id pub-id-type="medline">19283256</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kountz</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Strategies for improving low health literacy</article-title>
          <source>Postgrad Med</source>
          <year>2009</year>
          <volume>121</volume>
          <issue>5</issue>
          <fpage>171</fpage>
          <lpage>177</lpage>
          <pub-id pub-id-type="doi">10.3810/pgm.2009.09.2065</pub-id>
          <pub-id pub-id-type="medline">19820287</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hironaka</surname>
              <given-names>LK</given-names>
            </name>
            <name name-style="western">
              <surname>Paasche-Orlow</surname>
              <given-names>MK</given-names>
            </name>
          </person-group>
          <article-title>The implications of health literacy on patient-provider communication</article-title>
          <source>Arch Dis Child</source>
          <year>2008</year>
          <volume>93</volume>
          <issue>5</issue>
          <fpage>428</fpage>
          <lpage>432</lpage>
          <pub-id pub-id-type="doi">10.1136/adc.2007.131516</pub-id>
          <pub-id pub-id-type="medline">17916588</pub-id>
          <pub-id pub-id-type="pii">adc.2007.131516</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Talen</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Grampp</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tucker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Schultz</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>What physicians want from their patients: Identifying what makes good patient communication</article-title>
          <source>Families, Systems, &amp; Health</source>
          <year>2008</year>
          <volume>26</volume>
          <issue>1</issue>
          <fpage>58</fpage>
          <lpage>66</lpage>
          <pub-id pub-id-type="doi">10.1037/1091-7527.26.1.58</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Javaid</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Haleem</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>RP</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT for healthcare services: an emerging stage for an innovative perspective</article-title>
          <source>BenchCouncil Transactions on Benchmarks, Standards and Evaluations</source>
          <year>2023</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>100105</fpage>
          <pub-id pub-id-type="doi">10.1016/j.tbench.2023.100105</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alkaissi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>McFarlane</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Artificial hallucinations in ChatGPT: implications in scientific writing</article-title>
          <source>Cureus</source>
          <year>2023</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>e35179</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36811129"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.35179</pub-id>
          <pub-id pub-id-type="medline">36811129</pub-id>
          <pub-id pub-id-type="pmcid">PMC9939079</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jiang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>FF</given-names>
            </name>
            <name name-style="western">
              <surname>Araki</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Neubig</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>How can we know what language models know?</article-title>
          <source>Transactions of the Association for Computational Linguistics</source>
          <year>2020</year>
          <volume>8</volume>
          <fpage>423</fpage>
          <lpage>438</lpage>
          <pub-id pub-id-type="doi">10.1162/tacl_a_00324</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lecler</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Duron</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Soyer</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Revolutionizing radiology with GPT-based models: current applications, future possibilities and limitations of ChatGPT</article-title>
          <source>Diagn Interv Imaging</source>
          <year>2023</year>
          <volume>104</volume>
          <issue>6</issue>
          <fpage>269</fpage>
          <lpage>274</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2211-5684(23)00027-X"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.diii.2023.02.003</pub-id>
          <pub-id pub-id-type="medline">36858933</pub-id>
          <pub-id pub-id-type="pii">S2211-5684(23)00027-X</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Thorne</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Aletras</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Scarton</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Navigating prompt complexity for zero-shot classification: a study of large language models in computational social science</article-title>
          <source>ArXiv</source>
          <volume>2305.14310</volume>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>Preprint posted online on March 24, 2024</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2305.14310</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Orr</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Guha</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Bhatia</surname>
              <given-names>KS</given-names>
            </name>
          </person-group>
          <article-title>Ask me anything: a simple strategy for prompting language models</article-title>
          <source>ArXiv</source>
          <volume>2210.02441</volume>
          <fpage>1</fpage>
          <lpage>72</lpage>
          <comment>Preprint posted online on November 20, 2022</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2210.02441</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wei</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Schuurmans</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bosma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Chain-of-thought prompting elicits reasoning in large language models</article-title>
          <source>Advances in Neural Information Processing Systems</source>
          <year>2022</year>
          <volume>35</volume>
          <fpage>24824</fpage>
          <lpage>24837</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2201.11903"/>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
