<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e54633</article-id>
      <article-id pub-id-type="pmid">39083337</article-id>
      <article-id pub-id-type="doi">10.2196/54633</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>A Reliable and Accessible Caregiving Language Model (CaLM) to Support Tools for Caregivers: Development and Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Yang</surname>
            <given-names>Rui</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>So</surname>
            <given-names>Chaehan</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Parmanto</surname>
            <given-names>Bambang</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Health Information Management</institution>
            <institution>University of Pittsburgh</institution>
            <addr-line>6052 Forbes Tower</addr-line>
            <addr-line>Pittsburgh, PA, 15260</addr-line>
            <country>United States</country>
            <phone>1 412 383 6649</phone>
            <email>parmanto@pitt.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-4907-8402</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Aryoyudanta</surname>
            <given-names>Bayu</given-names>
          </name>
          <degrees>MEng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0009-1483-7489</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Soekinto</surname>
            <given-names>Timothius Wilbert</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0006-6240-5347</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Setiawan</surname>
            <given-names>I Made Agus</given-names>
          </name>
          <degrees>MSc, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8383-8471</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Wang</surname>
            <given-names>Yuhan</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5912-8293</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Hu</surname>
            <given-names>Haomin</given-names>
          </name>
          <degrees>MS</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9200-062X</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Saptono</surname>
            <given-names>Andi</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0933-8150</ext-link>
        </contrib>
        <contrib id="contrib8" contrib-type="author">
          <name name-style="western">
            <surname>Choi</surname>
            <given-names>Yong Kyung</given-names>
          </name>
          <degrees>MPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7882-4358</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Health Information Management</institution>
        <institution>University of Pittsburgh</institution>
        <addr-line>Pittsburgh, PA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Bambang Parmanto <email>parmanto@pitt.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>31</day>
        <month>7</month>
        <year>2024</year>
      </pub-date>
      <volume>8</volume>
      <elocation-id>e54633</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>11</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>9</day>
          <month>12</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>29</day>
          <month>12</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>9</day>
          <month>5</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Bambang Parmanto, Bayu Aryoyudanta, Timothius Wilbert Soekinto, I Made Agus Setiawan, Yuhan Wang, Haomin Hu, Andi Saptono, Yong Kyung Choi. Originally published in JMIR Formative Research (https://formative.jmir.org), 31.07.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2024/1/e54633" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>In the United States, 1 in 5 adults currently serves as a family caregiver for an individual with a serious illness or disability. Unlike professional caregivers, family caregivers often assume this role without formal preparation or training. Thus, there is an urgent need to enhance the capacity of family caregivers to provide quality care. Leveraging technology as an educational tool or an adjunct to care is a promising approach that has the potential to enhance the learning and caregiving capabilities of family caregivers. Large language models (LLMs) can potentially be used as a foundation technology for supporting caregivers. An LLM can be categorized as a foundation model (FM), which is a large-scale model trained on a broad data set that can be adapted to a range of different domain tasks. Despite their potential, FMs have the critical weakness of “hallucination,” where the models generate information that can be misleading or inaccurate. Information reliability is essential when language models are deployed as front-line help tools for caregivers.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to (1) develop a reliable caregiving language model (CaLM) by using FMs and a caregiving knowledge base, (2) develop an accessible CaLM using a small FM that requires fewer computing resources, and (3) evaluate the model’s performance compared with a large FM.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We developed a CaLM using the retrieval augmented generation (RAG) framework combined with FM fine-tuning for improving the quality of FM answers by grounding the model on a caregiving knowledge base. The key components of the CaLM are the caregiving knowledge base, a fine-tuned FM, and a retriever module. We used 2 small FMs as candidates for the foundation of the CaLM (LLaMA [large language model Meta AI] 2 and Falcon with 7 billion parameters) and adopted a large FM (GPT-3.5 with an estimated 175 billion parameters) as a benchmark. We developed the caregiving knowledge base by gathering various types of documents from the internet. We focused on caregivers of individuals with Alzheimer disease and related dementias. We evaluated the models’ performances using the benchmark metrics commonly used in evaluating language models and their reliability for providing accurate references with their answers.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The RAG framework improved the performance of all FMs used in this study across all measures. As expected, the large FM performed better than the small FMs across all metrics. Interestingly, the small fine-tuned FMs with RAG performed significantly better than GPT 3.5 across all metrics. The fine-tuned LLaMA 2 with a small FM performed better than GPT 3.5 (even with RAG) in returning references with the answers.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The study shows that a reliable and accessible CaLM can be developed using small FMs with a knowledge base specific to the caregiving domain.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>large language model</kwd>
        <kwd>caregiving</kwd>
        <kwd>caregiver</kwd>
        <kwd>informal care</kwd>
        <kwd>carer</kwd>
        <kwd>GPT</kwd>
        <kwd>language model</kwd>
        <kwd>LLM</kwd>
        <kwd>elderly</kwd>
        <kwd>aging</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>machine learning</kwd>
        <kwd>natural language processing</kwd>
        <kwd>NLP</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>The number of family caregivers for people with complex and chronic conditions, such as dementia and other disabilities, is increasing dramatically. In 2020, an estimated 53 million adults in the United States (1 in 5) served as family caregivers, up from the estimated 43.5 million in 2015 [<xref ref-type="bibr" rid="ref1">1</xref>]. These numbers are expected to climb dramatically over the next 30 years [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Unlike professional health care providers and caregivers, family caregivers often assume this role without formal education or training [<xref ref-type="bibr" rid="ref3">3</xref>], leaving them underprepared for the complex tasks of caregiving. This lack of preparation can lead to increased stress and a sense of being overwhelmed [<xref ref-type="bibr" rid="ref4">4</xref>]. Previous studies have shown that family caregivers are at risk for poor psychological health, physical health, and quality of life; strains in family relationships; and restrictions in social and work participation [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. There is an urgent need to enhance the capacity of caregivers, although there remain unanswered questions regarding how best to support the diverse needs of family caregivers [<xref ref-type="bibr" rid="ref9">9</xref>]. One of the key features of successful interventions in supporting caregivers is to equip caregivers with practical knowledge and skills for providing care, including knowledge about the care recipient’s condition, associated symptoms, and symptom progression, as well as skills that enable them to address the needs of the care recipient [<xref ref-type="bibr" rid="ref10">10</xref>]. In this context, technology can play a pivotal role in supporting caregivers as a means of delivering educational tools or serving as a supplementary aid in the caregiving process [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref14">14</xref>]. Notably, 2 of the 10 research priorities identified by the Summit on Family Caregiving focus on the use of technology for family caregiving, although technology can also be used to support the remaining 8 research priorities [<xref ref-type="bibr" rid="ref2">2</xref>].</p>
        <p>Recent advances in generative artificial intelligence (AI) [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref19">19</xref>] have resulted in the popularity and widespread use of large language models (LLMs), such as ChatGPT, captivating global interest with their ability to generate intelligent and context-aware responses to a wide spectrum of users’ questions or prompts [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref20">20</xref>-<xref ref-type="bibr" rid="ref22">22</xref>]. These LLMs fall into a category called foundation models (FMs) [<xref ref-type="bibr" rid="ref23">23</xref>]. An FM is a large-scale machine-learning model trained on diverse and comprehensive data sets. These data sets equip the FMs with the versatility to perform a wide range of tasks across different domains [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. An FM provides a base, or a foundation, on which other specific models can be built. FMs, such as OpenAI’s GPT-3 and GPT-4, are pretrained using diverse corpora of the content found across the internet. These pretrained models can serve as the basis for developing educational content as well as interactive agents, such as chatbots, to support caregivers [<xref ref-type="bibr" rid="ref24">24</xref>]. These interactive agents will be able to address common requests from caregivers, including answering questions about the care recipient’s condition, associated symptoms, and symptom progression, as well as teaching skills that enable caregivers to address the needs of the care recipient [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
        <p>However, FMs often fail to answer domain-specific questions that require factual knowledge. The responses generated by these models, while impressive and convincing, can be misleading or completely wrong—a phenomenon called hallucination [<xref ref-type="bibr" rid="ref26">26</xref>]. This issue is particularly problematic because it may be inherent to LLMs even when the size gets larger, and it is a feature, not a bug [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. This means that the system cannot be fully trusted in contexts where accuracy is paramount, such as in caregiving. In such contexts, the reliability and factual accuracy of information are nonnegotiable, as they underpin decisions that have direct consequences on the health and well-being of individuals. Furthermore, aside from hallucination, even the most powerful pretrained FMs will most likely not meet the specific needs of caregivers right out of the box.</p>
        <p>Adaptations have been developed to equip FMs to meet the specific needs of particular tasks, and 3 of the most prominent adaptation methods are prompt engineering, fine-tuning, and, most recently, retrieval augmented generation (RAG) [<xref ref-type="bibr" rid="ref28">28</xref>-<xref ref-type="bibr" rid="ref30">30</xref>]. Prompt engineering is the most popular method, and its goal is to guide the model toward desirable answers. This is the simplest approach because it does not involve retraining the FM or developing a knowledge base. RAG, on the other hand, introduces additional layers that use external knowledge (data sources) to provide the context for improving the performance and relevance of the FM. It is more complex to implement than prompt engineering because it requires the development of domain knowledge. Fine-tuning is the most complex method in terms of implementation because it requires retraining the FM. By strategically combining these methods, it is possible to enhance an FM’s functionality, tailoring it to deliver more precise and useful outputs for domain-specific applications such as caregiving.</p>
      </sec>
      <sec>
        <title>Objectives</title>
        <p>The objective of this exploratory study was to develop a reliable and accessible caregiving language model (CaLM). To achieve reliability, the CaLM will use the RAG framework that employs a caregiving knowledge base to generate prompts to provide a caregiving context to any questions from users. The CaLM is further fine-tuned by retraining the FM with caregiving-related data that can train the FM to provide authoritative references and informed answers to caregiving-related questions. To achieve the goal of accessibility, the CaLM uses a small FM that can be deployed with a modest computing infrastructure in a home or a small organization. The CaLM can further be used to develop downstream technologies, such as a caregiving chatbot, aiming to support caregivers in various settings.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>CaLM Architecture</title>
        <p>The overall architecture of the CaLM is illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>. The key components of the CaLM are the caregiving knowledge base, a fine-tuned FM, and a retriever module. The knowledge base and retriever module are part of the RAG framework [<xref ref-type="bibr" rid="ref28">28</xref>]. An interaction system can be added if the CaLM is implemented as a conversation agent such as a chatbot. The CaLM uses a RAG framework to give the FM access to information that is specific to caregiving, and it uses fine-tuning to further retrain the FM to answer questions related to caregiving. Each of the modules will be discussed further in this section.</p>
        <p>The most common interaction between caregivers and language models is an open-ended question and answer (Q-A) method [<xref ref-type="bibr" rid="ref28">28</xref>]. In a regular FM, which we call a vanilla FM, a question from caregivers is submitted to the FM, and the FM retrieves answers based on its pretrained knowledge representation. In the CaLM, a question from a user is appended by a prompt generated from the caregiving knowledge base before it is submitted to the FM. The retriever module in the CaLM retrieves semantically similar information from the caregiving knowledge base and creates prompts to accompany the user question. The FM uses the question and the prompts to get a more relevant answer than it could without the prompts.</p>
        <p>In the CaLM, the FM is further fine-tuned by retraining the FM using a Q-A training set containing common questions and answers related to caregiving. Fine-tuning can be used to impart FMs with domain-specific terminology and with personalization for a specific population. This technique can be used to train FMs in areas important to caregiving, such as empathy. The question from the caregiver, combined with prompts from RAG, is submitted to the fine-tuned FM, which subsequently provides answers that are more accurate and more knowledgeable regarding caregiving.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Caregiving language model architecture. FM: foundation model.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e54633_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>FM Description</title>
        <p>LLMs, such as ChatGPT, are subsets of AI systems called FMs. FMs are designed to be general purpose models capable of performing many different tasks and can also be adapted to a variety of tasks. As the name implies, we will use FMs as the base foundation for the CaLM. Hundreds of FMs are currently available, and a recent paper catalogs more than 75 major transformer-based models alone [<xref ref-type="bibr" rid="ref31">31</xref>]. The goal of this project is to build a reliable and accessible model in the caregiving domain; therefore, the FM needs to fulfill 2 requirements: good performance and relatively small size. In this project, we have evaluated 3 different FMs, which are summarized in <xref ref-type="table" rid="table1">Table 1</xref>. Falcon and LLaMA (large language model Meta AI) 2 were chosen because the models were at the top of the Open LLM Leaderboard [<xref ref-type="bibr" rid="ref32">32</xref>] when this study was carried out. LLaMA [<xref ref-type="bibr" rid="ref33">33</xref>] is a family of LLMs released by Meta starting in February 2023. Falcon is a family of LLMs developed by the Technology Innovation Institute (TII) based in Abu Dhabi, United Arab Emirates [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Foundation models used in the study.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="170"/>
            <col width="300"/>
            <col width="180"/>
            <col width="140"/>
            <thead>
              <tr valign="top">
                <td>Foundation model</td>
                <td>Developer</td>
                <td>Description</td>
                <td>Licensing</td>
                <td>Parameters</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>LLaMA<sup>a</sup> 2 7B</td>
                <td>Meta</td>
                <td>Large language model of Meta AI; top-ranked model</td>
                <td>Open source</td>
                <td>7 billion</td>
              </tr>
              <tr valign="top">
                <td>Falcon 7B</td>
                <td>Technology Innovation Institute, UAE</td>
                <td>Part of the Falcon LLM<sup>b</sup> family; Apache permissive license; top-ranked model</td>
                <td>Open source</td>
                <td>7 billion</td>
              </tr>
              <tr valign="top">
                <td>GPT 3.5</td>
                <td>OpenAI</td>
                <td>Generative pretrained transformer</td>
                <td>Proprietary</td>
                <td>Estimated 175 billion</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>LLaMA: large language model Meta AI.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>LLM: large language model.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We chose FMs that are considered “small” LLMs with 7 billion parameters. If the CaLM can be developed using a “small” FM, it will make powerful AI capabilities more accessible, affordable, and versatile. The models can be deployed using regular computing infrastructure available in low-resource settings such as small community organizations and homes. The open-source FMs used in this project have permissive licensing models that can potentially be deployed in low-resource settings. We also included a “large” LLM (proprietary GPT-3.5 with an estimated 175 billion parameters owned by OpenAI [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref35">35</xref>,<xref ref-type="bibr" rid="ref36">36</xref>]) as a point of comparison to highlight the potential that small FMs have for developing the CaLM.</p>
      </sec>
      <sec>
        <title>Caregiving Corpus and Knowledge Base</title>
        <p>The knowledge base is central to the CaLM in providing and guiding the FM with a rich context of caregiving-specific information. The long-term goal of this study is to develop a CaLM for various care recipients’ conditions. In this preliminary work, we focused on developing a CaLM for caregivers of individuals with Alzheimer disease and related dementias (ADRD). The development of the caregiving knowledge base started with the development of the caregiving corpora. In this project, we developed a caregiving corpus related to ADRD by gathering a significant collection of data related to ADRD caregiving. These data included publicly available journal articles, care guidelines, and practical insights from online caregiver discussion forums. <xref ref-type="table" rid="table2">Table 2</xref> describes the data sources that were collected for the knowledge base.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Sources for the caregiving corpus.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="180"/>
            <col width="170"/>
            <col width="210"/>
            <col width="150"/>
            <col width="130"/>
            <col width="160"/>
            <thead>
              <tr valign="top">
                <td>Source </td>
                <td>Source file format</td>
                <td>Type </td>
                <td>Document extracted</td>
                <td>Number of documents (N=8568)</td>
                <td>Number of chunks (N=196,926)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>caregiver.com </td>
                <td>HTML</td>
                <td>Online caregiving forum, discussion, and tips</td>
                <td>High-quality questions and answers</td>
                <td>142 </td>
                <td>1591 </td>
              </tr>
              <tr valign="top">
                <td>agingcare.com </td>
                <td>HTML</td>
                <td>Caregiver support website</td>
                <td>Caregiving resources and questions and answers</td>
                <td>402 </td>
                <td>4169 </td>
              </tr>
              <tr valign="top">
                <td>alzconnected.org</td>
                <td>HTML</td>
                <td>Discussion forums from Alzheimer’s Association</td>
                <td>Low-technical resources in question and answer format</td>
                <td>4087 </td>
                <td>4087 </td>
              </tr>
              <tr valign="top">
                <td>deliriumnetwork.org</td>
                <td>PDF</td>
                <td>Repository of resources related to delirium</td>
                <td>High-quality literature and resources</td>
                <td>1714 </td>
                <td>72,426 </td>
              </tr>
              <tr valign="top">
                <td>PubMed </td>
                <td>PDF</td>
                <td>Database of journal articles</td>
                <td>High-quality literature and resources</td>
                <td>2195 </td>
                <td>114,383 </td>
              </tr>
              <tr valign="top">
                <td>nia.nih.gov </td>
                <td>PDF + HTML</td>
                <td>Education resources on aging and ADRD<sup>a</sup> </td>
                <td>Literature and caregiving resources</td>
                <td>9 </td>
                <td>133 </td>
              </tr>
              <tr valign="top">
                <td>alzheimers.gov </td>
                <td>HTML</td>
                <td>Education resources on aging and ADRD</td>
                <td>Literature and caregiving resources</td>
                <td>8 </td>
                <td>55 </td>
              </tr>
              <tr valign="top">
                <td>alzheimers.org.uk </td>
                <td>HTML</td>
                <td>Education resources on aging and ADRD</td>
                <td>Literature and caregiving resources</td>
                <td>7 </td>
                <td>39 </td>
              </tr>
              <tr valign="top">
                <td>alz.org </td>
                <td>HTML</td>
                <td>Education resources on aging and ADRD</td>
                <td>Literature and caregiving resources</td>
                <td>2 </td>
                <td>12 </td>
              </tr>
              <tr valign="top">
                <td>Other web sources</td>
                <td>HTML</td>
                <td>Web article </td>
                <td>Caregiving resources</td>
                <td>2 </td>
                <td>31 </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ADRD: Alzheimer disease and related dementias.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>We selected ADRD for the proof-of-concept development of the CaLM owing to the substantial and growing impact these conditions have on the global population, especially within the aging demographic [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. The intricate care requirements associated with the cognitive and behavioral symptoms of ADRD present a complex challenge that caregivers must navigate, often without formal training [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Given the progressive nature of these conditions, caregivers are in need of long-term support and strategies, underlining the importance of a dedicated resource like the CaLM. By addressing ADRD, the model can provide substantial support to a vast community of caregivers who are frequently underserved when it comes to specialized care resources. Moreover, with an extensive amount of research and guidelines already available for ADRD, there is a rich foundation upon which to build a detailed and accurate knowledge base, making it a particularly suitable focus for the initial deployment of this innovative tool.</p>
        <p>The documents we collected were crawled and downloaded from websites in various formats such as HTML, PDF, and plain text (<xref ref-type="table" rid="table2">Table 2</xref>). These raw documents then underwent several preprocessing stages before they were converted into a caregiving corpus. The preprocessing procedure included data format conversion, text cleaning, and document chunking. During data format conversion, documents in HTML and PDF were converted into either plain text markdown format or plain text. The converted documents were cleansed from errors of duplication and unnecessary characters, such as extra spaces, new lines, punctuations, and non-ASCII tags, during conversion using regular expression (Regex) rules.</p>
        <p>The last stage of text processing was document chunking and converting the chunk documents into Document format with metadata. Document chunking broke down the documents in the caregiving corpus into smaller “chunks” with a length of 1200 characters each to fit the FM context window limitation and reduce unrelated text in the generated prompt in the RAG system. These chunks of text were then converted into a 768-dense dimensional vector using Siamese BERT-Network [<xref ref-type="bibr" rid="ref40">40</xref>] as the embedding model. We used BAAI general embedding (BGE), a recently released embedding model that was pretrained on massive data sets and multiple tasks, as a general-purpose embedding model [<xref ref-type="bibr" rid="ref41">41</xref>]. BGE was selected because it provides a good trade-off for memory usage, speed of computation, and embedding quality. These vectors were then stored in a Chroma DB vector database, and it functioned as the retriever database for the RAG system. Chroma DB was selected in this experiment for its ease of setup. It is not too strict on the data schematics and supports multiple built-in distance functions such as squared L2, inner product, and cosine similarity.</p>
      </sec>
      <sec>
        <title>Retriever Module in the CaLM: Providing Caregiving Context Prompts</title>
        <p>The retriever module in the CaLM searches for semantically related information from the caregiving knowledge base to provide a caregiving context that matches users’ questions. The related caregiving information provides enhanced context that is appended to the user’s prompt and passed to the FM. It is important to implement a retrieval mechanism that can efficiently search through the caregiving knowledge base to respond to a user’s questions. A dense passage retriever (DPR) [<xref ref-type="bibr" rid="ref42">42</xref>] is used to develop the retriever module in the CaLM. The DPR captures more complex semantic relationships between the query and the documents, leading to more accurate retrieval results.</p>
        <p>In the retriever implementation, the user’s question is converted into a vector with the same embedding model used to encode the caregiving knowledge base. The DPR uses cosine similarity as the distance function to calculate related documents in the vector space by calculating the user’s question vector and existing document vectors inside the Chroma vector database. The most related documents were limited to 3 to keep the content relevant to the user’s question, avoid FM token window limitations, and prevent degrading model generation quality [<xref ref-type="bibr" rid="ref43">43</xref>].</p>
        <p>Implementing the CaLM using a RAG framework has 2 main benefits. It ensures that the model has access to the most current reliable facts about caregiving, and it provides users with access to the model’s sources, ensuring that its claims can be checked for accuracy and can ultimately be trusted. The caregiving knowledge base is designed to be updated regularly. The knowledge base in the CaLM can be updated regularly, and the model can be retrained more frequently. Updating the knowledge base and fine-tuning the FM require fewer resources than retraining FMs. The model had access to the most current and reliable facts because of the frequency of the updates and the fine-tuning.</p>
      </sec>
      <sec>
        <title>Fine-Tuning the FM in the CaLM</title>
        <p>The CaLM uses a fine-tuned FM to synthesize an accurate answer tailored to the language and nuances of caregiving. The fine-tuned FM is an original FM that has been retrained using supervised learning on Q-A pairs related to caregiving. Fine-tuning involves retraining the FM on a caregiving domain-specific data set. Full fine-tuning that involves updating all of an FM’s parameters is less feasible because it requires large computing resources. A more practical and commonly used type of fine-tuning is called parameter-efficient fine-tuning (PEFT), which requires retraining only part of or an extra component of the pretrained FM. The most widely used PEFT technique is called LoRA (low-rank adaptation) [<xref ref-type="bibr" rid="ref44">44</xref>], which adds a small number of trainable parameters to the FM while the original model parameters remain frozen. In developing the CaLM, we used LoRA as well as the quantized technique of LoRA called QLoRA [<xref ref-type="bibr" rid="ref45">45</xref>] to improve the FM’s memory efficiency during retraining.</p>
        <p>The main benefit of using RAG in the CaLM is that by grounding an FM on a set of external verifiable facts about caregiving, the model has fewer opportunities to pull information baked into its parameters, thus reducing the chances that the model will “hallucinate” incorrect or misleading information. This is critical in achieving the goal of a reliable CaLM.</p>
      </sec>
      <sec>
        <title>Data Sets for Fine-Tuning the FM and for Model Evaluation</title>
        <p>Recent studies indicate that an FM can be made more accurate by fine-tuning it on a high-quality smaller data set [<xref ref-type="bibr" rid="ref46">46</xref>-<xref ref-type="bibr" rid="ref49">49</xref>]. In this study, we used a large data set for the RAG framework’s knowledge base and a high-quality small data set for fine-tuning the FM. The goal was to further improve the performance of the FM by retraining it using a high-quality data set tailored for caregiving Q-A so that it can respond more accurately and in a more contextually relevant way to questions related to caregiving. Fine-tuning is important particularly if we want to use a smaller and more efficient FM to achieve the goal of an accessible CaLM.</p>
        <p>The high-quality training set to fine-tune the FM was in the form of Q-A pairs, which were developed using a combination of both automatic and manual methods. Fragments of documents in the caregiving knowledge used in the RAG approach were randomly sampled and selected, and they were used as seed context to generate questions and answers. OpenAI GPT-4 was used to generate variations of questions based on the seed contexts. This seed of questions was then paired with the 3 documents in the knowledge base that were most closely related to the questions to synthesize the output answer. The pairs of Q-A data sets were subsequently curated manually by a data annotator to validate that the questions were representative from a caregiver perspective and that the answers and their associated references were correct. The data annotator was responsible for selecting which Q-A pairs were relevant to caregiving. Following the curation, the data set was deduplicated to remove duplicate data, prevent data leakage, and improve the fine-tuning process [<xref ref-type="bibr" rid="ref49">49</xref>].</p>
        <p>We constructed 581 Q-A pairs of data sets, each of which included questions and answers with references. Of these 581 pairs, we randomly selected 415 for inclusion in a training subset, reserving the remaining 66 pairs as a test subset. The 415 Q-A pairs in the caregiving training set were used for fine-tuning the FMs using supervised learning. <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> provides an example of the fine-tuning data set. The goal of the fine-tuning process is to adapt the pretrained FM to the caregiving field so that it can respond more accurately and with more contextual relevance to questions related to caregiving. Once all the components of the CaLM were developed and trained, the model was evaluated using the 66 pairs of questions that were not included in the training.</p>
        <p>We evaluated the performances of the models using the test set consisting of 66 Q-A pairs. The evaluation was conducted after the components of the RAG framework were developed and the small FMs were fine-tuned by retraining the models on the training set of 415 Q-A pairs. The training set (comprising 415 entries) and the test set (comprising 66 entries) were distinct with no overlapping questions or content. After exposing the FMs to the questions from the test set, we checked the FMs’ answers against the test set answers to determine whether the FMs were outputting accurate answers to the test questions. The performance of the FMs was measured by the similarities between the reference answers to the test set questions and the output generated by the models. In addition to similarities, we evaluated the capabilities of the models in providing references to the answers.</p>
        <p>We tested the small FMs trained in 3 different settings: vanilla, RAG, and RAG + fine-tuned. Vanilla is the original FM without adding the caregiving knowledge base. In the RAG setting, for every question in the data, additional context from the knowledge base was added before the Q-A pair was sent to the FM. In the RAG + fine-tuned setting, we followed the same procedure as with the RAG setting, except that the FM had already been retrained using the 415 Q-A training set pairs. We compared the small FMs with OpenAI GPT 3.5 as a baseline benchmark. Because GPT 3.5 is a proprietary commercial system, we were not able to control its fine-tuning variables other than providing examples. Therefore, we cannot compare it with other FMs that were retrained using the caregiving Q-A training set.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>To demonstrate a proof-of-concept for the CaLM, we generated a caregiving corpus specifically focused on ADRD. This corpus was compiled from a comprehensive set of publicly available sources, including journal articles, caregiving guidelines, and content from online caregiver discussion forums such as posts and replies. All data sources used were already in the public domain and freely accessible to the general public. Therefore, this study did not fall under the purview of institutional review board (IRB) oversight, and it was deemed exempt from IRB review by the University of Pittsburgh.</p>
        <p>Despite this exemption, we adhered to strict ethical guidelines, including the deidentification of any potentially identifiable information extracted from online sources. This precaution ensures that individual privacy is maintained and aligns with ethical research standards, particularly those concerning the use of social media data in research.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Experiment Results</title>
        <p>We applied the benchmark metrics commonly used in evaluating language models, including BLEU (Bilingual Evaluation Understudy), ROUGE (Recall-Oriented Understudy for Gisting Evaluation), CHR-F (character N-gram F-score), and BERT (Bidirectional Encoder Representations from Transformers) score. These metrics are automatic evaluations that measure the similarity of a response to a provided reference answer in the test set. The metrics are considered to have a high correlation with human judgments of quality. BLEU is the oldest and most popular metric, and it captures word-level similarities between the answer and the reference. ROUGE evaluates how well the models produce a generated answer compared to the reference answer by measuring several overlapping units such as n-gram, word sequences, and word pairs [<xref ref-type="bibr" rid="ref50">50</xref>]. The scores for the BLEU and ROUGE metrics range between 0 and 1, with 1 being a perfect score. CHR-F measures similarities at the character level, and it is scored between 0 and 100, with 100 being a perfect score. The BERT score calculates the similarity between the output and a reference using sentence representation and focuses on measuring semantic similarity. The BERT metric is scored between 0 and 1, with 1 being a perfect score.</p>
        <p>The purpose of using multiple metrics was to provide a more comprehensive intrinsic automatic evaluation and to assess whether the performances are consistent across different standards of granularity, from character to word and meaning. The performance is summarized in <xref rid="figure2" ref-type="fig">Figure 2</xref>. In addition to the general performance of the language models for providing the right answers, we evaluated their reliability by measuring their capacity to provide references with the answers. The capability of the models in providing references is summarized in <xref ref-type="table" rid="table3">Table 3</xref>.</p>
        <p>The RAG framework improved the performance of all FMs used in this study across all measures. The performances improved significantly at the character (CHR-F), word (BLEU and ROUGE), and semantic (BERT score) levels. As expected, the large FM (OpenAI GPT 3.5 with estimated 175 billion parameters) performed better than the small FMs (Falcon 7B and LLaMA 2 7B with 7 billion parameters) across all metrics. The larger FM has more parameters and is able to accommodate having more knowledge encoded into it.</p>
        <p>The RAG framework that was implemented with fine-tuning performed better than RAG-only and vanilla for the 2 small FMs. Because we could not retrain the GPT 3.5 model, we were not able to evaluate a retrained (fine-tuned) version of it. The most interesting result is that RAG + fine-tuned LLaMA 2 7B performed significantly better than vanilla GPT 3.5 across all metrics. Fine-tuned Falcon 7B also performed better than vanilla GPT 3.5 across all metrics. LLaMA 2 7B and Falcon 7B have only 7 billion parameters, while OpenAI GPT 3.5 has 175 billion parameters. This shows that a small FM with the injection of domain-specific knowledge can perform better than a much larger FM.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Benchmarks of the Falcon 7B, LLaMA 2 7B Chat, and OpenAI GPT 3.5 models in 3 different approaches: vanilla, RAG, and RAG + fine-tuned (except GPT 3.5). BERT: Bidirectional Encoder Representations from Transformers; BLEU: Bilingual Evaluation Understudy; CHR-F: character N-gram F-score; LLaMA: large language model Meta AI; RAG: retrieval augmented generation; ROUGE: Recall-Oriented Understudy for Gisting Evaluation.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e54633_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Capability to return references in the answer.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="370"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="0"/>
            <col width="150"/>
            <col width="150"/>
            <col width="0"/>
            <col width="0"/>
            <col width="0"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Foundation model and variant</td>
                <td colspan="2">Returning references, n </td>
                <td colspan="2">Not returning references, n</td>
                <td>Returning references, %</td>
                <td colspan="2">Correct references (human evaluation), %</td>
                <td colspan="2">
                  <break/>
                </td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="12">
                  <bold>Falcon 7B</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RAG<sup>a</sup></td>
                <td colspan="2">0 </td>
                <td colspan="2">66 </td>
                <td colspan="2">0</td>
                <td>0</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RAG + fine-tuned </td>
                <td colspan="2">61 </td>
                <td colspan="2">5 </td>
                <td colspan="2">92</td>
                <td>66</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="12">
                  <bold>LLaMA<sup>b</sup> 2 7B - Chat </bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RAG</td>
                <td colspan="2">17 </td>
                <td colspan="2">49 </td>
                <td colspan="2">26</td>
                <td>12</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RAG + fine-tuned </td>
                <td colspan="2">66 </td>
                <td colspan="2">0 </td>
                <td colspan="2">100</td>
                <td>80</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td colspan="11">
                  <bold>GPT 3.5</bold>
                </td>
                <td>
                  <break/>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>RAG GPT 3.5</td>
                <td colspan="2">46 </td>
                <td colspan="2">20 </td>
                <td colspan="2">70</td>
                <td>62</td>
                <td colspan="3">
                  <break/>
                </td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>RAG: retrieval augmented generation.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>LLaMA: large language model Meta AI.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>In addition to the general performance of the language models in providing the right answers, we also evaluated their reliability by measuring their capacity to provide accurate references with the answers. <xref ref-type="table" rid="table3">Table 3</xref> provides results on the capability of the models to return references in their answers and mentions the number of correct references. The references provided by the models were evaluated for correctness and relevance. In addition to checking whether the generated answer returned a list of references, the annotator verified that the links were correct and active. The annotator also checked each inline reference to determine if the content in the answer was part of the original document. Using the results from these checks, the data annotator decided whether each reference was correct and relevant to the answer.</p>
        <p>None of the vanilla FMs were trained to return references, and therefore, no references were provided for any questions (the results were all zeroes). LLaMA 2 7B in the RAG + fine-tuned setting provided references to all 66 answers. It performed better than GPT 3.5 with the RAG framework, which returned references in only 46 of the 66 answers (70%). Fine-tuning by retraining the FMs using the 415 Q-A pairs of the training set significantly improved the capabilities of the FMs to return references. The percentage of correct and relevant references among the models followed similar patterns, with LLaMA 2 7B performing better than GPT 3.5 (80.0% vs. 62.1%). <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref> provides examples of comparisons between the generated answers provided by the CaLM and GPT 3.5, as well as reference answers from the Q-A test set.</p>
        <p>Implementing the RAG framework in the development of the CaLM had several benefits. First, we were able to ensure that the language model had access to the most current facts. The caregiving knowledge could be updated regularly, and we could retrain the model more frequently than we would have been able to when using a new FM. Updating the knowledge base and fine-tuning FMs require fewer resources than retraining FMs. Second, the model had access to the most reliable facts because of the frequency of the updates and the fine-tuning that included references in the knowledge base and the training set. Third, users had access to the model’s information sources, ensuring that its answers could be checked for accuracy and ultimately trusted.</p>
      </sec>
      <sec>
        <title>Developing a Caregiver Chatbot Using the CaLM</title>
        <p>We developed a prototype of a caregiver chatbot that uses the CaLM as the engine. The purpose of the chatbot prototype was to show that the CaLM is accessible and can be deployed in a low-resource environment. The chatbot interface and example of the interaction are illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>. We developed the user interface and interaction system that work with the CaLM engine. The model that powers the chatbot is small and requires approximately 4-6 GB of memory. The chatbot can be deployed on a laptop or a small server.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Conversation example of a caregiver chatbot that uses the caregiving language model.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e54633_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The findings of this study showed that a domain-specific language model for caregiving can be developed by using the RAG framework. The RAG framework improved the quality of answers generated by FMs in the domain-specific caregiving field by grounding the model on a caregiving knowledge base to supplement the general knowledge already stored in the internal representation of the FM. The results were consistent across different metrics, showing that FMs that were adapted using the RAG framework enriched with a caregiving knowledge base performed better than the original vanilla FMs. Performance was further improved when the FMs were fine-tuned by retraining them using supervised learning on a specific Q-A training set related to caregiving. Fine-tuning also improved the reliability of the models by increasing their capability to provide verifiable responses through references. The results showed that a reliable CaLM can be developed by combining FMs with the RAG framework and by fine-tuning involving retraining the FMs.</p>
        <p>The study found that small FMs can perform comparably to or better than much larger FMs when they are grounded in domain-specific knowledge related to caregiving. For example, LLaMA 2 7B (7 billion parameters) with access to a caregiver knowledge base performed better than GPT 3.5 with 175 billion parameters. Fine-tuned LLaMA 2 7B also provided reliable answers by supplying references to all of the answers in the test set. Smaller FMs require less computing power to train and fewer resources to deploy once they are trained. For example, LLaMA 7B with RAG and fine-tuning can be deployed using the computing power of small servers or desktop computers. This makes it more accessible for small organizations that want to develop language models specific to the domains important to them. Other methods for developing “small” FMs that can perform almost as well as large FMs have been proposed in recent works, and an example is the Starling-7B LLM [<xref ref-type="bibr" rid="ref51">51</xref>], which uses reinforcement learning from AI feedback (RLAIF) and high-quality large training data to enhance model performance.</p>
        <p>The implications of the results show the potential for developing reliable and accessible language models in domain-specific areas by combining smaller fine-tuned FMs with the RAG framework, as evidenced by other studies [<xref ref-type="bibr" rid="ref52">52</xref>,<xref ref-type="bibr" rid="ref53">53</xref>]. The parameter sizes available for FMs will continue to grow rapidly in the coming years. For example, the latest release of LLaMA 2 is available in 3 model sizes: 7, 13, and 70 billion parameters. The computing power accessible to smaller organizations will also continue to grow. Therefore, this approach will still have potential even when the sizes of FMs increase. By using an FM size that can be supported by the infrastructure available to small organizations, we can ensure that the CaLM will remain accessible to those organizations. For example, FMs with a size of 70 billion will likely be accessible within the next few years. Using smaller FMs with RAG + fine-tuning will remain a valid approach even as large FMs grow even larger because hallucinations will still exist in the large FMs.</p>
        <p>Two main methods were used in the development of the CaLM: RAG and fine-tuning of FMs. Fine-tuning has a number of drawbacks, including the computational resources needed for full parameter fine-tuning and the risk of catastrophic forgetting, where the model loses its ability to perform well on the original task or domain after being fine-tuned with new data [<xref ref-type="bibr" rid="ref54">54</xref>]. A previous study found that the PEFT technique is effective in preventing the phenomenon known as “catastrophic forgetting” in original FM capabilities compared with the full-parameter fine-tuning technique [<xref ref-type="bibr" rid="ref55">55</xref>]. Therefore, fine-tuning with PEFT will increase model capability on the CaLM’s downstream tasks without sacrificing original FM capabilities.</p>
        <p>The CaLM is reliable and accessible and has significant potential for solving downstream tasks through the development of systems such as a chatbot for caregiving. This approach can also be used for developing systems for caregivers that are specific to the conditions of the care recipients they serve, such as for caregivers of individuals with disabilities or cancer. The contextual reference in the caregiving knowledge base can be further tailored to organizational needs or services. For example, if an organization has a service related to long-term care, the language model can be tailored using the organization’s internal documents, procedures, and guidelines. We can also tailor the answer function to the desired communication styles or education levels of the intended users.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Although the study shows the potential of the development of a reliable and accessible language model in a specific domain, such as caregiving, it has a number of limitations. The first limitation is that the knowledge base is restricted to the caregiving of individuals with ADRD. The choice to focus primarily on ADRD was driven by the significant global impact of this condition and the availability of extensive research and guidelines. However, this focus may limit the broader applicability of our findings to other caregiving contexts, potentially affecting the model’s generalizability. To address this, future work will include expansion of the caregiving corpora related to other care recipient conditions, especially chronic and complex conditions. The corpora related to issues and skills in general caregiving (irrespective of the care recipient’s condition) used in this study are also limited. We plan to expand the caregiving corpora by retrieving all publicly available documents related to caregiving. We envision the CaLM as an iterative and evolving model. By integrating data and insights from broader caregiving contexts, we aim to evolve the CaLM into a more inclusive and representative model, catering to the diverse needs and challenges encountered in caregiving.</p>
        <p>The study used quantitative metrics for evaluating the performance of the language models. Quantitative metrics show consistency across different metrics. The benefits of quantitative metrics include reduced time and cost and increased consistency compared to human evaluation. All metrics used in this study fall into the category of intrinsic metrics [<xref ref-type="bibr" rid="ref56">56</xref>]. Intrinsic metrics measure the proficiency of an LLM in generating coherent and meaningful sentences relying on language rules and patterns [<xref ref-type="bibr" rid="ref57">57</xref>]. However, these quantitative metrics are insufficient for capturing the multifaceted human perspectives and the practicalities encountered in real-world caregiving scenarios. Future research should include evaluations using extrinsic metrics that are crafted to encapsulate user experiences and the actual applicability of language models within real-world settings [<xref ref-type="bibr" rid="ref57">57</xref>]. Extrinsic metrics that are relevant to the health care and caregiving domains should measure the accuracy and reliability of information, its timeliness and relevance, and the system’s capability to provide empathetic and emotionally supportive responses [<xref ref-type="bibr" rid="ref56">56</xref>]. The next phase of research will aim to incorporate these metrics and engage real family caregivers and health care professionals to evaluate the quality of the answers. Because the CaLM will be implemented as a chatbot that engages family caregivers, assessments using psychological dimensions, such as perceived humanness, likeability, anthropomorphism, animacy, and perceived safety [<xref ref-type="bibr" rid="ref58">58</xref>], need to be included in more holistic evaluations in the future.</p>
        <p>Additionally, ethical considerations are integral to the deployment of AI in caregiving. The AI system must employ stringent data privacy measures and transparency in its decision-making processes [<xref ref-type="bibr" rid="ref59">59</xref>-<xref ref-type="bibr" rid="ref61">61</xref>]. This transparency ensures that caregivers can trust and understand the rationale behind AI-generated advice and recommendations. Moreover, addressing potential biases in the training data is crucial to ensure that the AI system provides equitable support across all user demographics. Future development of the CaLM will involve continuous engagement with stakeholders to address these ethical challenges and ensure the model’s alignment with the highest standards of responsible AI practice.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study shows promise in the development of CaLMs. It shows that the CaLM developed using the RAG framework and FM fine-tuning can provide reliable answers to user questions by retrieving accurate references. The study shows that a reliable CaLM can be developed using FMs with a knowledge base specific to the caregiving domain. It also shows that a small FM that uses a caregiving knowledge base and is retrained using caregiving Q-A sets can perform better and more reliably than a much larger FM in answering caregiving-related questions. The CaLM developed using small FMs performed better than the benchmark large FM (OpenAI GPT 3.5), which will allow the CaLM to be accessible and deployed in low-resource settings. Future work includes expanding the domain knowledge to include other conditions of care recipients to enhance utility. Furthermore, the evaluation process will be refined by engaging caregivers as end users in providing feedback, alongside insights from health care professionals and caregiving domain experts.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Examples of questions and answers for training (fine-tuning) foundation models.</p>
        <media xlink:href="formative_v8i1e54633_app1.docx" xlink:title="DOCX File , 41 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Examples of answers generated by the caregiving language model (CaLM) and GPT 3.5.</p>
        <media xlink:href="formative_v8i1e54633_app2.docx" xlink:title="DOCX File , 49 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">ADRD</term>
          <def>
            <p>Alzheimer disease and related dementias</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">BGE</term>
          <def>
            <p>BAAI general embedding</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">BLEU</term>
          <def>
            <p>Bilingual Evaluation Understudy</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">CaLM</term>
          <def>
            <p>caregiving language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">CHR-F</term>
          <def>
            <p>character N-gram F-score</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">DPR</term>
          <def>
            <p>dense passage retriever</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">FM</term>
          <def>
            <p>foundation model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">LLaMA</term>
          <def>
            <p>large language model Meta AI</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">LoRA</term>
          <def>
            <p>low-rank adaptation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">PEFT</term>
          <def>
            <p>parameter-efficient fine-tuning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb14">Q-A</term>
          <def>
            <p>question and answer</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb15">RAG</term>
          <def>
            <p>retrieval augmented generation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb16">ROUGE</term>
          <def>
            <p>Recall-Oriented Understudy for Gisting Evaluation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This study was supported by grants from the National Institute on Disability, Independent Living, and Rehabilitation Research (NIDILRR; grant number 90RTGE0002 National Rehabilitation Research and Training Center on Family Support and DPKT21000080 Translating mHealth Technology to a Community Service Organization Providing Long Term Services and Supports). The NIDILRR is a center within the Administration for Community Living (ACL), Department of Health and Human Services (HHS). The contents of this publication do not necessarily represent the policy of the NIDILRR, ACL, or HHS, and readers should not assume endorsement by the US Federal Government.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The data sets generated and analyzed during this study, including the caregiving corpus and knowledge base, are available from the corresponding author upon reasonable request. The prototype caregiving language model developed in this study, which focuses on Alzheimer disease and related dementias, is not publicly accessible at this time. This is due to ongoing improvements and optimizations being made to enhance its performance and reliability. Interested individuals who wish to access the model or the data sets for research purposes are encouraged to contact the corresponding author.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>BP contributed to conceptualization, methodology, data analysis, and writing of this study. BA contributed to conceptualization, methodology, programming, data curation, data analysis, and writing. TWS contributed to data curation, data cleansing, and question and answer annotation and validation. IMAS contributed to data analysis and machine learning infrastructure. YW and HH contributed to data curation. AS contributed to conceptualization and machine learning infrastructure. YKC contributed to conceptualization, methodology, writing, and editing.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>Caregiving in the United States 2020</article-title>
          <source>AARP</source>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aarp.org/pri/topics/ltss/family-caregiving/caregiving-in-the-united-states.html">https://www.aarp.org/pri/topics/ltss/family-caregiving/caregiving-in-the-united-states.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harvath</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mongoven</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bidwell</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cothran</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sexson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Mason</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Buckwalter</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Research priorities in family caregiving: process and outcomes of a conference on family-centered care across the trajectory of serious illness</article-title>
          <source>Gerontologist</source>
          <year>2020</year>
          <month>02</month>
          <day>14</day>
          <volume>60</volume>
          <issue>Suppl 1</issue>
          <fpage>S5</fpage>
          <lpage>S13</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32057081"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/geront/gnz138</pub-id>
          <pub-id pub-id-type="medline">32057081</pub-id>
          <pub-id pub-id-type="pii">5735604</pub-id>
          <pub-id pub-id-type="pmcid">PMC7019660</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elliott</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Rivera</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Nezu</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Nezu</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Geller</surname>
              <given-names>PA</given-names>
            </name>
          </person-group>
          <article-title>Spinal cord injury</article-title>
          <source>Handbook of psychology: Health psychology</source>
          <year>2003</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>John Wiley &amp; Sons, Inc</publisher-name>
          <fpage>415</fpage>
          <lpage>435</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reinhard</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Home alone revisited: Family caregivers providing complex care</article-title>
          <source>Innov Aging</source>
          <year>2019</year>
          <volume>3</volume>
          <issue>Suppl 1</issue>
          <fpage>S747</fpage>
          <lpage>S748</lpage>
          <pub-id pub-id-type="doi">10.1093/geroni/igz038.2740</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biegel</surname>
              <given-names>DE</given-names>
            </name>
            <name name-style="western">
              <surname>Sales</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Family caregiving in chronic illness: Alzheimer's disease, cancer, heart disease, mental illness, and stroke</source>
          <year>1991</year>
          <publisher-loc>Thousand Oaks, CA</publisher-loc>
          <publisher-name>Sage Publications</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Beach</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>Caregiving as a risk factor for mortality: the Caregiver Health Effects Study</article-title>
          <source>JAMA</source>
          <year>1999</year>
          <month>12</month>
          <day>15</day>
          <volume>282</volume>
          <issue>23</issue>
          <fpage>2215</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.282.23.2215</pub-id>
          <pub-id pub-id-type="medline">10605972</pub-id>
          <pub-id pub-id-type="pii">joc91040</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Martire</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Family caregiving of persons with dementia: prevalence, health effects, and support strategies</article-title>
          <source>The American Journal of Geriatric Psychiatry</source>
          <year>2004</year>
          <month>5</month>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>240</fpage>
          <lpage>249</lpage>
          <pub-id pub-id-type="doi">10.1097/00019442-200405000-00002</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schulz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Quittner</surname>
              <given-names>AL</given-names>
            </name>
          </person-group>
          <article-title>Caregiving for children and adults with chronic conditions: Introduction to the special issue</article-title>
          <source>Health Psychology</source>
          <year>1998</year>
          <month>03</month>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>107</fpage>
          <lpage>111</lpage>
          <pub-id pub-id-type="doi">10.1037/h0092707</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Committee on Family Caregiving for Older Adults</collab>
            <collab>Board on Health Care Services</collab>
            <collab>Health and Medicine Division</collab>
            <collab>National Academies of Sciences, Engineering, and Medicine</collab>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Schuulz</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Eden</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Families Caring for an Aging America</source>
          <year>2016</year>
          <publisher-loc>Washington, DC</publisher-loc>
          <publisher-name>National Academies Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harvath</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mongoven</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sexson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bettega</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Development of competencies to strengthen support for caregivers and enhance their capacity to provide care</article-title>
          <source>Gerontol Geriatr Educ</source>
          <year>2023</year>
          <month>10</month>
          <day>02</day>
          <volume>44</volume>
          <issue>4</issue>
          <fpage>523</fpage>
          <lpage>527</lpage>
          <pub-id pub-id-type="doi">10.1080/02701960.2022.2083117</pub-id>
          <pub-id pub-id-type="medline">35670382</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lindeman</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gladstone</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Apesoa-Varano</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Technology and caregiving: emerging interventions and directions for research</article-title>
          <source>Gerontologist</source>
          <year>2020</year>
          <month>02</month>
          <day>14</day>
          <volume>60</volume>
          <issue>Suppl 1</issue>
          <fpage>S41</fpage>
          <lpage>S49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32057082"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/geront/gnz178</pub-id>
          <pub-id pub-id-type="medline">32057082</pub-id>
          <pub-id pub-id-type="pii">5735605</pub-id>
          <pub-id pub-id-type="pmcid">PMC7019659</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coughlin</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Lau</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cathedral builders wanted: constructing a new vision of technology for old age</article-title>
          <source>Public Policy &amp; Aging Report</source>
          <year>2006</year>
          <month>01</month>
          <day>01</day>
          <volume>16</volume>
          <issue>1</issue>
          <fpage>4</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1093/ppar/16.1.4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Orlov</surname>
              <given-names>LM</given-names>
            </name>
          </person-group>
          <article-title>Technology for Aging in Place</article-title>
          <source>Aging in Place Technology Watch</source>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.aging.senate.gov/imo/media/doc/Orlov_5_6_152.pdf">https://www.aging.senate.gov/imo/media/doc/Orlov_5_6_152.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Miura</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Saiki</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Nakamura</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Yasuda</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Assisting personalized healthcare of elderly people: developing a rule-based virtual caregiver system using mobile chatbot</article-title>
          <source>Sensors (Basel)</source>
          <year>2022</year>
          <month>05</month>
          <day>18</day>
          <volume>22</volume>
          <issue>10</issue>
          <fpage>3829</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=s22103829"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/s22103829</pub-id>
          <pub-id pub-id-type="medline">35632238</pub-id>
          <pub-id pub-id-type="pii">s22103829</pub-id>
          <pub-id pub-id-type="pmcid">PMC9146313</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Deep Learning</source>
          <year>2016</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>LeCun</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hinton</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Deep learning</article-title>
          <source>Nature</source>
          <year>2015</year>
          <month>05</month>
          <day>28</day>
          <volume>521</volume>
          <issue>7553</issue>
          <fpage>436</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1038/nature14539</pub-id>
          <pub-id pub-id-type="medline">26017442</pub-id>
          <pub-id pub-id-type="pii">nature14539</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Mann</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ryder</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Subbiah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kaplan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dhariwal</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Neelakantan</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Shyam</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Language models are few-shot learners</article-title>
          <year>2020</year>
          <conf-name>34th Conference on Neural Information Processing Systems (NeurIPS 2020)</conf-name>
          <conf-date>December 6-12, 2020</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sillos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elepaño</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Madriaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aggabao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Candido</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maningo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on USMLE: Potential for AI-assisted medical education using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <month>02</month>
          <day>9</day>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>
          <pub-id pub-id-type="medline">36812645</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00371</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wornow</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Thapa</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Patel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Steinberg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Fleming</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pfeffer</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Fries</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>NH</given-names>
            </name>
          </person-group>
          <article-title>The shaky foundations of large language models and foundation models for electronic health records</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>07</month>
          <day>29</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>135</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00879-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00879-8</pub-id>
          <pub-id pub-id-type="medline">37516790</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00879-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10387101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Eysenbach</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The role of ChatGPT, generative language models, and artificial intelligence in medical education: a conversation with ChatGPT and a call for papers</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>03</month>
          <day>06</day>
          <volume>9</volume>
          <fpage>e46885</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e46885/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46885</pub-id>
          <pub-id pub-id-type="medline">36863937</pub-id>
          <pub-id pub-id-type="pii">v9i1e46885</pub-id>
          <pub-id pub-id-type="pmcid">PMC10028514</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Safranek</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Socrates</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Chartash</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? The implications of large language models for medical education and knowledge assessment</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>02</month>
          <day>08</day>
          <volume>9</volume>
          <fpage>e45312</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e45312/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/45312</pub-id>
          <pub-id pub-id-type="medline">36753318</pub-id>
          <pub-id pub-id-type="pii">v9i1e45312</pub-id>
          <pub-id pub-id-type="pmcid">PMC9947764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cascella</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Montomoli</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bellini</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Bignami</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Evaluating the feasibility of ChatGPT in healthcare: an analysis of multiple clinical and research scenarios</article-title>
          <source>J Med Syst</source>
          <year>2023</year>
          <month>03</month>
          <day>04</day>
          <volume>47</volume>
          <issue>1</issue>
          <fpage>33</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36869927"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10916-023-01925-4</pub-id>
          <pub-id pub-id-type="medline">36869927</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10916-023-01925-4</pub-id>
          <pub-id pub-id-type="pmcid">PMC9985086</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bommasani</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hudson</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Adeli</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Altman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Arora</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>von Arx</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bernstein</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Bohg</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>On the opportunities and risks of foundation models</article-title>
          <source>arXiv</source>
          <year>2021</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2108.07258">https://arxiv.org/abs/2108.07258</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ruggiano</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Brown</surname>
              <given-names>EL</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Framil Suarez</surname>
              <given-names>CV</given-names>
            </name>
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Hao</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Hristidis</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Chatbots to support people with dementia and their caregivers: systematic review of functions and quality</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>06</month>
          <day>03</day>
          <volume>23</volume>
          <issue>6</issue>
          <fpage>e25006</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/6/e25006/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/25006</pub-id>
          <pub-id pub-id-type="medline">34081019</pub-id>
          <pub-id pub-id-type="pii">v23i6e25006</pub-id>
          <pub-id pub-id-type="pmcid">PMC8212632</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fear</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gleber</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Shaping the future of older adult care: ChatGPT, advanced AI, and the transformation of clinical practice</article-title>
          <source>JMIR Aging</source>
          <year>2023</year>
          <month>09</month>
          <day>13</day>
          <volume>6</volume>
          <fpage>e51776</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://aging.jmir.org/2023//e51776/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/51776</pub-id>
          <pub-id pub-id-type="medline">37703085</pub-id>
          <pub-id pub-id-type="pii">v6i1e51776</pub-id>
          <pub-id pub-id-type="pmcid">PMC10534283</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yao</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Ning</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>ZH</given-names>
            </name>
            <name name-style="western">
              <surname>Ning</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>LLM Lies: Hallucinations are not bugs, but features as adversarial examples</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2310.01469">https://arxiv.org/abs/2310.01469</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>CS</given-names>
            </name>
          </person-group>
          <article-title>Hallucinations Could Blunt ChatGPT’s Success</article-title>
          <source>IEEE Spectrum</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://spectrum.ieee.org/ai-hallucination">https://spectrum.ieee.org/ai-hallucination</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Perez</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Piktus</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Petroni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Karpukhin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Küttler</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Retrieval-augmented generation for knowledge-intensive nlp tasks</article-title>
          <year>2020</year>
          <conf-name>34th Conference on Neural Information Processing Systems (NeurIPS 2020)</conf-name>
          <conf-date>December 6-12, 2020</conf-date>
          <conf-loc>Vancouver, Canada</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Marrese-Taylor</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ke</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Integrating UMLS knowledge into large language models for medical question answering</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2310.02778">https://arxiv.org/abs/2310.02778</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zakka</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Shad</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chaurasia</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dalal</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Moor</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Fong</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Alexander</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ashley</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hirsch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Langlotz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Melia</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tullis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Vogelsong</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Cunningham</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Hiesinger</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Almanac - Retrieval-augmented language models for clinical medicine</article-title>
          <source>NEJM AI</source>
          <year>2024</year>
          <month>02</month>
          <day>25</day>
          <volume>1</volume>
          <issue>2</issue>
          <fpage>68</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/38343631"/>
          </comment>
          <pub-id pub-id-type="doi">10.1056/aioa2300068</pub-id>
          <pub-id pub-id-type="medline">38343631</pub-id>
          <pub-id pub-id-type="pmcid">PMC10857783</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Amatriain</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Sankar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bing</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bodigutla</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Hazen</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kazi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Transformer models: an introduction and catalog</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2302.07730">https://arxiv.org/abs/2302.07730</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="web">
          <article-title>Open LLM Leaderboard</article-title>
          <source>HuggingFace</source>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Touvron</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lavril</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Izacard</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Martinet</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Lachaux</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lacroix</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rozière</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Goyal</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>LLaMA: Open and efficient foundation language models</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2302.13971">https://arxiv.org/abs/2302.13971</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Penedo</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Malartic</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Hesslow</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Cojocaru</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Cappelli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Alobeidli</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Pannier</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Almazrouei</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Launay</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The RefinedWeb dataset for Falcon LLM: outperforming curated corpora with web data, and web data only</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2306.01116">https://arxiv.org/abs/2306.01116</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="web">
          <article-title>Introducing ChatGPT</article-title>
          <source>OpenAI</source>
          <year>2022</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/index/chatgpt">https://openai.com/index/chatgpt</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hughes</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT: Everything you need to know about OpenAI's GPT-4 tool</article-title>
          <source>BBC Science Focus Magazine</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencefocus.com/future-technology/gpt-3">https://www.sciencefocus.com/future-technology/gpt-3</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="web">
          <article-title>Global action plan on the public health response to dementia 2017 - 2025</article-title>
          <source>World Health Organization</source>
          <year>2017</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/publications/i/item/global-action-plan-on-the-public-health-response-to-dementia-2017---2025">https://www.who.int/publications/i/item/global-action-plan-on-the-public-health-response-to-dementia-2017---2025</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>No authors listed</collab>
          </person-group>
          <article-title>2023 Alzheimer's disease facts and figures</article-title>
          <source>Alzheimers Dement</source>
          <year>2023</year>
          <month>04</month>
          <volume>19</volume>
          <issue>4</issue>
          <fpage>1598</fpage>
          <lpage>1695</lpage>
          <pub-id pub-id-type="doi">10.1002/alz.13016</pub-id>
          <pub-id pub-id-type="medline">36918389</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brodaty</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Donkin</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Family caregivers of people with dementia</article-title>
          <source>Dialogues in Clinical Neuroscience</source>
          <year>2022</year>
          <month>04</month>
          <day>01</day>
          <volume>11</volume>
          <issue>2</issue>
          <fpage>217</fpage>
          <lpage>228</lpage>
          <pub-id pub-id-type="doi">10.31887/dcns.2009.11.2/hbrodaty</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Reimers</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Gurevych</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Sentence-BERT: Sentence embeddings using siamese BERT-networks</article-title>
          <source>arXiv</source>
          <year>2019</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1908.10084">https://arxiv.org/abs/1908.10084</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Xiao</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Muennighoff</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Lian</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nie</surname>
              <given-names>JY</given-names>
            </name>
          </person-group>
          <article-title>C-pack: Packaged resources to advance general chinese embedding</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2309.07597">https://arxiv.org/abs/2309.07597</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karpukhin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Oğuz</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Edunov</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yih</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Dense passage retrieval for open-domain question answering</article-title>
          <source>arXiv</source>
          <year>2020</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2004.04906">https://arxiv.org/abs/2004.04906</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>NF</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hewitt</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Paranjape</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bevilacqua</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Petroni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Lost in the middle: How language models use long contexts</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2307.03172">https://arxiv.org/abs/2307.03172</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Shen</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wallis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Allen-Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>LoRA: Low-rank adaptation of large language models</article-title>
          <source>arXiv</source>
          <year>2021</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2106.09685">https://arxiv.org/abs/2106.09685</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dettmers</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Pagnoni</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Holtzman</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zettlemoyer</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>QLoRA: Efficient finetuning of quantized LLMs</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.14314">https://arxiv.org/abs/2305.14314</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Iyer</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Efrat</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>LIMA: Less is more for alignment</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.11206">https://arxiv.org/abs/2305.11206</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kandpal</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wallace</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Deduplicating training data mitigates privacy risks in language models</article-title>
          <year>2022</year>
          <conf-name>39th International Conference on Machine Learning</conf-name>
          <conf-date>July 17-23, 2022</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shumailov</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shumaylov</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Gal</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Papernot</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>The curse of recursion: Training on generated data makes models forget</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2305.17493">https://arxiv.org/abs/2305.17493</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ippolito</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Nystrom</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Eck</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Callison-Burch</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Carlini</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Deduplicating training data makes language models better</article-title>
          <source>arXiv</source>
          <year>2021</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2107.06499">https://arxiv.org/abs/2107.06499</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>CY</given-names>
            </name>
          </person-group>
          <article-title>ROUGE: A package for automatic evaluation of summaries</article-title>
          <source>Text Summarization Branches Out</source>
          <year>2004</year>
          <publisher-loc>Barcelona, Spain</publisher-loc>
          <publisher-name>Association for Computational Linguistics</publisher-name>
          <fpage>74</fpage>
          <lpage>81</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Frick</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Jiao</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Starling</source>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://starling.cs.berkeley.edu/">https://starling.cs.berkeley.edu/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Borgeaud</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mensch</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffmann</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Rutherford</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Millican</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>an den Driessche</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lespiau</surname>
              <given-names>JB</given-names>
            </name>
          </person-group>
          <article-title>Improving language models by retrieving from trillions of tokens</article-title>
          <year>2022</year>
          <conf-name>39th International Conference on Machine Learning</conf-name>
          <conf-date>July 17-23, 2022</conf-date>
          <conf-loc>Baltimore, MD</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Izacard</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Lewis</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lomeli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hosseini</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Petroni</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Schick</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Dwivedi-Yu</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Joulin</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Atlas: Few-shot learning with retrieval augmented language models</article-title>
          <source>arXiv</source>
          <year>2022</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2208.03299">https://arxiv.org/abs/2208.03299</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Luo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Meng</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>An empirical study of catastrophic forgetting in large language models during continual fine-tuning</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2308.08747">https://arxiv.org/abs/2308.08747</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Lan</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>EP</given-names>
            </name>
            <name name-style="western">
              <surname>Bing</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>LLM-Adapters: An adapter family for parameter-efficient fine-tuning of large language models</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2304.01933">https://arxiv.org/abs/2304.01933</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abbasian</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Khatibi</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Azimi</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Oniani</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Abad</surname>
              <given-names>ZSH</given-names>
            </name>
            <name name-style="western">
              <surname>Thieme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sriram</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>Z</given-names>
            </name>
          </person-group>
          <article-title>Foundation metrics for evaluating effectiveness of healthcare conversations powered by generative AI</article-title>
          <source>arXiv</source>
          <year>2023</year>
          <access-date>2023-11-16</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2309.12444">https://arxiv.org/abs/2309.12444</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Resnik</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fox</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Lappin</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of NLP Systems</article-title>
          <source>The Handbook of Computational Linguistics and Natural Language Processing</source>
          <year>2010</year>
          <publisher-loc>Hoboken, NJ</publisher-loc>
          <publisher-name>Blackwell Publishing</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bartneck</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Kulić</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Croft</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Zoghbi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Measurement Instruments for the Anthropomorphism, Animacy, Likeability, Perceived Intelligence, and Perceived Safety of Robots</article-title>
          <source>Int J of Soc Robotics</source>
          <year>2008</year>
          <month>11</month>
          <day>20</day>
          <volume>1</volume>
          <issue>1</issue>
          <fpage>71</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1007/s12369-008-0001-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mittermaier</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Raza</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Kvedar</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Bias in AI-based models for medical applications: challenges and mitigation strategies</article-title>
          <source>NPJ Digit Med</source>
          <year>2023</year>
          <month>06</month>
          <day>14</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>113</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1038/s41746-023-00858-z"/>
          </comment>
          <pub-id pub-id-type="doi">10.1038/s41746-023-00858-z</pub-id>
          <pub-id pub-id-type="medline">37311802</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41746-023-00858-z</pub-id>
          <pub-id pub-id-type="pmcid">PMC10264403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Naik</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hameed</surname>
              <given-names>BMZ</given-names>
            </name>
            <name name-style="western">
              <surname>Shetty</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Swain</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Paul</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Ibrahim</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Patil</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Smriti</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Shetty</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rai</surname>
              <given-names>BP</given-names>
            </name>
            <name name-style="western">
              <surname>Chlosta</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Somani</surname>
              <given-names>BK</given-names>
            </name>
          </person-group>
          <article-title>Legal and ethical consideration in artificial intelligence in healthcare: who takes responsibility?</article-title>
          <source>Front Surg</source>
          <year>2022</year>
          <month>3</month>
          <day>14</day>
          <volume>9</volume>
          <fpage>862322</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35360424"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fsurg.2022.862322</pub-id>
          <pub-id pub-id-type="medline">35360424</pub-id>
          <pub-id pub-id-type="pmcid">PMC8963864</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Norori</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Hu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Aellen</surname>
              <given-names>FM</given-names>
            </name>
            <name name-style="western">
              <surname>Faraci</surname>
              <given-names>FD</given-names>
            </name>
            <name name-style="western">
              <surname>Tzovara</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Addressing bias in big data and AI for health care: A call for open science</article-title>
          <source>Patterns (N Y)</source>
          <year>2021</year>
          <month>10</month>
          <day>08</day>
          <volume>2</volume>
          <issue>10</issue>
          <fpage>100347</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://boris.unibe.ch/id/eprint/161897"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.patter.2021.100347</pub-id>
          <pub-id pub-id-type="medline">34693373</pub-id>
          <pub-id pub-id-type="pii">S2666-3899(21)00202-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC8515002</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
