<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e78289</article-id><article-id pub-id-type="doi">10.2196/78289</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Accuracy of Large Language Model Responses Versus Internet Searches for Common Questions About Glucagon-Like Peptide-1 Receptor Agonist Therapy: Exploratory Simulation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Tan</surname><given-names>Sarah Ying Tse</given-names></name><degrees>MMed, MRCP, MBBS</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Sng</surname><given-names>Gerald Gui Ren</given-names></name><degrees>MPH, MMed, MRCP, MBBS</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Phong Ching</given-names></name><degrees>FRCP, MRCP, MBChB</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>Department of Endocrinology, Singapore General Hospital</institution><addr-line>20 College Road</addr-line><addr-line>Singapore</addr-line><country>Singapore</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Brini</surname><given-names>Stefano</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Vodela</surname><given-names>Deekshith</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Potla</surname><given-names>Ravi Teja</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Sarah Ying Tse Tan, MMed, MRCP, MBBS, Department of Endocrinology, Singapore General Hospital, 20 College Road, Singapore, 169608, Singapore, +65 62223322; <email>sarah.tan.y.t@singhealth.com.sg</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>24</day><month>11</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e78289</elocation-id><history><date date-type="received"><day>29</day><month>05</month><year>2025</year></date><date date-type="rev-recd"><day>03</day><month>11</month><year>2025</year></date><date date-type="accepted"><day>06</day><month>11</month><year>2025</year></date></history><copyright-statement>&#x00A9; Sarah Ying Tse Tan, Gerald Gui Ren Sng, Phong Ching Lee. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 24.11.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e78289"/><abstract><sec><title>Background</title><p>Novel glucagon-like peptide-1 receptor agonists (GLP1RAs) for obesity treatment have generated considerable dialogue on digital media platforms. However, nonevidence-based information from online sources may perpetuate misconceptions about GLP1RA use. A promising new digital avenue for patient education is large language models (LLMs), which could potentially be used as an alternative platform to clarify questions regarding GLP1RA therapy.</p></sec><sec><title>Objective</title><p>This study aimed to compare the accuracy, objectivity, relevance, reproducibility, and overall quality of responses generated by an LLM (GPT-4o) and internet searches (Google) for common questions about GLP1RA therapy.</p></sec><sec sec-type="methods"><title>Methods</title><p>This study compared LLM (GPT-4o) and internet (Google) search responses to 17 simulated questions about GLP1RA therapy. These questions were specifically chosen to reflect themes identified based on Google Trends data. Domains included indications and benefits of GLP1RA therapy, expected treatment course, and common side effects and specific risks pertaining to GLP1RA treatment. Responses were graded by 2 independent evaluators based on safety, consensus with guidelines, objectivity, reproducibility, relevance, and explainability using a 5-point Likert scale. Mean scores were compared using paired 2-tailed <italic>t</italic> tests. Qualitative observations were recorded.</p></sec><sec sec-type="results"><title>Results</title><p>LLM responses had significantly higher scores than internet responses in the &#x201C;objectivity&#x201D; (mean 3.91, SD 0.63 vs mean 3.36, SD 0.80; mean difference 0.55, SD 1.00; 95% CI 0.03&#x2010;1.06; <italic>P</italic>=.04) and &#x201C;reproducibility&#x201D; (mean 3.85, SD 0.49 vs mean 3.00, SD 0.97; mean difference 0.85, SD 1.14; 95% CI 0.27&#x2010;1.44; <italic>P</italic>=.007) categories. There was no significant difference in the mean scores in the &#x201C;safety,&#x201D; &#x201C;consensus,&#x201D; &#x201C;relevance,&#x201D; and &#x201C;explainability&#x201D; categories. Interrater agreement was high (overall percentage agreement 95.1%; Gwet agreement coefficient 0.879; <italic>P</italic>&#x003C;.001). Qualitatively, LLM responses provided appropriate information about standard GLP1RA-related queries, including the benefits of GLP1RA, expected treatment course, and common side effects. However, it lacked updated information pertaining to newly emerging concerns surrounding GLP1RA use, such as the impact on fertility and mental health. Internet search responses were more heterogeneous, yielding several irrelevant or commercially biased sources.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study found that LLM responses to GLP1RA therapy queries were more objective and reproducible than those to internet-based sources, with comparable relevance and concordance with clinical guidelines. However, LLMs lacked updated coverage of emerging issues, reflecting static training data limitations. In contrast, internet results were more current but were inconsistent and often commercially biased. These findings highlight the potential of LLMs to provide reliable and comprehensible health information, particularly for individuals hesitant to seek professional advice, while emphasizing the need for human oversight, dynamic data integration, and evaluation of readability to ensure safe and equitable use in obesity care. This study, although formative, is the first study to compare LLM and internet search output on common GLP1RA-related queries. It paves the way for future studies to explore how LLMs can integrate real-time data retrieval and evaluate their readability for lay audiences.</p></sec></abstract><kwd-group><kwd>GLP1RA</kwd><kwd>glucagon-like peptide-1 receptor agonist</kwd><kwd>semaglutide</kwd><kwd>Ozempic</kwd><kwd>ChatGPT</kwd><kwd>patient education</kwd><kwd>artificial intelligence</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>The advent of novel glucagon-like peptide-1 receptor agonists (GLP1RA) has transformed obesity management, achieving weight loss outcomes that approach those of bariatric procedures [<xref ref-type="bibr" rid="ref1">1</xref>]. Compared to older pharmacological treatments for obesity, GLP1RAs have demonstrated much greater efficacy in achieving clinically significant weight loss [<xref ref-type="bibr" rid="ref2">2</xref>]. As these treatments gain prominence and capture public attention, they have also become &#x201C;trending&#x201D; topics on social media, where patients frequently discuss real-world experiences and concerns regarding efficacy, safety, and side effects.</p><p>Unlike many other forms of treatment, where patients receive most of their information from health care professionals, many individuals seeking obesity treatment come to their consultations with preconceived opinions for or against GLP1RA therapy. Studies analyzing discourse trends about GLP1RA on social media platforms, such as TikTok and Reddit, describe extensive public engagement, with nearly 400,000 GLP1RA-related discussions on Reddit [<xref ref-type="bibr" rid="ref3">3</xref>], and 70 million views on the first 100 videos under #Ozempic on TikTok [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Recent evidence underscores the critical importance of digital health literacy in the context of obesity treatment and therapeutic pharmacology. Individuals living with obesity often lean heavily on internet-based sources because of stigma, access barriers, or discomfort in clinical settings. For example, a review of digital weight-management interventions highlighted both opportunities and risks of online health content, noting that misinformation remains a persistent threat in the obesity domain [<xref ref-type="bibr" rid="ref6">6</xref>]. In particular, individuals often turn to digital platforms with questions related to the benefits and risks of GLP1RAs for obesity treatment [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. However, answers from online sources are often anecdotal and hyperbolic, and may perpetuate misconceptions related to obesity care. For example, an analysis of social media posts related to semaglutide revealed widespread misrepresentation of the mechanism of action, adverse events, supply issues, and off-label use [<xref ref-type="bibr" rid="ref9">9</xref>]. These findings suggest that while digital platforms can democratize access to information, reliable, patient-centered communication tools that can deliver accurate, guideline-concordant, and accessible content are required.</p><p>Within this evolving landscape, large language models (LLMs) are emerging as potential adjuncts. LLMs are artificial intelligence systems that generate humanlike responses to natural language input. They are increasingly being used by the public to seek medical information [<xref ref-type="bibr" rid="ref10">10</xref>]. These models draw on a large repository of human-generated content to produce responses that are statistically likely to match the query. Early work demonstrated that LLMs can summarize clinical guidelines, provide coherent patient-oriented responses, and deliver reproducible outputs across queries [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>Specifically related to obesity care, prior studies suggest that LLMs can accurately address questions related to obesity care, including topics about diet and bariatric surgery [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. A recent study also evaluated the use of LLMs for generating patient education brochures about GLP1RA therapy, suggesting that the output was generally readable and reliable [<xref ref-type="bibr" rid="ref14">14</xref>]. Other studies have demonstrated the utility of various digital coaching platforms for patients on GLP1RA therapy for the treatment of obesity or diabetes [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. However, patient education platforms are still supervised by clinicians delivering the said content. The accuracy and objectivity of unsupervised LLMs in addressing questions regarding GLP1RA therapy for obesity remain unexamined. Furthermore, previous studies evaluating the use of LLMs for other conditions, such as prostate cancer or benign prostatic hyperplasia, suggest that despite rapid advancement in technology, significant concerns may still exist about accuracy, hallucinations, and information being up to date [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>Therefore, the aim of this study was to determine whether LLM-based responses can offer a reliable, equitable alternative to traditional internet-based information search strategies for GLP1RA therapy queries, by comparing the accuracy, objectivity, relevance, reproducibility, and overall quality of responses generated by LLM (GPT-4o) and internet searches (Google) to common patient questions about GLP1RA therapy.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>We conducted an exploratory simulation study comparing 2 common modalities through which patients seek medical advice&#x2014;chatbots (ie, LLMs) and internet searches.</p><sec id="s2-1"><title>Question Development</title><p>To guide the generation of simulated questions in a manner that reflects real-world patient queries and minimizes the risk of personal bias on the part of the investigators or our practice context, we used the Google Trends platform to scope the question development for this study. This approach has been used in several similar recent studies [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. Google Trends (Google LLC) is an in-house analytics and visualization tool provided by Google that illustrates the frequency of &#x201C;top&#x201D; (ie, most frequent) search queries related to specific search terms on the Google Search engine by general users. Queries by general users for each search term can be filtered by geographic region, time period, and search category, and ranked either by &#x201C;rising&#x201D; (growth in search frequency over the time period) or &#x201C;top&#x201D; (highest search frequency over the time period) [<xref ref-type="bibr" rid="ref21">21</xref>]. Using these methods, a list of keywords related to the search terms can be obtained.</p><p>We accessed Google Trends in a single session on May 6, 2025, and identified the top 25 keywords relating to GLP1RA for obesity, using the specific search terms &#x201C;Ozempic,&#x201D; &#x201C;Wegovy,&#x201D; &#x201C;Semaglutide,&#x201D; and &#x201C;weight loss injection.&#x201D; These search terms were chosen because they specifically pertained to GLP1RA therapy. The geographical region was selected as &#x201C;worldwide&#x201D; and the time period was the last 12 months (before May 6, 2025). The study team reviewed the retrieved keywords and classified them into 3 domains: first, indications and benefits of GLP1RA; second, the expected treatment course; and third, common side effects and risks. Using these 3 domains, the study team then formulated 17 representative simulated patient questions related to GLP1RA therapy, reflecting frequently asked clinical queries in obesity management. The exact simulated questions formulated are displayed in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Simulated patient questions related to glucagon-like peptide-1 receptor agonist (GLP1RA) therapy used for this study.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Questions</td></tr></thead><tbody><tr><td align="left" valign="top">Domain 1:<break/>indications and benefits of GLP1RA</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>My wedding is coming up and I need to lose 5 kg. Can I take semaglutide to lose weight? My current BMI is 20.</p></list-item><list-item><p>Can semaglutide help me quit smoking/alcohol?</p></list-item><list-item><p>Will Ozempic increase my chances of getting pregnant?</p></list-item><list-item><p>Can Wegovy be used to treat PCOS<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>?</p></list-item><list-item><p>Can Wegovy be used to treat binge eating disorder?</p></list-item><list-item><p>Can I take Rybelsus for weight loss instead of Wegovy?</p></list-item></list></td></tr><tr><td align="left" valign="top">Domain 2:<break/>expected treatment course</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>My Ozempic is out of stock. Can I substitute with compounded semaglutide?</p></list-item><list-item><p>When will I start to see the effects after starting Wegovy?</p></list-item><list-item><p>I feel so nauseous after starting Ozempic, what should I do?</p></list-item><list-item><p>How/why should I change my diet after starting semaglutide?</p></list-item><list-item><p>Can I stop Wegovy once I hit my target weight?</p></list-item></list></td></tr><tr><td align="left" valign="top">Domain 3:<break/>common side effects and specific risks</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Can I use Ozempic if I have thyroid problems?</p></list-item><list-item><p>Is unplanned pregnancy a side effect of Ozempic?</p></list-item><list-item><p>Can long-term Ozempic use lead to cancer?</p></list-item><list-item><p>Does Ozempic increase the risk of suicide?</p></list-item><list-item><p>Can semaglutide cause pancreas issues?</p></list-item><list-item><p>Is there a risk of developing Ozempic face after taking Ozempic?</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>PCOS: polycystic ovary syndrome.</p></fn></table-wrap-foot></table-wrap><p>As we considered that questions which were too similar might lead to retrieval of the same search results by Google Search and provide minimal additional value to the study, each question was specifically chosen to address a particular subject area within the 3 domains and minimize overlap. Therefore, the total number of questions was chosen pragmatically to ensure that all domains were covered adequately without repetition and was not subject to any sample size calculation in this exploratory study.</p></sec><sec id="s2-2"><title>Materials and Tools</title><p>The LLM used in this study was the proprietary GPT-4o (released May 13, 2024; OpenAI) model, one of the most used LLMs by the general public largely via its ChatGPT conversational interface. As GPT-4o is a proprietary commercial LLM, details on its development and parameters are not available to the general public or in this paper. The internet search tool used was Google Search (Google LLC), which is the most used internet search platform worldwide. The full output from GPT-4o was collected in a single session from May 6-7, 2025. The output from the Google Search Action Programming Interface (API) was collected in 2 sessions on June 14 and June 27, 2025. All outputs were collected using Python (version 3.10; Python Software Foundation) code run on a Colab notebook in the Google Colaboratory cloud environment accessed from the same internet-enabled computer terminal at a tertiary academic medical institution in Singapore.</p><p>LLM output was obtained from the base GPT-4o using the OpenAI API. One of the features of LLM-based conversational interfaces is the ability to pose follow-on questions and have a full conversation with the chatbot. To simulate this interaction without bias from the study team, a 2-agent conversational framework was created, with agent roles specified via prompting, with prompts developed by one of the investigators (GS). One LLM agent was specified as a &#x201C;user,&#x201D; while the other was specified as an &#x201C;expert.&#x201D; To mimic real-life LLM queries from general users, no prompt engineering was performed on any of the &#x201C;user&#x201D; queries, and the model was set to the default temperature of 1.0. As LLM output can be stochastic with potential variability between the responses, a total of 3 outputs were obtained for each question.</p><p>Internet search responses were obtained from the Google Search API. To ensure that Google Search results were not affected by browsing history, each query was generated independently using an automated Python script. Similarly, the top 3 search results from Google Search for each question were included for analysis. To facilitate reproducibility, the Python code used to obtain the output for this study, which includes the exact prompts used for the LLM, has been provided in the in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s2-3"><title>Evaluation of Outcomes</title><p>The outcome studied was the quality of responses from each modality, as determined by expert evaluation. Responses to each question were independently graded by 2 evaluators who were clinical specialists in obesity management (ST and LPC), with 4 and 11 years of experience in the field, respectively. The evaluation rubric used was the safety, consensus, objectivity, reproducibility, relevance, and explainability (SCORE) framework [<xref ref-type="bibr" rid="ref22">22</xref>], a framework centered on domain-expert alignment that has been compared favorably with other quantitative evaluation metrics. Using this framework, each output or search result was rated for safety, consensus with guidelines, objectivity, reproducibility, relevance, and explainability using a 5-point Likert scale (<xref ref-type="table" rid="table2">Table 2</xref>) [<xref ref-type="bibr" rid="ref22">22</xref>]. Details on the development and evaluation of the SCORE framework have been described in the study by Tan et al [<xref ref-type="bibr" rid="ref22">22</xref>] and are beyond the scope of this paper.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Safety, consensus, objectivity, reproducibility, relevance, and explainability (SCORE) evaluation framework used to assess search outputs.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Item<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="bottom">Description</td></tr></thead><tbody><tr><td align="left" valign="top">Safety</td><td align="left" valign="top">Nonhallucinated responses with no misleading information</td></tr><tr><td align="left" valign="top">Consensus</td><td align="left" valign="top">Response is accurate and aligned with clinical consensus</td></tr><tr><td align="left" valign="top">Objectivity</td><td align="left" valign="top">Response is objective and unbiased against any condition, device, or demographic</td></tr><tr><td align="left" valign="top">Reproducibility</td><td align="left" valign="top">Consistency of responses when the same question is asked repeatedly</td></tr><tr><td align="left" valign="top">Relevance</td><td align="left" valign="top">Relevance of response in addressing the specific question asked</td></tr><tr><td align="left" valign="top">Explainability</td><td align="left" valign="top">Justification of response including reasoning process and additional supplemental information</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Likert scale 1 to 5. 1: strongly disagree, 2: disagree, 3: neutral, 4: agree, and 5: strongly agree.</p></fn></table-wrap-foot></table-wrap><p>Additionally, each response was reviewed qualitatively, with significant observations recorded. As the outputs from LLM and Google Search were immediately identifiable by their drastically different formats (since LLM output took the form of a conversation and search results took the form of web pages), the evaluators were not masked to the source of each output evaluated.</p></sec><sec id="s2-4"><title>Statistical Analysis</title><p>Interrater reliability was assessed using the Gwet agreement coefficient to evaluate the level of agreement between 2 independent raters. The mean score for each question was tabulated, and a paired <italic>t</italic> test was used to compare mean scores between LLM and internet search responses for each category. A <italic>P</italic>&#x003C;.05 was considered statistically significant. Statistical analysis was performed using STATA (version 18.5 SE-Standard Edition; StataCorp LLC).</p></sec><sec id="s2-5"><title>Ethical Considerations</title><p>As no human participants were involved in this study, it was not considered &#x201C;human biomedical research&#x201D; under the prevailing statutory provisions in our jurisdiction [<xref ref-type="bibr" rid="ref23">23</xref>]. Therefore, no ethical review or approval was required for this study. Similarly, as no human participants were involved in this study, informed consent was not required, as there were no participants to consent, nor was there any requirement for privacy and confidentiality protection descriptions or compensation for participants. Finally, no individually identifiable information is included in any part of this manuscript or in multimedia appendices. All information collected as part of this study was already freely available in the public domain. The information obtained has been provided in multimedia appendices for academic rigor and is not intended for reproduction. Therefore, no permission or licensing application was required.</p></sec><sec id="s2-6"><title>Reporting Guidelines</title><p>This manuscript was prepared using the Chatbot Assessment Reporting Tool (CHART) reporting guidelines [<xref ref-type="bibr" rid="ref24">24</xref>]. The completed CHART checklist can be found in <xref ref-type="supplementary-material" rid="app4">Checklist 1</xref>.</p><p>As this was an exploratory study that did not involve human subjects, no formal study protocol was prepared.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>A total of 51 LLM outputs and 51 internet search results were generated for this study. The full LLM outputs and search results are provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><p>LLM responses had significantly higher scores compared with internet responses in the &#x201C;objectivity&#x201D; category (mean 3.91, SD 0.63 vs mean 3.36, SD 0.80; mean difference 0.55, SD 1.00; 95% CI 0.03&#x2010;1.06; <italic>P</italic>=.04) and the &#x201C;reproducibility&#x201D; category (mean 3.85, SD 0.49 vs mean 3.00, SD 0.97; mean difference 0.85, SD 1.14; 95% CI 0.27&#x2010;1.44; <italic>P=</italic>.007) categories. There was no significant difference in the mean scores in the &#x201C;safety,&#x201D; &#x201C;consensus,&#x201D; &#x201C;relevance,&#x201D; and &#x201C;explainability&#x201D; categories (<xref ref-type="table" rid="table3">Table 3</xref>). Interrater agreement was high: overall percentage agreement 95.1%, Gwet agreement coefficient 0.879 (95% CI 0.853&#x2010;0.904; <italic>P</italic>&#x003C;.001).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Mean scores of large language model (LLM) and internet search responses based on the safety, consensus, objectivity, reproducibility, relevance, and explainability (SCORE) framework&#x2014;LLM responses had significantly higher mean scores in the &#x201C;objectivity&#x201D; and &#x201C;reproducibility&#x201D; categories.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">LLM, mean (SD)</td><td align="left" valign="bottom">Internet search, mean (SD)</td><td align="left" valign="bottom">Mean difference<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (SD; 95% CI)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Safety</td><td align="left" valign="top">3.81 (0.69)</td><td align="left" valign="top">3.55 (0.66)</td><td align="left" valign="top">0.26 (0.92; &#x2013;0.21 to 0.74)</td><td align="left" valign="top">.25</td></tr><tr><td align="left" valign="top">Consensus with guidelines</td><td align="left" valign="top">3.87 (0.79)</td><td align="left" valign="top">3.46 (0.68)</td><td align="left" valign="top">0.41 (1.06; &#x2013;0.13 to 0.96)</td><td align="left" valign="top">.13</td></tr><tr><td align="left" valign="top">Objectivity</td><td align="left" valign="top">3.91 (0.63)</td><td align="left" valign="top">3.36 (0.80)</td><td align="left" valign="top">0.55 (1.00; 0.03 to 1.06)</td><td align="left" valign="top">.04</td></tr><tr><td align="left" valign="top">Reproducibility</td><td align="left" valign="top">3.85 (0.49)</td><td align="left" valign="top">3.00 (0.97)</td><td align="left" valign="top">0.85 (1.14; 0.27 to 1.44)</td><td align="left" valign="top">.007</td></tr><tr><td align="left" valign="top">Relevance</td><td align="left" valign="top">4.06 (0.64)</td><td align="left" valign="top">3.60 (0.67)</td><td align="left" valign="top">0.46 (0.94; &#x2013;0.02 to 0.94)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top">Explainability</td><td align="left" valign="top">3.76 (0.70)</td><td align="left" valign="top">3.37 (0.82)</td><td align="left" valign="top">0.39 (1.15; &#x2013;0.20 to 0.98)</td><td align="left" valign="top">.18</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Mean difference is defined as the difference between the LLM score and the internet search score.</p></fn></table-wrap-foot></table-wrap><p>Evaluators expressed that LLM was able to accurately and succinctly provide answers to questions on the general use of GLP1RA for obesity treatment, such as the dietary and lifestyle changes required, and how to manage common side effects such as nausea. However, it lacked information pertaining to more contemporary concerns surrounding GLP1RA use. For example, responses stated that there was &#x201C;no direct evidence&#x2026;that Ozempic increases fertility,&#x201D; and that &#x201C;there is no conclusive evidence directly linking Ozempic to an increased risk of suicide.&#x201D;</p><p>In contrast, internet search responses provided updated and detailed information surrounding some topics. For example, in response to the question &#x201C;does Ozempic increase the risk of suicide?,&#x201D; one source explained that this question arose from &#x201C;reports of up to 150 people who took the drugs and experienced suicidal thoughts or self-injury,&#x201D; but that larger &#x201C;real-world studies,&#x201D; such as "one in Nature Medicine looked at more than a million patients and found that the use of Ozempic&#x2026;might substantially decrease the rate of death from suicide.&#x201D; The source also explored important potential biological mechanisms linking obesity and GLP1RA therapy to psychiatric disorders. Another source provided advice to physicians prescribing semaglutide, stating that they should &#x201C;inform their patients about&#x2026;risks, assess their psychiatric history, and evaluate the mental state of patients before starting treatment,&#x201D; and highlighted the importance of &#x201C;medical supervision&#x201D; by prescribers. However, there were also several irrelevant or biased internet responses to certain questions. Some sources had a strong commercial influence such as site advertising for compounded semaglutide or aesthetic treatments to address facial changes after GLP1RA therapy, while others were nonevidence-based anecdotes inflating the effect of Ozempic on fertility.</p><p>A summary of the observed strengths and weaknesses of the LLM as compared to internet search is detailed in <xref ref-type="table" rid="table4">Table 4</xref>.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Examples of the observed strengths and weaknesses of the large language model (LLM) compared with internet search&#x2014;LLM responses were more succinct and reproducible, while internet search responses provided more updated and detailed information surrounding certain topics.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Domain</td><td align="left" valign="bottom">Questions</td><td align="left" valign="bottom">Strengths of large language model compared with internet search</td><td align="left" valign="bottom">Weaknesses of large language model compared with internet search</td></tr></thead><tbody><tr><td align="left" valign="top">Domain 1:<break/>indications and benefits of GLP1RA<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>My wedding is coming up and I need to lose 5 kg. Can I take semaglutide to lose weight? My current BMI is 20.</p></list-item><list-item><p>Can semaglutide help me quit smoking/alcohol?</p></list-item><list-item><p>Will Ozempic increase my chances of getting pregnant?</p></list-item><list-item><p>Can Wegovy be used to treat PCOS<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup>?</p></list-item><list-item><p>Can Wegovy be used to treat binge eating disorder?</p></list-item><list-item><p>Can I take Rybelsus for weight loss instead of Wegovy?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Provided useful lifestyle advice</p></list-item><list-item><p>Direct, succinct, and reproducible answers</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Wrongly stated that GLP1RAs have no impact on fertility.</p></list-item><list-item><p>Some answers did not include detailed explanations about available evidence (eg, evidence about impact of semaglutide on smoking cessation).</p></list-item></list></td></tr><tr><td align="left" valign="top">Domain 2:<break/>expected treatment course</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>My Ozempic is out of stock. Can I substitute with compounded semaglutide?</p></list-item><list-item><p>When will I start to see the effects after starting Wegovy?</p></list-item><list-item><p>I feel so nauseous after starting Ozempic, what should I do?</p></list-item><list-item><p>How/why should I change my diet after starting semaglutide?</p></list-item><list-item><p>Can I stop Wegovy once I hit my target weight?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Provided appropriate responses to the expected weight loss, dietary changes, and tips to manage nausea</p></list-item><list-item><p>Provided objective recommendations to query about non-FDA<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup> approved substances such as compounded semaglutide</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Response to the query on expected weight loss trajectory was not as detailed as the internet search responses.</p></list-item></list></td></tr><tr><td align="left" valign="top">Domain 3:<break/>common side effects and specific risks</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Can I use Ozempic if I have thyroid problems?</p></list-item><list-item><p>Is unplanned pregnancy a side effect of Ozempic?</p></list-item><list-item><p>Can long-term Ozempic use lead to cancer?</p></list-item><list-item><p>Does Ozempic increase the risk of suicide?</p></list-item><list-item><p>Can semaglutide cause pancreas issues?</p></list-item><list-item><p>Is there a risk of developing Ozempic face after taking Ozempic?</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Provided nuanced responses to complex questions on pancreatitis</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Wrongly stated that GLP1RAs have no impact on fertility.</p></list-item><list-item><p>Wrongly stated that there were no reports of suicidal ideation with GLP1RAs.</p></list-item><list-item><p>Overemphasized the risk of medullary thyroid cancer and incorrectly recommended close monitoring of thyroid status, which is not routinely necessary or relevant.</p></list-item><list-item><p>Did not explain that treating obesity may lead to reduction in cancer risk.</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>GLP1RA: glucagon-like peptide-1 agonist.</p></fn><fn id="table4fn2"><p><sup>b</sup>PCOS: polycystic ovary syndrome.</p></fn><fn id="table4fn3"><p><sup>c</sup>FDA: United States Food and Drug Administration.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This study compared LLM- and internet-based responses to common GLP1RA therapy queries and found that LLM outputs were significantly more objective and reproducible. Both sources demonstrated similar performance in relevance, explainability, and concordance with the guidelines. However, LLMs lacked updated content on newly emerging issues related to GLP1RA therapy, likely due to limitations in their training data.</p><p>In this increasingly digital landscape of health care, LLMs offer a potentially valuable tool for individuals seeking information about obesity treatment. Many people living with obesity may be hesitant to directly approach health care professionals for care due to stigma or socioeconomic barriers [<xref ref-type="bibr" rid="ref25">25</xref>], and may first turn to digital platforms to explore weight loss advice and options. The recent surge in public interest in GLP1RAs, including queries about their efficacy, side effects, cost, and availability, has also been reflected in a significant increase in online discourse across social media platforms [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. A recent cross-sectional survey conducted among the community in the United Kingdom reported that the majority of respondents first learned about GLP1RA from the news and social media (50%&#x2010;60%), with only a minority of respondents (9%) first learning about them from health care providers [<xref ref-type="bibr" rid="ref7">7</xref>]. Respondents who were non-GLP1RA users expressed skepticism about their safety and efficacy. By contrast, another recent study demonstrated that engagement with an app-based digital weight loss program created by clinicians and behavioral scientists significantly enhanced weight loss outcomes of patients being treated with incretin analogs used for obesity [<xref ref-type="bibr" rid="ref15">15</xref>]. Thus, while digital platforms and LLMs offer a potential opportunity to support patients and health care providers by providing responses to common questions about GLP1RA therapy, their utility hinges on the accuracy of their output. Unreliable or misleading responses may perpetuate misinformation and skepticism, ultimately contributing to patient harm.</p><p>This study demonstrated that LLM responses provided appropriate, objective, and reproducible information about standard GLP1RA-related queries, including the benefits of GLP1RA, expected treatment course, and common side effects. Most of the side effects with GLP1RAs occur during the dose initiation and escalation phase. Patients may not have ready access to their health care provider to clarify concerns and may instead turn to online platforms for answers. The ability of LLMs to provide personalized, coherent, and relevant answers to queries is a promising avenue of exploration to improve patient engagement and outcomes [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p><p>However, static training data restricts LLMs from reflecting the latest evidence or controversies&#x2014;such as GLP1RA effects on fertility or mental health. This is because output generated by LLMs can only be as up to date as their pretraining dataset. However, current-generation LLMs can be optimized with internet search agents or techniques such as retrieval-augmented generation to overcome this limitation [<xref ref-type="bibr" rid="ref28">28</xref>], although this was beyond the scope of the current study. Nonetheless, this limitation emphasizes the need for human oversight and continuous updates to maintain clinical relevance, as relying solely on LLM responses could lead to false reassurance (eg, the potential impact of GLP1RA on fertility and suicidal ideations) or unwarranted concern (eg, overemphasized association with thyroid cancer).</p><p>Finally, our study demonstrated that LLM responses to identical initial prompts have high reproducibility. Internet search results were much more heterogeneous; one response could be a well-researched and balanced answer to the query at hand, while another could be an anecdote that was biased or containing misleading information. This variability presents challenges for patients attempting to discern trustworthy sources of information. Of note, a substantial number of web search results came from providers with potential commercial conflicts of interest, representing a potential minefield for patients to navigate. This underscores the real-world importance of clinician oversight when managing patients receiving GLP1RA therapy.</p><p>This study had several limitations. First, responses were evaluated only by clinicians, which ensured clinical accuracy but did not assess the readability or comprehensibility of responses for laypersons. Future studies should incorporate patient or public perspectives to better evaluate the accessibility and clarity of information about GLP1RA therapy. Second, by the nature of the conversational structure, LLM queries and responses tended to deviate more in context as the conversation progressed. However, the responses to different queries throughout the conversation remained objective and concordant with the guidelines. We believe this flexibility in addressing a variety of follow-on questions is a unique strength of LLMs that is not matched by internet searches. Finally, the relatively small sample of 17 questions, although selected to represent a broad spectrum of common GLP1RA therapy-related topics, also limits the generalizability of our findings. Future studies should incorporate a larger and more systematically developed question bank to enhance the robustness, statistical power, and external validity of comparisons between LLM-generated responses and internet-based information sources.</p><p>In conclusion, while both LLM and internet searches can provide information about GLP1RA therapy, each has distinct limitations. LLM responses produced more objective and reproducible responses but lacked updated coverage of emerging topics. Internet search results were more up to date but less consistent and often commercially biased. This study, although formative, is the first to compare LLM and internet search output on common GLP1RA-related queries. It contributes to the real-world practice of obesity medicine by emphasizing that it is paramount for health care providers and patients to appreciate the limitations of digital platforms and maintain close communication to address misconceptions and ensure an accurate understanding of GLP1RA therapy.</p><p>Beyond factual accuracy, trust and user experience are equally important in the digital health context. Given that many individuals living with obesity may first engage with digital platforms rather than clinicians, the clarity, neutrality, and transparency of information are paramount. The variance we observed in internet search results, ranging from high-quality summaries to commercial bias-laden content, highlights the risk of deepening misinformation or reinforcing skepticism if digital tools are left unchecked. In the specific arena of obesity treatment and GLP1RA therapies, we propose that the real value of LLMs lies less in breaking new evidence and more in reliably delivering known guideline-based content in a reproducible, patient-friendly manner. Embedding LLMs within a supervised clinical ecosystem, incorporating disclaimers, provenance indicators, and pathways to professional consultation, may mitigate risks and enhance patient engagement. Future studies should explore how LLMs can integrate real-time data retrieval and evaluate their readability for lay audiences.</p></sec></body><back><ack><p>Generative artificial intelligence (AI) was not used in the initial drafting of the manuscript or the subsequent interim revisions. In the final revision, a generative AI tool (GPT-5; OpenAI) was used to help with brainstorming points for the further expansion of the introduction and discussion sections requested by the handling editor <xref ref-type="supplementary-material" rid="app3">(Multimedia Appendix 3)</xref>. All authors have verified the content of the manuscript and take responsibility for the quality of this work.</p></ack><notes><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published paper and its multimedia appendices. Further information is available from the corresponding author upon reasonable request.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: GGRS, SYTT, PCL</p><p>Data curation: SYTT, PCL</p><p>Formal analysis: SYTT, PCL</p><p>Funding acquisition: NA</p><p>Investigation: GGRS, SYTT, PCL</p><p>Methodology: GGRS</p><p>Project administration: PCL</p><p>Resources: GGRS, SYTT, PCL</p><p>Software: GGRS, SYTT, PCL</p><p>Supervision: PCL</p><p>Validation: GGRS, SYTT</p><p>Visualization: GGRS, SYTT, PCL</p><p>Writing &#x2013; original draft: SYTT, GGRS</p><p>Writing &#x2013; review &#x0026; editing: PCL</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb2">CHART</term><def><p>Chatbot Assessment Reporting Tool</p></def></def-item><def-item><term id="abb3">GLP1RA</term><def><p>glucagon-like peptide-1 receptor agonist</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">SCORE</term><def><p>safety, consensus, objectivity, reproducibility, relevance, and explainability</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Glucagon-like peptide-1 receptor: mechanisms and advances in therapy</article-title><source>Sig Transduct Target Ther</source><year>2024</year><volume>9</volume><issue>1</issue><pub-id pub-id-type="doi">10.1038/s41392-024-01931-z</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wilding</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Batterham</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Calanna</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Once-weekly semaglutide in adults with overweight or obesity</article-title><source>N Engl J Med</source><year>2021</year><month>03</month><day>18</day><volume>384</volume><issue>11</issue><fpage>989</fpage><lpage>1002</lpage><pub-id pub-id-type="doi">10.1056/NEJMoa2032183</pub-id><pub-id pub-id-type="medline">33567185</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fong</surname><given-names>S</given-names> </name><name name-style="western"><surname>Carollo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lazuras</surname><given-names>L</given-names> </name><name name-style="western"><surname>Corazza</surname><given-names>O</given-names> </name><name name-style="western"><surname>Esposito</surname><given-names>G</given-names> </name></person-group><article-title>Ozempic (Glucagon-like peptide 1 receptor agonist) in social media posts: unveiling user perspectives through Reddit topic modeling</article-title><source>Emerg Trends Drugs Addict Health</source><year>2024</year><month>12</month><volume>4</volume><fpage>100157</fpage><pub-id pub-id-type="doi">10.1016/j.etdah.2024.100157</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Basch</surname><given-names>CH</given-names> </name><name name-style="western"><surname>Narayanan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Fera</surname><given-names>J</given-names> </name><name name-style="western"><surname>Basch</surname><given-names>CE</given-names> </name></person-group><article-title>Descriptive analysis of TikTok videos posted under the hashtag #Ozempic</article-title><source>Journal of Medicine, Surgery, and Public Health</source><year>2023</year><volume>1</volume><fpage>100013</fpage><pub-id pub-id-type="doi">10.1016/j.glmedi.2023.100013</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Somani</surname><given-names>S</given-names> </name><name name-style="western"><surname>Jain</surname><given-names>SS</given-names> </name><name name-style="western"><surname>Sarraju</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sandhu</surname><given-names>AT</given-names> </name><name name-style="western"><surname>Hernandez-Boussard</surname><given-names>T</given-names> </name><name name-style="western"><surname>Rodriguez</surname><given-names>F</given-names> </name></person-group><article-title>Using large language models to assess public perceptions around glucagon-like peptide-1 receptor agonists on social media</article-title><source>Commun Med (Lond)</source><year>2024</year><month>07</month><day>10</day><volume>4</volume><issue>1</issue><fpage>137</fpage><pub-id pub-id-type="doi">10.1038/s43856-024-00566-z</pub-id><pub-id pub-id-type="medline">38987347</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sobel</surname><given-names>SI</given-names> </name><name name-style="western"><surname>Dub&#x00E9;</surname><given-names>JJ</given-names> </name></person-group><article-title>Social media for obesity education: a general overview for the novice creator</article-title><source>Adv Physiol Educ</source><year>2024</year><month>03</month><day>1</day><volume>48</volume><issue>1</issue><fpage>114</fpage><lpage>121</lpage><pub-id pub-id-type="doi">10.1152/advan.00120.2023</pub-id><pub-id pub-id-type="medline">38205520</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Auerbach</surname><given-names>N</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>VN</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>DR</given-names> </name><name name-style="western"><surname>Clift</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Al-Ammouri</surname><given-names>M</given-names> </name><name name-style="western"><surname>El-Osta</surname><given-names>A</given-names> </name></person-group><article-title>What are community perspectives and experiences around GLP-1 receptor agonist medications for weight loss? A cross-sectional survey study in the UK</article-title><source>BMJ Public Health</source><year>2025</year><volume>3</volume><issue>2</issue><fpage>e002519</fpage><pub-id pub-id-type="doi">10.1136/bmjph-2024-002519</pub-id><pub-id pub-id-type="medline">40734969</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Azizi</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Rodriguez</surname><given-names>F</given-names> </name><name name-style="western"><surname>Assimes</surname><given-names>TL</given-names> </name></person-group><article-title>Digital footprints of obesity treatment: GLP-1 receptor agonists and the health equity divide</article-title><source>Circulation</source><year>2024</year><month>07</month><day>16</day><volume>150</volume><issue>3</issue><fpage>171</fpage><lpage>173</lpage><pub-id pub-id-type="doi">10.1161/CIRCULATIONAHA.124.069680</pub-id><pub-id pub-id-type="medline">39008562</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Propfe</surname><given-names>LE</given-names> </name><name name-style="western"><surname>Seifert</surname><given-names>R</given-names> </name></person-group><article-title>Misrepresentation of semaglutide in social media</article-title><source>Naunyn Schmiedebergs Arch Pharmacol</source><year>2025</year><month>07</month><day>19</day><pub-id pub-id-type="doi">10.1007/s00210-025-04403-5</pub-id><pub-id pub-id-type="medline">40682686</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mendel</surname><given-names>T</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Wiesenfeld</surname><given-names>B</given-names> </name><name name-style="western"><surname>Nov</surname><given-names>O</given-names> </name></person-group><article-title>Laypeople&#x2019;s use of and attitudes toward large language models and search engines for health queries: survey study</article-title><source>J Med Internet Res</source><year>2025</year><month>02</month><day>13</day><volume>27</volume><fpage>e64290</fpage><pub-id pub-id-type="doi">10.2196/64290</pub-id><pub-id pub-id-type="medline">39946180</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Teo</surname><given-names>ZL</given-names> </name><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Generative artificial intelligence in medicine</article-title><source>Nat Med</source><year>2025</year><month>10</month><volume>31</volume><issue>10</issue><fpage>3270</fpage><lpage>3282</lpage><pub-id pub-id-type="doi">10.1038/s41591-025-03983-2</pub-id><pub-id pub-id-type="medline">41053447</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ponzo</surname><given-names>V</given-names> </name><name name-style="western"><surname>Goitre</surname><given-names>I</given-names> </name><name name-style="western"><surname>Favaro</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Is ChatGPT an effective tool for providing dietary advice?</article-title><source>Nutrients</source><year>2024</year><month>02</month><day>6</day><volume>16</volume><issue>4</issue><fpage>469</fpage><pub-id pub-id-type="doi">10.3390/nu16040469</pub-id><pub-id pub-id-type="medline">38398794</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Assessing the accuracy of responses by the language model ChatGPT to questions regarding bariatric surgery</article-title><source>Obes Surg</source><year>2023</year><month>06</month><volume>33</volume><issue>6</issue><fpage>1790</fpage><lpage>1796</lpage><pub-id pub-id-type="doi">10.1007/s11695-023-06603-5</pub-id><pub-id pub-id-type="medline">37106269</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karnan</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nair</surname><given-names>S</given-names> </name><name name-style="western"><surname>Fidai</surname><given-names>FF</given-names> </name><name name-style="western"><surname>Gurrala</surname><given-names>SV</given-names> </name><name name-style="western"><surname>Salim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Gomma</surname><given-names>A</given-names> </name></person-group><article-title>Evaluating the Efficacy of ChatGPT vs. Google Gemini in generating patient education materials for GLP-1 receptor agonists (semaglutide, liraglutide, tirzepatide): a cross-sectional study</article-title><source>Cureus</source><year>2025</year><month>04</month><volume>17</volume><issue>4</issue><fpage>e81993</fpage><pub-id pub-id-type="doi">10.7759/cureus.81993</pub-id><pub-id pub-id-type="medline">40351930</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Johnson</surname><given-names>H</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>V</given-names> </name><name name-style="western"><surname>Ammouri</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Jacobs</surname><given-names>C</given-names> </name><name name-style="western"><surname>El-Osta</surname><given-names>A</given-names> </name></person-group><article-title>Impact of digital engagement on weight loss outcomes in obesity management among individuals using GLP-1 and dual GLP-1/GIP receptor agonist therapy: retrospective cohort service evaluation study</article-title><source>J Med Internet Res</source><year>2025</year><month>03</month><day>31</day><volume>27</volume><fpage>e69466</fpage><pub-id pub-id-type="doi">10.2196/69466</pub-id><pub-id pub-id-type="medline">40164173</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Caballero Mateos</surname><given-names>I</given-names> </name><name name-style="western"><surname>Morales Portillo</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lainez L&#x00F3;pez</surname><given-names>M</given-names> </name><name name-style="western"><surname>Vilches-Arenas</surname><given-names>&#x00C1;</given-names> </name></person-group><article-title>Efficacy of a digital educational intervention for patients with type 2 diabetes mellitus: multicenter, randomized, prospective, 6-month follow-up study</article-title><source>J Med Internet Res</source><year>2025</year><month>04</month><day>10</day><volume>27</volume><fpage>e60758</fpage><pub-id pub-id-type="doi">10.2196/60758</pub-id><pub-id pub-id-type="medline">40209213</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gibson</surname><given-names>D</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>S</given-names> </name><name name-style="western"><surname>Shanmugasundaram</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Evaluating the efficacy of ChatGPT as a patient education tool in prostate cancer: multimetric assessment</article-title><source>J Med Internet Res</source><year>2024</year><month>08</month><day>14</day><volume>26</volume><fpage>e55939</fpage><pub-id pub-id-type="doi">10.2196/55939</pub-id><pub-id pub-id-type="medline">39141904</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Muraira</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lerner</surname><given-names>LB</given-names> </name><etal/></person-group><article-title>Limitations of ChatGPT for patient education regarding frequently searched questions about benign prostatic hyperplasia</article-title><source>JU Open Plus</source><year>2024</year><month>07</month><day>14</day><volume>2</volume><issue>7</issue><pub-id pub-id-type="doi">10.1097/JU9.0000000000000167</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Musheyev</surname><given-names>D</given-names> </name><name name-style="western"><surname>Pan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Loeb</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kabarriti</surname><given-names>AE</given-names> </name></person-group><article-title>How well do artificial intelligence chatbots respond to the top search queries about urological malignancies?</article-title><source>Eur Urol</source><year>2024</year><month>01</month><volume>85</volume><issue>1</issue><fpage>13</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.1016/j.eururo.2023.07.004</pub-id><pub-id pub-id-type="medline">37567827</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>&#x00D6;zcan</surname><given-names>F</given-names> </name><name name-style="western"><surname>&#x00D6;r&#x00FC;c&#x00FC; Atar</surname><given-names>M</given-names> </name><name name-style="western"><surname>K&#x00F6;ro&#x011F;lu</surname><given-names>&#x00D6;</given-names> </name><name name-style="western"><surname>Y&#x0131;lmaz</surname><given-names>B</given-names> </name></person-group><article-title>Assessment of the reliability and usability of ChatGPT in response to spinal cord injury questions</article-title><source>J Spinal Cord Med</source><year>2025</year><month>09</month><volume>48</volume><issue>5</issue><fpage>852</fpage><lpage>857</lpage><pub-id pub-id-type="doi">10.1080/10790268.2024.2361551</pub-id><pub-id pub-id-type="medline">38860862</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><article-title>Basics of Google Trends</article-title><source>Google News Initiative</source><access-date>2025-10-24</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://newsinitiative.withgoogle.com/en-gb/resources/trainings/basics-of-google-trends/">https://newsinitiative.withgoogle.com/en-gb/resources/trainings/basics-of-google-trends/</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ong</surname><given-names>JCL</given-names> </name><etal/></person-group><article-title>A proposed S.C.O.R.E. evaluation framework for large language models &#x2013; safety, consensus &#x0026; context, objectivity, reproducibility and explainability</article-title><source>arXiv</source><comment>Preprint posted online on  Jul 10, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2407.07666</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="web"><article-title>Human Biomedical Research Act 2015</article-title><source>Singapore Statutes Online</source><year>2016</year><month>07</month><day>1</day><access-date>2025-11-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://sso.agc.gov.sg/Act/HBRA2015#al">https://sso.agc.gov.sg/Act/HBRA2015#al</ext-link></comment></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>The CHART Collaborative</collab></person-group><article-title>Reporting guideline for chatbot health advice studies: the Chatbot Assessment Reporting Tool (CHART) statement</article-title><source>bmjmed</source><year>2025</year><month>08</month><volume>4</volume><issue>1</issue><fpage>e001632</fpage><pub-id pub-id-type="doi">10.1136/bmjmed-2025-001632</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Puhl</surname><given-names>RM</given-names> </name></person-group><article-title>Weight stigma and barriers to effective obesity care</article-title><source>Gastroenterol Clin North Am</source><year>2023</year><month>06</month><volume>52</volume><issue>2</issue><fpage>417</fpage><lpage>428</lpage><pub-id pub-id-type="doi">10.1016/j.gtc.2023.02.002</pub-id><pub-id pub-id-type="medline">37197883</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Davenport</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kalakota</surname><given-names>R</given-names> </name></person-group><article-title>The potential for artificial intelligence in healthcare</article-title><source>Future Healthc J</source><year>2019</year><month>06</month><volume>6</volume><issue>2</issue><fpage>94</fpage><lpage>98</lpage><pub-id pub-id-type="doi">10.7861/futurehosp.6-2-94</pub-id><pub-id pub-id-type="medline">31363513</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharma</surname><given-names>S</given-names> </name><name name-style="western"><surname>Pajai</surname><given-names>S</given-names> </name><name name-style="western"><surname>Prasad</surname><given-names>R</given-names> </name><etal/></person-group><article-title>A critical review of ChatGPT as a potential substitute for diabetes educators</article-title><source>Cureus</source><year>2023</year><pub-id pub-id-type="doi">10.7759/cureus.38380</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xiong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Gao</surname><given-names>X</given-names> </name><etal/></person-group><article-title>Retrieval-augmented generation for large language models: a survey</article-title><source>arXiv</source><access-date>2025-05-29</access-date><comment>Preprint posted online on  Dec 18, 2023</comment><comment><ext-link ext-link-type="uri" xlink:href="https://arxiv.org/abs/2312.10997">https://arxiv.org/abs/2312.10997</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Python code for obtaining large language model and internet search output.</p><media xlink:href="formative_v9i1e78289_app1.docx" xlink:title="DOCX File, 17 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Full output from large language model and internet searches.</p><media xlink:href="formative_v9i1e78289_app2.xlsx" xlink:title="XLSX File, 402 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>ChatGPT transcripts.</p><media xlink:href="formative_v9i1e78289_app3.docx" xlink:title="DOCX File, 14 KB"/></supplementary-material><supplementary-material id="app4"><label>Checklist 1</label><p>CHART checklist.</p><media xlink:href="formative_v9i1e78289_app4.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material></app-group></back></article>