<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e90139</article-id><article-id pub-id-type="doi">10.2196/90139</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Retrieval-Augmented Generation Versus GPT-4o for Patient-Facing Gynecological Cancer Information: Quality Evaluation</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Pearson</surname><given-names>Stephen</given-names></name><degrees>MRES</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Reyburn</surname><given-names>Mimi</given-names></name><degrees>MEng</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Foley</surname><given-names>Conor</given-names></name><degrees>MBChB</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Finch</surname><given-names>Alison</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Bench</surname><given-names>Suzanne</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff4">4</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Bonnici</surname><given-names>Timothy</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Rose</surname><given-names>Louise</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Critical Care, University College London Hospitals NHS Foundation Trust</institution><addr-line>London</addr-line><addr-line>England</addr-line><country>United Kingdom</country></aff><aff id="aff2"><institution>University College London Hospitals NHS Foundation Trust</institution><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff3"><institution>Anaesthetic Department, Whittington Health NHS Trust</institution><addr-line>London</addr-line><addr-line>England</addr-line><country>United Kingdom</country></aff><aff id="aff4"><institution>School of Nursing and Midwifery, London South Bank University</institution><addr-line>London</addr-line><addr-line>England</addr-line><country>United Kingdom</country></aff><aff id="aff5"><institution>Florence Nightingale Faculty of Nursing, Midwifery and Palliative Care, King's College London</institution><addr-line>King's College London, Strand</addr-line><addr-line>London</addr-line><addr-line>England</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Bucher</surname><given-names>Amy</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Lin</surname><given-names>Kuan-Hsun</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Pasupulety</surname><given-names>Ujjwal</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Louise Rose, PhD, Florence Nightingale Faculty of Nursing, Midwifery and Palliative Care, King's College London, King's College London, Strand, London, England, WC2R2LS, United Kingdom, 44 02078365454; <email>louise.rose@kcl.ac.uk</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>all authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>24</day><month>4</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e90139</elocation-id><history><date date-type="received"><day>22</day><month>12</month><year>2025</year></date><date date-type="rev-recd"><day>12</day><month>03</month><year>2026</year></date><date date-type="accepted"><day>12</day><month>03</month><year>2026</year></date></history><copyright-statement>&#x00A9; Stephen Pearson, Mimi Reyburn, Conor Foley, Alison Finch, Suzanne Bench, Timothy Bonnici, Louise Rose. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 24.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e90139"/><abstract><p>Retrieval-augmented generation improved overall quality scores for patient-facing gynecological cancer information mainly through better source attribution.</p></abstract><kwd-group><kwd>gynecological neoplasms</kwd><kwd>patient education as topic</kwd><kwd>health literacy</kwd><kwd>large language models</kwd><kwd>retrieval-augmented generation</kwd><kwd>readability</kwd><kwd>GPT-4o</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Health literacy, which is the ability to obtain, process and understand basic health information, varies widely. Furthermore, situational stress influences the ability to comprehend health information [<xref ref-type="bibr" rid="ref1">1</xref>]. High-quality accessible information underpins shared decision-making. Yet people with cancer frequently report information overload and unmet needs at diagnosis and during treatment [<xref ref-type="bibr" rid="ref2">2</xref>]. Producing timely, personalized, and readable health-related materials is human-resource intensive, yields static documents, and can result in duplicated effort and variable quality [<xref ref-type="bibr" rid="ref3">3</xref>]. Large language models (LLMs) may offer a solution through rapid generation and adaptation of text. However, LLMs may hallucinate, cite unverifiable sources, and produce materials misaligned with literacy needs [<xref ref-type="bibr" rid="ref4">4</xref>]. Retrieval augmented generation (RAG) may address these issues as it grounds LLM outputs in verified information as opposed to proprietary LLMs that rely on training data alone [<xref ref-type="bibr" rid="ref5">5</xref>].</p><p>Our objective was to compare base GPT-40 with GPT-4o enhanced with a tailored gynecological cancer knowledgebase (RAG GPT-4o) in order to determine whether RAG improves the quality of AI-generated patient-facing information.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>We compared two GPT-4o configurations to answer frequently asked gynecological cancer questions compiled from patient queries and service information resources. For the RAG GPT-4o intervention arm, a retrieval layer supplied passages from a curated UK knowledge base restricted to Macmillan Cancer Support [<xref ref-type="bibr" rid="ref6">6</xref>] and The Eve Appeal [<xref ref-type="bibr" rid="ref7">7</xref>] resources. The Base GPT-4o control arm generated answers without retrieval of external sources. RAG GPT-4o ran via an application programming interface (API); Base GPT-4o ran via the web interface.</p><p>We compiled 17 questions and generated two responses per question (Base GPT-4o via ChatGPT web interface; RAG GPT-4o via API), producing 34 outputs. Prompts were held constant across configurations. Paired model outputs for each question formed the unit of comparison. Seven expert raters, each, evaluated eight paired outputs, while an LLM-judge evaluated all 17 paired outputs. Application Programming Interface generation settings were recorded for reproducibility; equivalent sampling parameters could not be fixed in the web interface.</p></sec><sec id="s2-2"><title>Recruitment</title><p>We recruited a convenience sample of clinical nurse specialists (CNSs) via professional networks. Participants were presented with paired outputs in random order and blinded to allocation. Each participant evaluated the same eight preselected question-answer pairs, the maximum feasible within one hour based on pilot testing, using the Quality Analysis of Medical AI (QAMAI) tool (six domains rated on 5-point Likert scales) [<xref ref-type="bibr" rid="ref8">8</xref>]. Readability was assessed with the Linguistic Features Toolkit (LFTK), including Flesch Reading Ease (FRE), Flesch Kincaid Grade Level (FKGL), and total words [<xref ref-type="bibr" rid="ref9">9</xref>]. An LLM-judge, implemented with RAG GPT-4o, also rated all paired outputs using the QAMAI tool. Prompt templates and model configuration details, including API settings, are provided.</p></sec><sec id="s2-3"><title>Analysis</title><p>Two-tailed paired <italic>t</italic>-tests were used to compare Base and RAG scores when the within pair differences were approximately normally distributed; Wilcoxon signed-rank tests otherwise. Inter-rater agreement was assessed with intraclass correlation coefficients (ICCs 3,k); internal consistency with Cronbach &#x03B1;.</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>This study was approved by the King&#x2019;s College London Minimal Risk Research Ethics Committee (MRSU 24/25 46882). Participants received a participant information sheet, provided written informed consent, and could withdraw at any time. Privacy and confidentiality were protected throughout. Participants received a &#x00A3;25 voucher for participation.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>Seven UK-based CNSs participated in the study (median post-registration experience 20 years; IQR 5&#x2010;30). Data collection ran from March-May 2025. Participants rated quality of RAG GPT-4o answers higher than those generated by base GPT-4o with highest mean difference in the domain, <italic>Provision of Sources and References</italic> (<xref ref-type="table" rid="table1">Table 1</xref>). Inter-rater agreement for total QAMAI scores was moderate (ICC 0.65, 95% CI 0.31-0.86); internal consistency was good (Cronbach &#x03B1;=0.81). The LLM-judge rated the quality of RAG GPT-4o answers higher than those generated by base GPT-4o (<italic>P</italic>&#x003C;.001), again driven by the QAMAI <italic>Provision of Sources and References</italic> domain (<italic>P</italic>=.01) (<xref ref-type="fig" rid="figure1">Figure 1</xref>).</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Mean QAMAI (Quality Analysis of Medical Artificial Intelligence) item-level domain scores (1 to 5, higher scores indicate higher quality) for RAG GPT-4o (RAG) and Base GPT-4o (ChatGPT). Left panel (Expert ratings): domain means calculated from 7 expert raters who each scored 8 matched answer pairs. Right panel (LLM ratings): domain means calculated from a single LLM-judge that scored all 17 matched answer pairs once. LLM: large language model; RAG: retrieval augmented generation.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e90139_fig01.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Paired analysis of QAMAI<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> and readability metrics.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">QAMAI domain</td><td align="left" valign="bottom">RAG<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> GPT-4o,<break/>mean (SD)</td><td align="left" valign="bottom">Base GPT-4o, mean (SD)</td><td align="left" valign="bottom">Mean difference<break/>(95% CI)</td><td align="left" valign="bottom"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="5">Participants: 8 question-answer pairs evaluated</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">4.14 (0.75)</td><td align="left" valign="top">3.98 (0.75)</td><td align="left" valign="top">0.16 (&#x2212;0.05 to 0.37)</td><td align="left" valign="top">.11</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clarity</td><td align="left" valign="top">4.16 (0.65)</td><td align="left" valign="top">4.02 (0.77)</td><td align="left" valign="top">0.14 (&#x2212;0.01 to 0.30)</td><td align="left" valign="top">.06</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Relevance</td><td align="left" valign="top">4.11 (0.73)</td><td align="left" valign="top">4.09 (0.67)</td><td align="left" valign="top">0.02 (&#x2212;0.26 to 0.23)</td><td align="left" valign="top">.78</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness</td><td align="left" valign="top">3.88 (0.90)</td><td align="left" valign="top">3.84 (0.93)</td><td align="left" valign="top">0.04 (&#x2212;0.16 to 0.23)</td><td align="left" valign="top">.58</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Provision of Sources &#x0026; References</td><td align="left" valign="top">3.98 (0.94)</td><td align="left" valign="top">1.80 (1.24)</td><td align="left" valign="top">2.18 (0.92 to 3.44)</td><td align="left" valign="top">.03</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Usefulness</td><td align="left" valign="top">3.96 (0.69)</td><td align="left" valign="top">3.80 (0.64)</td><td align="left" valign="top">0.16 (&#x2212;0.10 to 0.42)</td><td align="left" valign="top">.17</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total (max=30)</td><td align="left" valign="top">24.23 (3.60)</td><td align="left" valign="top">21.54 (3.46)</td><td align="left" valign="top">2.69 (0.77 to 4.62)</td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top" colspan="5">LLM-Judge: 17 question-answer pairs evaluated</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy</td><td align="left" valign="top">4.82 (0.39)</td><td align="left" valign="top">4.06 (0.24)</td><td align="left" valign="top">0.76 (0.53 to 0.99)</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clarity</td><td align="left" valign="top">4.82 (0.39)</td><td align="left" valign="top">4.94 (0.24)</td><td align="left" valign="top">&#x2212;0.12 (&#x2212;0.35 to 0.11)</td><td align="left" valign="top">.16</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Relevance</td><td align="left" valign="top">5.00 (0)</td><td align="left" valign="top">5.00 (0)</td><td align="left" valign="top">NA<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup></td><td align="left" valign="top">NA<sup><xref ref-type="table-fn" rid="table1fn7">g</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Completeness</td><td align="left" valign="top">3.94 (0.24)</td><td align="left" valign="top">4.00 (0)</td><td align="left" valign="top">0.06 (&#x2212;0.18 to 0.06)</td><td align="left" valign="top">.32</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Provision of Sources &#x0026; References</td><td align="left" valign="top">1.71 (0.77)</td><td align="left" valign="top">1.06 (0.24)</td><td align="left" valign="top">0.65 (0.25 to 1.05)</td><td align="left" valign="top">.01</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Usefulness</td><td align="left" valign="top">4.94 (0.24)</td><td align="left" valign="top">4.71 (0.47)</td><td align="left" valign="top">0.23 (&#x2212;0.03 to 0.49)</td><td align="left" valign="top">.10</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total (max=30)</td><td align="left" valign="top">25.24 (1.35)</td><td align="left" valign="top">23.76 (0.56)</td><td align="left" valign="top">1.48 (0.76 to 2.20)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top">LFTK<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup> Domain</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top" colspan="5">17 question-answer pairs evaluated</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FKGL<sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="top">7.2 (1.85)</td><td align="left" valign="top">6.2 (1.87)</td><td align="left" valign="top">1.00 (&#x2013;0.1 to 2.2)</td><td align="left" valign="top">.08<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>FRE<sup><xref ref-type="table-fn" rid="table1fn6">f</xref></sup></td><td align="left" valign="top">67.9 (10.98)</td><td align="left" valign="top">75.2 (9.54)</td><td align="left" valign="top">&#x2212;8.2 (&#x2013;12.9 to &#x2013;1.9)</td><td align="left" valign="top">.006<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Total words</td><td align="left" valign="top">370.8 (84.27)</td><td align="left" valign="top">308.5 (67.82)</td><td align="left" valign="top">62.3 (34.6 to 90.0)</td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>QAMAI: Quality Analysis of Medical Artificial Intelligence.</p></fn><fn id="table1fn2"><p><sup>b</sup>RAG: retrieval augmented generation.</p></fn><fn id="table1fn3"><p><sup>c</sup>paired <italic>t</italic> test (otherwise Wilcoxon signed-rank)</p></fn><fn id="table1fn4"><p><sup>d</sup>LFTK: linguistic features toolkit.</p></fn><fn id="table1fn5"><p><sup>e</sup>FKGL: Flesch Kincaid Grade Level.</p></fn><fn id="table1fn6"><p><sup>f</sup>FRE: Flesch Reading Ease.</p></fn><fn id="table1fn7"><p><sup>g</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>RAG GPT-4o answers were longer (<italic>P</italic>&#x003C;.001) and had a lower FRE score (<italic>P</italic>=.006), indicating more difficult-to-read text. FKGL did not differ significantly (<xref ref-type="table" rid="table1">Table 1</xref>).</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>Overall, experts and the LLM-judge rated both configurations highly for accuracy, clarity, relevance, and readability. RAG GPT-4o achieved higher overall QAMAI scores than Base GPT-4o, driven mainly by better provision of sources and references rather than perceived gains in accuracy or clarity. Although the LLM-judge identified differences in source attribution, it appeared to weigh provenance less than expert raters or had difficulty applying QAMAI in this domain, consistent with other studies [<xref ref-type="bibr" rid="ref10">10</xref>]. RAG GPT-4o outputs were longer and less easy to read.</p><p>Although overall QAMAI scores were high, this does not eliminate the risk of clinically important failure in patient-facing cancer information. Errors may present as confident hallucinated claims, omission of safety critical details such as red flag symptoms or treatment risks, or overgeneralization that fails to address question intent. Provenance related problems are also possible, including outdated guidance and references that do not support the accompanying statements. Within this small sample, ratings did not suggest prominent safety related concerns, but larger evaluations are needed to characterize the frequency and impact of these types of error, and to determine whether high rubric scores and source attribution are sufficient safeguards for deployment without clinical oversight.</p><p>Improved provenance is expected with RAG, but the marginal benefit observed here must be weighed against the overhead of curating and maintaining a knowledge base, monitoring retrieval quality, and updating content as guidance changes. Although transparent sourcing may support perceived credibility, acceptability and trust in patient-facing information are also shaped by readability, tone, length, and cognitive load. Future work should evaluate whether optimized RAG configurations, and patient centered presentation formats improve usability, comprehension, acceptability, and perceived trust without increasing cognitive burden.</p><p>These findings suggest LLMs can generate patient-facing information that clinicians may judge as accurate, relevant, and readable, and could reduce the time required to draft materials. In this study, RAG improved overall QAMAI scores primarily through source attribution rather than perceived improvements in accuracy or clarity. Because RAG outputs were longer and less easy to read, any gains in transparency should be balanced against potential impacts on comprehensibility and health literacy.</p><p>Study limitations include a small convenience sample, expert ratings restricted to eight of 17 questions, use of a single LLM and UK knowledge base, and interface asymmetry between API and web access. Because equivalent sampling parameters could not be fixed in the web interface, this comparison should be interpreted as pragmatic and exploratory.</p></sec></body><back><notes><sec><title>Funding</title><p>This study was supported by the National Institute for Health and Care Research (NIHR) Pre-doctoral Clinical Academic Fellowship. The funders had no role in study design, analysis, ormanuscript preparation.</p></sec><sec><title>Data Availability</title><p>The datasets generated or analyzed during this study are available in the GitHub repository [<xref ref-type="bibr" rid="ref11">11</xref>].</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">API</term><def><p>application programming interface</p></def></def-item><def-item><term id="abb2">CNS</term><def><p>clinical nurse specialist</p></def></def-item><def-item><term id="abb3">FKGL</term><def><p>Flesch Kincaid Grade Level</p></def></def-item><def-item><term id="abb4">FRE</term><def><p>Flesch Reading Ease</p></def></def-item><def-item><term id="abb5">ICC</term><def><p>intraclass correlation coefficient</p></def></def-item><def-item><term id="abb6">LFTK</term><def><p>linguistic features toolkit</p></def></def-item><def-item><term id="abb7">LLM</term><def><p>large language models</p></def></def-item><def-item><term id="abb8">QAMAI</term><def><p>Quality Analysis of Medical Artificial Intelligence</p></def></def-item><def-item><term id="abb9">RAG</term><def><p>retrieval augmented generation</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berkman</surname><given-names>ND</given-names> </name><name name-style="western"><surname>Davis</surname><given-names>TC</given-names> </name><name name-style="western"><surname>McCormack</surname><given-names>L</given-names> </name></person-group><article-title>Health literacy: what is it?</article-title><source>J Health Commun</source><year>2010</year><volume>15 Suppl 2</volume><fpage>9</fpage><lpage>19</lpage><pub-id pub-id-type="doi">10.1080/10810730.2010.499985</pub-id><pub-id pub-id-type="medline">20845189</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Covvey</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Kamal</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Gorse</surname><given-names>EE</given-names> </name><etal/></person-group><article-title>Barriers and facilitators to shared decision-making in oncology: a systematic review of the literature</article-title><source>Support Care Cancer</source><year>2019</year><month>05</month><volume>27</volume><issue>5</issue><fpage>1613</fpage><lpage>1637</lpage><pub-id pub-id-type="doi">10.1007/s00520-019-04675-7</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papadakos</surname><given-names>J</given-names> </name><name name-style="western"><surname>Giannopoulos</surname><given-names>E</given-names> </name><name name-style="western"><surname>Forbes</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Reinventing the wheel: the incidence and cost implication of duplication of effort in patient education materials development</article-title><source>Patient Educ Couns</source><year>2021</year><month>06</month><volume>104</volume><issue>6</issue><fpage>1398</fpage><lpage>1405</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2020.11.017</pub-id><pub-id pub-id-type="medline">33257201</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ji</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>N</given-names> </name><name name-style="western"><surname>Frieske</surname><given-names>R</given-names> </name><etal/></person-group><article-title>Survey of hallucination in natural language generation</article-title><source>ACM Comput Surv</source><year>2023</year><month>12</month><day>31</day><volume>55</volume><issue>12</issue><fpage>1</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1145/3571730</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ranjan</surname><given-names>R</given-names> </name><name name-style="western"><surname>Singh</surname><given-names>SN</given-names> </name></person-group><article-title>A comprehensive survey of retrieval-augmented generation (RAG): evolution, current landscape and future directions</article-title><source>arXiv</source><comment>Preprint posted online on  Oct 3, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2410.12837</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="web"><article-title>Macmillan Cancer Support</article-title><year>2025</year><access-date>2025-12-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.macmillan.org.uk/">https://www.macmillan.org.uk/</ext-link></comment></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><source>The Eve Appeal</source><year>2025</year><access-date>2025-12-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://eveappeal.org.uk/">https://eveappeal.org.uk/</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vaira</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Lechien</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Abbate</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Validation of the Quality Analysis of Medical Artificial Intelligence (QAMAI) tool: a new tool to assess the quality of health information provided by AI platforms</article-title><source>Eur Arch Otorhinolaryngol</source><year>2024</year><month>11</month><volume>281</volume><issue>11</issue><fpage>6123</fpage><lpage>6131</lpage><pub-id pub-id-type="doi">10.1007/s00405-024-08710-0</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>BW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>JHJ</given-names> </name></person-group><article-title>Handcrafted features in computational linguistics</article-title><source>arXiv</source><comment>Preprint posted online on  May 25, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2305.15878</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Thakur</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Choudhary</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ramayapally</surname><given-names>VS</given-names> </name><name name-style="western"><surname>Vaidyanathan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hupkes</surname><given-names>D</given-names> </name></person-group><article-title>Judging the judges: evaluating alignment and vulnerabilities in LLMs-as-judges</article-title><source>arXiv</source><comment>Preprint posted online on  Aug 18, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2406.12624</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="web"><article-title>CDE-research-group/RAG-for-patient-information</article-title><source>GitHub</source><access-date>2026-04-14</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/CDE-Research-Group/RAG-for-Patient-Information">https://github.com/CDE-Research-Group/RAG-for-Patient-Information</ext-link></comment></nlm-citation></ref></ref-list></back></article>