<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e64986</article-id><article-id pub-id-type="doi">10.2196/64986</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Enhancing Diagnostic Accuracy of Ophthalmological Conditions With Complex Prompts in GPT-4: Comparative Analysis of Global and Low- and Middle-Income Country (LMIC)&#x2013;Specific Pathologies</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>M'gadzah</surname><given-names>Shona Alex Tapiwa</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>O'Malley</surname><given-names>Andrew</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/></contrib></contrib-group><aff id="aff1"><institution>School of Medicine, University of St Andrews</institution><addr-line>North Haugh</addr-line><addr-line>St Andrews</addr-line><country>United Kingdom</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Xu</surname><given-names>Yanwu</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Wang</surname><given-names>Zhixiang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Andrew O'Malley, PhD, School of Medicine, University of St Andrews, North Haugh, St Andrews, KY16 9TF, United Kingdom, 44 01382384210; <email>aso2@st-andrews.ac.uk</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>30</day><month>6</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e64986</elocation-id><history><date date-type="received"><day>01</day><month>08</month><year>2024</year></date><date date-type="rev-recd"><day>24</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>15</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Shona Alex Tapiwa M'gadzah, Andrew O'Malley. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 30.6.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e64986"/><abstract><sec><title>Background</title><p>The global incidence of blindness has continued to increase, despite the enactment of a Global Eye Health Action Plan by the World Health Assembly. This can be attributed, in part, to an aging population, but also to the limited diagnostic resources within low- and middle-income countries (LMICs). The advent of generative artificial intelligence (AI) within health care could pose a novel solution to combating the prevalence of blindness globally.</p></sec><sec><title>Objective</title><p>The objectives of this study are to quantify the effect the addition of a complex prompt has on the diagnostic accuracy of a commercially available LLM, and to assess whether such LLMs are better or worse at diagnosing conditions that are more prevalent in LMICs.</p></sec><sec sec-type="methods"><title>Methods</title><p>Ten clinical vignettes representing globally and LMIC-prevalent ophthalmological conditions were presented to GPT-4&#x2010;0125-preview using simple and complex prompts. Diagnostic performance metrics, including sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV), were calculated. Statistical comparison between prompts was conducted using a chi-square test of independence.</p></sec><sec sec-type="results"><title>Results</title><p>The complex prompt achieved a higher diagnostic accuracy (90.1%) compared to the simple prompt (60.4%), with a statistically significant difference (<italic>&#x03C7;</italic><sup>2</sup>=428.86; <italic>P</italic>&#x003C;.001). Sensitivity, specificity, PPV, and NPV were consistently improved for most conditions with the complex prompt. The simple prompt struggled with LMIC-prevalent conditions, diagnosing only 1 of 5 accurately, while the complex prompt successfully diagnosed 4 of 5.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The study established that overall, the inclusion of a complex prompt positively affected the diagnostic accuracy of GPT-4&#x2010;0125-preview, particularly for LMIC-prevalent conditions. This highlights the potential for LLMs, when appropriately tailored, to support clinicians in diverse health care settings. Future research should explore the generalizability of these findings across other models and specialties.</p></sec></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>AI</kwd><kwd>ophthalmology</kwd><kwd>clinical diagnostics</kwd><kwd>medical technology</kwd><kwd>data project</kwd><kwd>complex prompt</kwd><kwd>diagnostic accuracy</kwd><kwd>ophthalmological conditions</kwd><kwd>ophthalmological disorder</kwd><kwd>eyes</kwd><kwd>blindness</kwd><kwd>low- and middle-income countries</kwd><kwd>LMIC</kwd><kwd>low-income or middle-income economies</kwd><kwd>health care</kwd><kwd>LLMs</kwd><kwd>NLP</kwd><kwd>machine learning</kwd><kwd>statistical analysis</kwd><kwd>GPT-4</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Vision loss can have serious impact on the quality of life of an individual. In a world designed around the able-bodied population, the loss of one&#x2019;s sight can make even the most seemingly simple tasks complex. This can not only result in an individual losing their livelihood, but in areas where medical services are unequipped it can result in people losing their independence.</p><p>Although sight impairments are a natural consequence of growing old, an aging population has led to an increasing number of individuals experiencing moderate or worse vision impairment worldwide [<xref ref-type="bibr" rid="ref1">1</xref>]. In 2018, for the first time recorded, there were more people aged older than 65 years than those younger than 5 years of age. This trend is expected to continue over the next 4 decades when it is forecasted that in 2050 there will be more than double the number of people older than 65 years of age compared to the number of children younger than 5 years of age [<xref ref-type="bibr" rid="ref2">2</xref>]. This emphasizes the need for novel solutions that can help mitigate the growing effects that the global aging population has upon health care systems worldwide.</p><p>Within the last decade, the importance of reducing the incidence of avoidable visual impairment worldwide was renewed with the introduction of the World Health Assembly (WHA) Global Eye Health Action Plan, which aimed to reduce &#x201C;the prevalence of avoidable vision impairment by 2019 from the baseline of 2010&#x201D; [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>In 2020, the leading cause of blindness in adults aged 50 years and older globally was cataract (45.4%). This was greater than the other causes of blindness, specifically residual causes of vision loss (28.9%), glaucoma (11%), uncorrected refractive error (6.6%), age-related macular degeneration (5.6%), and diabetic retinopathy (2.5%) [<xref ref-type="bibr" rid="ref1">1</xref>]. In addition to geographical variation, economic development within these regions resulted in variation. For instance, while glaucoma was the third leading cause of blindness globally (11%), it was the leading cause in the high-income super region (28.2%) [<xref ref-type="bibr" rid="ref1">1</xref>].</p><p>Another target set in the World Health Organization (WHO) Global Action Plan was a focus on the elimination of avoidable blindness within the area of child health [<xref ref-type="bibr" rid="ref3">3</xref>]. Childhood blindness can either be classified descriptively or etiologically by underlying cause [<xref ref-type="bibr" rid="ref4">4</xref>]. Although it is harder to obtain etiological data, it can provide a useful insight into the areas that require the most attention. The most affected site that resulted in blindness globally was the retina (353,000); however, like in adults, the causes of blindness varied between socioeconomic regions.</p><p>Although the incidence of childhood visual impairment and blindness globally is low compared to adult blindness [<xref ref-type="bibr" rid="ref5">5</xref>], the impact of childhood blindness is arguably greater. When the potential lifespan of a child with blindness is taken into account, the number of &#x201C;blind person years&#x201D; is the second largest following cataract for conditions starting in childhood [<xref ref-type="bibr" rid="ref4">4</xref>], highlighting the greatness of impact. Furthermore, due to the large number of potential years of blindness that a person could experience as a result of childhood blindness, the global financial cost of blindness is greater than that of adult blindness when considering loss of earning capacity [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>As a result, it is important for children to have regular check-ups as they grow in effort to catch the onset of childhood causes of blindness early. In low- and middle-income countries (LMICs), primary health care workers within the community often do not have the skills required to differentiate between the causes of blindness so children with suspected eye pathologies are sent to other services for follow-up care [<xref ref-type="bibr" rid="ref5">5</xref>]. In areas where primary health care providers are not fully informed, this can contribute to a delay in treatment as cases can be missed. Moreover, there is an increasing number of children requiring specialist services and not enough capacity to meet demand [<xref ref-type="bibr" rid="ref5">5</xref>]. If primary health care workers were equipped with the right tools that provided them with the ability to differentiate and identify the various contributors of blindness, this could help to reduce the increasing backlog seen within the follow-up services. One low-cost potential solution that could assist health care workers in lower income countries is online clinical assistants powered by artificial intelligence (AI) large language models (LLMs). These clinical assistants could help clinicians to triage patients and identify the causes of their conditions in settings where secondary or tertiary specialist care is unavailable.</p><p>The advent of chatbots and AI within the field of medicine is not a new occurrence. A chatbot can be defined as &#x201C;a program that simulates a human conversation with an end user&#x201D; [<xref ref-type="bibr" rid="ref7">7</xref>]. SightBot, a research chatbot, uses both Open AI and PubMed&#x2019;s application programming interface (APIs) to restrict the information available to GPT-3.5 [<xref ref-type="bibr" rid="ref8">8</xref>]. This limits the data that the AI can access in the hopes that this will reduce &#x201C;AI hallucination&#x201D;&#x2013;the fabrication of data [<xref ref-type="bibr" rid="ref8">8</xref>]. BioMedLM is built upon the HuggingFace GPT model with 2.7 billion parameters and is also trained upon biomedical data from PubMed [<xref ref-type="bibr" rid="ref9">9</xref>]. However, there is limited research on AI used as ophthalmological diagnostic tools. One paper reported that ChatGPT based upon the GPT-3 architecture had similar accuracy in diagnosing patients with primary and secondary glaucoma compared with senior ophthalmology residents [<xref ref-type="bibr" rid="ref10">10</xref>] Furthermore when compared with the established differential diagnosis software, Isabel Pro Differential Diagnosis Generator, ChatGPT outperformed Isabel in the diagnosis of ophthalmic conditions by correctly identifying 9 of 10 cases compared to 1 of 10 by Isabel [<xref ref-type="bibr" rid="ref11">11</xref>].</p><p>The objectives of this study are to quantify the effect the addition of a complex prompt has on the diagnostic accuracy on a commercially available LLM, and to assess whether such LLMs are better or worse at diagnosing conditions that are more prevalent in LMICs.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Selection of Conditions</title><p>Ten clinical conditions were selected based on their global and LMIC-specific prevalence. These comprised 5 conditions that are mostly observed in adults, and 5 that are mostly observed in children.</p><p>Adult conditions were selected based on a recent analysis into the causes of blindness within the region [<xref ref-type="bibr" rid="ref12">12</xref>], the top contributors of blindness were selected in order of prevalence: cataract (46%), glaucoma (14%), trachoma (5%), and diabetic retinopathy (2%). Primary open-angle glaucoma [<xref ref-type="bibr" rid="ref13">13</xref>] was selected as the leading subtype of glaucoma [<xref ref-type="bibr" rid="ref13">13</xref>]. Uncorrected refractive error (presbyopia) was included due to its prominence in the WHA Global Action Plan [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>Childhood conditions were selected due to their prevalence in LMICs; leading cases were identified corneal scarring, cataract or glaucoma, retinopathy of prematurity and &#x201C;other,&#x201D; mainly unavoidable, pathologies [<xref ref-type="bibr" rid="ref4">4</xref>]. Corneal scarring was excluded due to its extensive causes, including measles and neonatal conjunctivitis [<xref ref-type="bibr" rid="ref4">4</xref>], which would result in an increased breadth of scope that the diagnostic criteria would be required to cover. Two additional cases were selected due to reports of their impacts elsewhere in the literature: myopia accounted for 75% of cases of refractive error in children in Ethiopia, an LMIC [<xref ref-type="bibr" rid="ref14">14</xref>], and retinoblastoma was added as compounded with childhood blindness it often leads to early mortality [<xref ref-type="bibr" rid="ref5">5</xref>].</p></sec><sec id="s2-2"><title>Production of Vignettes</title><p>A vignette was drafted for each of the 10 conditions. Two additional vignettes were drafted to function as control cases: one for the adult cases and another for the pediatric ones. Vignettes were produced by compiling lists of symptoms and signs for each condition from relevant BMJ Best Practice pages before being compared against the NICE Clinical Knowledge Summaries and the NHS website. From the compiled lists, symptoms were then selected for each vignette and refined with help from a qualified ophthalmologist; of the large number of symptoms and signs each condition could present with, only 3 (2 symptoms and 1 clinical sign) were selected for inclusion in the final vignette. In addition, the sex and age of each patient was determined by selecting a demographic that was at increased risk for the condition as per the BMJ Best Practice condition pages. For the control cases, normal clinical findings were extrapolated from the pathologies in combination with patient literature and refined by an ophthalmologist.</p><p>As GPT-4 is trained upon publicly available data, in an effort to mitigate the ability for the AI to identify the condition by matching definitions to the material it was trained upon, the symptoms for each vignette were placed into colloquially styled short sentences to emulate what chatbots might receive in practice. Complete list of conditions and their associated vignettes is presented in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>The 12 clinical vignettes, for each of the 5 adult cases, 5 pediatric cases and 2 control cases.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Condition</td><td align="left" valign="bottom">Vignette</td></tr></thead><tbody><tr><td align="left" valign="top">Cataract</td><td align="left" valign="top">Male, 68 years, presenting with and washed-out vision. On examination, pupil looks a bit cloudy [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</td></tr><tr><td align="left" valign="top">Primary open-angle glaucoma</td><td align="left" valign="top">Male, 61 years, bumping into obvious things despite good central visual acuity. On examination, optic nerve looks abnormal [<xref ref-type="bibr" rid="ref17">17</xref>].</td></tr><tr><td align="left" valign="top">Trachoma</td><td align="left" valign="top">Female, 45 years, presenting with a painful, red eye that feels gritty eye. On examination, eyelashes touching cornea [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>].</td></tr><tr><td align="left" valign="top">Diabetic retinopathy</td><td align="left" valign="top">Obese female, 58 years, painless loss of vision in both eyes with prominent floaters. On examination, difficult view of retina but red and yellow patches seen [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>].</td></tr><tr><td align="left" valign="top">Uncorrected refractive error: presbyopia</td><td align="left" valign="top">Female, 72 years, unable to thread needle or prepare food safely no problems recognizing faces or walking around. On examination, eyes look healthy.</td></tr><tr><td align="left" valign="top">Adult control</td><td align="left" valign="top">Female, 63 years, no visual disturbance or pain. On examination, media is clear and optic nerve and retina look healthy.</td></tr><tr><td align="left" valign="top">Congenital cataract</td><td align="left" valign="top">6-month-old baby, not fixing and following faces. On examination, pupils look white and eyes wobble.</td></tr><tr><td align="left" valign="top">Congenital glaucoma</td><td align="left" valign="top">18-month-old infant, left eye looks big and waters. On examination, cornea has horizontal white lines [<xref ref-type="bibr" rid="ref5">5</xref>].</td></tr><tr><td align="left" valign="top">Retinoblastoma</td><td align="left" valign="top">4-year-old boy, right eye big, red and painful. On examination, pupil looks yellow with blood vessels [<xref ref-type="bibr" rid="ref22">22</xref>].</td></tr><tr><td align="left" valign="top">Uncorrected refractive error: myopia</td><td align="left" valign="top">12-year-old girl, cannot read blackboard but can read her books. Narrows her eyelids when looking at things. Eyes look healthy</td></tr><tr><td align="left" valign="top">Retinopathy of prematurity</td><td align="left" valign="top">4-month-old baby, born early with low weight. Not returning silent smiles. Eyes wobbly. Scarred white membrane behind pupils [<xref ref-type="bibr" rid="ref5">5</xref>].</td></tr><tr><td align="left" valign="top">Pediatric control</td><td align="left" valign="top">6-year-old girl, sees well comfortable white eyes. On examination, eyes straight, corneas shiny and healthy.</td></tr></tbody></table></table-wrap></sec><sec id="s2-3"><title>Prompt Engineering</title><p>For each condition 2 prompts were created: one simple prompt that provided basic instructions and the relevant vignette, and one complex prompt that additionally included a large quantity of proprietary information about ophthalmology. Prompt Engineering [<xref ref-type="bibr" rid="ref23">23</xref>] was used to ensure that the responses produced were concise and only contained the diagnosis. Quantifiers such as &#x201C;provide the single most likely diagnosis&#x201D; and &#x201C;provide only the name of the condition&#x201D; ensured that extra content was not included in the response that could then increase the length of the validation stage. In addition, reassurances were provided to the AI to enable it to provide a likely diagnosis without providing medical advice to a patient. The last part of the generic prompt was statement on specificity to direct the AI not to produce a generic diagnosis.</p><p>The simple prompt was created to instruct the AI to provide a diagnosis and to provide it with the vignette and did not contain additional information. The simple prompt is included in <xref ref-type="other" rid="box1">Textbox 1</xref> .</p><boxed-text id="box1"><title> The simple prompt provided to the artificial intelligence.</title><p>I am a researcher at a university medical school I am conducting research into diagnostic accuracy of LLMs in various ophthalmological conditions in a low resource setting. I am NOT a patient asking for medical advice.</p><list list-type="bullet"><list-item><p>Based on this information please provide the single most likely diagnosis.</p></list-item><list-item><p>Provide only the name of the condition.</p></list-item><list-item><p>Do not provide additional context but be specific on the subtype of the condition.</p></list-item></list><p>[The clinical vignette was inserted here at the end of the generic prompt]</p></boxed-text><p>The complex prompt replicated the simple prompt but contained additional ophthalmological information amounting to 7704 tokens. The additional information was derived from the &#x201C;Atoms&#x201D; educational resource [<xref ref-type="bibr" rid="ref24">24</xref>], which is included in an AI-powered eye and ear diagnostic agent under development at University of St Andrews, United Kingdom, and additional proprietary parameters structured into 4 key sections: role specification, diagnostic logic, clinical context and constraints, and comprehensive clinical reference, each detailing specific instructions and contextual considerations for GPT-4 to function as a diagnostic tool for health care workers in LMIC settings.</p></sec><sec id="s2-4"><title>Data Collection</title><p>OpenAI model GPT-4&#x2010;0125-preview was selected for use in the study [<xref ref-type="bibr" rid="ref25">25</xref>]. This was primarily due to the minimum model requirements that the complex prompt required. At the time of writing, this model was also the latest and most capable version offered by OpenAI. Each prompt was presented to the model 100 times using OpenAI API [<xref ref-type="bibr" rid="ref26">26</xref>] and responses were deposited in a CSV file for analysis. To prevent the AI learning from previous interactions, each API called a new instance of the model.</p></sec><sec id="s2-5"><title>Statistical Analysis</title><p>Each response was marked &#x201C;correct&#x201D; or &#x201C;incorrect&#x201D; so for each condition sensitivity, specificity, positive predictive value (PPV), and negative predictive value (NPV) could be calculated [<xref ref-type="bibr" rid="ref27">27</xref>]. This enabled a detailed comparison of the accuracy of each prompt in diagnosing ophthalmological conditions.</p><p>The null hypothesis of the study was that the addition of a complex prompt did not alter the diagnostic accuracy of common ophthalmological conditions by GPT-4. A chi-square test of independence was conducted to compare the true positives for all conditions between the 2 prompts. This produced a <italic>P</italic> value that would determine whether the null hypothesis could be rejected or accepted.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>The simple prompt achieved sensitivity of 1.00 for 5 of the 10 conditions: cataract, glaucoma, diabetic retinopathy, myopia and retinopathy of prematurity. It also achieved sensitivity of 0.96 for congenital glaucoma. The simple prompt struggled to identify the remainder of the conditions, including trachoma (0.00), presbyopia (0.00), congenital cataract (0.06), and retinoblastoma (0.02).</p><p>The complex prompt was able to diagnose 9 of the 10 conditions with perfect (cataract, glaucoma, trachoma, diabetic retinopathy, presbyopia, congenital glaucoma, retinoblastoma, myopia; sensitivity=1.00) or near-perfect (congenital cataract; sensitivity=0.99) accuracy. It was unable to diagnose retinopathy of prematurity (sensitivity=0.02).</p><p>Both the simple and complex prompts demonstrated high specificity (&#x003E;0.92) for every condition. For most conditions, high sensitivity and high specificity translated into high predictive values; however, there were some exceptions to this pattern. For trachoma and presbyopia the simple prompt demonstrated no PPV and middling (0.5) NPV. Additionally for congenital cataract and retinoblastoma, the simple prompt demonstrated middling NPV (0.52 and 0.51, respectively) due to the large number of false negative results. The complex prompt performed much better in relation to predictive values; for every condition, the PPV and NPV were 0.99&#x2010;1.00, with the exception of retinopathy of prematurity, which had a NPV of 0.51. These data are summarized in <xref ref-type="table" rid="table2">Table 2</xref> and <xref ref-type="fig" rid="figure1">Figure 1</xref>.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Diagnostic performance metrics of GPT-4&#x2010;0125-preview with simple and complex prompts.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Prompts</td><td align="left" valign="bottom">True positives, n</td><td align="left" valign="bottom">False negatives, n</td><td align="left" valign="bottom">False positives, n</td><td align="left" valign="bottom">True negatives, n</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="6">Simple prompt</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cataract</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">8</td><td align="left" valign="top">92</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glaucoma</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Trachoma</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diabetic retinopathy</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Presbyopia</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Congenital cataract</td><td align="left" valign="top">6</td><td align="left" valign="top">94</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Congenital glaucoma</td><td align="left" valign="top">96</td><td align="left" valign="top">4</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Retinoblastoma</td><td align="left" valign="top">2</td><td align="left" valign="top">98</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Myopia</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Retinopathy of prematurity</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="6">Complex prompt</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Cataract</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Glaucoma</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Trachoma</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Diabetic retinopathy</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Presbyopia</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Congenital cataract</td><td align="left" valign="top">99</td><td align="left" valign="top">1</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Congenital glaucoma</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Retinoblastoma</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Myopia</td><td align="left" valign="top">100</td><td align="left" valign="top">0</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Retinopathy of prematurity</td><td align="left" valign="top">2</td><td align="left" valign="top">98</td><td align="left" valign="top">0</td><td align="left" valign="top">100</td></tr></tbody></table></table-wrap><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Sensitivity (Sens), specificity (Spec), positive predictive values (PPVs), and negative predictive values (NPVs) for GPT-4&#x2010;0125-preview, comparing simple and complex prompts, and globally and LMIC-prevalent conditions. LMIC: low- and middle-income country.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e64986_fig01.png"/></fig></sec><sec id="s3-2"><title>Chi-Square Analysis</title><p>A chi-square test of independence was conducted to compare the number of true positives across all conditions between the 2 prompts (<italic>&#x03C7;</italic><sup>2</sup>=428.86; <italic>P</italic>&#x003C;.001). This result indicates a statistically significant difference in diagnostic accuracy between the 2 prompts. This finding combined with the diagnostic performance metrics demonstrates that the complex prompt was superior to the simple prompt in this test.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The study aimed to identify whether the implementation a complex prompt altered the diagnostic accuracy of common ophthalmological conditions by the model GPT-4&#x2010;0125-preview. Overall, the complex prompt diagnosed 90.1% of clinical conditions provided, compared 60.4% with the simple prompt. This amounted to a statistically significant difference (<italic>&#x03C7;</italic><sup><italic>2</italic></sup>=428.86; <italic>P</italic>&#x003C;.001).</p><p>Although there was a statistically significant difference between the true positives of each prompt, both prompts were comparable in sensitivity and specificity for most conditions However, as the sensitivity of congenital cataract, congenital glaucoma, and retinoblastoma is reduced with the simple prompt compared to the complex prompt, the majority of pathologies are likely to be missed if these conditions were presented to GPT-4&#x2010;0125-preview using the simple prompt.</p><p>Further differences between the 2 models became apparent upon analysis of the NPV. While the complex prompt demonstrated an NPV of 1.00 for most conditions investigated, retinopathy of prematurity was an exception, yielding an NPV of 0.51. This indicates that for prompts negatively classified for retinopathy of prematurity, there remained a 49% probability that pathology was, in fact, present.</p><p>In addition to these headline results, there appears to be an additional trend in the prompts&#x2019; responses, which become apparent after categorizing the 10 conditions into &#x201C;globally prevalent&#x201D; or &#x201C;LMIC prevalent&#x201D; (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The simple prompt was able to identify 4 out of the 5 globally-prevalent conditions compared to only 1 of the 5 LMIC-prevalent conditions. In comparison, the complex prompt identified all globally-prevalent conditions and 4 of the 5 LMIC-prevalent conditions. The exception to this trend was retinopathy of prematurity.</p><p>This data suggests that although very accurate at diagnosing a proportion of diseases, GPT-4&#x2010;0125-preview appears to experience selection bias and its diagnostic ability could be considered Western-centric. It appears that this diagnostic &#x201C;blind spot&#x201D; can be mitigated by the addition of supplementary information in a complex prompt. This bias has been detected in other applications of generative AI in medicine, including in the range of skin tone present in images generated by Dall-E 3 and Midjourney [<xref ref-type="bibr" rid="ref28">28</xref>], and in representations of sex or gender in AI-generated patient vignettes [<xref ref-type="bibr" rid="ref29">29</xref>]. In both cases, the bias in question was mitigated by the addition of real demographic data into the relevant prompt.</p><p>A potential limitation of this study is that only a fraction of LMIC-prevalent conditions were investigated. In a recent study exploring blindness in children [<xref ref-type="bibr" rid="ref4">4</xref>], the category of &#x201C;others&#x201D; was the largest contributor (6150 children per 10 million) when compared to corneal scar, cataract or glaucoma and retinopathy of prematurity. Residual causes of vison loss contributed to 28.9% of blindness cases globally in adults aged 50 years and older [<xref ref-type="bibr" rid="ref1">1</xref>]. In addition, a study exploring the presentation of retinoblastoma [<xref ref-type="bibr" rid="ref30">30</xref>] identified that delayed presentation and reduced awareness are major contributors to the decreased survival rate of patients with retinoblastoma in LMICs. When awareness of particular conditions is limited, community health-care workers may be uncertain about recognising their initial clinical presentation. As such, early identification, and screening programs of lesser-known conditions within LMICs are imperative to combat treatable or preventable causes of blindness at onset before they progress.</p><p>Another limitation of the study design was the use of control cases. Two controls&#x2014;one adult-oriented and one child-oriented&#x2014;were designed to represent healthy individuals without pathology. These controls enabled the calculation of specificity and predictive values by providing a basis for true negatives and false positives. While GPT-4&#x2010;0125-preview occasionally misidentified pathology in the controls, this typically did not affect the true negative counts, as the responses were still negative for the specific condition being assessed. To improve our methodology, future studies could use condition-specific prompts instead of general adult and child controls. The clinical vignette would be paired with a tailored question such as, &#x201C;Does this clinical vignette describe [condition]?&#x201D; This approach would likely improve the usefulness and transferability of specificity calculations.</p><p>To further quantify the effect of a complex prompt it would be useful to examine how newer and more capable reasoning models (eg, GPT o1, o3-mini, Anthropic Claude) would perform when provided with the same clinical prompts. Although the study did not utilize older models (eg, GPT-3.5) due to its inability to handle large prompts, it would be advantageous to understand whether this model performed at similar level of accuracy to GPT-4&#x2010;0125-preview or whether it is necessary for the newer model to be used. More generally, it is important to study the diagnostic capabilities of publicly available generative AI models to understand how their use might impact on patient care.</p><p>Furthermore, it would also be beneficial to evaluate the performance of LLMs using complex prompts against existing established diagnostic tools [<xref ref-type="bibr" rid="ref11">11</xref>], and in other specialties of medicine. A complex prompt can be created to contain other medical reference information. This diagnostic versatility could lead to a more ubiquitous and useful product, such that health care practitioners might only need to use one application in clinical practice.</p><p>This study sought to understand how complex prompts influence LLMs diagnostic abilities, but LLMs are capable of much more sophisticated interactions. In future, conversational agents may enhance the diagnostic process such that clinician and LLM may collaborate in real time to better understand the patient in front of them. This could, in theory, be evaluated at scale by deploying AI-powered simulated clinicians to interact with AI-powered diagnostic assistants; however, this could result in obvious potential issues with bias.</p></sec><sec id="s4-2"><title>Conclusions</title><p>This study demonstrates that the diagnostic accuracy of GPT-4&#x2010;0125-preview for common ophthalmological conditions can be significantly enhanced through the use of complex prompts. A complex prompt improved sensitivity, specificity, and predictive values for most conditions, outperforming the simple prompt, particularly for LMIC-prevalent conditions. Despite this, limitations such as the diagnostic bias towards globally-prevalent conditions and the challenges associated with certain LMIC-specific pathologies like retinopathy of prematurity highlight the need for further refinement.</p><p>Future research should explore the performance of other generative AI models using similar prompt designs and evaluate their utility against established diagnostic tools across various medical specialties. Expanding the use of condition-specific prompts and incorporating real-world demographic data could further enhance the diagnostic applicability of AI in diverse health care settings. By addressing these limitations, AI-powered diagnostic assistants hold the potential to support clinicians, especially in resource-constrained environments, ultimately improving global eye health outcomes.</p></sec></sec></body><back><ack><p>Dr Andrew Blaikie and William J Williams provided their support on this paper along with providing permission to use Alan within this study. The authors thank Dr Blaikie who provided support with and checked the clinical vignettes and AO who was the academic supervisor for this student project. Generative artificial intelligence was used as outlined in the Methods section only.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study are available in the University of St Andrews Research Portal repository [<xref ref-type="bibr" rid="ref31">31</xref>].</p></sec></notes><fn-group><fn fn-type="con"><p>AO and SATM jointly conceptualized the study and designed the methods. SATM drafted the original manuscript and AO reviewed and edited the final submission. SATM collected and analyzed the data. The project was supervised by AO.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb3">LMIC</term><def><p>low- and middle-income country</p></def></def-item><def-item><term id="abb4">NPV</term><def><p>negative predictive value</p></def></def-item><def-item><term id="abb5">PPV</term><def><p>positive predictive value</p></def></def-item><def-item><term id="abb6">WHO</term><def><p>World Health Organization</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><collab>GBD 2019 Blindness and Vision Impairment Collaborators</collab><collab>Vision Loss Expert Group of the Global Burden of Disease Study</collab></person-group><article-title>Causes of blindness and VISION impairment in 2020 and trends over 30 years, and prevalence of avoidable blindness in relation to VISION 2020: the Right to Sight: an analysis for the Global Burden of Disease Study</article-title><source>Lancet Glob Health</source><year>2021</year><month>02</month><volume>9</volume><issue>2</issue><fpage>e144</fpage><lpage>e160</lpage><pub-id pub-id-type="doi">10.1016/S2214-109X(20)30489-7</pub-id><pub-id pub-id-type="medline">33275949</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><source>World Population Prospects 2019: Highlights</source><year>2019</year><publisher-name>United Nations</publisher-name><pub-id pub-id-type="doi">10.18356/13bf5476-en</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="report"><article-title>Universal eye health: a global action plan 2014&#x2013;2019</article-title><year>2013</year><publisher-name>World Health Organization</publisher-name></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Parikshit</surname><given-names>G</given-names> </name><name name-style="western"><surname>Clare</surname><given-names>G</given-names> </name></person-group><article-title>Blindness in children: a worldwide perspective</article-title><source>Community Eye Health</source><year>2007</year><month>06</month><volume>20</volume><issue>62</issue><fpage>32</fpage><lpage>33</lpage><pub-id pub-id-type="medline">17612696</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Courtright</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hutchinson</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Lewallen</surname><given-names>S</given-names> </name></person-group><article-title>Visual impairment in children in middle- and lower-income countries</article-title><source>Arch Dis Child</source><year>2011</year><month>12</month><volume>96</volume><issue>12</issue><fpage>1129</fpage><lpage>1134</lpage><pub-id pub-id-type="doi">10.1136/archdischild-2011-300093</pub-id><pub-id pub-id-type="medline">21868404</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rahi</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Gilbert</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Foster</surname><given-names>A</given-names> </name><name name-style="western"><surname>Minassian</surname><given-names>D</given-names> </name></person-group><article-title>Measuring the burden of childhood blindness</article-title><source>Br J Ophthalmol</source><year>1999</year><month>04</month><volume>83</volume><issue>4</issue><fpage>387</fpage><lpage>388</lpage><pub-id pub-id-type="doi">10.1136/bjo.83.4.387</pub-id><pub-id pub-id-type="medline">10434856</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="web"><article-title>What is a chatbot?</article-title><source>IBM</source><year>2024</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.ibm.com/topics/chatbots">https://www.ibm.com/topics/chatbots</ext-link></comment></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Sanjeev</surname><given-names>S</given-names> </name></person-group><article-title>SightBot: ChatGPT-powered research insights with pubmed citations</article-title><source>Brilliantly</source><year>2023</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.brilliantly.ai/blog/sightbot">https://www.brilliantly.ai/blog/sightbot</ext-link></comment></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Abhinav Venigalla</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Carbin</surname><given-names>M</given-names> </name></person-group><article-title>Mosaic research</article-title><source>Databricks</source><year>2022</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.mosaicml.com/blog/introducing-pubmed-gpt">https://www.mosaicml.com/blog/introducing-pubmed-gpt</ext-link></comment></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Delsoz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Raja</surname><given-names>H</given-names> </name><name name-style="western"><surname>Madadi</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>The use of ChatGPT to assist in diagnosing glaucoma based on clinical case reports</article-title><source>Ophthalmol Ther</source><year>2023</year><month>12</month><volume>12</volume><issue>6</issue><fpage>3121</fpage><lpage>3132</lpage><pub-id pub-id-type="doi">10.1007/s40123-023-00805-x</pub-id><pub-id pub-id-type="medline">37707707</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Balas</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ing</surname><given-names>EB</given-names> </name></person-group><article-title>Conversational AI models for ophthalmic diagnosis: comparison of ChatGPT and the Isabel pro differential diagnosis generator</article-title><source>JFO Open Ophthalmology</source><year>2023</year><month>03</month><volume>1</volume><fpage>100005</fpage><pub-id pub-id-type="doi">10.1016/j.jfop.2023.100005</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xulu-Kasaba</surname><given-names>ZN</given-names> </name><name name-style="western"><surname>Kalinda</surname><given-names>C</given-names> </name></person-group><article-title>Prevalence of blindness and its major causes in sub-Saharan Africa in 2020: a systematic review and meta-analysis</article-title><source>Br J Vis Impair</source><year>2022</year><month>09</month><volume>40</volume><issue>3</issue><fpage>563</fpage><lpage>577</lpage><pub-id pub-id-type="doi">10.1177/02646196211055924</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quigley</surname><given-names>HA</given-names> </name><name name-style="western"><surname>Broman</surname><given-names>AT</given-names> </name></person-group><article-title>The number of people with glaucoma worldwide in 2010 and 2020</article-title><source>Br J Ophthalmol</source><year>2006</year><month>03</month><volume>90</volume><issue>3</issue><fpage>262</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.1136/bjo.2005.081224</pub-id><pub-id pub-id-type="medline">16488940</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kedir</surname><given-names>J</given-names> </name><name name-style="western"><surname>Girma</surname><given-names>A</given-names> </name></person-group><article-title>Prevalence of refractive error and visual impairment among rural school-age children of Goro District, Gurage Zone, Ethiopia</article-title><source>Ethiop J Health Sci</source><year>2014</year><month>10</month><volume>24</volume><issue>4</issue><fpage>353</fpage><lpage>358</lpage><pub-id pub-id-type="doi">10.4314/ejhs.v24i4.11</pub-id><pub-id pub-id-type="medline">25489200</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Chang</surname><given-names>RT</given-names> </name></person-group><article-title>Cataracts</article-title><source>BMJ Best Practice</source><year>2023</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bestpractice.bmj.com/topics/en-gb/499">https://bestpractice.bmj.com/topics/en-gb/499</ext-link></comment></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="web"><article-title>What are the clinical features of cataracts?</article-title><source>National Institute for Health and Care Excellence</source><year>2022</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cks.nice.org.uk/topics/glaucoma/diagnosis/ocular-hypertension-primary-open-angle-glaucoma">https://cks.nice.org.uk/topics/glaucoma/diagnosis/ocular-hypertension-primary-open-angle-glaucoma</ext-link></comment></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Amerasinghe</surname><given-names>N</given-names> </name><name name-style="western"><surname>Serov-Volach</surname><given-names>I</given-names> </name></person-group><article-title>Open-angle glaucoma</article-title><source>BMJ Best Practice</source><year>2023</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bestpractice.bmj.com/topics/en-gb/373">https://bestpractice.bmj.com/topics/en-gb/373</ext-link></comment></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><article-title>Trachoma</article-title><source>World Health Organization</source><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/news-room/fact-sheets/detail/trachoma">https://www.who.int/news-room/fact-sheets/detail/trachoma</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Lansingh</surname><given-names>VC</given-names> </name><name name-style="western"><surname>Callahan</surname><given-names>K</given-names> </name></person-group><article-title>Trachoma</article-title><source>BMJ Best Practice</source><year>2024</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bestpractice.bmj.com/topics/en-gb/958">https://bestpractice.bmj.com/topics/en-gb/958</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="web"><article-title>Diabetic retinopathy</article-title><source>NHS</source><year>2021</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nhs.uk/conditions/diabetic-retinopathy">https://www.nhs.uk/conditions/diabetic-retinopathy</ext-link></comment></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Dowler</surname><given-names>J</given-names> </name></person-group><article-title>Diabetic retinopathy</article-title><source>BMJ Best Practice</source><year>2024</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bestpractice.bmj.com/topics/en-gb/532">https://bestpractice.bmj.com/topics/en-gb/532</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Murray</surname><given-names>TG</given-names> </name><name name-style="western"><surname>Villegas</surname><given-names>VM</given-names> </name></person-group><article-title>Retinoblastoma</article-title><source>BMJ Best Practice</source><year>2023</year><access-date>2025-06-26</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://bestpractice.bmj.com/topics/en-gb/1055">https://bestpractice.bmj.com/topics/en-gb/1055</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesk&#x00F3;</surname><given-names>B</given-names> </name></person-group><article-title>Prompt Engineering as an Important Emerging Skill for Medical Professionals: Tutorial</article-title><source>J Med Internet Res</source><year>2023</year><month>10</month><day>4</day><volume>25</volume><fpage>e50638</fpage><pub-id pub-id-type="doi">10.2196/50638</pub-id><pub-id pub-id-type="medline">37792434</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="web"><article-title>Atoms</article-title><source>School of Medicine at the University of St Andrews</source><year>2022</year><access-date>2026-06-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://medicine.st-andrews.ac.uk/arclight/teaching-materials/atoms-cards/">https://medicine.st-andrews.ac.uk/arclight/teaching-materials/atoms-cards/</ext-link></comment></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="web"><article-title>Models</article-title><source>OpenAI</source><year>2024</year><access-date>2025-06-11</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://platform.openai.com/docs/models">https://platform.openai.com/docs/models</ext-link></comment></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="web"><article-title>Openai-python</article-title><source>GitHub</source><access-date>2025-06-27</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/openai/openai-python">https://github.com/openai/openai-python</ext-link></comment></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Trevethan</surname><given-names>R</given-names> </name></person-group><article-title>Sensitivity, specificity, and predictive values: foundations, pliabilities, and pitfalls in research and practice</article-title><source>Front Public Health</source><year>2017</year><volume>5</volume><fpage>307</fpage><pub-id pub-id-type="doi">10.3389/fpubh.2017.00307</pub-id><pub-id pub-id-type="medline">29209603</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Malley</surname><given-names>A</given-names> </name><name name-style="western"><surname>Veenhuizen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ahmed</surname><given-names>A</given-names> </name></person-group><article-title>Ensuring appropriate representation in AI-generated medical imagery: a methodological approach to address skin tone bias</article-title><source>JMIR AI</source><year>2024</year><month>11</month><day>27</day><volume>3</volume><fpage>e58275</fpage><pub-id pub-id-type="doi">10.2196/58275</pub-id><pub-id pub-id-type="medline">39602221</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Veenhuizen</surname><given-names>M</given-names> </name><name name-style="western"><surname>O&#x2019;Malley</surname><given-names>A</given-names> </name></person-group><article-title>Investigating and combating gender bias in generative large language models</article-title><access-date>2025-06-11</access-date><conf-name>The 1st International Conference on Artificial Intelligence in Medical Education with MAEMHS (Malaysia Association of Education in Medical and Health Science)</conf-name><conf-date>Dec 6-8, 2024</conf-date><conf-loc>Kuala Lumpur, Malaysia</conf-loc><comment><ext-link ext-link-type="uri" xlink:href="https://medicineandhealthukm.com/sites/default/files/article/2024/AI%20in%20Medical%20Education.pdf">https://medicineandhealthukm.com/sites/default/files/article/2024/AI%20in%20Medical%20Education.pdf</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zia</surname><given-names>N</given-names> </name><name name-style="western"><surname>Hamid</surname><given-names>A</given-names> </name><name name-style="western"><surname>Iftikhar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Qadri</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Jangda</surname><given-names>A</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>MR</given-names> </name></person-group><article-title>Retinoblastoma presentation and survival: a four-year analysis from a tertiary care hospital</article-title><source>Pak J Med Sci</source><year>2020</year><month>01</month><volume>36</volume><issue>1</issue><fpage>S61</fpage><lpage>S66</lpage><pub-id pub-id-type="doi">10.12669/pjms.36.ICON-Suppl.1720</pub-id><pub-id pub-id-type="medline">31933609</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="report"><person-group person-group-type="author"><name name-style="western"><surname>M&#x2019;Gadzah</surname><given-names>SAT</given-names> </name><name name-style="western"><surname>O&#x2019;Malley</surname><given-names>AS</given-names> </name></person-group><article-title>Enhancing diagnostic accuracy of ophthalmological conditions with complex prompts in GPT-4: a comparative analysis of global and LMIC-specific pathologies (dataset)</article-title><year>2025</year><publisher-name>University of St Andrews</publisher-name></nlm-citation></ref></ref-list></back></article>