<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e89939</article-id><article-id pub-id-type="doi">10.2196/89939</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Prospective Evaluation of Large Language Model Integration Into a Classical Hematology Case Conference</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kewan</surname><given-names>Tariq</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Lee</surname><given-names>Alfred I</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Van Doren</surname><given-names>Layla</given-names></name><degrees>MBA, MD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Medicine, Division of Hematology, Mayo Clinic</institution><addr-line>200 1st St SW</addr-line><addr-line>Rochester</addr-line><addr-line>MN</addr-line><country>United States</country></aff><aff id="aff2"><institution>Department of Medicine, Division of Hematology, Yale University</institution><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Steenstra</surname><given-names>Ivan</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Senst</surname><given-names>Benjamin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Cerqueira</surname><given-names>Renato</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Tariq Kewan, MD, Department of Medicine, Division of Hematology, Mayo Clinic, 200 1st St SW, Rochester, MN, 55905, United States, 1 6193896524; <email>kewan.tariq@mayo.edu</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>18</day><month>3</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e89939</elocation-id><history><date date-type="received"><day>18</day><month>12</month><year>2025</year></date><date date-type="rev-recd"><day>09</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>11</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Tariq Kewan, Alfred I Lee, Layla Van Doren. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 18.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e89939"/><abstract><p>Prospective integration of large language model tools into a classical hematology challenging-cases conference was feasible, increased clinician familiarity and interest, and was perceived as diagnostically and educationally valuable.</p></abstract><kwd-group><kwd>artificial intelligence</kwd><kwd>education</kwd><kwd>machine learning</kwd><kwd>large language model</kwd><kwd>classical hematology</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Artificial intelligence (AI) systems based on large language models (LLMs) are increasingly accessible to clinicians and trainees, yet their practical use in real-time case-based learning has not been systematically evaluated [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Classical hematology, with its diagnostic challenges and frequent reliance on critical thinking represents a relevant environment for early implementation. We conducted a prospective study to assess the integration of LLM tools into a classical hematology case conference and to evaluate user experience, perceived value, and key considerations for safe adoption.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><p>Over eight consecutive sessions, two LLMs, ChatGPT and Open Evidence AI, were incorporated into the Yale Classical Hematology Case Conference (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The use of two distinct LLM platforms was intentional; Open Evidence AI was selected because it provides guaranteed, source-linked medical references, while the inclusion of ChatGPT aimed to demonstrate that different LLM platforms can be leveraged to support case-based discussions in classical hematology. Importantly, no formal performance comparison between the two platforms was conducted in this initiative. Presenters prepared structured prompts summarizing clinical presentation, laboratory data, and specific questions relevant to differential diagnosis and management for each case. These prompts were used to generate outputs that included differential diagnoses, diagnostic algorithms, rationale for additional workup, evidence-based therapeutic recommendations, and citation-supported references.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>General scheme of large language models (LLMs) integration within the Classical Hematology Case Conference.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e89939_fig01.png"/></fig><p>The AI-generated content was shown during case discussions and evaluated in parallel with expert clinical reasoning. This created a structured format in which faculty and trainees could critically evaluate LLM output, compare it with established approaches or recommendations, and examine areas of concordance and discordance.</p></sec><sec id="s3" sec-type="methods"><title>Methods</title><sec id="s3-1"><title>Ethical Considerations</title><p>This prospective educational feasibility study involved an anonymous survey of conference participants without collection of identifiable private information. Participation was voluntary, and all data were analyzed and reported in aggregate to ensure confidentiality.</p></sec></sec><sec id="s4" sec-type="results"><title>Results</title><p>Following the intervention, 25 attendees completed a structured questionnaire (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>). Respondents were primarily faculty hematologists (n=16/25, 64%) and trainees (n=7, 28%), with a wide range of practice experience. Prior to the intervention, only 16% (n=4) reported being &#x201C;very familiar&#x201D; with AI in clinical hematology; after the intervention, 36% (n=9) reported &#x201C;a lot of familiarity,&#x201D; and none reported no familiarity. Similarly, the proportion using AI frequently or occasionally increased from 44% (n=11) preintervention to 68% (n=afterward. These findings suggest that even limited, structured exposure can influence clinician comfort with AI tools.</p><p>Participants generally perceived AI as valuable or somewhat valuable in the context of case discussion (n=21, 84%). The aspects rated highest were the generation of alternative diagnoses (80%) and the retrieval of relevant references (92%). These findings align with known capabilities of LLMs to rapidly provide relevant information, broaden diagnostic considerations, and provide literature support that may otherwise be time-consuming to compile during real-time discussions [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. In classical hematology, where cases often involve complex presentations with broad differential diagnoses, these contributions offer notable educational benefits.</p><p>Participants also identified several limitations. The most frequently reported concern was that the quality and specificity of AI output depended significantly on the structure and clarity of the input (n=15, 60%). This finding underscores the need for standardized prompting frameworks, a known issue across clinical AI applications. Respondents also noted that AI-generated treatment suggestions were sometimes insufficiently tailored to the individual clinical scenario (52%) and that occasional incomplete or irrelevant outputs were generated for both diagnoses and management options (52%). These observations reinforce the importance of clinician oversight and illustrate the current inability of LLMs to independently interpret patient-specific detailed clinical scenarios [<xref ref-type="bibr" rid="ref6">6</xref>].</p><p>Importantly, nearly all respondents (n=23, 92%) believed that AI should function exclusively in an adjunctive capacity, supporting rather than replacing clinician judgment. Only one participant indicated that AI added little or no value to the case discussions. These perspectives mirror broader concerns in the medical community regarding reliability, safety, and the need for human supervision in clinical applications of AI [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p></sec><sec id="s5" sec-type="discussion"><title>Discussion</title><p>Our findings demonstrate that prospective integration of LLM tools into a classical hematology case conference is both feasible and acceptable to clinicians. The experience increased familiarity with AI systems, encouraged early adoption, and was perceived as valuable in enhancing diagnostic evaluation and reference retrieval. Importantly, the intervention created a structured environment for examining limitations of AI, including prompt dependency, generalization, and incomplete reasoning pathways, thus reinforcing the need for careful oversight and transparency.</p><p>This early experience suggests several considerations for future implementations. First, structured prompt templates may improve output reliability and consistency. Second, AI integration may be best positioned as an on-demand component rather than a continuous or self-supervised feature of case presentations. Third, further prospective studies should evaluate diagnostic accuracy, effects on clinical decision-making, potential biases, and implications for trainee education. Finally, developing practical guidelines for human-AI integration is essential as educational and clinical environments adopt these tools [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>].</p><p>In summary, our prospective feasibility study provides early evidence that integrating LLM-based tools into clinical case conferences enhances educational value and increases clinician familiarity with AI. These findings support continued, supervised exploration of AI-assisted case-based learning within hematology and other medical specialties.</p></sec></body><back><ack><p>The authors declare the use of generative AI in the research and writing process. According to the GAIDeT taxonomy (2025), the following tasks were delegated to GAI tools under full human supervision: Proofreading and editing. Responsibility for the final manuscript lies entirely with the authors. GAI tools are not listed as authors and do not bear responsibility for the final outcomes.</p></ack><notes><sec><title>Funding</title><p>The authors declared no financial support was received for this work.</p></sec></notes><fn-group><fn fn-type="con"><p>TK conceived the study, designed the study protocol and survey, edited and distributed the survey, collected and analyzed the data, drafted the first version of the manuscript, and presented the clinical cases in a timely manner during the conference. AL contributed to study conception and design, survey development, facilitated survey implementation, assisted with data analysis and interpretation, contributed to drafting and revising the manuscript, and presented the large language model outputs to conference attendees. LVD contributed to study conception and design, survey development and implementation, data collection and interpretation, and contributed to drafting and revising the manuscript.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lucas</surname><given-names>HC</given-names> </name><name name-style="western"><surname>Upperman</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>JR</given-names> </name></person-group><article-title>A systematic review of large language models and their implications in medical education</article-title><source>Med Educ</source><year>2024</year><month>11</month><volume>58</volume><issue>11</issue><fpage>1276</fpage><lpage>1285</lpage><pub-id pub-id-type="doi">10.1111/medu.15402</pub-id><pub-id pub-id-type="medline">38639098</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sallam</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title><source>Healthcare (Basel)</source><year>2023</year><month>03</month><day>19</day><volume>11</volume><issue>6</issue><fpage>887</fpage><pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id><pub-id pub-id-type="medline">36981544</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thirunavukarasu</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSJ</given-names> </name><name name-style="western"><surname>Elangovan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gutierrez</surname><given-names>L</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>TF</given-names> </name><name name-style="western"><surname>Ting</surname><given-names>DSW</given-names> </name></person-group><article-title>Large language models in medicine</article-title><source>Nat Med</source><year>2023</year><month>08</month><volume>29</volume><issue>8</issue><fpage>1930</fpage><lpage>1940</lpage><pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id><pub-id pub-id-type="medline">37460753</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qiu</surname><given-names>P</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Quantifying the reasoning abilities of LLMs on clinical cases</article-title><source>Nat Commun</source><year>2025</year><month>11</month><day>6</day><volume>16</volume><issue>1</issue><fpage>9799</fpage><pub-id pub-id-type="doi">10.1038/s41467-025-64769-1</pub-id><pub-id pub-id-type="medline">41198657</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Rebitschek</surname><given-names>FG</given-names> </name><name name-style="western"><surname>Carella</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kohlrausch-Pazin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zitzmann</surname><given-names>M</given-names> </name><name name-style="western"><surname>Steckelberg</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wilhelm</surname><given-names>C</given-names> </name></person-group><article-title>Evaluating evidence-based health information from generative AI using a cross-sectional study with laypeople seeking screening information</article-title><source>NPJ Digit Med</source><year>2025</year><month>06</month><day>9</day><volume>8</volume><issue>1</issue><fpage>343</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01752-6</pub-id><pub-id pub-id-type="medline">40490558</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Asgari</surname><given-names>E</given-names> </name><name name-style="western"><surname>Monta&#x00F1;a-Brown</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dubois</surname><given-names>M</given-names> </name><etal/></person-group><article-title>A framework to assess clinical safety and hallucination rates of LLMs for medical text summarisation</article-title><source>NPJ Digit Med</source><year>2025</year><month>05</month><day>13</day><volume>8</volume><issue>1</issue><fpage>274</fpage><pub-id pub-id-type="doi">10.1038/s41746-025-01670-7</pub-id><pub-id pub-id-type="medline">40360677</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Al-Nusair</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lanino</surname><given-names>L</given-names> </name><name name-style="western"><surname>Durmaz</surname><given-names>A</given-names> </name><name name-style="western"><surname>Porta</surname><given-names>MGD</given-names> </name><name name-style="western"><surname>Zeidan</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Kewan</surname><given-names>T</given-names> </name></person-group><article-title>Artificial intelligence in myeloid malignancies: clinical applications of machine learning in myelodysplastic syndromes and acute myeloid leukemia</article-title><source>Blood Rev</source><year>2025</year><month>11</month><volume>74</volume><fpage>101340</fpage><pub-id pub-id-type="doi">10.1016/j.blre.2025.101340</pub-id><pub-id pub-id-type="medline">41109825</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ong</surname><given-names>JCL</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>SYH</given-names> </name><name name-style="western"><surname>William</surname><given-names>W</given-names> </name><etal/></person-group><article-title>Ethical and regulatory challenges of large language models in medicine</article-title><source>Lancet Digit Health</source><year>2024</year><month>06</month><volume>6</volume><issue>6</issue><fpage>e428</fpage><lpage>e432</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(24)00061-X</pub-id><pub-id pub-id-type="medline">38658283</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Weidener</surname><given-names>L</given-names> </name><name name-style="western"><surname>Fischer</surname><given-names>M</given-names> </name></person-group><article-title>Teaching AI ethics in medical education: a scoping review of current literature and practices</article-title><source>Perspect Med Educ</source><year>2023</year><volume>12</volume><issue>1</issue><fpage>399</fpage><lpage>410</lpage><pub-id pub-id-type="doi">10.5334/pme.954</pub-id><pub-id pub-id-type="medline">37868075</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Structured questionnaire .</p><media xlink:href="formative_v10i1e89939_app1.pdf" xlink:title="PDF File, 522 KB"/></supplementary-material></app-group></back></article>