<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e80752</article-id>
      <article-id pub-id-type="pmid">41183323</article-id>
      <article-id pub-id-type="doi">10.2196/80752</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Quality Assessment of Large Language Model–Generated Medical Dialogue for Clinical Vignettes: Evaluation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Al-Agil</surname>
            <given-names>Mohammad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Roshani</surname>
            <given-names>Mohammad Amin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mohamed Shaffi</surname>
            <given-names>Shamnad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Banerjee</surname>
            <given-names>Somnath</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Liang</surname>
            <given-names>Xiaolong</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yanagita</surname>
            <given-names>Yasutaka</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution/>
            <institution>Department of General Medicine</institution>
            <institution>Chiba University Hospital, Chiba, Japan</institution>
            <addr-line>1-8-1, Inohana, Chuo-ku</addr-line>
            <addr-line>Chiba, 260-8677</addr-line>
            <country>Japan</country>
            <phone>81 43 222 7171 ext 6438</phone>
            <fax>81 43 224 4758</fax>
            <email>y.yanagita@gmail.com</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-9213-8247</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Yokokawa</surname>
            <given-names>Daiki</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0944-8664</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Ihara</surname>
            <given-names>Shiichi</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5081-9327</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Yoshida</surname>
            <given-names>Ryo</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0002-3128-4133</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Okano</surname>
            <given-names>Yoshihide</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-0582-776X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Uehara</surname>
            <given-names>Takanori</given-names>
          </name>
          <degrees>MD, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-5086-5799</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of General Medicine</institution>
        <institution>Chiba University Hospital, Chiba, Japan</institution>
        <addr-line>Chiba</addr-line>
        <country>Japan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Yasutaka Yanagita <email>y.yanagita@gmail.com</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>3</day>
        <month>11</month>
        <year>2025</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e80752</elocation-id>
      <history>
        <date date-type="received">
          <day>16</day>
          <month>7</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>8</day>
          <month>9</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>18</day>
          <month>9</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>15</day>
          <month>10</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Yasutaka Yanagita, Daiki Yokokawa, Shiichi Ihara, Ryo Yoshida, Yoshihide Okano, Takanori Uehara. Originally published in JMIR Formative Research (https://formative.jmir.org), 03.11.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2025/1/e80752" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Traditional clinical vignettes, though widely used in medical education, often focus on prototypical presentations; require substantial time and effort to develop; and fail to represent patient diversity, the complexity of clinical conditions, patients’ perspectives, and the dynamic nature of physician-patient interactions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to evaluate the quality of Japanese-language physician-patient dialogues produced by generative artificial intelligence (AI), focusing on their medical accuracy and overall appropriateness as medical interviews.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We created an AI prompt that included a specific clinical history and instructed the model to simulate a cooperative patient responding to the physician’s questions to generate a physician-patient dialogue. The target diseases were those covered by the Japanese National Medical Licensing Examination. Each dialogue consisted of 25 turns by the physician and 25 by the patient, reflecting the typical volume of conversation in Japanese outpatient settings. Three internists independently evaluated each generated dialogue using a 7-point Likert scale across 6 criteria: coherence of the conversation, medical accuracy of the patient’s responses, medical accuracy of the physician’s responses, content of the medical history, communication skills, and professionalism. In addition, a composite score for each dialogue was calculated as the overall mean of these 6 criteria. Each dialogue was also examined for the presence of 5 essential clinical components commonly included in medical interviews: chief concern and clinical course since onset, physical findings, test results, diagnosis, and treatment course. A dialogue was considered to include a component only if all 3 evaluators independently confirmed its presence.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The mean composite score was 5.7 (SD 1.0), indicating high overall quality. Mean scores for each criterion were as follows: coherence of the conversation, 5.9 (SD 0.9); medical accuracy of the patient’s responses, 6.0 (SD 0.9); medical accuracy of the physician’s responses, 5.6 (SD 1.1); content of medical history taking, 5.9 (SD 0.9); communication skills, 5.6 (SD 0.9); and professionalism, 5.5 (SD 1.1). Among the 5 clinical components assessed in each dialogue across 47 clinical cases, chief concern and clinical course were included in all 47 (100%) cases, physical findings in 15 (32%) cases, test results in 27 (57%) cases, diagnosis in 45 (96%) cases, and treatment course in 0 (0%) cases.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>While physician oversight remains essential, it is feasible to efficiently create AI-generated educational materials for medical education that overcome the limitations of traditional clinical vignettes. This approach may reduce time and financial burdens, enhancing opportunities to practice clinical interviewing in settings that closely mirror real-world encounters.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>ChatGPT</kwd>
        <kwd>clinical vignettes</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>generative AI</kwd>
        <kwd>medical education</kwd>
        <kwd>physician-patient dialogue</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Artificial intelligence (AI) is critical for the advancement of medicine. In particular, generative AI, exemplified by large language models (LLMs), has the potential to fundamentally transform health care. Tools such as ChatGPT, developed by OpenAI, possess advanced conversational capabilities and broad applicability [<xref ref-type="bibr" rid="ref1">1</xref>], with an increasing number of applications in the medical field. From the perspective of conversational capabilities, generative AI–based chatbots can provide rehabilitation guidance and mental health support to patients and assist health care professionals in patient management [<xref ref-type="bibr" rid="ref2">2</xref>]. Generative AI is versatile enough to have passed the Japanese National Medical Licensing Examination [<xref ref-type="bibr" rid="ref3">3</xref>]. Moreover, LLMS are now used to enter patients’ medical histories and verify diagnostic accuracy in English and Japanese [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>], indicating that the scope of generative AI applications is expected to expand further in the future.</p>
      <p>The application of generative AI in medical education has attracted increasing interest. The use of generative AI to produce physician-patient dialogues has the potential to replicate disease-specific communication encountered in clinical practice and serve as a valuable educational resource for medical students.</p>
      <p>A comparable and widely used educational tool in medical education research is the <italic>clinical vignette</italic> [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Vignettes are paper-based instructional resources that present a patient’s medical history and key clinical information in an organized written format. They are considered highly effective in helping medical students understand disease concepts, symptomatology, and treatment pathways [<xref ref-type="bibr" rid="ref9">9</xref>].</p>
      <p>Exposure to various vignettes can deepen students’ clinical understanding. However, traditional vignettes often lack diversity in case scenarios and tend to focus narrowly on diagnostic information, thereby failing to capture the complexity, ambiguity, and uncertainty that characterize real-world clinical practice [<xref ref-type="bibr" rid="ref10">10</xref>]. In addition, the development of vignettes requires substantial time and effort. As clinical vignettes are generally written from the health care provider’s perspective, they inherently fail to capture the patient’s viewpoint and the dynamics of physician-patient interaction. Using generative AI such as ChatGPT to create dialogue-based educational materials that incorporate these missing elements is considered a promising approach. These educational materials offer several advantages. First, ChatGPT can generate a wide variety of dialogues tailored to different diseases and clinical scenarios, enabling repeated practice. Its use can reduce both the time and financial costs associated with producing educational content. Moreover, medical students can engage in clinical reasoning by organizing information while considering multiple differential diagnoses and can practice effective physician-patient communication, as would be required in actual clinical settings.</p>
      <p>AI-generated dialogues incorporate the medical information necessary for diagnostic decision-making along with a variety of interactions that closely resemble those encountered in real clinical settings. This enables medical students to develop information-processing skills in authentic, context-rich scenarios. Through this process, students are expected to acquire the ability to identify clinically relevant information and develop a broader range of clinical competencies, including medical interviewing techniques, patient management skills, and effective communication strategies.</p>
      <p>This study examined the feasibility of generating physician-patient dialogues using generative AI and evaluated the quality of the generated dialogues. Generative AI has already demonstrated the ability to generate differential diagnosis lists, illness scripts, and clinical vignettes based on medical information [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>]. Accordingly, it is considered feasible to generate physician-patient dialogues with a high level of accuracy using generative AI.</p>
      <p>In this study, we assessed the quality of dialogues generated in Japanese medical interviews, including their medical accuracy, and investigated their potential utility as educational materials in the context of medical education. By demonstrating the feasibility of easily generating diverse physician-patient dialogues using generative AI, it becomes possible to efficiently produce a large volume of educational content for use in medical education under the supervision of physicians with medical expertise. The appropriate application of AI technology is expected to increase the opportunities for medical students to engage with more practical and interactive content, thereby enhancing the effectiveness of medical education.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>Physician-patient dialogues were generated using an LLM and were subjected to cross-sectional evaluation. To ensure relevance to medical education, 47 target diseases were selected to represent diseases that medical students were expected to learn. The selection was based on the content of the Japanese National Medical Licensing Examination [<xref ref-type="bibr" rid="ref13">13</xref>] and finalized through discussion between a board-certified internist (YY) and a board-certified family physician (DY; <xref ref-type="boxed-text" rid="box1">Textbox 1</xref>). To evaluate the generated dialogues, we recruited 3 Japanese internists (SI, RY, and YO) who are actively involved in diagnostic practice at a university hospital and regularly manage a wide range of diagnostically challenging cases. These internists assessed the quality of the dialogues.</p>
        <boxed-text id="box1" position="float">
          <title>List of 47 diseases used to generate and evaluate artificial intelligence–generated physician-patient dialogues.</title>
          <p>
            <bold>Diseases</bold>
          </p>
          <list list-type="order">
            <list-item>
              <p>Gastroesophageal reflux disease</p>
            </list-item>
            <list-item>
              <p>Functional dyspepsia</p>
            </list-item>
            <list-item>
              <p>Esophageal achalasia</p>
            </list-item>
            <list-item>
              <p>Crohn disease</p>
            </list-item>
            <list-item>
              <p>Appendicitis</p>
            </list-item>
            <list-item>
              <p>Pheochromocytoma</p>
            </list-item>
            <list-item>
              <p>Primary aldosteronism</p>
            </list-item>
            <list-item>
              <p>Cushing syndrome</p>
            </list-item>
            <list-item>
              <p>Hashimoto disease</p>
            </list-item>
            <list-item>
              <p>Subacute thyroiditis</p>
            </list-item>
            <list-item>
              <p>Graves disease</p>
            </list-item>
            <list-item>
              <p>Depression</p>
            </list-item>
            <list-item>
              <p>Panic disorder</p>
            </list-item>
            <list-item>
              <p>Gout</p>
            </list-item>
            <list-item>
              <p>Pneumothorax</p>
            </list-item>
            <list-item>
              <p>Myasthenia gravis</p>
            </list-item>
            <list-item>
              <p>Trigeminal neuralgia</p>
            </list-item>
            <list-item>
              <p>Parkinson disease</p>
            </list-item>
            <list-item>
              <p>Alzheimer disease</p>
            </list-item>
            <list-item>
              <p>Benign paroxysmal positional vertigo</p>
            </list-item>
            <list-item>
              <p>Migraine</p>
            </list-item>
            <list-item>
              <p>Cluster headache</p>
            </list-item>
            <list-item>
              <p>Neurocardiogenic syncope</p>
            </list-item>
            <list-item>
              <p>Intervertebral disk herniation</p>
            </list-item>
            <list-item>
              <p>Insomnia</p>
            </list-item>
            <list-item>
              <p>Bronchial asthma</p>
            </list-item>
            <list-item>
              <p>Acute eosinophilic pneumonia</p>
            </list-item>
            <list-item>
              <p>Rheumatoid arthritis</p>
            </list-item>
            <list-item>
              <p>Systemic lupus erythematosus</p>
            </list-item>
            <list-item>
              <p>Sjögren syndrome</p>
            </list-item>
            <list-item>
              <p>Measles</p>
            </list-item>
            <list-item>
              <p>Fibromyalgia</p>
            </list-item>
            <list-item>
              <p>Gallstone attack</p>
            </list-item>
            <list-item>
              <p>Acute cholecystitis</p>
            </list-item>
            <list-item>
              <p>Unstable angina</p>
            </list-item>
            <list-item>
              <p>COVID-19</p>
            </list-item>
            <list-item>
              <p>Lumbar spinal stenosis</p>
            </list-item>
            <list-item>
              <p>Thromboangiitis obliterans</p>
            </list-item>
            <list-item>
              <p>Infectious mononucleosis</p>
            </list-item>
            <list-item>
              <p>Streptococcal pharyngitis</p>
            </list-item>
            <list-item>
              <p>Hepatitis</p>
            </list-item>
            <list-item>
              <p>Transient ischemic attack</p>
            </list-item>
            <list-item>
              <p>Iron deficiency anemia</p>
            </list-item>
            <list-item>
              <p>Heatstroke</p>
            </list-item>
            <list-item>
              <p>Acute pericarditis</p>
            </list-item>
            <list-item>
              <p>Chronic obstructive pulmonary disease</p>
            </list-item>
            <list-item>
              <p>Lung cancer</p>
            </list-item>
          </list>
        </boxed-text>
      </sec>
      <sec>
        <title>Production Environment</title>
        <p>The analysis was performed on a terminal running Ubuntu (version 20.04.2 Long-Term Support; Canonical). The central processing unit was equipped with an AMD EPYC 7402<italic>P</italic> 24-core processor (Advanced Micro Devices, Inc), with 256 GB of main memory. An NVIDIA A100 graphics processing unit (NVIDIA Corp) with 40 GB of RAM was used for the computations. Python (version 3.6.9) and the OpenAI Python library (version 0.27.8; OpenAI) were used. The application programming interface used to generate the dialogues was the latest one available at the start of the study, gpt-4o-2024-11-20 [<xref ref-type="bibr" rid="ref14">14</xref>], and the dialogues were generated on November 22, 2024.</p>
      </sec>
      <sec>
        <title>Prompts to Be Entered Into the LLM</title>
        <p>Referring to previous studies that used AI to generate vignettes and illness scripts [<xref ref-type="bibr" rid="ref12">12</xref>], concise instructions specifying the desired output conditions were provided to the LLM as prompts (<xref rid="figure1" ref-type="fig">Figure 1</xref>). To enhance the completeness and coherence of the generated dialogues, the prompts included conditions such as the user supplying a specific vignette before the generation, refraining from using medical terminology, and responding cooperatively to the physician’s questions. The dialogue length was determined with reference to the average volume of conversation in typical outpatient consultations (approximately 5-10 min) [<xref ref-type="bibr" rid="ref15">15</xref>], and the LLM was instructed to generate 25 turns each from the physician and the patient.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>A portion of the prompt used to generate physician-patient dialogues in Japanese using generative artificial intelligence (Top: Japanese, Bottom: English).</p>
          </caption>
          <graphic xlink:href="formative_v9i1e80752_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Evaluation of Dialogue</title>
        <p>The presence of five key components commonly included in medical interviews was assessed: (1) chief concern and clinical course since onset, (2) physical findings, (3) test results, (4) diagnosis, and (5) treatment course. Three physicians independently reviewed each dialogue, and a dialogue was considered to contain all 5 components only if all 3 physicians confirmed their inclusion.</p>
        <p>Next, criteria for assessing the quality of the medical interviews were developed. Six evaluation criteria were selected through discussions between a board-certified internist (YY) and a board-certified family physician (DY), with reference to the evaluation domains of the Mini-Clinical Evaluation Exercise (Mini-CEX) [<xref ref-type="bibr" rid="ref16">16</xref>]: (1) coherence of the conversation, (2) medical accuracy of the patient’s statements, (3) medical accuracy of the physician’s statements, (4) quality of the physician’s history taking, (5) communication skills, and (6) professionalism.</p>
        <p>Criterion 1, coherence of the conversation, was evaluated as a linguistic criterion, focusing on the smoothness of the interaction between the physician and patient, the presence of inconsistencies, grammatical or typographical errors, and the logical relationship of the dialogue. Criteria 2 and 3, medical accuracy of the patient’s and physician’s statements, respectively, were assessed as clinical criteria by evaluating their consistency with the known clinical features of the respective diseases. Criterion 4, history taking, was assessed based on whether the physician elicited information regarding symptom characteristics and exacerbating or relieving factors. Criteria 5 and 6, communication skills and professionalism, respectively, were evaluated by assessing whether the physician explored the patient’s explanatory model and demonstrated respect, compassion, and empathy toward the patient (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>Following prior studies [<xref ref-type="bibr" rid="ref12">12</xref>], we specifically employed 3 Japanese physicians affiliated with a university hospital, each of whom was involved in the supervision and education of medical students and residents, to evaluate the generated dialogues. Each of the 6 evaluation criteria was rated on a 7-point Likert scale, based on the perceived educational usefulness of the medical students. The scale was defined as follows: 1=not applicable at all or not useful—major overall revision required, 2=low usefulness—multiple major revisions needed, 3=limited usefulness—some valuable content but substantial revisions necessary, 4=moderate usefulness—both strengths and several areas for improvement, 5=generally useful—some revisions or adjustments desirable, 6=high usefulness—only minor adjustments possibly needed, and 7=extremely useful and complete—no further revisions required.</p>
        <p>For each dialogue, the score of each evaluation criterion was calculated as the average of the ratings of the 3 evaluators. The composite score for the dialogue was then derived by averaging the scores across all 6 evaluation criteria and interpreted using the same 7-point Likert scale.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Evaluation criteria and definitions used to assess artificial intelligence–generated physician-patient dialogues in medical interviews.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="250"/>
            <col width="750"/>
            <thead>
              <tr valign="top">
                <td>Evaluation criteria</td>
                <td>Evaluation details</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Coherence of the conversation</td>
                <td>Assessed whether the dialogue between the physician and patient proceeded smoothly, with accurate grammar, no typographical or spelling errors, and overall linguistic clarity</td>
              </tr>
              <tr valign="top">
                <td>Medical accuracy of the patient’s statements</td>
                <td>Evaluated whether the patient’s utterances accurately reflected the typical onset patterns, symptoms, and clinical course associated with the relevant disease</td>
              </tr>
              <tr valign="top">
                <td>Medical accuracy of the physician’s statements</td>
                <td>Assessed whether the physician’s explanations and other statements were medically accurate and aligned with established clinical knowledge</td>
              </tr>
              <tr valign="top">
                <td>Quality of the physician’s history taking</td>
                <td>Evaluated whether the physician asked about essential elements of the current illness, including symptom location, characteristics, severity, temporal course, contextual factors, aggravating and relieving factors, associated symptoms, and the patient’s response to the symptoms</td>
              </tr>
              <tr valign="top">
                <td>Communication skills</td>
                <td>Assessed whether the physician conducted the interview in a way that facilitated open communication, explored the patient’s explanatory model and psychosocial context, and confirmed the patient’s understanding of the information discussed</td>
              </tr>
              <tr valign="top">
                <td>Professionalism</td>
                <td>Evaluated whether the physician demonstrated respect, compassion, and empathy toward the patient and whether a trusting therapeutic relationship was established</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study did not involve human or animal participants, and therefore, ethics approval was not required.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>Using the gpt-4o-2024-11-20 model, physician-patient dialogues were generated for 47 clinical cases (<xref ref-type="table" rid="table2">Table 2</xref>). Among the 47 generated dialogues, clinical component 1, chief concern and clinical course since onset, was present in all 47 (100%) cases; clinical component 4, diagnosis, was included in 45 (96%) cases, and in each of these cases, the model accurately outputted the specified disease name, as instructed. In contrast, clinical component 2, physical findings, was included in 15 (32%) cases; clinical component 3, test results, was included in 27 (58%) cases; and clinical component 5, treatment course, was not included in any of the cases (0%). Regarding the quality of the medical interviews, the average score was 5.9 (SD 0.9) for coherence of the conversation, 6.0 (SD 0.9) for medical accuracy of the patient’s statements, and 5.6 (SD 1.1) for medical accuracy of the physician’s statements. The average score was 5.9 (SD 0.9) for quality of the physician’s history taking, 5.6 (SD 0.9) for communication skills, and 5.5 (SD 1.1) for professionalism. The overall composite score, calculated as the mean of the 6 evaluation criteria, was 5.7 (SD 1.0).</p>
      <p>A focused discussion was conducted among 5 physicians, 2 specialists (YY and DY), and 3 evaluators (SI, RY, and YO), centered on dialogues that were subject to point deductions to identify and clarify the specific issues present in the lower-rated dialogues. The results of this analysis are summarized in <xref ref-type="table" rid="table3">Table 3</xref>. For reference, one dialogue with a perfect average score of 7 and another that received a lower average score of 4 are provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>, respectively.</p>
      <table-wrap position="float" id="table2">
        <label>Table 2</label>
        <caption>
          <p>Average scores for each of the 6 evaluation criteria used to assess artificial intelligence–generated physician-patient dialogues.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="600"/>
          <col width="400"/>
          <thead>
            <tr valign="top">
              <td>Evaluation criteria</td>
              <td>Average score (SD)</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Coherence of the conversation</td>
              <td>5.9 (0.9)</td>
            </tr>
            <tr valign="top">
              <td>Medical accuracy of the patient’s statements</td>
              <td>6.0 (0.9)</td>
            </tr>
            <tr valign="top">
              <td>Medical accuracy of the physician’s statements</td>
              <td>5.6 (1.1)</td>
            </tr>
            <tr valign="top">
              <td>Quality of the physician’s history taking</td>
              <td>5.9 (0.9)</td>
            </tr>
            <tr valign="top">
              <td>Communication skills</td>
              <td>5.6 (0.9)</td>
            </tr>
            <tr valign="top">
              <td>Professionalism</td>
              <td>5.5 (1.1)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <table-wrap position="float" id="table3">
        <label>Table 3</label>
        <caption>
          <p>Problems identified for each evaluation criterion based on expert review of lower-rated artificial intelligence–generated physician-patient dialogues.</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="300"/>
          <col width="700"/>
          <thead>
            <tr valign="top">
              <td>Evaluation criteria</td>
              <td>Problems</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Coherence of the conversation</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Responses to patient questions were omitted.</p>
                  </list-item>
                  <list-item>
                    <p>Although some expressions were unnatural, overall coherence was maintained.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Medical accuracy of the patient’s statements</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Typical responses regarding aggravating and relieving factors were not provided.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Medical accuracy of the physician’s statements</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>A diagnosis was rendered, despite the interview being insufficient.</p>
                  </list-item>
                  <list-item>
                    <p>Diagnoses were finalized based on test results that were never mentioned as having been performed, indicating inappropriate or unjustified diagnostic reasoning.</p>
                  </list-item>
                  <list-item>
                    <p>The response is that it cures a disease that cannot be cured.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Quality of the physician’s history taking</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>Redundant questions were asked regarding information that had already been provided.</p>
                  </list-item>
                  <list-item>
                    <p>When multiple symptoms were present, it was unclear which symptom’s aggravating or relieving factors were being discussed.</p>
                  </list-item>
                  <list-item>
                    <p>The structure and content of the medical interview were inadequate.</p>
                  </list-item>
                  <list-item>
                    <p>Additional questioning was conducted about symptoms not initially reported by the patient.</p>
                  </list-item>
                  <list-item>
                    <p>Unnecessary blood tests were included and described without justification.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Communication skills</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>No attempts were made to confirm the patient’s understanding of the information provided.</p>
                  </list-item>
                  <list-item>
                    <p>The patient’s explanatory model was rarely elicited.</p>
                  </list-item>
                  <list-item>
                    <p>The patient’s response lacked logical progression or flow.</p>
                  </list-item>
                </list>
              </td>
            </tr>
            <tr valign="top">
              <td>Professionalism</td>
              <td>
                <list list-type="bullet">
                  <list-item>
                    <p>No appropriate responses were provided following expressions of anxiety.</p>
                  </list-item>
                  <list-item>
                    <p>The overall focus of the dialogue was limited to diagnostic questioning, with few utterances directed toward building rapport or fostering a therapeutic relationship.</p>
                  </list-item>
                </list>
              </td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>This study aimed to generate physician-patient dialogue using generative AI and evaluate the prompt designs, the resulting outputs, and the overall quality of the dialogues. We created 47 dialogues based on diseases from the Japanese National Medical Licensing Examination and evaluated them using 6 criteria related to clinical communication. The dialogues consistently included the chief concern and clinical course and demonstrated high medical accuracy and coherence, while areas such as professionalism and inclusion of treatment information were less consistently addressed. The overall composite score was 5.7 (SD 1.0), indicating general usefulness with minor revisions required.</p>
        <p>The analysis first focused on the outputs generated for 47 clinical cases and the corresponding prompt designs that produced them. Examination of the medical content essential for a clinical interview revealed that all dialogues included descriptions of the chief concern and clinical course since onset and that accurate diagnostic labels were presented in 45 (96%) of the 47 cases. In contrast, none of the dialogues included information regarding the treatment course. In designing the prompts, the number of dialogue turns was set at 25 for each participant (50 in total), considering the average consultation time in Japanese outpatient settings, which is approximately 5 minutes. As an exploratory extension, we tested longer dialogues (50 and 100 turns per speaker). Although brief mentions of treatment began to appear, these dialogues became increasingly verbose, with redundant differential diagnoses and questioning. This indicated a decline in dialogue quality and educational effectiveness, highlighting the need to identify an optimal dialogue length in prompt design. Although the generation of information related to the treatment course may be improved through more sophisticated prompt engineering [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>] or future updates to generative AI models [<xref ref-type="bibr" rid="ref19">19</xref>], prior studies have demonstrated that diagnostic reasoning and treatment management involve fundamentally distinct cognitive processes [<xref ref-type="bibr" rid="ref20">20</xref>]. Therefore, separating these processes into distinct dialogue scenarios may be more educationally effective than integrating them into a single prompt.</p>
        <p>The quality of the generated dialogues, produced in Japanese, was evaluated based on 6 criteria. Regarding criterion 1, coherence of the conversation, the dialogues were generally smooth and grammatically correct. This reflects the high-level natural language processing capabilities of generative AI and suggests its potential to replicate physician-patient interactions at a basic level of linguistic fidelity within the context of Japanese-language medical interviews. Regarding criterion 2, medical accuracy of the patient’s statements, although there were instances in which the patient did not provide clear responses to the physician’s questions, particularly in certain disease contexts, such ambiguity may reflect the nature of real-world clinical encounters. From an educational perspective, these instances may be valuable for simulating authentic dialogue dynamics. Criterion 3, medical accuracy of the physician’s statements, received comparatively lower ratings than other criteria. This may be attributable to the perceived need for greater domain-specific precision, particularly when formulating clinical questions and providing medically accurate explanations to patients. This finding aligns with previous research on the limitations of generative AI in clinical reasoning [<xref ref-type="bibr" rid="ref12">12</xref>]. Regarding criterion 4, quality of the physician’s history taking, in actual clinical practice, once the probability of a particular disease increases based on the patient’s narrative, physicians typically engage in further in-depth inquiry. Generative AI appears to struggle with appropriately weighing clinical information and identifying which elements warrant further exploration; thus, at present, AI may be limited in its capacity to conduct history taking guided by probabilistic diagnostic reasoning. Regarding criterion 5, communication skills, the dialogues showed limited use of verbal cues such as acknowledgments or responses that convey an understanding of the patient’s statements, and expressions of empathy toward patients’ concerns were insufficient. In addition, there were a few attempts to elicit a patient’s explanatory model, which is a critical step toward building rapport. Prompt design that encourages empathic and interactive responses may help address these limitations in future development. Regarding criterion 6, professionalism, although no ethically inappropriate expressions were identified, the dialogues generally lacked explicit demonstrations of patient-centered attitudes, respect, or empathic concern.</p>
        <p>It is important to note that, in face-to-face medical interviews, nonverbal cues, such as facial expressions and nodding, play a substantially role in promoting patient satisfaction and emotional attunement [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>], and the absence of such elements constitutes a fundamental limitation of text-based dialogue evaluations. Moreover, a small number of dialogues contained clinically inappropriate content, such as failure to provide justification for a diagnosis or incorrect assertion that a typically incurable disease could be cured. However, no ethically problematic or professionally inappropriate statements were identified in the dataset. Taken together, these results, along with the overall composite score of 5.7 (SD 1.0), suggest that, while physician oversight and revision are essential, generative AI–based dialogues may serve as a valuable educational resource for teaching clinical communication skills to medical students and early-stage trainees.</p>
        <p>Because of their underlying architecture, LLMs exhibit inherent output variability, and identical prompts do not consistently yield identical responses. Although this randomness poses challenges in terms of reproducibility and control, it offers opportunities for prompt-based modulation, whereby carefully designed prompts can elicit diverse and contextually appropriate outputs [<xref ref-type="bibr" rid="ref23">23</xref>]. In this respect, the ability to generate a wide range of physician-patient dialogues represents a particularly compelling and pedagogically valuable feature.</p>
        <p>In this study, to facilitate the acquisition of fundamental structures in history taking, the AI was instructed to assume the role of a cooperative patient who provided clear and responsive answers. However, the simulated patient did not need to be restricted to cooperative profiles. It is also feasible to generate dialogues featuring patients with diverse communicative behaviors, such as those who are angry, uncommunicative, or unable to articulate clearly because of their underlying health conditions.</p>
        <p>On the basis of our findings, we suggest that physician-patient dialogues generated by generative AI can be developed into high-quality educational materials with relatively minimal effort, provided that particular attention is paid to the medical accuracy of the physician’s utterances and that supervising physicians revise the content as necessary. Given the capability of generative AI to rapidly produce medically relevant content, its application in medical education and clinical practice is expected to expand further in the coming years [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. As demonstrated in previous studies, when generative AI is used in medical education [<xref ref-type="bibr" rid="ref26">26</xref>], the ability of instructors to review and modify the generated content enables its use as a supplementary instructional resource. Considering the capacity of the model to generate dialogues tailored to a wide range of clinical scenarios, this approach, when used with appropriate oversight, could offer a highly flexible and scalable educational tool.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study had 3 primary limitations. First, the version of the generative AI used in this study was gpt-4o-2024-11-20, and the evaluation was based on the outputs generated on November 13, 2024. As the performance of generative AI models may evolve with future updates, periodic reevaluation is necessary. Moreover, as multiple LLMs are available, the quality and characteristics of dialogues generated by other models may differ from those evaluated in this study.</p>
        <p>Second, there is currently no standardized method for prompt construction when interacting with generative AI, and the content of the input prompt can significantly influence the quality and nature of the output. In this study, the extent to which variations in the input conditions affect the generated dialogues was not examined systematically. Further investigations are warranted to clarify the impact of prompt design on output quality.</p>
        <p>Third, this study was conducted in Japanese, and all prompts and generated dialogues were also in Japanese. Although prior research suggests a strong potential for adaptation to other languages, further validation is warranted.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>In this study, physician-patient dialogues were generated using an LLM, that is, generative AI. These findings indicate that, although physician supervision remains essential, such dialogues can be easily developed into materials suitable for use in medical education. In particular, dialogues that incorporate the patient’s perspective and the interactive elements of physician-patient communication could provide practical learning experiences that are difficult to achieve with conventional educational resources, suggesting a wide range of possible applications in medical education.</p>
        <p>Moreover, the use of generative AI enables the efficient and large-scale creation of diverse educational content. This represents a significant advantage in terms of reducing the substantial time and effort traditionally required to develop medical instructional materials. By using this approach, medical students are expected to have more opportunities to learn in contexts that closely resemble actual clinical practice, thereby facilitating the acquisition of more practical medical interview skills.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Dialogue with a perfect average score (case 21: migraine).</p>
        <media xlink:href="formative_v9i1e80752_app1.docx" xlink:title="DOCX File , 18 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Dialogue with a lower average score of 4 (case 27: acute eosinophilic pneumonia).</p>
        <media xlink:href="formative_v9i1e80752_app2.docx" xlink:title="DOCX File , 17 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">Mini-CEX</term>
          <def>
            <p>Mini-Clinical Evaluation Exercise</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>No external financial support or grants were received from any public, commercial, or not-for-profit entities for the research, authorship, or publication of this paper.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The datasets generated or analyzed during this study are available from the corresponding author on reasonable request.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>YY, DY, and TU designed and coordinated the study. YY and DY carried out data analysis and interpretation. YY, DY, and TU drafted the manuscript. SI, RY, YO, and TU revised the manuscript for important intellectual content. All authors read and approved the final manuscript and take responsibility for all aspects of the work, ensuring that any questions regarding accuracy or integrity are appropriately investigated and resolved. This work was supported by JSPS KAKENHI (grant JP25K20499).</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>He</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The potential applications and challenges of ChatGPT in the medical field</article-title>
          <source>Int J Gen Med</source>
          <year>2024</year>
          <month>03</month>
          <volume>Volume 17</volume>
          <fpage>817</fpage>
          <lpage>26</lpage>
          <pub-id pub-id-type="doi">10.2147/ijgm.s456659</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Laymouna</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Lessard</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Schuster</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Engler</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lebouché</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Roles, users, benefits, and limitations of chatbots in health care: rapid review</article-title>
          <source>J Med Internet Res</source>
          <year>2024</year>
          <month>07</month>
          <day>23</day>
          <volume>26</volume>
          <fpage>e56930</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2024//e56930/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/56930</pub-id>
          <pub-id pub-id-type="medline">39042446</pub-id>
          <pub-id pub-id-type="pii">v26i1e56930</pub-id>
          <pub-id pub-id-type="pmcid">PMC11303905</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yanagita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yokokawa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Uchida</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tawara</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ikusaka</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of ChatGPT on medical questions in the national medical licensing examination in Japan: evaluation study</article-title>
          <source>JMIR Form Res</source>
          <year>2023</year>
          <month>10</month>
          <day>13</day>
          <volume>7</volume>
          <fpage>e48023</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2023//e48023/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48023</pub-id>
          <pub-id pub-id-type="medline">37831496</pub-id>
          <pub-id pub-id-type="pii">v7i1e48023</pub-id>
          <pub-id pub-id-type="pmcid">PMC10612006</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fukuzawa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Yanagita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yokokawa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Uchida</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yamashita</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Shikino</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Tsukamoto</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Noda</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Uehara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ikusaka</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Importance of patient history in artificial intelligence-assisted medical diagnosis: comparison study</article-title>
          <source>JMIR Med Educ</source>
          <year>2024</year>
          <month>04</month>
          <day>08</day>
          <volume>10</volume>
          <fpage>e52674</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2024//e52674/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/52674</pub-id>
          <pub-id pub-id-type="medline">38602313</pub-id>
          <pub-id pub-id-type="pii">v10i1e52674</pub-id>
          <pub-id pub-id-type="pmcid">PMC11024399</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kanjee</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Crowe</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Rodman</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of a generative artificial intelligence model in a complex diagnostic challenge</article-title>
          <source>JAMA</source>
          <year>2023</year>
          <month>07</month>
          <day>03</day>
          <volume>330</volume>
          <issue>1</issue>
          <fpage>78</fpage>
          <lpage>80</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37318797"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jama.2023.8288</pub-id>
          <pub-id pub-id-type="medline">37318797</pub-id>
          <pub-id pub-id-type="pii">2806457</pub-id>
          <pub-id pub-id-type="pmcid">PMC10273128</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Coşkun</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Kıyak</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Budakoğlu</surname>
              <given-names>Iİ</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT to generate clinical vignettes for teaching and multiple-choice questions for assessment: a randomized controlled experiment</article-title>
          <source>Med Teach</source>
          <year>2025</year>
          <month>02</month>
          <volume>47</volume>
          <issue>2</issue>
          <fpage>268</fpage>
          <lpage>74</lpage>
          <pub-id pub-id-type="doi">10.1080/0142159X.2024.2327477</pub-id>
          <pub-id pub-id-type="medline">38478902</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tremblay</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Turcotte</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Touati</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Poder</surname>
              <given-names>TG</given-names>
            </name>
            <name name-style="western">
              <surname>Kilpatrick</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bilodeau</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Richard</surname>
              <given-names>PO</given-names>
            </name>
            <name name-style="western">
              <surname>Lessard</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Giordano</surname>
              <given-names>É</given-names>
            </name>
          </person-group>
          <article-title>Development and use of research vignettes to collect qualitative data from healthcare professionals: a scoping review</article-title>
          <source>BMJ Open</source>
          <year>2022</year>
          <month>01</month>
          <day>31</day>
          <volume>12</volume>
          <issue>1</issue>
          <fpage>e057095</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopen.bmj.com/lookup/pmidlookup?view=long&#38;pmid=35105654"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjopen-2021-057095</pub-id>
          <pub-id pub-id-type="medline">35105654</pub-id>
          <pub-id pub-id-type="pii">bmjopen-2021-057095</pub-id>
          <pub-id pub-id-type="pmcid">PMC8804653</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wofford</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Exploring the educational value of clinical vignettes from the Society of General Internal Medicine national meeting in the internal medicine clerkship: a pilot study</article-title>
          <source>J Gen Intern Med</source>
          <year>2006</year>
          <month>11</month>
          <volume>21</volume>
          <issue>11</issue>
          <fpage>1195</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17026730"/>
          </comment>
          <pub-id pub-id-type="doi">10.1111/j.1525-1497.2006.00596.x</pub-id>
          <pub-id pub-id-type="medline">17026730</pub-id>
          <pub-id pub-id-type="pii">JGI596</pub-id>
          <pub-id pub-id-type="pmcid">PMC1831647</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Trullàs</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Blay</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sarri</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pujol</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Effectiveness of problem-based learning methodology in undergraduate medical education: a scoping review</article-title>
          <source>BMC Med Educ</source>
          <year>2022</year>
          <month>02</month>
          <day>17</day>
          <volume>22</volume>
          <issue>1</issue>
          <fpage>104</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-022-03154-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-022-03154-8</pub-id>
          <pub-id pub-id-type="medline">35177063</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-022-03154-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC8851721</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Evans</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Keeley</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Blossom</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Amaro</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Stough</surname>
              <given-names>CO</given-names>
            </name>
            <name name-style="western">
              <surname>Canter</surname>
              <given-names>KS</given-names>
            </name>
            <name name-style="western">
              <surname>Robles</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Reed</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Vignette methodologies for studying clinicians' decision-making: validity, utility, and application in ICD-11 field studies</article-title>
          <source>Int J Clin Health Psychol</source>
          <year>2015</year>
          <volume>15</volume>
          <issue>2</issue>
          <fpage>160</fpage>
          <lpage>70</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1697-2600(14)00066-0"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.ijchp.2014.12.001</pub-id>
          <pub-id pub-id-type="medline">30487833</pub-id>
          <pub-id pub-id-type="pii">S1697-2600(14)00066-0</pub-id>
          <pub-id pub-id-type="pmcid">PMC6224682</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yanagita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yokokawa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Fukuzawa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Uchida</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Uehara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ikusaka</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Expert assessment of ChatGPT's ability to generate illness scripts: an evaluative study</article-title>
          <source>BMC Med Educ</source>
          <year>2024</year>
          <month>05</month>
          <day>15</day>
          <volume>24</volume>
          <issue>1</issue>
          <fpage>536</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-024-05534-8"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-024-05534-8</pub-id>
          <pub-id pub-id-type="medline">38750546</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-024-05534-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC11095028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yanagita</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yokokawa</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Uchida</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Uehara</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Ikusaka</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Can AI-generated clinical vignettes in Japanese be used medically and linguistically?</article-title>
          <source>J Gen Intern Med</source>
          <year>2024</year>
          <month>12</month>
          <volume>39</volume>
          <issue>16</issue>
          <fpage>3282</fpage>
          <lpage>9</lpage>
          <pub-id pub-id-type="doi">10.1007/s11606-024-09031-y</pub-id>
          <pub-id pub-id-type="medline">39313665</pub-id>
          <pub-id pub-id-type="pii">10.1007/s11606-024-09031-y</pub-id>
          <pub-id pub-id-type="pmcid">PMC11618267</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="web">
          <article-title>Questions and answers for the 118th National Medical Examination</article-title>
          <source>Ministry of Health, Labour, and Welfare Japan</source>
          <year>2024</year>
          <access-date>2025-10-27</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mhlw.go.jp/seisakunitsuite/bunya/kenkou_iryou/iryou/topics/tp240424-01.html">https://www.mhlw.go.jp/seisakunitsuite/bunya/kenkou_iryou/iryou/topics/tp240424-01.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="web">
          <article-title>Model OpenAI API</article-title>
          <source>OpenAI Platform</source>
          <access-date>2024-12-05</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://platform.openai.com/docs/models#gpt-4o">https://platform.openai.com/docs/models#gpt-4o</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Silverman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kurtz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Draper</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Skills for Communicating with Patients</source>
          <year>2013</year>
          <publisher-loc>Boca Raton, FL</publisher-loc>
          <publisher-name>CRC Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Martinsen</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Espeland</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Berg</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>Samstad</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lillebo</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Slørdahl</surname>
              <given-names>TS</given-names>
            </name>
          </person-group>
          <article-title>Examining the educational impact of the mini-CEX: a randomised controlled study</article-title>
          <source>BMC Med Educ</source>
          <year>2021</year>
          <month>04</month>
          <day>21</day>
          <volume>21</volume>
          <issue>1</issue>
          <fpage>228</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmcmededuc.biomedcentral.com/articles/10.1186/s12909-021-02670-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12909-021-02670-3</pub-id>
          <pub-id pub-id-type="medline">33882913</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-021-02670-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC8061047</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Meskó</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Prompt engineering as an important emerging skill for medical professionals: tutorial</article-title>
          <source>J Med Internet Res</source>
          <year>2023</year>
          <month>10</month>
          <day>04</day>
          <volume>25</volume>
          <fpage>e50638</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2023//e50638/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/50638</pub-id>
          <pub-id pub-id-type="medline">37792434</pub-id>
          <pub-id pub-id-type="pii">v25i1e50638</pub-id>
          <pub-id pub-id-type="pmcid">PMC10585440</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Yuan</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Dong</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Di</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Prompting frameworks for large language models: a survey</article-title>
          <source>ArXiv</source>
          <comment>Preprint posted online on November 21, 2023</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.48550/arXiv.2311.12785"/>
          </comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2311.12785</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Elangovan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Gutierrez</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>TF</given-names>
            </name>
            <name name-style="western">
              <surname>Ting</surname>
              <given-names>DS</given-names>
            </name>
          </person-group>
          <article-title>Large language models in medicine</article-title>
          <source>Nat Med</source>
          <year>2023</year>
          <month>08</month>
          <volume>29</volume>
          <issue>8</issue>
          <fpage>1930</fpage>
          <lpage>40</lpage>
          <pub-id pub-id-type="doi">10.1038/s41591-023-02448-8</pub-id>
          <pub-id pub-id-type="medline">37460753</pub-id>
          <pub-id pub-id-type="pii">10.1038/s41591-023-02448-8</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cook</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Sherbino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Durning</surname>
              <given-names>SJ</given-names>
            </name>
          </person-group>
          <article-title>Management reasoning: beyond the diagnosis</article-title>
          <source>JAMA</source>
          <year>2018</year>
          <month>06</month>
          <day>12</day>
          <volume>319</volume>
          <issue>22</issue>
          <fpage>2267</fpage>
          <lpage>8</lpage>
          <pub-id pub-id-type="doi">10.1001/jama.2018.4385</pub-id>
          <pub-id pub-id-type="medline">29800012</pub-id>
          <pub-id pub-id-type="pii">2681495</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Collins</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Schrimmer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Diamond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Burke</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Evaluating verbal and non-verbal communication skills, in an ethnogeriatric OSCE</article-title>
          <source>Patient Educ Couns</source>
          <year>2011</year>
          <month>05</month>
          <volume>83</volume>
          <issue>2</issue>
          <fpage>158</fpage>
          <lpage>62</lpage>
          <pub-id pub-id-type="doi">10.1016/j.pec.2010.05.012</pub-id>
          <pub-id pub-id-type="medline">20561763</pub-id>
          <pub-id pub-id-type="pii">S0738-3991(10)00304-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Little</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Everitt</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Gashi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bikker</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mercer</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Verbal and non-verbal behaviour and patient perception of communication in primary care: an observational study</article-title>
          <source>Br J Gen Pract</source>
          <year>2015</year>
          <month>05</month>
          <day>25</day>
          <volume>65</volume>
          <issue>635</issue>
          <fpage>e357</fpage>
          <lpage>65</lpage>
          <pub-id pub-id-type="doi">10.3399/bjgp15x685249</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lyu</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zapadka</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Ponnatapura</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Myers</surname>
              <given-names>KJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Whitlow</surname>
              <given-names>CT</given-names>
            </name>
          </person-group>
          <article-title>Translating radiology reports into plain language using ChatGPT and GPT-4 with prompt learning: results, limitations, and potential</article-title>
          <source>Vis Comput Ind Biomed Art</source>
          <year>2023</year>
          <month>05</month>
          <day>18</day>
          <volume>6</volume>
          <issue>1</issue>
          <fpage>9</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37198498"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s42492-023-00136-5</pub-id>
          <pub-id pub-id-type="medline">37198498</pub-id>
          <pub-id pub-id-type="pii">10.1186/s42492-023-00136-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC10192466</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Karabacak</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ozkara</surname>
              <given-names>BB</given-names>
            </name>
            <name name-style="western">
              <surname>Margetis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wintermark</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Bisdas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The advent of generative language models in medical education</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <month>06</month>
          <day>06</day>
          <volume>9</volume>
          <fpage>e48163</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e48163/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48163</pub-id>
          <pub-id pub-id-type="medline">37279048</pub-id>
          <pub-id pub-id-type="pii">v9i1e48163</pub-id>
          <pub-id pub-id-type="pmcid">PMC10282912</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Qin</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>AI in medical education: medical student perception, curriculum recommendations and design suggestions</article-title>
          <source>BMC Med Educ</source>
          <year>2023</year>
          <month>11</month>
          <day>09</day>
          <volume>23</volume>
          <issue>1</issue>
          <fpage>852</fpage>
          <pub-id pub-id-type="doi">10.1186/s12909-023-04700-8</pub-id>
          <pub-id pub-id-type="medline">37946176</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12909-023-04700-8</pub-id>
          <pub-id pub-id-type="pmcid">PMC10637014</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Han</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Battaglia</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Udaiyar</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Fooks</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Terlecky</surname>
              <given-names>SR</given-names>
            </name>
          </person-group>
          <article-title>An explorative assessment of ChatGPT as an aid in medical education: use it with caution</article-title>
          <source>Med Teach</source>
          <year>2024</year>
          <month>05</month>
          <volume>46</volume>
          <issue>5</issue>
          <fpage>657</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.1080/0142159X.2023.2271159</pub-id>
          <pub-id pub-id-type="medline">37862566</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
