<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e65726</article-id><article-id pub-id-type="doi">10.2196/65726</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Using a Hybrid of AI and Template-Based Method in Automatic Item Generation to Create Multiple-Choice Questions in Medical Education: Hybrid AIG</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>Yavuz Selim</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Kononowicz</surname><given-names>Andrzej A</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Medical Education and Informatics, Faculty of Medicine, Gazi University</institution><addr-line>Ankara</addr-line><country>Turkey</country></aff><aff id="aff2"><institution>Department of Bioinformatics and Telemedicine, Jagiellonian University Medical College</institution><addr-line>Medyczna Str 7</addr-line><addr-line>Krak&#x00F3;w</addr-line><country>Poland</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Montazerolghaem</surname><given-names>Ahmadreza</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Alshiekh</surname><given-names>Mona</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Andrzej A Kononowicz, PhD, Department of Bioinformatics and Telemedicine, Jagiellonian University Medical College, Medyczna Str 7, Krak&#x00F3;w, 30-688, Poland, 48 12 3476908; <email>andrzej.kononowicz@uj.edu.pl</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>4</day><month>4</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e65726</elocation-id><history><date date-type="received"><day>23</day><month>08</month><year>2024</year></date><date date-type="rev-recd"><day>16</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>18</day><month>03</month><year>2025</year></date></history><copyright-statement>&#x00A9; Yavuz Selim K&#x0131;yak, Andrzej A Kononowicz. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 4.4.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e65726"/><abstract><sec><title>Background</title><p>Template-based automatic item generation (AIG) is more efficient than traditional item writing but it still heavily relies on expert effort in model development. While nontemplate-based AIG, leveraging artificial intelligence (AI), offers efficiency, it faces accuracy challenges. Medical education, a field that relies heavily on both formative and summative assessments with multiple choice questions, is in dire need of AI-based support for the efficient automatic generation of items.</p></sec><sec><title>Objective</title><p>We aimed to propose a hybrid AIG to demonstrate whether it is possible to generate item templates using AI in the field of medical education.</p></sec><sec sec-type="methods"><title>Methods</title><p>This is a mixed-methods methodological study with proof-of-concept elements. We propose the hybrid AIG method as a structured series of interactions between a human subject matter expert and AI, designed as a collaborative authoring effort. The method leverages AI to generate item models (templates) and cognitive models to combine the advantages of the two AIG approaches. To demonstrate how to create item models using hybrid AIG, we used 2 medical multiple-choice questions: one on respiratory infections in adults and another on acute allergic reactions in the pediatric population.</p></sec><sec sec-type="results"><title>Results</title><p>The hybrid AIG method we propose consists of 7 steps. The first 5 steps are performed by an expert in a customized AI environment. These involve providing a parent item, identifying elements for manipulation, selecting options and assigning values to elements, and generating the cognitive model. After a final expert review (Step 6), the content in the template can be used for item generation through a traditional (non-AI) software (Step 7). We showed that AI is capable of generating item templates for AIG under the control of a human expert in only 10 minutes. Leveraging AI in template development made it less challenging.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The hybrid AIG method transcends the traditional template-based approach by marrying the &#x201C;art&#x201D; that comes from AI as a &#x201C;black box&#x201D; with the &#x201C;science&#x201D; of algorithmic generation under the oversight of expert as a &#x201C;marriage registrar&#x201D;. It does not only capitalize on the strengths of both approaches but also mitigates their weaknesses, offering a human-AI collaboration to increase efficiency in medical education.</p></sec></abstract><kwd-group><kwd>automatic item generation</kwd><kwd>ChatGPT</kwd><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>medical education</kwd><kwd>AI</kwd><kwd>hybrid</kwd><kwd>template-based method</kwd><kwd>hybrid AIG</kwd><kwd>mixed-method</kwd><kwd>multiple-choice question</kwd><kwd>multiple-choice</kwd><kwd>human-AI collaboration</kwd><kwd>human-AI</kwd><kwd>medical education</kwd><kwd>algorithm</kwd><kwd>expert</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Multiple-choice items are integral to written assessment in medical education, serving as a useful tool for assessing a wide range of knowledge and skills. Their common use spans from evaluating factual knowledge to clinical reasoning and decision-making in various domains [<xref ref-type="bibr" rid="ref1">1</xref>]. This assessment format supports high-volume testing with the added advantage of automated scoring to enhance the efficiency of examinations in medical education.</p><p>The traditional way of writing multiple-choice items, characterized by manual development processes, presents significant challenges in scalability. This challenge stems from the intensive time and effort required to create and review each question. This laborious process, which demands expertise and resources, faces a bottleneck in scaling up to meet the demand for a vast quantity of quality items. The challenge is particularly pronounced in medical education, where only a progress test administration in a year requires having 2400 multiple-choice items [<xref ref-type="bibr" rid="ref2">2</xref>], showing the inefficiency of traditional methods in satisfying the needs of question banks in medical schools.</p><p>Template-based automatic item generation (AIG) is a scalable method used in educational assessment that leverages predefined templates to systematically produce multiple-choice items with the help of software [<xref ref-type="bibr" rid="ref3">3</xref>]. It has been implemented in 7 languages [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref6">6</xref>] and different health professions [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref9">9</xref>]. It consists of 3 sequential stages: development of a cognitive model, development of an item model (template), and using software for the rapid generation of hundreds of items [<xref ref-type="bibr" rid="ref10">10</xref>]. Item models developed based on cognitive models are structured by subject matter experts to include variables and rules for item generation, allowing for a more efficient creation of consistent questions. This approach enhances the scalability of item development and review compared with traditional item writing [<xref ref-type="bibr" rid="ref3">3</xref>], addressing the demand for high-quality assessment materials across various educational domains. Template-based AIG, while generating high-quality multiple-choice items efficiently [<xref ref-type="bibr" rid="ref11">11</xref>-<xref ref-type="bibr" rid="ref15">15</xref>], still heavily relies on expert effort in development of cognitive models and item models. Although it depends on the content area and the expert&#x2019;s skills, a study reported that development of a cognitive model and an item model can take hours for a subject matter expert [<xref ref-type="bibr" rid="ref10">10</xref>]. Furthermore, this development process necessitates high levels of extraneous cognitive load due to high element interactivity [<xref ref-type="bibr" rid="ref16">16</xref>], which requires experts to deal with several components simultaneously. Therefore, &#x201C;creating cognitive models for AIG is challenging&#x201D; [<xref ref-type="bibr" rid="ref3">3</xref>] and it &#x201C;requires a lot of practice&#x201D; for experts [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>On the other hand, nontemplate-based AIG, which &#x201C;can be guided by the syntactic, semantic, or sequential structure of a text&#x201D; [<xref ref-type="bibr" rid="ref3">3</xref>], is an approach that leverages natural language processing to generate assessment items without relying on predefined templates. Unlike the template-based method, this method uses the ability of artificial intelligence (AI) to generate content dynamically, for example, using ChatGPT, which is an AI-based chatbot developed by OpenAI, for creating items based on specific topics or learning outcomes provided by users [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. This approach allows for the generation of diverse and complex questions in seconds, offering flexibility and efficiency in item development. However, this AI-driven approach struggles with issues of inaccuracy and inconsistency [<xref ref-type="bibr" rid="ref18">18</xref>], especially when good prompting strategies [<xref ref-type="bibr" rid="ref22">22</xref>] are not used [<xref ref-type="bibr" rid="ref23">23</xref>]. In AI-driven item generation, such as with ChatGPT, these issues often emerge due to the model&#x2019;s reliance on its training data, which may not always align perfectly with the specific objectives intended by educators. For example, an AI might generate content that includes incorrect information, such as asserting that &#x201C;the human heart only has two chambers&#x201D; [<xref ref-type="bibr" rid="ref24">24</xref>], or misinterpret the complexity level required for a medical education context. Furthermore, the &#x201C;black box&#x201D; [<xref ref-type="bibr" rid="ref25">25</xref>] nature of these AI models complicates diagnosing and correcting these errors within the AI mechanism, as it is challenging to trace back how the AI arrived at a particular output. Therefore, it raises concerns regarding validity, reliability, transparency, fairness, and equity [<xref ref-type="bibr" rid="ref26">26</xref>], and the process still requires subject matter experts to review and revise each generated question [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref27">27</xref>]. Although it is more efficient than traditional item writing, necessity for reviewing each question is still inefficient.</p><p>As outlined above, recent advancements in AIG have offered efficiency, yet each method&#x2014;template-based and nontemplate-based&#x2014;brings its own set of limitations. The gap, therefore, lies in the need for a method that merges the structured efficiency of template-based AIG with the content generation capabilities of AI-driven, nontemplate-based, approaches. This convergence could potentially address the pressing need for tools augmenting capabilities of medical educators in test development. We are aware of the interdependence of social (human) and technical elements within an organization [<xref ref-type="bibr" rid="ref28">28</xref>], advocating for the design of systems that concurrently optimize both human and technological components to achieve effective outcomes. In AIG, this can be interpreted as the need to harmonize the collaboration of a subject matter expert and AI tools working together on developing item and cognitive models.</p><p>In this paper, we propose a hybrid AIG method that uses AI to generate an item model (template) and a cognitive model for applying the item template in a template-based item generation process. This capitalizes on the strengths of both approaches but also mitigates their respective weaknesses, offering a novel human-AI collaboration to increase AIG efficiency in medical education.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Process</title><p>This is a proof-of-concept study. Drawing on existing guidance on prompt engineering [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref30">30</xref>] and our experience in building human-AI dialogues [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref31">31</xref>], we developed a series of flipped interactions through a series of iterative refinements. In this approach, the AI initiates the conversation and prompts the human expert to provide the necessary data, enabling a collaborative process for generating and refining item templates. The AI shoulders the significant cognitive load involved in template development, thereby reducing the cognitive burden on experts, allowing them to focus on deciding which elements of the question are essential for assessing students&#x2019; knowledge.</p><p>We used GPT Builder (OpenAI), a platform for customizing ChatGPT to the user&#x2019;s needs [<xref ref-type="bibr" rid="ref32">32</xref>], to train our Custom GPT. Since we conducted this study in February 2024, the Custom GPT worked based on the frontier model at that time, GPT-4.</p><p>We will present our approach in a manner analogous to how Gierl et al [<xref ref-type="bibr" rid="ref10">10</xref>] described the template-based AIG. To illustrate the process, we used two items: one from Gierl et al&#x2019;s work [<xref ref-type="bibr" rid="ref3">3</xref>], and another from a multiple-choice question (MCQ) similar to an item in the Turkish National Medical Specialty Exam, TUS (2021/1, clinical question number 58). The reason for choosing Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item is that it allows readers to compare it with the existing item model developed by a subject matter expert. The reason for choosing a TUS item is that Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item model has likely already been processed by ChatGPT, so we also aimed to focus on an item that had not been modelled before.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study did not involve human subjects, patient data, or personal identifiers, and therefore did not require ethical approval. No informed consent was necessary as no human participants were involved. The study is based on the demonstration of a methodological approach using AI for automatic item generation.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Hybrid AIG and Prompts</title><p>The Hybrid AIG consists of 7 steps, with the last 2 steps carried out outside the AI environment. The AI environment requires a specialized GPT trained for generating item models and cognitive models. The Custom GPT we developed is titled &#x201C;Item Model Maker for AIG&#x201D; and is accessible at [<xref ref-type="bibr" rid="ref33">33</xref>].<ext-link ext-link-type="uri" xlink:href="https://chat.openai.com/g/g-ISoiQOLyv-item-model-maker-for-aig">.</ext-link></p><p>In addition, the prompt we used is provided in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p></sec><sec id="s3-2"><title>Step 1: Providing a Parent Item</title><p>The starting point for developing both the item model and the cognitive model in the hybrid AIG is a parent item, reflecting the approach used in the template-based method. It functions as a prototype for generating new questions that follow the template&#x2019;s structure.</p><p>In the first step of the hybrid AIG, the expert provides a parent item to AI. Although it is not a requirement, it would be better if the parent item has been chosen from well-performing items in an exam (eg, a published item from a past medical licensure examination). This could mitigate the possible problems that can stem from the lack of quality in the parent item. If an expert does not have a parent item, they can use AI tools, such as Case-Based MCQ Generator [<xref ref-type="bibr" rid="ref20">20</xref>], to generate an MCQ as a parent item.</p><p>The purpose of generating MCQs in medical education is for a human expert to assess the students&#x2019; skills and knowledge and to evaluate whether they have grasped the nuances of the information presented, aiming for outcomes that are consistent with expert reasoning. The parent item includes the necessary components and embedded possible cognitive models required for this purpose, as it is approved and provided by the expert. It serves as the well-established basis and acts as a seed for generating a new template. In the subsequent steps, AI will primarily handle the cognitive work in using one of the possible underlying cognitive models in the parent item to generate a template. The expert&#x2019;s role will be to monitor and ensure the AI maintains a consistent cognitive model in its output.</p><p>We present below the parent items provided to AI, adopting them from the sources described in the Methods section.</p><p>Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item:</p><p>&#x201C;A 22-year-old female sees her doctor and reports that she&#x2019;s been experiencing a mild cough and slight body aches that have developed over a few days.</p><p>Upon examination, she presents with an oral temperature of 37&#x00B0;C. What is the most likely diagnosis?</p><list list-type="alpha-upper"><list-item><p>Hay fever</p></list-item><list-item><p>Ear infection</p></list-item><list-item><p>Common cold</p></list-item><list-item><p>Acute sinusitis</p></list-item><list-item><p>Seasonal influenza&#x201D;</p></list-item></list><p>TUS Item:</p><p>&#x201C;A fifteen-year-old girl, who became ill during her physical education class after lunch, is brought to the infirmary complaining of coughing, shortness of breath, dizziness, facial swelling and redness, as well as itching in her hands and feet. During the physical examination, the patient appears anxious, with a blood pressure reading of 80/50 mmHg, periorbital edema, and wheezing upon auscultation. Which of the following is the most likely diagnosis for this patient?</p><list list-type="alpha-upper"><list-item><p>Exercise-induced asthma</p></list-item><list-item><p>Cholinergic urticaria</p></list-item><list-item><p>Pulmonary embolism</p></list-item><list-item><p>Anaphylaxis</p></list-item><list-item><p>Vasovagal syncope&#x201D;</p></list-item></list></sec><sec id="s3-3"><title>Step 2: Identification of Elements for Manipulation</title><p>In the next step, the expert identifies words or phrases in the parent item for variable manipulation.</p><p>In the identification process, the expert can, preferably, consider the importance of the content in the case that is more influential in deciding what the correct answer is. <xref ref-type="table" rid="table1">Table 1</xref> shows AI&#x2019;s response and what the expert chose as the elements for manipulation.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Identification of elements for manipulation.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Who speaks</td><td align="left" valign="bottom">Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item</td><td align="left" valign="bottom">TUS item</td></tr></thead><tbody><tr><td align="left" valign="top">AI<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Thank you for sharing the parent item. Could you please identify words or phrases in the question that you would like to manipulate for variable creation? These can include demographic details, symptoms, examination findings, or any other elements that could vary across different versions of this question.</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Based on the parent item you&#x2019;ve provided, let&#x2019;s proceed to the next step. Could you please identify words or phrases in the scenario and question that you would like to have as variable elements for manipulation? These could be specific symptoms, the patient&#x2019;s demographics, the setting, or any other details you think should be variable.</p></list-item></list></td></tr><tr><td align="left" valign="top">Expert</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Element 1: mild cough</p></list-item><list-item><p>Element 2: slight body aches</p></list-item><list-item><p>Element 3: over a few days</p></list-item><list-item><p>Element 4: 37&#x00B0;C</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Element 1: during her physical education class after lunch</p></list-item><list-item><p>Element 2: coughing, shortness of breath, dizziness, facial swelling and redness</p></list-item><list-item><p>Element 3: a blood pressure reading of 80/50 mm Hg, periorbital edema, and wheezing upon auscultation</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>AI: artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4"><title>Step 3: Replacement of Elements and Choosing Options</title><p>In this step, AI replaces the words and phrases received from the expert with the elements in brackets, then the expert reviews the output, and prompts AI to make necessary changes, if any. Subsequently, the expert chooses 2 or more options from the options in the parent item. The AI will use them to define the possible values of elements. <xref ref-type="table" rid="table2">Table 2</xref> shows the replaced version and the expert&#x2019;s choice among the options.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Replacement of elements and choosing options.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Who speaks</td><td align="left" valign="bottom">Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item</td><td align="left" valign="bottom">TUS item</td></tr></thead><tbody><tr><td align="left" valign="top">AI<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">Based on the elements you&#x2019;ve identified, here&#x2019;s how the parent item could be modified with these variables in brackets:<break/>"A 22-year-old female sees her doctor and reports that she&#x2019;s been experiencing a [Element 1] and [Element 2] that have developed [Element 3]. Upon examination, she presents with an oral temperature of [Element 4]. What is the most likely diagnosis?<break/>A. Hay fever<break/>B. Ear infection<break/>C. Common cold<break/>D. Acute sinusitis<break/>E. Seasonal influenza&#x201D;<break/>Could you please choose two or more options from the parent item that the variables will be focused on? This will help in offering variables for each element that significantly impact the correct answer according to medical sources and guidelines.</td><td align="left" valign="top">Great, I&#x2019;ll incorporate these elements as variables into the parent item. Here&#x2019;s how it will look:<break/>"A fifteen-year-old girl, who became ill [Element 1], is brought to the infirmary complaining of [Element 2], as well as itching in her hands and feet. During the physical examination, the patient appears anxious, with [Element3]. Which of the following is the most likely diagnosis for this patient?<break/>A. Exercise-induced asthma<break/>B. Cholinergic urticaria<break/>C. Pulmonary embolism<break/>D. Anaphylaxis<break/>E. Vasovagal syncope&#x201D;<break/>Now, could you please choose two or more options from the answer choices that you&#x2019;d like the variables to significantly impact? This will help in designing the variables for each element to ensure they align closely with the medical scenarios corresponding to the chosen options.</td></tr><tr><td align="left" valign="top">Expert</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Common cold</p></list-item><list-item><p>Seasonal influenza</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Cholinergic urticaria</p></list-item><list-item><p>Anaphylaxis</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>AI: artificial intelligence.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5"><title>Step 4: Offering Values in Elements</title><p>At this stage, AI suggest possible values for each element that can impact the correct answer. Then, the expert reviews the output and makes necessary changes, if any, by providing prompts. <xref ref-type="table" rid="table3">Table 3</xref> shows the response of AI in order for offering values. It is important to note that &#x201C;values&#x201D; were presented as &#x201C;variables&#x201D; by AI in the example because it did not work well when we call them &#x201C;values&#x201D; (eg, it was prone to offer numbers rather than words). It may not be a problem in newer versions.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Artificial intelligence&#x2019;s (AI&#x2019;s) offer for values.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Who speaks</td><td align="left" valign="bottom">Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item</td><td align="left" valign="bottom">TUS item</td></tr></thead><tbody><tr><td align="left" valign="top">AI</td><td align="left" valign="top">Thank you for choosing &#x201C;Common cold&#x201D; and &#x201C;Seasonal influenza&#x201D; as the focus. Based on these options, I&#x2019;ll offer variables for each element that significantly impact the correct answer, considering medical sources and guidelines:<break/>Element 1: Cough<list list-type="bullet"><list-item><p>Variable 1: Persistent dry cough</p></list-item><list-item><p>Variable 2: Mild cough</p></list-item><list-item><p>Variable 3: Severe hacking cough</p></list-item></list><break/>Element 2: Body aches<list list-type="bullet"><list-item><p>Variable 1: Slight body aches</p></list-item><list-item><p>Variable 2: Moderate body aches</p></list-item><list-item><p>Variable 3: Severe muscle pains</p></list-item></list><break/>Element 3: Duration of symptoms<list list-type="bullet"><list-item><p>Variable 1: Suddenly over a few hours</p></list-item><list-item><p>Variable 2: Over a few days</p></list-item><list-item><p>Variable 3: Gradually over more than a week</p></list-item></list><break/>Element 4: Oral temperature<list list-type="bullet"><list-item><p>Variable 1: 37&#x00B0;C (normal)</p></list-item><list-item><p>Variable 2: 37.5&#x00B0;C to 38&#x00B0;C (mild fever)</p></list-item><list-item><p>Variable 3: Over 38&#x00B0;C (fever)</p></list-item></list><break/>Do these variables seem appropriate for creating different versions of the MCQ<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> item focused on distinguishing between the common cold and seasonal influenza? If you need any adjustments or have additional suggestions, please let me know!</td><td align="left" valign="top">Thank you for choosing options B (Cholinergic urticaria) and D (Anaphylaxis). Based on these selections, I&#x2019;ll offer variables for each element that significantly impact the correct answer considering the chosen options. Here are the proposed variables:<break/>Element 1: Timing of symptoms<list list-type="bullet"><list-item><p>During her physical education class after lunch.</p></list-item><list-item><p>Immediately after taking a hot shower.</p></list-item><list-item><p>While sitting in a hot classroom.</p></list-item><list-item><p>After exposure to cold wind post-exercise.</p></list-item><list-item><p>During a stressful exam.</p></list-item></list><break/>Element 2: Symptoms<list list-type="bullet"><list-item><p>Coughing, shortness of breath, dizziness, facial swelling and redness.</p></list-item><list-item><p>Generalized hives, sweating, and palpitations</p></list-item><list-item><p>Localized itching and redness following sweating.</p></list-item><list-item><p>Throat tightness, difficulty swallowing, and a sense of impending doom.</p></list-item><list-item><p>Flushing, headache, and nausea after eating specific foods.</p></list-item></list><break/>Element 3: Clinical findings<list list-type="bullet"><list-item><p>A blood pressure reading of 80/50 mm Hg, periorbital edema, and wheezing upon auscultation.</p></list-item><list-item><p>Tachycardia with a blood pressure reading of 100/70 mmHg, and diffuse hives.</p></list-item><list-item><p>Normal blood pressure, with localized swelling and erythema.</p></list-item><list-item><p>Rapid drop in blood pressure upon exposure to an allergen, with angioedema.</p></list-item><list-item><p>Elevated blood pressure due to anxiety, with mild facial edema.</p><p>Please review these variables and let me know if they are appropriate or if you would like any modifications.</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>MCQ: multiple-choice question.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-6"><title>Step 5: Generating the Cognitive Model</title><p>In the following step, AI determines the constraints based on values (called &#x201C;variables&#x201D; by AI) approved by the expert, in order to create the cognitive model. Then, the expert reviews the output and prompts AI for necessary changes, if any. The presence of the expert is to validate the output, as in the previous steps.</p><p>Compared with the cognitive model in the traditional template-based method, our hybrid method adopts a simpler approach. By providing constraints, it allows to determine what items will be generated and what the correct option is in each item. <xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref> present the cognitive models. Although the content needs small adjustments that can be easily carried out by the expert (eg, the expert may ask ChatGPT to remove &#x201C;37.5&#x00B0;C to 38 &#x00B0;C&#x201D; to make the correct answers more evident), we did not make any changes to present ChatGPT&#x2019;s original output in order for demonstrating that it is able to provide a useful cognitive model to begin with for further revisions. It is also possible due to the nature of GPT models that the output might defer in formatting as visible in the tables.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Cognitive model generated by artificial intelligence (AI) for Gierl et al.&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Element (variable)</td><td align="left" valign="bottom">Constraints (supported options)</td></tr></thead><tbody><tr><td align="left" valign="top">Cough</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Persistent dry cough (seasonal Influenza)</p></list-item><list-item><p>Mild cough (common cold)</p></list-item><list-item><p>Severe hacking cough (seasonal influenza)</p></list-item></list></td></tr><tr><td align="left" valign="top">Body aches</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Slight body aches (common cold)</p></list-item><list-item><p>Moderate body aches (seasonal influenza)</p></list-item><list-item><p>Severe muscle pains (seasonal influenza)</p></list-item></list></td></tr><tr><td align="left" valign="top">Duration of symptoms</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Suddenly over a few hours (seasonal influenza)</p></list-item><list-item><p>Over a few days (common cold)</p></list-item><list-item><p>Gradually over more than a week (common cold)</p></list-item></list></td></tr><tr><td align="left" valign="top">Oral temperature</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>37&#x00B0;C (normal) (common cold)</p></list-item><list-item><p>37.5&#x00B0;C to 38&#x00B0;C (mild fever) (seasonal influenza)</p></list-item><list-item><p>Over 38&#x00B0;C (fever) (seasonal influenza)</p></list-item></list></td></tr></tbody></table></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Cognitive model generated by artificial intelligence (AI) for TUS item.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable element</td><td align="left" valign="bottom">Constraints (supported options)</td></tr></thead><tbody><tr><td align="left" valign="top">Timing of symptoms</td><td align="left" valign="top">B and D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>During her physical education class after lunch.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Immediately after taking a hot shower.</td><td align="left" valign="top">B</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>While sitting in a hot classroom.</td><td align="left" valign="top">B</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>After exposure to cold wind postexercise.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>During a stressful exam.</td><td align="left" valign="top">B</td></tr><tr><td align="left" valign="top">Symptoms</td><td align="left" valign="top">B and D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Coughing, shortness of breath, dizziness, and facial swelling and redness.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Generalized hives, sweating, and palpitations.</td><td align="left" valign="top">B and D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Localized itching and redness following sweating.</td><td align="left" valign="top">B</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Throat tightness, difficulty swallowing, and a sense of impending doom.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Flushing, headache, and nausea after eating specific foods.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top">Clinical findings</td><td align="left" valign="top">B and D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>A blood pressure reading of 80/50 mmHg, periorbital edema, and wheezing upon auscultation.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Tachycardia with a blood pressure reading of 100/70 mmHg, and diffuse hives.</td><td align="left" valign="top">B and D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Normal blood pressure, with localized swelling and erythema.</td><td align="left" valign="top">B</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Rapid drop in blood pressure upon exposure to an allergen, with angioedema.</td><td align="left" valign="top">D</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Elevated blood pressure due to anxiety, with mild facial edema.</td><td align="left" valign="top">B</td></tr></tbody></table></table-wrap><p>These 5 steps have been completed in less than 10 minutes for each model. The whole process within the AI environment can be displayed by accessing the following public pages of the human-AI conversations: Gierl et al&#x2019;s [<xref ref-type="bibr" rid="ref3">3</xref>] item [<xref ref-type="bibr" rid="ref34">34</xref>] and TUS item [<xref ref-type="bibr" rid="ref35">35</xref>].</p></sec><sec id="s3-7"><title>Step 6: Final Review by the Expert(s)</title><p>During this phase, the expert carries out a final review of the item model and cognitive model provided by AI, preferably with other experts.</p><p>In the previous steps, in order to keep the demonstration simple and due to the fact that there was not a significant inaccuracy in the AI-generated content, the expert did not demand any additional changes during the process in AI environment. But in the hybrid method, the 5 steps within the AI environment should be actively monitored by the expert, and if necessary, the expert should input prompts to make changes because AI is always prone to provide inaccurate content and deviate from providing a consistent template. Expert oversight, and involvement if necessary, is a strong necessity for creating high-quality item models and cognitive models.</p><p>Following the first 5 steps, which can be completed in less than 10 minutes, the expert should carry out one more round of review for the item model and the cognitive model generated through human-AI collaboration. It would be better if the expert conducts this review together with other experts to make sure that there is no inaccuracy, inconsistency, or inappropriate way of presentation. The expert should keep in mind that content generated by AI, in this case ChatGPT, is generated through a large language model, so it could &#x201C;hallucinate&#x201D; [<xref ref-type="bibr" rid="ref36">36</xref>] some inaccurate information and relationships that are harmful for the output quality. Apart from that, in this step, the expert may prefer to add more elements and variables, such as age and gender, in a way that does not change the correct answers, in order for increasing the number of the items.</p></sec><sec id="s3-8"><title>Step 7: Item Generation Using a Non-AI Software</title><p>Finally, the expert inputs the final version of the item template and the constraints to a traditional template-based AIG tool (software without AI involvement), and then the software algorithmically produces multiple-choice items based on the input provided by the expert. It is crucial to emphasize that the expert must input the content accurately, as traditional software cannot handle inconsistent type of inputs, unlike AI in the previous steps. There is no difference between the traditional template-based method (stage 3) [<xref ref-type="bibr" rid="ref10">10</xref>] and our hybrid method (step 7) in this regard. As is in the template-based method [<xref ref-type="bibr" rid="ref10">10</xref>], Hybrid AIG also allows the software to generate hundreds of consistent items based on a single item model.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>In this study, we used AI to generate item models and cognitive models for generating multiple-choice items by using these models for template-based AIG. We demonstrated that AI is capable of providing AIG templates for this purpose under the control of human expert. Leveraging AI in template development has significantly reduced the time investment from hours [<xref ref-type="bibr" rid="ref10">10</xref>] to less than 10 minutes, and provided a smoother experience for experts in this challenging task [<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>In our hybrid AIG method, cognitive work required to be carried out by experts in the past [<xref ref-type="bibr" rid="ref10">10</xref>] is now shared with AI. It switches the role of experts from &#x201C;the creators of item-cognitive models from scratch&#x201D; to &#x201C;the reviewer of AI-generated content,&#x201D; which brings an important efficiency to AIG without sacrificing consistency and accuracy. Our hybrid AIG method transcends the traditional template-based approach by marrying the &#x201C;art&#x201D; that comes from AI as a &#x201C;black box&#x201D; [<xref ref-type="bibr" rid="ref25">25</xref>] with the &#x201C;science&#x201D; of algorithmic generation [<xref ref-type="bibr" rid="ref10">10</xref>] under the oversight of expert as a &#x201C;marriage registrar.&#x201D; Practically, this balanced fusion under human guidance reduces the extraneous cognitive load [<xref ref-type="bibr" rid="ref16">16</xref>] on experts by allocating the burdensome tasks to AI in order for enhancing human efficiency and allowing them to concentrate on refining and validating the AI-generated content.</p><p>Similar to our approach, a recent study successfully incorporated a large language model into the process of developing reading comprehension items [<xref ref-type="bibr" rid="ref37">37</xref>]. While addressing a critical issue in item development for a non&#x2013;health care setting, its direct application to medical education is challenging due to the inherent complexities of health professions education. Furthermore, this approach integrates AI only into generating unique sentences based on rules imposed by experts, leaving the essential cognitive work dependent on expert input, which remains inefficient for medical education. In our hybrid method, we use AI not only for generating unique sentences but also for development of item models and cognitive models as a whole, hence transforming the role of experts from the main &#x201C;cognitive workers&#x201D; to reviewers. This shift reduces cognitive effort for experts while maintaining their essential contribution for accurate and consistent items. Considering the importance of clarity and constraints in the templates, we still can suggest that it is possible and desirable to create specifications and instructions using artificial intelligence. Our research demonstrated that even a minimal human oversight can be sufficient for using AI in the creation of specifications and instructions, particularly in challenging domains such as medical education, which suggests even greater possibility for less complex tasks like reading comprehension. By dismissing the potential of AI in this regard by labeling it as &#x201C;impossible,&#x201D; humans might inadvertently limit AI&#x2019;s capacity to enhance efficiency in cognitive work needed to be done. Thus, we propose leveraging AI more effectively rather than relegating it to a lesser role.</p><p>While noting the improved efficiency of the proposed hybrid for cognitive tasks, we emphasize the importance of rigorous human oversight, and consequently, accountability for automatically generated content. As demonstrated in a study by Zack et al [<xref ref-type="bibr" rid="ref38">38</xref>], even a state of the art large language model can still be prone to perpetuate racial and gender bias. Adding to the complexity, such bias may not be visible at the level of a single question item or template, but as an effect of prolonged use of generative AI tools. The role of human examiners is to be aware of such risks and to implement bias mitigation strategies at different steps and levels of the AI process to prevent the injection of harmful stereotypes into the assessment of students&#x2019; skills.</p><p>Our study has some limitations. Although the templates generated by AI showed promising results, replicability depends on the consistency of the AI model, which is GPT-4 in this case. In addition, other AI models such as GPT-4o (Open AI), Claude (Anthropic), Gemini (Google), Llama (Meta), and Command <italic>R</italic>+ (Cohere) could lead to different outputs. While our study demonstrated that a hybrid AIG is possible, future research should explore this further by using different parent items across various settings to generate MCQs. As this is a proof-of-concept study, there is a lack of empirical evidence supporting the efficacy of the proposed hybrid AIG method, no qualitative reviews to assess the generated items&#x2019; quality, and a lack of quantitative item analysis since the items were not tested on medical students. However, it is still valuable because it has shown for the first time that generating plausible, and possibly useful, item templates using AI is possible in medical education. A recent study has provided empirical evidence, demonstrating that experts correctly identified the answers in MCQs generated by using hybrid AIG [<xref ref-type="bibr" rid="ref39">39</xref>]. In the future studies, we are planning to generate more items using these templates and investigate their effectiveness using qualitative and quantitative methods. Moreover, a direct comparison of traditional template-based AIG and hybrid AIG could provide valuable evidence for effectiveness and efficiency. Another limitation is that we generated simple templates. There are multilayered templates for AIG [<xref ref-type="bibr" rid="ref3">3</xref>], which require relatively complex structures, that might require from us to use different custom AIs for this purpose.</p><p>In conclusion, the hybrid AIG is a promising novel method that leverages AI in development of templates for template-based AIG that transforms the traditional role of experts from creators to reviewers. This shift can significantly reduce the cognitive burden on experts and streamline the item generation process while ensuring high-quality outcomes. We recommend piloting and improving the hybrid AIG in high-demand settings of increasing importance to investigate and improve its efficiency and quality benefits.</p></sec></body><back><ack><p>This work has been supported by T&#x00DC;B&#x0130;TAK (The Scientific and Technological Research Council of Turkiye) under the 2219 program for YSK and internal research funds at Jagiellonian University Medical College under the N41/DBS/000332 program for AK. Generative AI, as a research tool, was used in the preparation of this manuscript for multiple-choice question template generation, which has been transparently reported in the methods and results section of the article. In addition, generative AI was used for language correction. The authors have reviewed, edited, and verified all content. The authors take full responsibility for the integrity and validity of the final work.</p></ack><notes><sec><title>Data Availability</title><p>All data generated or analyzed during this study are included in this published article.</p></sec></notes><fn-group><fn fn-type="con"><p>YSK contibuted to conceptualization, methodology, writing the original draft, data curation, writing-review and editing, and funding acquisition. AK contributed to conceptualization, methodology, writing-review and editing, and funding acquisition.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">AIG</term><def><p>automatic item generation</p></def></def-item><def-item><term id="abb3">MCQ</term><def><p>multiple-choice question</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pugh</surname><given-names>D</given-names> </name><name name-style="western"><surname>De Champlain</surname><given-names>A</given-names> </name><name name-style="western"><surname>Touchie</surname><given-names>C</given-names> </name></person-group><article-title>Plus &#x00E7;a change, plus c&#x2019;est pareil: Making a continued case for the use of MCQs in medical education</article-title><source>Med Teach</source><year>2019</year><month>05</month><day>4</day><volume>41</volume><issue>5</issue><fpage>569</fpage><lpage>577</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2018.1505035</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wrigley</surname><given-names>W</given-names> </name><name name-style="western"><surname>van der Vleuten</surname><given-names>CPM</given-names> </name><name name-style="western"><surname>Freeman</surname><given-names>A</given-names> </name><name name-style="western"><surname>Muijtjens</surname><given-names>A</given-names> </name></person-group><article-title>A systemic framework for the progress test: strengths, constraints and issues: AMEE Guide No. 71</article-title><source>Med Teach</source><year>2012</year><volume>34</volume><issue>9</issue><fpage>683</fpage><lpage>697</lpage><pub-id pub-id-type="doi">10.3109/0142159X.2012.704437</pub-id><pub-id pub-id-type="medline">22905655</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tanygin</surname><given-names>V</given-names> </name></person-group><source>Advanced Methods in Automatic Item Generation</source><year>2021</year><edition>1</edition><publisher-name>Routledge</publisher-name><pub-id pub-id-type="doi">10.4324/9781003025634-1</pub-id><pub-id pub-id-type="other">978-1-00-302563-4</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kiyak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Budako&#x011F;lu</surname><given-names>I&#x0130;</given-names> </name><name name-style="western"><surname>Co&#x015F;kun</surname><given-names>&#x00D6;</given-names> </name><name name-style="western"><surname>Koyun</surname><given-names>E</given-names> </name></person-group><article-title>The first automatic item generation in Turkish for assessment of clinical reasoning in medical education</article-title><source>T&#x0131;p E&#x011F;itimi D&#x00FC;nyas&#x0131;</source><year>2023</year><month>03</month><volume>22</volume><issue>66</issue><fpage>72</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.25282/ted.1225814</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kiyak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Co&#x015F;kun</surname><given-names>&#x00D6;</given-names> </name><name name-style="western"><surname>Budako&#x011F;lu</surname><given-names>I&#x0130;</given-names> </name><name name-style="western"><surname>Uluo&#x011F;lu</surname><given-names>C</given-names> </name></person-group><article-title>Psychometric analysis of the first Turkish multiple-choice questions generated using automatic item generation method in medical education</article-title><source>T&#x0131;p E&#x011F;itimi D&#x00FC;nyas&#x0131;</source><year>2023</year><volume>22</volume><issue>68</issue><fpage>154</fpage><lpage>161</lpage><pub-id pub-id-type="doi">10.25282/ted.1376840</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Kononowicz</surname><given-names>AA</given-names> </name><name name-style="western"><surname>G&#x00F3;rski</surname><given-names>S</given-names> </name></person-group><article-title>Multilingual template-based automatic item generation for medical education supported by generative artificial intelligence models ChatGPT and Claude</article-title><source>Bio-Algorithms Med-Syst</source><year>2024</year><month>12</month><day>23</day><volume>20</volume><issue>1</issue><fpage>81</fpage><lpage>89</lpage><pub-id pub-id-type="doi">10.5604/01.3001.0054.9192</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leslie</surname><given-names>T</given-names> </name><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name></person-group><article-title>Using automatic item generation to create multiple-choice questions for pharmacy assessment</article-title><source>Am J Pharm Educ</source><year>2023</year><month>10</month><volume>87</volume><issue>10</issue><fpage>100081</fpage><pub-id pub-id-type="doi">10.1016/j.ajpe.2023.100081</pub-id><pub-id pub-id-type="medline">37852684</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Byrne</surname><given-names>BE</given-names> </name><name name-style="western"><surname>Spielman</surname><given-names>AI</given-names> </name><name name-style="western"><surname>Waldschmidt</surname><given-names>DM</given-names> </name></person-group><article-title>Three modeling applications to promote automatic item generation for examinations in dentistry</article-title><source>J Dent Educ</source><year>2016</year><month>03</month><volume>80</volume><issue>3</issue><fpage>339</fpage><lpage>347</lpage><pub-id pub-id-type="doi">10.1002/j.0022-0337.2016.80.3.tb06090.x</pub-id><pub-id pub-id-type="medline">26933110</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Falc&#x00E3;o</surname><given-names>F</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>P</given-names> </name><name name-style="western"><surname>P&#x00EA;go</surname><given-names>JM</given-names> </name></person-group><article-title>Feasibility assurance: a review of automatic item generation in medical assessment</article-title><source>Adv in Health Sci Educ</source><year>2022</year><month>05</month><volume>27</volume><issue>2</issue><fpage>405</fpage><lpage>425</lpage><pub-id pub-id-type="doi">10.1007/s10459-022-10092-z</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Turner</surname><given-names>SR</given-names> </name></person-group><article-title>Using automatic item generation to create multiple-choice test items</article-title><source>Med Educ</source><year>2012</year><month>08</month><volume>46</volume><issue>8</issue><fpage>757</fpage><lpage>765</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2923.2012.04289.x</pub-id><pub-id pub-id-type="medline">22803753</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kosh</surname><given-names>AE</given-names> </name><name name-style="western"><surname>Simpson</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Bickel</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kellogg</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sanford&#x2010;Moore</surname><given-names>E</given-names> </name></person-group><article-title>A cost&#x2013;benefit analysis of automatic item generation</article-title><source>Educational Measurement</source><year>2019</year><month>03</month><volume>38</volume><issue>1</issue><fpage>48</fpage><lpage>53</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/17453992/38/1">https://onlinelibrary.wiley.com/toc/17453992/38/1</ext-link></comment><pub-id pub-id-type="doi">10.1111/emip.12237</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pugh</surname><given-names>D</given-names> </name><name name-style="western"><surname>De Champlain</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gierl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Touchie</surname><given-names>C</given-names> </name></person-group><article-title>Using cognitive models to develop quality multiple-choice questions</article-title><source>Med Teach</source><year>2016</year><month>08</month><volume>38</volume><issue>8</issue><fpage>838</fpage><lpage>843</lpage><pub-id pub-id-type="doi">10.3109/0142159X.2016.1150989</pub-id><pub-id pub-id-type="medline">26998566</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Pugh</surname><given-names>D</given-names> </name><name name-style="western"><surname>Touchie</surname><given-names>C</given-names> </name><name name-style="western"><surname>Boulais</surname><given-names>AP</given-names> </name><name name-style="western"><surname>De Champlain</surname><given-names>A</given-names> </name></person-group><article-title>Evaluating the psychometric characteristics of generated multiple-choice test items</article-title><source>Applied Measurement in Education</source><year>2016</year><month>07</month><day>2</day><volume>29</volume><issue>3</issue><fpage>196</fpage><lpage>210</lpage><pub-id pub-id-type="doi">10.1080/08957347.2016.1171768</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pugh</surname><given-names>D</given-names> </name><name name-style="western"><surname>De Champlain</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gierl</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name><name name-style="western"><surname>Touchie</surname><given-names>C</given-names> </name></person-group><article-title>Can automated item generation be used to develop high quality MCQs that assess application of knowledge?</article-title><source>RPTEL</source><year>2020</year><month>12</month><volume>15</volume><issue>1</issue><fpage>12</fpage><pub-id pub-id-type="doi">10.1186/s41039-020-00134-8</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gierl</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Lai</surname><given-names>H</given-names> </name></person-group><article-title>Evaluating the quality of medical multiple-choice items created with automated processes</article-title><source>Med Educ</source><year>2013</year><month>07</month><volume>47</volume><issue>7</issue><fpage>726</fpage><lpage>733</lpage><pub-id pub-id-type="doi">10.1111/medu.12202</pub-id><pub-id pub-id-type="medline">23746162</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>van Merri&#x00EB;nboer</surname><given-names>JJG</given-names> </name><name name-style="western"><surname>Sweller</surname><given-names>J</given-names> </name></person-group><article-title>Cognitive load theory in health professional education: design principles and strategies</article-title><source>Med Educ</source><year>2010</year><month>01</month><volume>44</volume><issue>1</issue><fpage>85</fpage><lpage>93</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2923.2009.03498.x</pub-id><pub-id pub-id-type="medline">20078759</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name></person-group><article-title>A ChatGPT prompt for writing case-based multiple-choice questions</article-title><source>Rev Esp Edu Med</source><year>2023</year><volume>4</volume><issue>3</issue><fpage>98</fpage><lpage>103</lpage><pub-id pub-id-type="doi">10.6018/edumed.587451</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zuckerman</surname><given-names>M</given-names> </name><name name-style="western"><surname>Flood</surname><given-names>R</given-names> </name><name name-style="western"><surname>Tan</surname><given-names>RJB</given-names> </name><etal/></person-group><article-title>ChatGPT for assessment writing</article-title><source>Med Teach</source><year>2023</year><month>11</month><volume>45</volume><issue>11</issue><fpage>1224</fpage><lpage>1227</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2023.2249239</pub-id><pub-id pub-id-type="medline">37789636</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Laupichler</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Rother</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Grunwald Kadow</surname><given-names>IC</given-names> </name><name name-style="western"><surname>Ahmadi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Raupach</surname><given-names>T</given-names> </name></person-group><article-title>Large language models in medical education: comparing ChatGPT- to human-generated exam questions</article-title><source>Acad Med</source><year>2024</year><month>05</month><day>1</day><volume>99</volume><issue>5</issue><fpage>508</fpage><lpage>512</lpage><pub-id pub-id-type="doi">10.1097/ACM.0000000000005626</pub-id><pub-id pub-id-type="medline">38166323</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Kononowicz</surname><given-names>AA</given-names> </name></person-group><article-title>Case-based MCQ generator: A custom ChatGPT based on published prompts in the literature for automatic item generation</article-title><source>Med Teach</source><year>2024</year><month>08</month><volume>46</volume><issue>8</issue><fpage>1018</fpage><lpage>1020</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2024.2314723</pub-id><pub-id pub-id-type="medline">38340312</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Co&#x015F;kun</surname><given-names>&#x00D6;</given-names> </name><name name-style="western"><surname>Budako&#x011F;lu</surname><given-names>I&#x0130;</given-names> </name><name name-style="western"><surname>Uluo&#x011F;lu</surname><given-names>C</given-names> </name></person-group><article-title>ChatGPT for generating multiple-choice questions: Evidence on the use of artificial intelligence in automatic item generation for a rational pharmacotherapy exam</article-title><source>Eur J Clin Pharmacol</source><year>2024</year><month>05</month><volume>80</volume><issue>5</issue><fpage>729</fpage><lpage>735</lpage><pub-id pub-id-type="doi">10.1007/s00228-024-03649-x</pub-id><pub-id pub-id-type="medline">38353690</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Indran</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Paranthaman</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mustafa</surname><given-names>N</given-names> </name></person-group><article-title>Twelve tips to leverage AI for efficient and effective medical question generation: A guide for educators using Chat GPT</article-title><source>Med Teach</source><year>2024</year><month>08</month><volume>46</volume><issue>8</issue><fpage>1021</fpage><lpage>1026</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2023.2294703</pub-id><pub-id pub-id-type="medline">38146711</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ngo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gupta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Perrine</surname><given-names>O</given-names> </name><name name-style="western"><surname>Reddy</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ershadi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Remick</surname><given-names>D</given-names> </name></person-group><article-title>ChatGPT 3.5 fails to write appropriate multiple choice practice exam questions</article-title><source>Acad Pathol</source><year>2024</year><volume>11</volume><issue>1</issue><fpage>100099</fpage><pub-id pub-id-type="doi">10.1016/j.acpath.2023.100099</pub-id><pub-id pub-id-type="medline">38162414</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lee</surname><given-names>H</given-names> </name></person-group><article-title>The rise of ChatGPT: exploring its potential in medical education</article-title><source>Anatomical Sciences Ed</source><year>2024</year><volume>17</volume><issue>5</issue><fpage>926</fpage><lpage>931</lpage><pub-id pub-id-type="doi">10.1002/ase.2270</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>von Eschenbach</surname><given-names>WJ</given-names> </name></person-group><article-title>Transparency and the black box problem: why we do not trust AI</article-title><source>Philos Technol</source><year>2021</year><month>12</month><volume>34</volume><issue>4</issue><fpage>1607</fpage><lpage>1622</lpage><pub-id pub-id-type="doi">10.1007/s13347-021-00477-0</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>Bulut</surname><given-names>O</given-names> </name><name name-style="western"><surname>Beiting-Parrish</surname><given-names>M</given-names> </name><name name-style="western"><surname>Casabianca</surname><given-names>JM</given-names> </name></person-group><article-title>The rise of artificial intelligence in educational measurement: opportunities and ethical challenges</article-title><source>arXiv</source><comment>Preprint posted online on  Jun 27, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2406.18900</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Han</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Battaglia</surname><given-names>F</given-names> </name><name name-style="western"><surname>Udaiyar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Fooks</surname><given-names>A</given-names> </name><name name-style="western"><surname>Terlecky</surname><given-names>SR</given-names> </name></person-group><article-title>An explorative assessment of ChatGPT as an aid in medical education: Use it with caution</article-title><source>Med Teach</source><year>2024</year><month>05</month><volume>46</volume><issue>5</issue><fpage>657</fpage><lpage>664</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2023.2271159</pub-id><pub-id pub-id-type="medline">37862566</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Appelbaum</surname><given-names>SH</given-names> </name></person-group><article-title>Socio&#x2010;technical systems theory: an intervention strategy for organizational development</article-title><source>Manag Decis</source><year>1997</year><month>08</month><volume>35</volume><issue>6</issue><fpage>452</fpage><lpage>463</lpage><pub-id pub-id-type="doi">10.1108/00251749710173823</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mesko</surname><given-names>B</given-names> </name></person-group><article-title>The ChatGPT (generative artificial intelligence) revolution has made artificial intelligence approachable for medical professionals</article-title><source>J Med Internet Res</source><year>2023</year><month>06</month><day>22</day><volume>25</volume><fpage>e48392</fpage><pub-id pub-id-type="doi">10.2196/48392</pub-id><pub-id pub-id-type="medline">37347508</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="preprint"><person-group person-group-type="author"><name name-style="western"><surname>White</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Hays</surname><given-names>S</given-names> </name><etal/></person-group><article-title>A prompt pattern catalog to enhance prompt engineering with chatgpt</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 21, 2023</comment><pub-id pub-id-type="doi">10.48550/arXiv.2302.11382</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name></person-group><article-title>Beginner-level tips for medical educators: guidance on selection, prompt engineering, and the use of artificial intelligence chatbots</article-title><source>Med Sci Educ</source><year>2024</year><month>12</month><volume>34</volume><issue>6</issue><fpage>1571</fpage><lpage>1576</lpage><pub-id pub-id-type="doi">10.1007/s40670-024-02146-1</pub-id><pub-id pub-id-type="medline">39758489</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masters</surname><given-names>K</given-names> </name><name name-style="western"><surname>Benjamin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Agrawal</surname><given-names>A</given-names> </name><name name-style="western"><surname>MacNeill</surname><given-names>H</given-names> </name><name name-style="western"><surname>Pillow</surname><given-names>MT</given-names> </name><name name-style="western"><surname>Mehta</surname><given-names>N</given-names> </name></person-group><article-title>Twelve tips on creating and using custom GPTs to enhance health professions education</article-title><source>Med Teach</source><year>2024</year><month>06</month><volume>46</volume><issue>6</issue><fpage>752</fpage><lpage>756</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2024.2305365</pub-id><pub-id pub-id-type="medline">38285894</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="web"><article-title>Item Model Maker for AIG</article-title><source>ChatGPT</source><access-date>2025-04-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chatgpt.com/g/g-ISoiQOLyv-item-model-maker-for-aig">https://chatgpt.com/g/g-ISoiQOLyv-item-model-maker-for-aig</ext-link></comment></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="web"><article-title>Item model 1</article-title><source>ChatGPT</source><access-date>2025-04-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chatgpt.com/share/1c975655-6bc4-4a16-a082-efad2030a5e2">https://chatgpt.com/share/1c975655-6bc4-4a16-a082-efad2030a5e2</ext-link></comment></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="web"><article-title>Item model 2</article-title><source>ChatGPT</source><access-date>2025-04-01</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://chatgpt.com/share/7e093fef-8d43-4b8f-bd15-a00bdf4c25c5">https://chatgpt.com/share/7e093fef-8d43-4b8f-bd15-a00bdf4c25c5</ext-link></comment></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Masters</surname><given-names>K</given-names> </name></person-group><article-title>Medical Teacher &#x2019;s first ChatGPT&#x2019;s referencing hallucinations: lessons for editors, reviewers, and teachers</article-title><source>Med Teach</source><year>2023</year><month>07</month><volume>45</volume><issue>7</issue><fpage>673</fpage><lpage>675</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2023.2208731</pub-id><pub-id pub-id-type="medline">37183932</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sayin</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gierl</surname><given-names>M</given-names> </name></person-group><article-title>Using OpenAI GPT to generate reading comprehension items</article-title><source>Educational Measurement</source><year>2024</year><month>03</month><volume>43</volume><issue>1</issue><fpage>5</fpage><lpage>18</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://onlinelibrary.wiley.com/toc/17453992/43/1">https://onlinelibrary.wiley.com/toc/17453992/43/1</ext-link></comment><pub-id pub-id-type="doi">10.1111/emip.12590</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zack</surname><given-names>T</given-names> </name><name name-style="western"><surname>Lehman</surname><given-names>E</given-names> </name><name name-style="western"><surname>Suzgun</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Assessing the potential of GPT-4 to perpetuate racial and gender biases in health care: a model evaluation study</article-title><source>Lancet Digit Health</source><year>2024</year><month>01</month><volume>6</volume><issue>1</issue><fpage>e12</fpage><lpage>e22</lpage><pub-id pub-id-type="doi">10.1016/S2589-7500(23)00225-X</pub-id><pub-id pub-id-type="medline">38123252</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x0131;yak</surname><given-names>YS</given-names> </name><name name-style="western"><surname>Emekli</surname><given-names>E</given-names> </name><name name-style="western"><surname>Co&#x015F;kun</surname><given-names>&#x00D6;</given-names> </name><name name-style="western"><surname>Budako&#x011F;lu</surname><given-names>I&#x0130;</given-names> </name></person-group><article-title>Keeping humans in the loop efficiently by generating question templates instead of questions using AI: Validity evidence on Hybrid AIG</article-title><source>Med Teach</source><year>2025</year><month>04</month><volume>47</volume><issue>4</issue><fpage>744</fpage><lpage>747</lpage><pub-id pub-id-type="doi">10.1080/0142159X.2024.2430360</pub-id><pub-id pub-id-type="medline">39604119</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>The prompt that has been used in the custom GPT.</p><media xlink:href="formative_v9i1e65726_app1.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material></app-group></back></article>