<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article article-type="research-article" dtd-version="2.0" xmlns:xlink="http://www.w3.org/1999/xlink">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v8i1e46800</article-id>
      <article-id pub-id-type="pmid">39115919</article-id>
      <article-id pub-id-type="doi">10.2196/46800</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Assessing ChatGPT’s Capability for Multiple Choice Questions Using RaschOnline: Observational Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Lai</surname>
            <given-names>ChungLiang</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Chaudhry</surname>
            <given-names>Beenish</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Mungoli</surname>
            <given-names>Neelesh</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Chow</surname>
            <given-names>Julie Chi</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-3150-4917</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Cheng</surname>
            <given-names>Teng Yun</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-0641-5405</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Chien</surname>
            <given-names>Tsair-Wei</given-names>
          </name>
          <degrees>MBA</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1329-0679</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Chou</surname>
            <given-names>Willy</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff5" ref-type="aff">5</xref>
          <address>
            <institution>Department of Physical Medicine and Rehabilitation</institution>
            <institution>Chi Mei Medical Center</institution>
            <addr-line>No. 901, Chung Hwa Road</addr-line>
            <addr-line>Yung Kung District</addr-line>
            <addr-line>Tainan, 710</addr-line>
            <country>Taiwan</country>
            <phone>886 937399106</phone>
            <email>smilewilly@mail.chimei.org.tw</email>
          </address>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-1132-9341</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Pediatrics</institution>
        <institution>Chi Mei Medical Center</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Pediatrics</institution>
        <institution>School of Medicine, College of Medicine</institution>
        <institution>Chung Shan Medical University</institution>
        <addr-line>Taichung</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Emergency Medicine</institution>
        <institution>Chi Mei Medical Center</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Statistics</institution>
        <institution>Coding Data Analytics</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Physical Medicine and Rehabilitation</institution>
        <institution>Chi Mei Medical Center</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Leisure and Sports Management</institution>
        <institution>Far East University</institution>
        <addr-line>Tainan</addr-line>
        <country>Taiwan</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Willy Chou <email>smilewilly@mail.chimei.org.tw</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2024</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>8</day>
        <month>8</month>
        <year>2024</year>
      </pub-date>
      <volume>8</volume>
      <elocation-id>e46800</elocation-id>
      <history>
        <date date-type="received">
          <day>25</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>15</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>3</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>31</day>
          <month>7</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Julie Chi Chow, Teng Yun Cheng, Tsair-Wei Chien, Willy Chou. Originally published in JMIR Formative Research (https://formative.jmir.org), 08.08.2024.</copyright-statement>
      <copyright-year>2024</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2024/1/e46800" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>ChatGPT (OpenAI), a state-of-the-art large language model, has exhibited remarkable performance in various specialized applications. Despite the growing popularity and efficacy of artificial intelligence, there is a scarcity of studies that assess ChatGPT’s competence in addressing multiple-choice questions (MCQs) using KIDMAP of Rasch analysis—a website tool used to evaluate ChatGPT’s performance in MCQ answering.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aims to (1) showcase the utility of the website (Rasch analysis, specifically RaschOnline), and (2) determine the grade achieved by ChatGPT when compared to a normal sample.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The capability of ChatGPT was evaluated using 10 items from the English tests conducted for Taiwan college entrance examinations in 2023. Under a Rasch model, 300 simulated students with normal distributions were simulated to compete with ChatGPT’s responses. RaschOnline was used to generate 5 visual presentations, including item difficulties, differential item functioning, item characteristic curve, Wright map, and KIDMAP, to address the research objectives.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The findings revealed the following: (1) the difficulty of the 10 items increased in a monotonous pattern from easier to harder, represented by logits (–2.43, –1.78, –1.48, –0.64, –0.1, 0.33, 0.59, 1.34, 1.7, and 2.47); (2) evidence of differential item functioning was observed between gender groups for item 5 (<italic>P</italic>=.04); (3) item 5 displayed a good fit to the Rasch model (<italic>P</italic>=.61); (4) all items demonstrated a satisfactory fit to the Rasch model, indicated by Infit mean square errors below the threshold of 1.5; (5) no significant difference was found in the measures obtained between gender groups (<italic>P</italic>=.83); (6) a significant difference was observed among ability grades (<italic>P</italic>&lt;.001); and (7) ChatGPT’s capability was graded as A, surpassing grades B to E.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>By using RaschOnline, this study provides evidence that ChatGPT possesses the ability to achieve a grade A when compared to a normal sample. It exhibits excellent proficiency in answering MCQs from the English tests conducted in 2023 for the Taiwan college entrance examinations.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>RaschOnline</kwd>
        <kwd>ChatGPT</kwd>
        <kwd>multiple choice questions</kwd>
        <kwd>differential item functioning</kwd>
        <kwd>Wright map</kwd>
        <kwd>KIDMAP</kwd>
        <kwd>website tool</kwd>
        <kwd>evaluation tool</kwd>
        <kwd>tool</kwd>
        <kwd>application</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>scoring</kwd>
        <kwd>testing</kwd>
        <kwd>college</kwd>
        <kwd>students</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>ChatGPT is an advanced language model, which stands for Chat Generative Pretrained Transformer [<xref ref-type="bibr" rid="ref1">1</xref>]. Its primary function is to generate text that mimics human language based on a given prompt or context [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. This state-of-the-art model has been trained using an extensive amount of text data available on the internet, enabling it to understand and produce text on a diverse range of subjects and in various language styles [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>ChatGPT is a highly versatile language model that has found numerous applications [<xref ref-type="bibr" rid="ref1">1</xref>]. One such significant use is text generation, which could revolutionize content creation, including academic publications [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. With the ever-growing sophistication of language models, such as ChatGPT, differentiating between text produced by humans and that generated by artificial intelligence (AI) will become increasingly difficult [<xref ref-type="bibr" rid="ref3">3</xref>]. ChatGPT can respond to user prompts to perform a variety of tasks, such as answering questions, composing essays, writing poems and love letters, generating computer code, and even creating business plans. Furthermore, it can also solve complex problems, including those in math or physics, among other fields [<xref ref-type="bibr" rid="ref6">6</xref>-<xref ref-type="bibr" rid="ref8">8</xref>].</p>
      </sec>
      <sec>
        <title>Assessing ChatGPT’s Capacity for Multiple-Choice Questions</title>
        <p>Korn and Kelly [<xref ref-type="bibr" rid="ref9">9</xref>] have raised serious doubts about the reliability and fairness of ChatGPT, echoing concerns voiced in the popular press regarding the chatbot’s tendency to disseminate misinformation. The authors caution that ChatGPT may not always provide accurate information [<xref ref-type="bibr" rid="ref9">9</xref>], and there are fears that it could be manipulated to spread false information [<xref ref-type="bibr" rid="ref10">10</xref>] or produce “deepfakes” [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>Research on medical question answering has previously evaluated ChatGPT’s performance on specific tasks [<xref ref-type="bibr" rid="ref12">12</xref>]. For example, Jin et al [<xref ref-type="bibr" rid="ref13">13</xref>] achieved 68.1% accuracy in answering yes-or-no questions from PubMed abstracts, while ChatGPT performed with accuracy rates of 64.4% and 57.8% on 2 data sets from the United States Medical Licensing Examination (USMLE) [<xref ref-type="bibr" rid="ref12">12</xref>]. ChatGPT also achieved high scores on breast cancer screening prompts [<xref ref-type="bibr" rid="ref14">14</xref>] and Kawasaki disease prompts [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref21">21</xref>]. A total of 2 pediatricians’ assessments indicated that ChatGPT’s overall performance corresponded to a grade of C in a range from A to E, with average scores of –0.89 logits and 0.90 logits (=log odds), respectively [<xref ref-type="bibr" rid="ref22">22</xref>].</p>
        <p>Recent research findings indicate that ChatGPT has shown remarkable precision in answering questions related to the US Certified Public Accountant exam and the US bar examination [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. Additionally, in the field of medicine, ChatGPT has met the required standards for the USMLE [<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. While there are still obstacles to overcome when applying ChatGPT to clinical medicine [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref28">28</xref>], it has demonstrated satisfactory performance in English examinations [<xref ref-type="bibr" rid="ref29">29</xref>]. However, Ha and Yaneva [<xref ref-type="bibr" rid="ref30">30</xref>] reported low accuracy rates for medical multiple-choice questions (MCQs). In this study, we were motivated to determine ChatGPT’s grade (eg, A, B, C, or D) in answering MCQs against the study [<xref ref-type="bibr" rid="ref22">22</xref>] with low accuracy for MCQs.</p>
      </sec>
      <sec>
        <title>Rasch Model Applied to This Study</title>
        <p>In ChatGPT, there are 2 types of prompts: MCQs [<xref ref-type="bibr" rid="ref30">30</xref>] and open-ended (OE) [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. The OE format of ChatGPT is more subjective than the MCQs. MCQs can be objectively evaluated by observing the correct and incorrect answers to each item. The Rasch model [<xref ref-type="bibr" rid="ref31">31</xref>] is suitable for analyzing dichotomous responses (ie, correct and incorrect answers). Otherwise, the Rasch rating scale model (RSM) [<xref ref-type="bibr" rid="ref32">32</xref>] can be applied. Nonetheless, a study using Rasch analysis to examine the capability of ChatGPT has not yet been published in the literature. Therefore, it is necessary to demonstrate the use of Rasch analysis in assessing ChatGPT’s capability based on MCQs with correct and incorrect answers (ie, dichotomous responses in Rasch analysis).</p>
      </sec>
      <sec>
        <title>Features of Rasch Analysis</title>
        <sec>
          <title>Overview</title>
          <p>Rasch analysis is a statistical method that evaluates the performance of individuals on tests or assessments. By applying this technique to ChatGPT [<xref ref-type="bibr" rid="ref1">1</xref>], researchers can assess the quality of its responses and pinpoint areas that may require improvement [<xref ref-type="bibr" rid="ref33">33</xref>]. Below are some of the features of Rasch analysis that can be applied to evaluate the performance of ChatGPT.</p>
        </sec>
        <sec>
          <title>Item Difficulty</title>
          <p>Rasch analysis can provide valuable insights into the difficulty level of each prompt or question presented to ChatGPT. This information can be used to pinpoint areas where ChatGPT may face challenges (such as when presented with difficult questions) or perform well (such as when presented with easier questions) [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        </sec>
        <sec>
          <title>Person Ability</title>
          <p>By analyzing ChatGPT’s responses to prompts or questions, Rasch analysis can measure its ability level. This evaluation can offer valuable information about ChatGPT’s overall performance and highlight areas where enhancements may be necessary [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        </sec>
        <sec>
          <title>Item Fit Statistics</title>
          <p>Item fit statistics are generated through Rasch analysis to evaluate the degree to which each prompt or question aligns with the overall model. This analysis can be used to identify items that require revision or removal from the assessment [<xref ref-type="bibr" rid="ref35">35</xref>-<xref ref-type="bibr" rid="ref37">37</xref>].</p>
        </sec>
        <sec>
          <title>Differential Item Functioning</title>
          <p>Differential item functioning (DIF) can be identified by Rasch analysis when different groups of individuals (such as males and females or individuals from diverse cultural backgrounds) respond differently to the same item [<xref ref-type="bibr" rid="ref38">38</xref>], for example, a specific item may be preferred by men or women based on DIF analysis. By detecting DIF, Rasch analysis can flag potentially biased items and facilitate the improvement of assessment fairness [<xref ref-type="bibr" rid="ref39">39</xref>].</p>
          <p>A total of 5 visualizations are frequently applied to present item features and person measures, including the distribution of item difficulties (DID) [<xref ref-type="bibr" rid="ref33">33</xref>], DIF [<xref ref-type="bibr" rid="ref38">38</xref>], item characteristic curve (ICC) [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], Wright map (namely, item-person map) [<xref ref-type="bibr" rid="ref33">33</xref>], and KIDMAP [<xref ref-type="bibr" rid="ref42">42</xref>]. A forest plot [<xref ref-type="bibr" rid="ref43">43</xref>] can be used to integrate DID and DIF for a better understanding of item characteristics.</p>
        </sec>
      </sec>
      <sec>
        <title>Study Aims</title>
        <p>The study objectives were to (1) demonstrate the use of website Rasch analysis (namely, RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>]) and (2) determine the ChatGPT’s grade compared to a normal sample.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Source</title>
        <p>In this study, 300 simulated participants responded to 10 items from Taiwan college entrance examinations for the year 2023 (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) with 2-response categories [<xref ref-type="bibr" rid="ref45">45</xref>] (eg, 0 and 1 for incorrect and correct answers) and were analyzed according to item difficulty (with a logit unit from –2.5 to 2.5; eg, –2.43, –1.78, –1.48, –0.64, –0.1, 0.33, 0.59, 1.34, 1.7, and 2.47 logits) in the Rasch model based on the normal distribution of person measures (<xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>); see MP4 video [<xref ref-type="bibr" rid="ref46">46</xref>] and the approach of simulation generation [<xref ref-type="bibr" rid="ref47">47</xref>] in RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>] about the way to conduct this study.</p>
        <p>Each item in <xref ref-type="table" rid="table1">Table 1</xref> was prompted. Answers from ChatGPT were gathered and scored on a binary scale with 301 people answering the 10 items (<xref ref-type="table" rid="table1">Table 1</xref>).</p>
        <p>The 301 simulated participants were randomly divided into 2 groups based on gender. There were 5 grades assigned based on the person measures (eg, &gt;3.0, &gt;1.5, &gt;–1.5, &gt;–3.0, and ≤–3.0 logits).</p>
        <p>As a final step, the 301 individuals (including the ChatGPT301 student) were analyzed using RaschOnline software [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>The 10 items used for examining ChatGPT’s capability<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="90"/>
            <col width="830"/>
            <thead>
              <tr valign="top">
                <td>Answer</td>
                <td>Number</td>
                <td>Item</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>A</td>
                <td>1</td>
                <td>The bus driver often complains about chewing gum found under passenger seats because it is () and very hard to remove. (A) sticky, (B) greasy, (C) clumsy, (D) mighty</td>
              </tr>
              <tr valign="top">
                <td>C</td>
                <td>2</td>
                <td>Jesse is a talented model. He can easily adopt an elegant () for a camera shoot. (A) clap, (B) toss, (C) pose, (D) snap</td>
              </tr>
              <tr valign="top">
                <td>C</td>
                <td>3</td>
                <td>To draw her family tree, Mary tried to trace her () back to their arrival in North America. (A) siblings, (B) commuters, (C) ancestors, (D) instructor</td>
              </tr>
              <tr valign="top">
                <td>B</td>
                <td>4</td>
                <td>Upon the super typhoon warning, Nancy rushed to the supermarket—only to find the shelves almost () and the stock nearly gone. (A) blank, (B) bare, (C) hollow, (D) queer</td>
              </tr>
              <tr valign="top">
                <td>D</td>
                <td>5</td>
                <td>Even though Jack said “Sorry!” to me in person, I did not feel any () in his apology. (A) liability, (B) generosity, (C) integrity, (D) sincerity</td>
              </tr>
              <tr valign="top">
                <td>D</td>
                <td>6</td>
                <td>My grandfather has astonishing powers of (). He can still vividly describe his first day at school as a child. (A) resolve, (B) faction, (C) privilege, (D) recall</td>
              </tr>
              <tr valign="top">
                <td>B</td>
                <td>7</td>
                <td>Recent research has found lots of evidence to () the drug company’s claims about its “miracle” tablets for curing cancer. (A) provoke, (B) counter, (C) expose, (D) convert</td>
              </tr>
              <tr valign="top">
                <td>A</td>
                <td>8</td>
                <td>Corrupt officials and misguided policies have () the country’s economy and burdened its people with enormous foreign debts. (A) crippled, (B) accelerated, (C) rendered, (D) ventured</td>
              </tr>
              <tr valign="top">
                <td>A</td>
                <td>9</td>
                <td>As a record number of fans showed up for the baseball final, the highways around the stadium were () with traffic all day. (A) choked, (B) disturbed, (C) enclosed, (D) injected</td>
              </tr>
              <tr valign="top">
                <td>D</td>
                <td>10</td>
                <td>Studies show that the () unbiased media are in fact often deeply influenced by political ideology. (A) undoubtedly, (B) roughly, (C) understandably, (D) supposedly</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The prompt to ChatGPT is described as “which is the correct word to fill in the blank in the sentence following: item content” (see MP4 video [<xref ref-type="bibr" rid="ref46">46</xref>] about the way to conduct this study).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>In the case of this study comparing the accuracy using ChatGPT in English test for students’ answers to the test, it is important to understand that this kind of research does not involve direct interaction with human participants. The focus here is on the performance of the AI model, and the “subjects” are essentially the algorithms themselves. There is no risk of physical, emotional, or psychological harm to human individuals, and there is no collection of personally identifiable information or any sensitive data from humans. Therefore, the Taiwan Ministry of Health and Welfare provides guidelines for research that is exempt from institutional review board review.</p>
      </sec>
      <sec>
        <title>Rasch Analysis of Item Features and Person Responses Using RaschOnline</title>
        <p>RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>] based on the Rasch RSM model [<xref ref-type="bibr" rid="ref32">32</xref>] was used to analyze the data. The multitomous data can therefore be analyzed using RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>].</p>
        <p>In the Rasch model, the probability and SE of the person estimate can be expressed as equations 1 and 2:</p>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>where <italic>θ</italic> and <italic>δ</italic> are defined as person ability and item difficulty, respectively. <italic>L</italic> is the item length. <inline-graphic xlink:href="formative_v8i1e46800_fig13.png" xlink:type="simple" mimetype="image"/> is the first-order derivative for person <italic>n</italic> with ability <italic>θ</italic> on item <italic>i</italic> in equation 1; <italic>P<sub>i</sub></italic>(<italic>θ</italic>) is identical to equation 1; <italic>Q<sub>i</sub></italic>(<italic>θ</italic>) refers to equation 3, as shown below:</p>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The processes of the first-order derivative for <inline-graphic xlink:href="formative_v8i1e46800_fig14.png" xlink:type="simple" mimetype="image"/> in equation 2 are described below:</p>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Equation 2 can then be extended to equation 5, indicating that person SE is associated with the inverse of its total variances across all items.</p>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig11.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>The processes of the first-order derivative for variance (denoted by Var<italic><sub>ni</sub></italic>) on (<italic>θ<sub>n</sub></italic>–<italic>δ<sub>i</sub></italic>) can also be described based on equation 4 and are shown below</p>
        <disp-formula>
          <graphic xlink:href="formative_v8i1e46800_fig12.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>If (<italic>e</italic><sup>(</sup><italic><sup>θn</sup></italic><sup>–</sup><italic><sup>δi</sup></italic><sup>)</sup>) is replaced with <inline-graphic xlink:href="formative_v8i1e46800_fig15.png" xlink:type="simple" mimetype="image"/>
, the variance for person <italic>n</italic> on item <italic>i</italic> adaptive to the RSM equals the result in equation 6 [<xref ref-type="bibr" rid="ref48">48</xref>]. Through the Newton-Raphson iteration method [<xref ref-type="bibr" rid="ref49">49</xref>] and the person estimate and <inline-graphic xlink:href="formative_v8i1e46800_fig14.png" xlink:type="simple" mimetype="image"/> in equations 1 and 5, RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref50">50</xref>] was programmed and developed.</p>
        <p>To visualize item features and individual measures, several visualizations are commonly used, such as DID [<xref ref-type="bibr" rid="ref33">33</xref>], DIF [<xref ref-type="bibr" rid="ref38">38</xref>], ICC [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], Wright map [<xref ref-type="bibr" rid="ref51">51</xref>], and KIDMAP [<xref ref-type="bibr" rid="ref42">42</xref>].</p>
        <p>The method of drawing these visualizations refers to the manual of RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>] and <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref> (how to conduct this study).</p>
      </sec>
      <sec>
        <title>Two Tasks Required to Achieve the Study Goals</title>
        <sec>
          <title>Demonstrate the Use of RaschOnline (Task 1)</title>
          <p>Rasch analysis was used to observe item features and person responses (eg, the determination of grade in ChatGPT performance [<xref ref-type="bibr" rid="ref22">22</xref>]), and some significant terms in Rasch analysis are defined: (1) DIF [<xref ref-type="bibr" rid="ref38">38</xref>] analysis was performed to examine whether there are items in favor of a specific group (eg, Female or Male), for example, a specific item might be in favor of female (or male) to be easy in response. Details about DIF are in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>; (2) the ICC [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>] is a plot of the probability of the examinee answering a question correctly against his or her underlying abilities on the trait being measured [<xref ref-type="bibr" rid="ref33">33</xref>]. The ICC is based on item response theory: the curve is bounded between 0 and 1, monotonically increases, and is commonly referred to as a logistic function. There is a characteristic curve for each item in a test; (3) Wright map [<xref ref-type="bibr" rid="ref51">51</xref>] with groups was used to display sample distributions of groups compared to the overall sample of item difficulties and person performance abilities with a log-odds(=logit) unit on a common equal-interval continuum. ANOVA was performed to examine differences in measures between groups (eg, Female and Male); (4) in the KIDMAP [<xref ref-type="bibr" rid="ref42">42</xref>], individual person performance is assessed using the <italic>z</italic> score (observed×expected÷SD) across items. The <italic>z</italic> scores of items outside the upper limit (&gt;2.0) indicate that the observed responses are significantly higher than those expected or <italic>z</italic> scores (&lt;–2.0) with unexpected responses based on the individual’s ability.</p>
          <p>In task 1, the first study goal of the determination of RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>] would be achieved.</p>
        </sec>
        <sec>
          <title>Determine ChatGPT’s Grade Against Normal Sample (Task 2)</title>
          <p>Using Rasch analysis, the capability of ChatGPT301 to answer 10-item MCQs from Taiwan college entrance examinations for the year 2023 (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) can be assessed.</p>
          <p>In task 2, the second study goal of determining ChatGPT’s grade compared to a normal sample would be achieved.</p>
        </sec>
      </sec>
      <sec>
        <title>Statistical Tools and Data Analysis</title>
        <p>SPSS Statistics (version 22.0; IBM Corp) for Windows and MedCalc (version 9.5.0.0; MedCalc Software) for Windows were used to help perform Rasch analysis. Type I errors were set at a significance level of 0.05.</p>
        <p>The 5 visualizations include DID [<xref ref-type="bibr" rid="ref33">33</xref>], DIF [<xref ref-type="bibr" rid="ref38">38</xref>], ICC [<xref ref-type="bibr" rid="ref40">40</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], Wright map [<xref ref-type="bibr" rid="ref51">51</xref>], and KIDMAP [<xref ref-type="bibr" rid="ref42">42</xref>] in tasks 1 and 2 of this study.</p>
        <p>Details about how to conduct this study can be found in the link (MP4) provided in references [<xref ref-type="bibr" rid="ref46">46</xref>,<xref ref-type="bibr" rid="ref49">49</xref>] and in <xref rid="figure1" ref-type="fig">Figure 1</xref> (eg, copy and paste data into the box, select visual display, and click on submit icon to draw website visual representations).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>How to execute RaschOnline with the example of KIDMAP (note: (1) data are copied and pasted to the box frame; (2) visual presentation is selected; (3) submission icon is clicked to generate results). (A) Data entry; (B) Data display.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Demonstrate the Use of RaschOnline (Task 1)</title>
        <p>The DID is shown in <xref rid="figure2" ref-type="fig">Figure 2</xref>. All 10 items fit Rasch rather well (ie, Infit meansquares [MNSQs] of all items less than 1.5, as shown in the first column of <xref rid="figure2" ref-type="fig">Figure 2</xref>). The reason for fitting the Rasch model is that all data were simulated under the Rasch RSM model. It is stated that item difficulties are from the easiest (left) to the hardest (right) and refer to the summation scores: the easy items will have a higher summation score.</p>
        <p>The items in <xref rid="figure3" ref-type="fig">Figure 3</xref> are all DIF-free, but item 5 has a slight DIF (<italic>P</italic>=.04). The reason for this is that all responses are generated using a Rasch RSM model, and the gender groups are randomly assigned to each simulated participant.</p>
        <p>The ICCs for item 5 are shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. There is a slight deviation from the expected scores in stratum B. Nonetheless, item 5 is still fitted to the Rasch model, with <italic>P</italic>=.61, based on chi-square fit statistics [<xref ref-type="bibr" rid="ref35">35</xref>].</p>
        <p>The first study goal of the demonstration of RaschOnline has been achieved.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Distribution of item difficulties used in this study.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>DIF analysis of the 10 items in this study (note: item 5 exhibits a small DIF effect with <italic>P</italic>=.04&lt;.05). DIF: differential item functioning; SMD: standardized mean difference.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>ICC of item 5 fits the Rasch model (<italic>P</italic>=.61). ICC: item characteristic curve.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Determine ChatGPT’s Grade Against a Normal Sample (Task 2)</title>
        <p>The Wright in <xref rid="figure5" ref-type="fig">Figure 5</xref> illustrates several findings. First, item difficulties are arranged from harder to easier on the right panel. Second, the middle panel displays person measures distributed from high to low abilities. Third, the left panel shows the display of person measures in groups. Fourth, the bottom panel indicates that there is no significant difference in measures between the 2 groups of males and females (<italic>P</italic>=.85), but a significant difference was found among strata. Finally, based on the grade criteria of person measures (eg, from A to E), GPT301, with measures of 4.66 logits, is classified as grade A, indicating excellent performance in answering 10 items from Taiwan college entrance examinations for the year 2023 (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>) when compared to the normal sample generated by responses under the Rasch model.</p>
        <p>According to <xref rid="figure6" ref-type="fig">Figure 6</xref>, the responses of GPT301 are expected within the upper and lower limits (ie, <italic>z</italic> score in item <italic>i</italic> = (observed – expected)/(SD) of item <italic>i</italic>&lt;2.0). The Outfit MNSQs are smaller than 2.0, which indicates that no aberrant responses exist in items [<xref ref-type="bibr" rid="ref52">52</xref>] (ie, person responses are consistent with Rasch’s expectations). This is because the GPT300 has 100% correct answers to the 10 items from Taiwan college entrance examinations for the year 2023 (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>Accordingly, this study confirms the second goal of determining that the ChatGPT’s grade is A when compared to a normal sample.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>Features of the study sample on Wright map (note: no difference in measures between gender groups was found). (A) Wright map; (B) Ability comparison of gender; (C) Ability comparison of grade. </p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Performance of GPT30 shown on KIDMAP (note: expected scores are vertical with red fonts in the middle and observed scores are vertical with black fonts in the middle). MNSQ: meansquare.</p>
          </caption>
          <graphic xlink:href="formative_v8i1e46800_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Website Dashboards Shown on Google Maps</title>
        <p>For readers who wish to manipulate dashboards independently, those QR codes are provided in Figures (or at links [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]).</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>The study findings showed that the 10 items displayed progressively more difficulty from easiest to hardest, as indicated by their respective logit scores (–2.43, –1.78, –1.48, –0.64, –0.1, 0.33, 0.59, 1.34, 1.7, and 2.47). Item 5 exhibited DIF between gender groups, with a <italic>P</italic> value of .04. However, item 5 still fits the Rasch model reasonably well, with a <italic>P</italic> value of .61. All items were deemed to fit the Rasch model since their Infit MNSQs were below the threshold of 1.5. There was no significant difference in measures obtained between male and female participants (<italic>P</italic>=.83), but there was a significant difference among ability grades (<italic>P</italic>&lt;.001). Finally, based on its performance, ChatGPT received a grade of A, surpassing grades B to E in other counterparts.</p>
        <p>Accordingly, two objectives have been achieved: (1) to demonstrate the use of website Rasch analysis (namely, RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>]) and (2) to determine the ChatGPT’s grade compared to a normal sample.</p>
      </sec>
      <sec>
        <title>What This Knowledge Adds to What We Already Knew</title>
        <p>ChatGPT has demonstrated accuracy across various data sets such as answering yes-or-no questions from PubMed abstracts, questions on the USMLE, and breast cancer screening and select-all-that-apply prompts [<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. While Ha and Yaneva [<xref ref-type="bibr" rid="ref30">30</xref>] reported low accuracy rates for MCQs, this study found that GPT301 exhibited high accuracy rates for MCQs in 10 items from the 2023 Taiwan college entrance examinations (<xref ref-type="table" rid="table1">Table 1</xref> and <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>).</p>
        <p>According to a study [<xref ref-type="bibr" rid="ref12">12</xref>], ChatGPT’s performance on the USMLE exceeded a 60% threshold and demonstrated the ability to achieve a passing score equivalent to that of a third-year medical student.</p>
        <p>On the other hand, ChatGPT was assessed on all 3 sections of the USMLE: step 1, step 2CK, and step 3 [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref59">59</xref>]. The study findings revealed that ChatGPT achieved or nearly achieved the passing threshold for all 3 examinations without requiring any specialized training or reinforcement.</p>
        <p>Past research on medical question answering has predominantly focused on assessing model performance on specific tasks [<xref ref-type="bibr" rid="ref58">58</xref>]. ChatGPT was rated as a grade of A minor for answering prompts related to Kawasaki disease [<xref ref-type="bibr" rid="ref3">3</xref>].</p>
        <p>A study [<xref ref-type="bibr" rid="ref60">60</xref>] found that ChatGPT and other assistants hold great potential as useful tools for both patients and health care providers, as they are capable of handling a broad range of assessments from basic fact-based questions to complex clinical queries. Compared to Google’s feature snippet, ChatGPT was able to provide interpretable responses that minimized the risk of causing undue alarm. However, given the nascent stage of this technology, it is crucial for regulators and health care professionals to collaborate in establishing minimum quality standards and educating patients about the limitations of AI assistants [<xref ref-type="bibr" rid="ref61">61</xref>]. As we consider the transformative impact of these advancements on medical education and research, it is important to recognize the potential benefits and drawbacks of this technology [<xref ref-type="bibr" rid="ref62">62</xref>].</p>
        <p>In terms of accuracy, GPT-4 demonstrated superior performance compared to GPT-3.5, particularly in handling general, clinical, and clinical sentence questions [<xref ref-type="bibr" rid="ref5">5</xref>]. Moreover, GPT-4 successfully met the passing criteria for the Joint Medical Licensure Examination, affirming its dependability in clinical reasoning and medical knowledge, even in non-English languages [<xref ref-type="bibr" rid="ref5">5</xref>].</p>
        <p>Korn and Kelly [<xref ref-type="bibr" rid="ref9">9</xref>] have raised concerns about ChatGPT’s reliability and fairness, in line with reports in the popular press regarding misinformation issues. The authors caution that ChatGPT may not always provide accurate information, and there are fears that it could be manipulated to spread false information [<xref ref-type="bibr" rid="ref10">10</xref>] or produce “deepfakes” [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
        <p>Based on the results of this study, ChatGPT can provide answers to MCQs with an excellent level of accuracy and consistency across the 10 prompts provided. The study suggests that ChatGPT can be a valuable tool for MCQs in English language tests. However, it is essential to exercise caution when using ChatGPT for other forms of English language tests.</p>
        <p>Several computer programs, such as WINSTEPS [<xref ref-type="bibr" rid="ref63">63</xref>], Quest [<xref ref-type="bibr" rid="ref64">64</xref>], ConQuest [<xref ref-type="bibr" rid="ref65">65</xref>], RUMM2030 [<xref ref-type="bibr" rid="ref66">66</xref>], WINMIRA [<xref ref-type="bibr" rid="ref67">67</xref>], LPCM-Win [<xref ref-type="bibr" rid="ref68">68</xref>], and R-language Rasch software [<xref ref-type="bibr" rid="ref69">69</xref>], have been developed to calibrate item and person parameters in Rasch models. However, none of these software packages provide a website Rasch analysis technique that is easily accessible to users and allows for the creation of visual graphs (such as the Wright map, KIDMAP, category probability curves, student outfit plots, and DIF detection), which are commonly used in Rasch analysis.</p>
        <p>The website reports generated by RaschOnline provide estimations that are equivalent to those obtained using the Joint Medical Licensure Examination in WINSTEPS [<xref ref-type="bibr" rid="ref63">63</xref>]. These estimations are like, but more accurate than, those obtained in a previous study [<xref ref-type="bibr" rid="ref70">70</xref>], which relied on copying and pasting data instead of directly uploading it to the website.</p>
        <p>To generate visual graphs, the Rasch model parameters must first be obtained. Then, it is necessary to assess whether the data set meets the requirements for invariant measurement, as depicted in <xref rid="figure2" ref-type="fig">Figures 2</xref>, 4, and 5. Additionally, DIF detection is a crucial aspect of Rasch analysis [<xref ref-type="bibr" rid="ref38">38</xref>,<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref71">71</xref>-<xref ref-type="bibr" rid="ref73">73</xref>], as shown in <xref rid="figure3" ref-type="fig">Figure 3</xref>. Providing website access to test results is vital for teachers and students, as demonstrated by the RaschOnline platform [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref50">50</xref>] in this study.</p>
      </sec>
      <sec>
        <title>The Strengths and Features of This Study</title>
        <p>In this study, the capacity of ChatGPT was evaluated. The study compared ChatGPT’s responses to 10 items of MCQs using the Wright Map and KIDMAP to compare ChatGPT’s ability with other simulated participants in Rasch analysis.</p>
        <p>The study found that ChatGPT has the potential to improve the English learning process, and it demonstrated the feasibility of using ChatGPT for other types of participants (eg, patients) with symptoms commonly encountered in clinical settings.</p>
        <p>According to this study, (1) ChatGPT has an excellent level of ability to answer MCQs in English examinations, and (2) the effectiveness of ChatGPT is determined by a grade A with 4.66 logits. We suggest that the methods and visualizations used in this study can be replicated in future research using RaschOnline [<xref ref-type="bibr" rid="ref34">34</xref>].</p>
        <p>The distinct features of this study include the following: (1) the data were analyzed using RaschOnline [<xref ref-type="bibr" rid="ref44">44</xref>], a tool based on Rasch RSM. This enabled the use of visualizations, such as Wright Map with groups, DIF using forest plots, and KIDMAP, to display item features and person responses. These visualizations had not been previously demonstrated in the literature and can be accessed on RaschOnline for more information and demonstrations; (2) using objective measurement through Rasch analysis to analyze responses, ChatGPT has demonstrated a high level of proficiency in answering MCQs; (3) the efficacy of ChatGPT has been established; however, future evaluations of ChatGPT’s performance on open-ended questions must be conducted with caution due to potential bias in judges’ leniency and severity.</p>
      </sec>
      <sec>
        <title>Limitations and Directions for Future Studies</title>
        <p>This study has certain limitations that may motivate further research. The first concern is that the data were generated using Rasch simulation responses, as shown in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>, based on the Rasch model [<xref ref-type="bibr" rid="ref21">21</xref>]. The real and simulated responses to the 10 items were compared.</p>
        <p>Second, RashOnline [<xref ref-type="bibr" rid="ref34">34</xref>] has clearly been shown to be applicable in use [<xref ref-type="bibr" rid="ref25">25</xref>] rather than traditional professional statistical software (eg, WINSTEPS [<xref ref-type="bibr" rid="ref63">63</xref>], Quest [<xref ref-type="bibr" rid="ref64">64</xref>], ConQuest [<xref ref-type="bibr" rid="ref65">65</xref>], RUMM2030 [<xref ref-type="bibr" rid="ref66">66</xref>], WINMIRA [<xref ref-type="bibr" rid="ref67">67</xref>], LPCM-Win [<xref ref-type="bibr" rid="ref68">68</xref>], and R-language Rasch software [<xref ref-type="bibr" rid="ref69">69</xref>]), and further research should be conducted to determine whether the visualizations generated using Google Maps in RaschOnline are more straightforward and easier to use for general researchers.</p>
        <p>Third, on the basis of the study sample size (n=300 in this study), it is not possible to draw reliable and valid conclusions. For a reliable and accurate assessment, there is a need for a larger sample size in future research.</p>
        <p>Fourth, in this study, only 10 items were used. A test or assessment that contains more items will be more reliable. To assess ChatGPT’s ability, more items will be needed in the future.</p>
        <p>Fifth, in the case of an OE assessment [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], ChatGPT’s ability is dependent upon the judge’s leniency and severity. The results of the OE assessment reveal that the 2 judges have distinctly different attitudes toward the responses provided by the ChatGPT. The conditions of leniency and severity in the assessment of ChatGPT should be stricter in the future.</p>
        <p>Finally, although AI technologies, such as ChatGPT, have demonstrated their potential in assisting medical decision-making in certain domains [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref58">58</xref>], such as identifying particular ailments or interpreting medical images, they are not sufficiently advanced to replace physicians in intricate diagnoses or treatment planning. Nevertheless, as technology advances, it is conceivable that AI may play a more prominent role in health care decision-making in the future.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This paper evaluates the effectiveness of ChatGPT in answering MCQs using Rasch analysis. The study used RaschOnline to assess ChatGPT’s capabilities and compared its performance to a normal sample.</p>
        <p>The findings of this study reveal that ChatGPT’s ability to answer MCQs is graded as A, indicating excellent performance. The study showcases the use of website Rasch analysis and highlights ChatGPT’s remarkable proficiency in addressing English test MCQs for the year 2023 on Taiwan college entrance examinations.</p>
        <p>While AI technologies have displayed promising potential in assisting medical decision-making, they are not yet advanced enough to replace medical doctors in complex diagnoses or treatment planning. However, with the continuous evolution of technology, AI has the potential to play an increasingly significant role in health care decision-making in the future.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Ten items from Taiwan college entrance examinations for the year 2023.</p>
        <media xlink:href="formative_v8i1e46800_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 398 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Data used in this study.</p>
        <media xlink:href="formative_v8i1e46800_app2.txt" xlink:title="TXT File , 8 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>How to conduct this study.</p>
        <media xlink:href="formative_v8i1e46800_app3.pdf" xlink:title="PDF File  (Adobe PDF File), 898 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">DID</term>
          <def>
            <p>distribution of item difficulty</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DIF</term>
          <def>
            <p>differential item functioning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">ICC</term>
          <def>
            <p>item characteristic curve</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">MCQ</term>
          <def>
            <p>multiple-choice question</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MNSQ</term>
          <def>
            <p>meansquare</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">OE</term>
          <def>
            <p>open-ended</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RSM</term>
          <def>
            <p>rating scale model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">USMLE</term>
          <def>
            <p>United States Medical Licensing Examination</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank AJE (American Journal Experts) for the English language review of this manuscript.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>All data generated or analyzed during this study are included in this published article and its Multimedia Appendices.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>JCC conceived and designed the study. TYC, TWC, and WC performed the statistical analyses and oversaw the recruiting of study participants. JCC and WC contributed to the idea. WC helped design the study and collected information, and JCC interpreted the data. TWC monitored the research. All authors read and approved the final article.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <source>Introducing ChatGPT</source>
          <access-date>2022-11-30</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://openai.com/blog/chatgpt/">https://openai.com/blog/chatgpt/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biswas</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT and the future of medical writing</article-title>
          <source>Radiology</source>
          <year>2023</year>
          <volume>307</volume>
          <issue>2</issue>
          <fpage>e223312</fpage>
          <pub-id pub-id-type="doi">10.1148/radiol.223312</pub-id>
          <pub-id pub-id-type="medline">36728748</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>NC</given-names>
            </name>
            <collab>ChatGPT</collab>
          </person-group>
          <article-title>To ChatGPT or not to ChatGPT? The impact of artificial intelligence on academic publishing</article-title>
          <source>Pediatr Infect Dis J</source>
          <year>2023</year>
          <volume>42</volume>
          <issue>4</issue>
          <fpage>275</fpage>
          <pub-id pub-id-type="doi">10.1097/INF.0000000000003852</pub-id>
          <pub-id pub-id-type="medline">36757192</pub-id>
          <pub-id pub-id-type="pii">00006454-202304000-00001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Harsha</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McKinney</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Carignan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Horvitz</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Capabilities of GPT-4 on medical challenge problems</article-title>
          <source>ArXiv. Preprint posted online on March 20, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.13375</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Takagi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Watari</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Erabi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sakaguchi</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Performance of GPT-3.5 and GPT-4 on the Japanese medical licensing examination: comparison study</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <volume>9</volume>
          <fpage>e48002</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e48002/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/48002</pub-id>
          <pub-id pub-id-type="medline">37384388</pub-id>
          <pub-id pub-id-type="pii">v9i1e48002</pub-id>
          <pub-id pub-id-type="pmcid">PMC10365615</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Macdonald</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Adeloye</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Rudan</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Can ChatGPT draft a research article? An example of population-level vaccine effectiveness analysis</article-title>
          <source>J Glob Health</source>
          <year>2023</year>
          <volume>13</volume>
          <fpage>01003</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36798998"/>
          </comment>
          <pub-id pub-id-type="doi">10.7189/jogh.13.01003</pub-id>
          <pub-id pub-id-type="medline">36798998</pub-id>
          <pub-id pub-id-type="pmcid">PMC9936200</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lubowitz</surname>
              <given-names>JH</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT, an artificial intelligence chatbot, is impacting medical literature</article-title>
          <source>Arthroscopy</source>
          <year>2023</year>
          <volume>39</volume>
          <issue>5</issue>
          <fpage>1121</fpage>
          <lpage>1122</lpage>
          <pub-id pub-id-type="doi">10.1016/j.arthro.2023.01.015</pub-id>
          <pub-id pub-id-type="medline">36797148</pub-id>
          <pub-id pub-id-type="pii">S0749-8063(23)00033-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="web">
          <article-title>The new chatbots could change the world. can you trust them?</article-title>
          <source>New York Times</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/2022/12/10/technology/ai-chat-bot-chatgpt.html?smid=nytcore-ios-share&amp;referringSource=articleShare">https://www.nytimes.com/2022/12/10/technology/ai-chat-bot-chatgpt.html?smid=nytcore-ios-share&amp;referringSource=articleShare</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Korn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kelly</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>New York City public schools ban access to AI tool that could help students cheat</article-title>
          <source>CNN Business News</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.cnn.com/2023/01/05/tech/chatgpt-nyc-school-ban/index.html">https://www.cnn.com/2023/01/05/tech/chatgpt-nyc-school-ban/index.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="web">
          <article-title>A new era of AI blooms even amid the tech gloom</article-title>
          <source>New York Times</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/2023/01/07/technology/generative-ai-chatgpt-investments.html?smid=nytcore-ios-share&amp;referringSource=articleShare">https://www.nytimes.com/2023/01/07/technology/generative-ai-chatgpt-investments.html?smid=nytcore-ios-share&amp;referringSource=articleShare</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="web">
          <article-title>Did a fourth grader write this? Or the new chatbot?</article-title>
          <source>New York Times</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/interactive/2022/12/26/upshot/chatgpt-child-essays.html">https://www.nytimes.com/interactive/2022/12/26/upshot/chatgpt-child-essays.html</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="web">
          <article-title>How A.I. could be weaponized to spread disinformation</article-title>
          <source>New York Times</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.nytimes.com/interactive/2019/06/07/technology/ai-text-disinformation.html?action=click&amp;module=RelatedLinks&amp;pgtype=Article">https://www.nytimes.com/interactive/2019/06/07/technology/ai-text-disinformation.html?action=click&amp;module=RelatedLinks&amp;pgtype=Article</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jin</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Dhingra</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Cohen</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>PubMedQA: a dataset for biomedical research question answering</article-title>
          <source>ArXiv. Preprint posted online on September 13, 2019</source>
          <year>2019</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.1909.06146</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gilson</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Safranek</surname>
              <given-names>CW</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Socrates</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Chartash</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>How does ChatGPT perform on the United States Medical Licensing Examination (USMLE)? The implications of large language models for medical education and knowledge assessment</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <volume>9</volume>
          <fpage>e45312</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e45312/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/45312</pub-id>
          <pub-id pub-id-type="medline">36753318</pub-id>
          <pub-id pub-id-type="pii">v9i1e45312</pub-id>
          <pub-id pub-id-type="pmcid">PMC9947764</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rao</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kamineni</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lie</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Succi</surname>
              <given-names>MD</given-names>
            </name>
          </person-group>
          <article-title>Evaluating ChatGPT as an adjunct for radiologic decision-making</article-title>
          <source>medRxiv</source>
          <year>2023</year>
          <fpage>23285399</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1101/2023.02.02.23285399"/>
          </comment>
          <pub-id pub-id-type="doi">10.1101/2023.02.02.23285399</pub-id>
          <pub-id pub-id-type="medline">36798292</pub-id>
          <pub-id pub-id-type="pii">2023.02.02.23285399</pub-id>
          <pub-id pub-id-type="pmcid">PMC9934725</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Feng</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Ban</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Medication analysis and pharmaceutical care for a child with Kawasaki disease: a case report and review of the literature</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2023</year>
          <volume>102</volume>
          <issue>1</issue>
          <fpage>e32488</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36607867"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000032488</pub-id>
          <pub-id pub-id-type="medline">36607867</pub-id>
          <pub-id pub-id-type="pii">00005792-202301060-00022</pub-id>
          <pub-id pub-id-type="pmcid">PMC9829272</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Ding</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Retrospective analysis of clinical characteristics and related influencing factors of Kawasaki disease</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2022</year>
          <volume>101</volume>
          <issue>52</issue>
          <fpage>e32430</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36596080"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000032430</pub-id>
          <pub-id pub-id-type="medline">36596080</pub-id>
          <pub-id pub-id-type="pii">00005792-202212300-00112</pub-id>
          <pub-id pub-id-type="pmcid">PMC9803503</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Choi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kim</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Min</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Integrative treatment of herbal medicine with western medicine on coronary artery lesions in children with Kawasaki disease</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2022</year>
          <volume>101</volume>
          <issue>7</issue>
          <fpage>e28802</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35363167"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000028802</pub-id>
          <pub-id pub-id-type="medline">35363167</pub-id>
          <pub-id pub-id-type="pii">00005792-202202180-00009</pub-id>
          <pub-id pub-id-type="pmcid">PMC9281920</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Neonatal Kawasaki disease: case report and literature review</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2021</year>
          <volume>100</volume>
          <issue>7</issue>
          <fpage>e24624</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33607798"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000024624</pub-id>
          <pub-id pub-id-type="medline">33607798</pub-id>
          <pub-id pub-id-type="pii">00005792-202102190-00037</pub-id>
          <pub-id pub-id-type="pmcid">PMC7899894</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Yue</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Diagnostic significance of circulating miRNAs in Kawasaki disease in China: current evidence based on a meta-analysis</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2021</year>
          <volume>100</volume>
          <issue>6</issue>
          <fpage>e24174</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33578520"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000024174</pub-id>
          <pub-id pub-id-type="medline">33578520</pub-id>
          <pub-id pub-id-type="pii">00005792-202102120-00013</pub-id>
          <pub-id pub-id-type="pmcid">PMC7886432</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <source>Examples of ChatGPT responses (generated in Jan 2023)</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://links.lww.com/INF/E931">http://links.lww.com/INF/E931</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kao</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chow</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Assessing ChatGPT's capacity for clinical decision support in pediatrics: a comparative study with pediatricians using KIDMAP of Rasch analysis</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2023</year>
          <volume>102</volume>
          <issue>25</issue>
          <fpage>e34068</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37352054"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000034068</pub-id>
          <pub-id pub-id-type="medline">37352054</pub-id>
          <pub-id pub-id-type="pii">00005792-202306230-00035</pub-id>
          <pub-id pub-id-type="pmcid">PMC10289633</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bommarito</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bommarito</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>GPT as knowledge worker: a zero-shot evaluation of (AI)CPA capabilities</article-title>
          <source>ArXiv. Preprint posted online on January 11, 2023</source>
          <year>2023</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2301.04408</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bommarito</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>Katz</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>GPT takes the bar exam</article-title>
          <source>ArXiv. Preprint posted online on December 29, 2022</source>
          <year>2022</year>
          <pub-id pub-id-type="doi">10.48550/arXiv.2212.14402</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kung</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Cheatham</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Medenilla</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sillos</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leon</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Elepaño</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Madriaga</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Aggabao</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Candido</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Maningo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tseng</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Performance of ChatGPT on USMLE: potential for AI-assisted medical education using large language models</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000198</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812645"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000198</pub-id>
          <pub-id pub-id-type="medline">36812645</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-22-00371</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931230</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sallam</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title>
          <source>Healthcare (Basel)</source>
          <year>2023</year>
          <volume>11</volume>
          <issue>6</issue>
          <fpage>887</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=healthcare11060887"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id>
          <pub-id pub-id-type="medline">36981544</pub-id>
          <pub-id pub-id-type="pii">healthcare11060887</pub-id>
          <pub-id pub-id-type="pmcid">PMC10048148</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Dada</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Puladi</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kleesiek</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Egger</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT in healthcare: A taxonomy and systematic review</article-title>
          <source>Comput Methods Programs Biomed</source>
          <year>2024</year>
          <month>03</month>
          <volume>245</volume>
          <fpage>108013</fpage>
          <lpage>30</lpage>
          <pub-id pub-id-type="doi">10.1016/j.cmpb.2024.108013</pub-id>
          <pub-id pub-id-type="medline">38262126</pub-id>
          <pub-id pub-id-type="pii">S0169-2607(24)00008-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Bubeck</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Petro</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Benefits, limits, and risks of GPT-4 as an AI chatbot for medicine</article-title>
          <source>N Engl J Med</source>
          <year>2023</year>
          <volume>388</volume>
          <issue>13</issue>
          <fpage>1233</fpage>
          <lpage>1239</lpage>
          <pub-id pub-id-type="doi">10.1056/NEJMsr2214184</pub-id>
          <pub-id pub-id-type="medline">36988602</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thirunavukarasu</surname>
              <given-names>AJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hassan</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mahmood</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sanghera</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Barzangi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>El Mukashfi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shah</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Trialling a large language model (ChatGPT) in general practice with the applied knowledge test: observational study demonstrating opportunities and limitations in primary care</article-title>
          <source>JMIR Med Educ</source>
          <year>2023</year>
          <volume>9</volume>
          <fpage>e46599</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mededu.jmir.org/2023//e46599/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/46599</pub-id>
          <pub-id pub-id-type="medline">37083633</pub-id>
          <pub-id pub-id-type="pii">v9i1e46599</pub-id>
          <pub-id pub-id-type="pmcid">PMC10163403</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ha</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Yaneva</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>Automatic question answering for medical MCQs: can it go further than information retrieval?</article-title>
          <year>2019</year>
          <conf-name>Proceedings of the International Conference on Recent Advances in Natural Language Processing (RANLP 2019)</conf-name>
          <conf-date>September, 2019</conf-date>
          <conf-loc>Varna, Bulgaria</conf-loc>
          <fpage>418</fpage>
          <lpage>422</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasch</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <source>Probabilistic Models for Some Intelligence and Attainment Tests</source>
          <year>1960</year>
          <publisher-loc>Denmark</publisher-loc>
          <publisher-name>Danmarks Paedagogiske Institut</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andrich</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>A rating formulation for ordered response categories</article-title>
          <source>Psychometrika</source>
          <year>1978</year>
          <volume>43</volume>
          <issue>4</issue>
          <fpage>561</fpage>
          <lpage>573</lpage>
          <pub-id pub-id-type="doi">10.1007/bf02293814</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bond</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yan</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Heene</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Applying the Rasch model</article-title>
          <source>Fundamental Measurement in the Human Sciences</source>
          <year>2020</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Routledge</publisher-name>
          <fpage>1</fpage>
          <lpage>376</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Stone</surname>
              <given-names>MH</given-names>
            </name>
          </person-group>
          <source>Best Test Design: Rasch Measurement</source>
          <year>1979</year>
          <publisher-loc>Chicago</publisher-loc>
          <publisher-name>MESA PRESS</publisher-name>
          <fpage>1</fpage>
          <lpage>240</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Item fit statistics for Rasch analysis: can we trust them?</article-title>
          <source>J Stat Distrib App</source>
          <year>2020</year>
          <volume>7</volume>
          <issue>1</issue>
          <fpage>5</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1186/s40488-020-00108-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s40488-020-00108-7</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Linacre</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>An all-purpose person fit statistic</article-title>
          <source>Rasch Meas Trans</source>
          <year>1997</year>
          <volume>11</volume>
          <issue>3</issue>
          <fpage>582</fpage>
          <lpage>583</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rasch.org/rmt/rmt113n.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>EV</given-names>
            </name>
          </person-group>
          <article-title>Detecting and evaluating the impact of multidimensionality using item fit statistics and principal component analysis of residuals</article-title>
          <source>J Appl Meas</source>
          <year>2002</year>
          <volume>3</volume>
          <issue>2</issue>
          <fpage>205</fpage>
          <lpage>231</lpage>
          <pub-id pub-id-type="medline">12011501</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Holland</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Wainer</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <source>Differential Item Functioning</source>
          <year>1993</year>
          <publisher-loc>Hillsdale, NJ</publisher-loc>
          <publisher-name>Lawrence Erlbaum Associates, Inc</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Embretson</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Reise</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Item Response Theory for Psychologists</source>
          <year>2000</year>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>Psychology Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pakhare</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Chouhan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pandey</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Kokane</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Data-driven monitoring in community based management of children with severely acute malnutrition (SAM) using psychometric techniques: an operational framework</article-title>
          <source>Cureus</source>
          <year>2021</year>
          <volume>13</volume>
          <issue>10</issue>
          <fpage>e18589</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34760426"/>
          </comment>
          <pub-id pub-id-type="doi">10.7759/cureus.18589</pub-id>
          <pub-id pub-id-type="medline">34760426</pub-id>
          <pub-id pub-id-type="pmcid">PMC8572322</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Vincent</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>MacDermid</surname>
              <given-names>JC</given-names>
            </name>
          </person-group>
          <article-title>Evaluation of the structural validity of the work instability scale using the Rasch model</article-title>
          <source>Arch Rehabil Res Clin Transl</source>
          <year>2021</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>100103</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S2590-1095(21)00003-3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.arrct.2021.100103</pub-id>
          <pub-id pub-id-type="medline">33778476</pub-id>
          <pub-id pub-id-type="pii">S2590-1095(21)00003-3</pub-id>
          <pub-id pub-id-type="pmcid">PMC7984990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Masters</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Rasch KIDMAP - A history</article-title>
          <source>Rasch Meas Trans</source>
          <year>1994</year>
          <volume>8</volume>
          <issue>2</issue>
          <fpage>366</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rasch.org/rmt/rmt82k.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Comparing the similarity and differences in MeSH terms associated with spine-specific journals using the forest plot: a bibliometric analysis</article-title>
          <source>Medicine (Baltimore)</source>
          <year>2022</year>
          <volume>101</volume>
          <issue>44</issue>
          <fpage>e31441</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36343077"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/MD.0000000000031441</pub-id>
          <pub-id pub-id-type="medline">36343077</pub-id>
          <pub-id pub-id-type="pii">00005792-202211040-00060</pub-id>
          <pub-id pub-id-type="pmcid">PMC9646558</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
            <name name-style="western">
              <surname>Tam</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>WC</given-names>
            </name>
          </person-group>
          <source>RaschOnline based on Rasch rating scale model</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://raschonline.healthup.org.tw">http://raschonline.healthup.org.tw</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Linacre</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>How to simulate Rasch data</article-title>
          <source>Rasch Meas Trans</source>
          <year>2007</year>
          <volume>21</volume>
          <issue>3</issue>
          <fpage>1125</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rasch.org/rmt/rmt213a.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>How to conduct this study</source>
          <access-date>2023-02-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.youtube.com/watch?v=Juikq-96LA0">https://www.youtube.com/watch?v=Juikq-96LA0</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>How to generate simulation data in RaschOnline</source>
          <access-date>2023-07-03</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/raschonline/forstudents.htm#section-14">https://www.healthup.org.tw/raschonline/forstudents.htm#section-14</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lai</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Web-based skin cancer assessment and classification using machine learning and mobile computerized adaptive testing in a Rasch model: development study</article-title>
          <source>JMIR Med Inform</source>
          <year>2022</year>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>e33006</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://medinform.jmir.org/2022/3/e33006/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/33006</pub-id>
          <pub-id pub-id-type="medline">35262505</pub-id>
          <pub-id pub-id-type="pii">v10i3e33006</pub-id>
          <pub-id pub-id-type="pmcid">PMC9282670</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Niu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <article-title>A note on the Newton–Raphson iteration method in the Rasch model</article-title>
          <source>Rasch Meas Trans</source>
          <year>2022</year>
          <volume>35</volume>
          <issue>1</issue>
          <fpage>1851</fpage>
          <lpage>1856</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rasch.org/rmt/rmt351.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>The manual of Raschonline</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/raschonline/forstudents.htm">https://www.healthup.org.tw/raschonline/forstudents.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Some notes on the term: ?Wright Map?</article-title>
          <source>Rasch Meas Trans</source>
          <year>2011</year>
          <volume>25</volume>
          <fpage>1331</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rasch.org/rmt/rmt253b.htm"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Linacre</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <article-title>Optimizing rating scale category effectiveness</article-title>
          <source>J Appl Meas</source>
          <year>2002</year>
          <volume>3</volume>
          <issue>1</issue>
          <fpage>85</fpage>
          <lpage>106</lpage>
          <pub-id pub-id-type="medline">11997586</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>Figure 2 in this study</source>
          <access-date>2023-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/gps/jmirgptitem.htm">https://www.healthup.org.tw/gps/jmirgptitem.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>Figure 3 in this study</source>
          <access-date>2023-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/gps/jmirgptdif.htm">https://www.healthup.org.tw/gps/jmirgptdif.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>Figure 4 in this study</source>
          <access-date>2023-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/gps/jmirgpticc.htm">https://www.healthup.org.tw/gps/jmirgpticc.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <source>Figure 5 in this study</source>
          <access-date>2023-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/gps/jmirgptwright.htm">https://www.healthup.org.tw/gps/jmirgptwright.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <source>Figure 6 in this study</source>
          <access-date>2023-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.healthup.org.tw/gps/jmirgptkidmap.htm">https://www.healthup.org.tw/gps/jmirgptkidmap.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="web">
          <article-title>China, a pioneer in regulating algorithms, turns its focus to deepfakes</article-title>
          <source>WSJ</source>
          <access-date>2022-02-21</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.wsj.com/articles/china-a-pioneer-in-regulating-algorithms-turns-its-focus-to-deepfakes-11673149283?mod=Searchresults_pos1&amp;page=1">https://www.wsj.com/articles/china-a- pioneer-in-regulating-algorithms-turns-its-focus-to-deepfakes-11673149283?mod=Searchresults_pos1&amp;page=1</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mbakwe</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Lourentzou</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Celi</surname>
              <given-names>LA</given-names>
            </name>
            <name name-style="western">
              <surname>Mechanic</surname>
              <given-names>OJ</given-names>
            </name>
            <name name-style="western">
              <surname>Dagan</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>ChatGPT passing USMLE shines a spotlight on the flaws of medical education</article-title>
          <source>PLOS Digit Health</source>
          <year>2023</year>
          <volume>2</volume>
          <issue>2</issue>
          <fpage>e0000205</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36812618"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pdig.0000205</pub-id>
          <pub-id pub-id-type="medline">36812618</pub-id>
          <pub-id pub-id-type="pii">PDIG-D-23-00027</pub-id>
          <pub-id pub-id-type="pmcid">PMC9931307</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hopkins</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Logan</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Kichenadasse</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Sorich</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Artificial intelligence chatbots will revolutionize how cancer patients access information: ChatGPT represents a paradigm-shift</article-title>
          <source>JNCI Cancer Spectr</source>
          <year>2023</year>
          <volume>7</volume>
          <issue>2</issue>
          <fpage>10</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36808255"/>
          </comment>
          <pub-id pub-id-type="doi">10.1093/jncics/pkad010</pub-id>
          <pub-id pub-id-type="pii">7049531</pub-id>
          <pub-id pub-id-type="pmcid">PMC10013638</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arif</surname>
              <given-names>TB</given-names>
            </name>
            <name name-style="western">
              <surname>Munaf</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Ul-Haque</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>The future of medical education and research: is ChatGPT a blessing or blight in disguise?</article-title>
          <source>Med Educ Online</source>
          <year>2023</year>
          <volume>28</volume>
          <issue>1</issue>
          <fpage>2181052</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/36809073"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/10872981.2023.2181052</pub-id>
          <pub-id pub-id-type="medline">36809073</pub-id>
          <pub-id pub-id-type="pmcid">PMC9946299</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Item parameter recovery, standard error estimates, and fit statistics of the winsteps program for the family of Rasch models</article-title>
          <source>Educ Psychol Meas</source>
          <year>2005</year>
          <volume>65</volume>
          <issue>3</issue>
          <fpage>376</fpage>
          <lpage>404</lpage>
          <pub-id pub-id-type="doi">10.1177/0013164404268673</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Linacre</surname>
              <given-names>JM</given-names>
            </name>
          </person-group>
          <source>WINSTEPS Rasch Software</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.winsteps.com/winsteps.htm">https://www.winsteps.com/winsteps.htm</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Quest: The interactive test analysis system</article-title>
          <source>Australian Council for Educational Research</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://research.acer.edu.au/measurement/3/(Accessed">https://research.acer.edu.au/measurement/3/(Accessed</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Adams</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Cloney</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>MR</given-names>
            </name>
          </person-group>
          <article-title>ACER ConQuest: Generalized item response modeling software (Version 5) Computer software</article-title>
          <source>Australian Council for Educational Research</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.acer.org/in/conquest">https://www.acer.org/in/conquest</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Andrich</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <source>Rasch measurement tools for research and education</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.rummlab.com.au/(Accessed">https://www.rummlab.com.au/(Accessed</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Von Davier</surname>
              <given-names>DM</given-names>
            </name>
          </person-group>
          <article-title>WINMlRA—a program system for analyses with the Rasch-model, with the latent class analysis and with the mixed-Rasch model</article-title>
          <source>Institute for Science Education</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://www.von-davier.com">http://www.von-davier.com</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fischer</surname>
              <given-names>GH</given-names>
            </name>
          </person-group>
          <article-title>LpcM-win computer software</article-title>
          <source>Assessment Systems Corp</source>
          <access-date>2023-01-11</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.winsteps.com/a/Linacre-estimation-methods.pdf">https://www.winsteps.com/a/Linacre-estimation-methods.pdf</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Tam</surname>
              <given-names>HP</given-names>
            </name>
            <name name-style="western">
              <surname>Hen</surname>
              <given-names>TH</given-names>
            </name>
          </person-group>
          <article-title>Educational measurement for applied researchers</article-title>
          <source>Theory Into Practice</source>
          <year>2017</year>
          <publisher-loc>Singapore</publisher-loc>
          <publisher-name>Springe</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>HM</given-names>
            </name>
            <name name-style="western">
              <surname>Shao</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Chien</surname>
              <given-names>TW</given-names>
            </name>
          </person-group>
          <article-title>Student's performance is shown on Google Maps using online Rasch analysis</article-title>
          <source>J Appl Meas</source>
          <year>2020</year>
          <volume>21</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://scholar.google.com/scholar_lookup?journal=J+Appl+Meas&amp;title=Student%E2%80%99s+performance+is+shown+on+Google+Maps+using+online+Rasch+analysis.&amp;author=HM+Wu&amp;author=Y+Shao&amp;author=TW+Chien&amp;volume=21&amp;publication_year=2020&amp;pages=1-10&amp;pmid=32129766&amp;"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Engelhard</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Invariant measurement</article-title>
          <source>Using Rasch Models in the Social, Behavioral, and Health Sciences</source>
          <year>2013</year>
          <publisher-loc>England, UK</publisher-loc>
          <publisher-name>Routledge</publisher-name>
          <fpage>167</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Wilson</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Assessment of differential item functioning in testlet-based items using the Rasch testlet model</article-title>
          <source>Educ Psychol Meas</source>
          <year>2005</year>
          <volume>65</volume>
          <issue>4</issue>
          <fpage>549</fpage>
          <lpage>576</lpage>
          <pub-id pub-id-type="doi">10.1177/0013164404268677</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref73">
        <label>73</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Shih</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>The DIF-Free-Then-DIF Strategy for the Assessment of Differential Item Functioning</article-title>
          <source>Educational and Psychological Measurement</source>
          <year>2012</year>
          <month>01</month>
          <day>04</day>
          <volume>72</volume>
          <issue>4</issue>
          <fpage>687</fpage>
          <lpage>708</lpage>
          <pub-id pub-id-type="doi">10.1177/0013164411426157</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
