<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e71358</article-id><article-id pub-id-type="doi">10.2196/71358</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Assessment of Recommendations Provided to Athletes Regarding Sleep Education by GPT-4o and Google Gemini: Comparative Evaluation Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Masur</surname><given-names>Lukas</given-names></name><degrees>MSci</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Driller</surname><given-names>Matthew</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Suppiah</surname><given-names>Haresh</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Matzka</surname><given-names>Manuel</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Sperlich</surname><given-names>Billy</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>D&#x00FC;king</surname><given-names>Peter</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Department of Sports Science and Movement Pedagogy, Technische Universit&#x00E4;t Braunschweig</institution><addr-line>Pockelsstra&#x00DF;e 11</addr-line><addr-line>Braunschweig</addr-line><country>Germany</country></aff><aff id="aff2"><institution>School of Allied Health, Human Services, and Sport, La Trobe University</institution><addr-line>Melbourne</addr-line><country>Australia</country></aff><aff id="aff3"><institution>Department of Sports Science, Integrative and Experimental Exercise Science &#x0026; Training, University of W&#x00FC;rzburg</institution><addr-line>W&#x00FC;rzburg</addr-line><country>Germany</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Schmidt</surname><given-names>Marcus</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Haghayegh</surname><given-names>Shahab</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Peter D&#x00FC;king, PhD, Department of Sports Science and Movement Pedagogy, Technische Universit&#x00E4;t Braunschweig, Pockelsstra&#x00DF;e 11, Braunschweig, 38106, Germany, 49 531 391 3432; <email>Peter.dueking@tu-braunschweig.de</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>8</day><month>7</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e71358</elocation-id><history><date date-type="received"><day>16</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>06</day><month>03</month><year>2025</year></date><date date-type="accepted"><day>11</day><month>03</month><year>2025</year></date></history><copyright-statement>&#x00A9; Lukas Masur, Matthew Driller, Haresh Suppiah, Manuel Matzka, Billy Sperlich, Peter D&#x00FC;king. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 8.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e71358"/><abstract><sec><title>Background</title><p>Inadequate sleep is prevalent among athletes, affecting adaptation to training and performance. While education on factors influencing sleep can improve sleep behaviors, large language models (LLMs) may offer a scalable approach to provide sleep education to athletes.</p></sec><sec><title>Objective</title><p>This study aims (1) to investigate the quality of sleep recommendations generated by publicly available LLMs, as evaluated by experienced raters, and (2) to determine whether evaluation results vary with information input granularity.</p></sec><sec sec-type="methods"><title>Methods</title><p>Two prompts with differing information input granularity (low and high) were created for 2 use cases and inserted into ChatGPT-4o (GPT-4o) and Google Gemini, resulting in 8 different recommendations. Experienced raters (n=13) evaluated the recommendations on a 1&#x2010;5 Likert scale, based on 10 sleep criteria derived from recent literature. A Friedman test with Bonferroni correction was performed to test for significant differences in all rated items between the training plans. Significance level was set to <italic>P</italic>&#x003C;.05. Fleiss &#x03BA; was calculated to assess interrater reliability.</p></sec><sec sec-type="results"><title>Results</title><p>The overall interrater reliability using Fleiss &#x03BA; indicated a fair agreement of 0.280 (range between 0.183 and 0.296). The highest summary rating was achieved by GPT-4o using high input information granularity, with 8 ratings &#x003E;3 (tendency toward good), 3 ratings equal to 3 (neutral), and 2 ratings &#x003C;3 (tendency toward bad). GPT-4o outperformed Google Gemini in 9 of 10 criteria (<italic>P</italic>&#x003C;.001 to <italic>P</italic>=.04). Recommendations generated with high input granularity received significantly higher ratings than those with low granularity across both LLMs and use cases (<italic>P</italic>&#x003C;.001 to <italic>P</italic>=.049). High input granularity leads to significantly higher ratings in items pertaining to the used scientific sources (<italic>P</italic>&#x003C;.001), irrespective of the analyzed LLM.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>Both LLMs exhibit limitations, neglecting vital criteria of sleep education. Sleep recommendations by GPT-4o and Google Gemini were evaluated as suboptimal, with GPT-4o achieving higher overall ratings. However, both LLMs demonstrated improved recommendations with higher information input granularity, emphasizing the need for specificity and a thorough review of outputs to securely implement artificial intelligence technologies into sleep education.</p></sec></abstract><kwd-group><kwd>individualization</kwd><kwd>personalization</kwd><kwd>artificial intelligence</kwd><kwd>education</kwd><kwd>recovery</kwd><kwd>monitoring</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Sleep is essential for the health and well-being of individuals across all age groups, as it supports cognitive function, mood, mental health, as well as cardiovascular, cerebrovascular, and metabolic health [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref2">2</xref>]. Short-term sleep deprivation, chronic sleep restriction, circadian misalignment, and untreated sleep disorders can significantly harm physical health, mental health, and mood [<xref ref-type="bibr" rid="ref1">1</xref>,<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>].</p><p>One population frequently experiencing inadequate sleep or poor sleep quality is athletes [<xref ref-type="bibr" rid="ref5">5</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. Reasons for poor sleep may stem from multiple sport and nonsport factors including high training loads, long-haul travel, early morning training, family commitments, lifestyle choices including diet, or work or study commitments [<xref ref-type="bibr" rid="ref6">6</xref>]. While it is beyond the scope of this paper to dive into the effects of inadequate sleep in detail, we refer the reader to existing papers on this topic [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Briefly, athletes&#x2019; sleep is considered a primary mechanism facilitating both psychological and physiological recovery [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. Poor sleep may be detrimental for athletes, with negative impacts on mental well-being, cognition, learning and memory consolidation, growth and repair of cells, glucose metabolism, and immune responses (eg, the resistance to respiratory infection) [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>].</p><p>To improve aspects of sleep, a first step is to educate athletes and staff on the negative effects of poor sleep and factors affecting sleep [<xref ref-type="bibr" rid="ref6">6</xref>], and there is evidence that sleep education improves the sleep behavior of team sport athletes [<xref ref-type="bibr" rid="ref13">13</xref>]. Such sleep education might include (1) education of athletes on potential factors negatively impacting sleep [<xref ref-type="bibr" rid="ref14">14</xref>] and (2) education on reducing the impact of factors negatively impacting sleep.</p><p>It was reported that even in elite athlete cohorts, there is a lack of sleep knowledge [<xref ref-type="bibr" rid="ref15">15</xref>], and consequently, there is a need for sleep education. For this, artificial intelligence and, more specifically, publicly available large language models (LLMs) might offer a scalable solution. By simulating human-like conversations, LLMs leverage deep learning to process information and generate nuanced responses. LLMs such as ChatGPT (OpenAI) are rapidly gaining popularity among the general population [<xref ref-type="bibr" rid="ref16">16</xref>] as well as in various scientific domains such as medical research and education [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>], health care [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref23">23</xref>], or nutrition [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. Thereby, it is likely that individuals turn to LLMs to receive responses to questions they face, for example, regarding sleep. However, it is currently unknown if recommendations regarding sleep generated by publicly available LLMs are appropriate and in line with recent scientific evidence and thus suitable for a specific athlete. Here, we aim (1) to investigate sleep recommendations provided to athletes generated by different publicly available LLMs as evaluated by experienced raters and (2) to investigate if sleep recommendations differ depending on the provided information by the user.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>General Design</title><p>To evaluate sleep recommendations provided by LLMs, we followed methodologies of similar papers in the medical field [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>] or in the exercise science literature [<xref ref-type="bibr" rid="ref30">30</xref>-<xref ref-type="bibr" rid="ref33">33</xref>] and adjusted these methodologies to the aim of our research. <xref ref-type="fig" rid="figure1">Figure 1</xref> depicts the experimental workflow of the study.</p><p>For this, we (1) define a specific use case, (2) define criteria of relevance for sleep education in this use case, (3) define information input into publicly available LLMs, and (4) involve experienced raters in the topic of sleep within the athletic population to rate outcomes of LLM-generated responses.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Experimental workflow of the study.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e71358_fig01.png"/></fig></sec><sec id="s2-2"><title>Ethical Considerations</title><p>The ethics committee of the Faculty of Exercise Science and Training at the University of W&#x00FC;rzburg approved the study (reference: EV2025/5-0606). Raters were informed about procedures and gave their consent to participate in the study. No compensation was given to the raters. After receiving the ratings, these were deidentified.</p></sec><sec id="s2-3"><title>Definition of 2 Use Cases</title><sec id="s2-3-1"><title>Use Case 1: Male Tier 4 Soccer Player</title><p>For use case 1, we define a healthy, 23-year-old male tier 4, elite soccer player [<xref ref-type="bibr" rid="ref34">34</xref>] who trains 5 times a week and has 1&#x2010;2 competitive games per week at a national level. The soccer player experiences poor sleep for approximately a year, and sleep disturbance is closer to important games. The individual in our use case has no formal education on, for example, factors affecting sleep or on the effectiveness of countermeasures to improve sleep and no access to experienced and educated personnel who could educate him on sleep. We define that the major reasons for impaired sleep are high training loads, arousal the night before competition, and unfamiliar sleeping environments in the case of away games.</p></sec><sec id="s2-3-2"><title>Use Case 2: Female Tier 3 Marathon Runner</title><p>For use case 2, we define a healthy, 25-year-old female tier 3, highly trained or national-level marathon runner [<xref ref-type="bibr" rid="ref34">34</xref>] who runs around 100 km per week following a pyramidal training intensity distribution [<xref ref-type="bibr" rid="ref35">35</xref>]. The female runner competes at the national level. In addition to her running training, the runner is enrolled at a master level at university.</p><p>The female runner experiences poor sleep since entering master level at university (approximately 6 months ago), and sleep is compromised especially during examination periods, which also affects running training and performance. The individual in our use case has no formal education on, for example, factors affecting sleep or on the effectiveness of countermeasures to improve sleep and no access to experienced and educated personnel who could educate her on sleep. We define that the major reasons for impaired sleep are training loads and disturbances stemming from university obligations.</p><p>In line with the aims of this research, we challenge the publicly available LLMs (1) to identify the reasons for sleep disturbances and (2) to give evidence-based guidance on how to reduce the impact of factors negatively impacting sleep.</p></sec></sec><sec id="s2-4"><title>Criteria of Relevance for Sleep Education</title><p>There are many sport and nonsport factors that impact the sleep of athletes. However, factors influencing sleep that are most commonly mentioned in the literature include high training loads [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], unfamiliar sleeping environments [<xref ref-type="bibr" rid="ref6">6</xref>], early morning or late evening training or competition [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], arousal the night before competition or the night after competition [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], circadian rhythm disruption (eg, due to long-haul travel) [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], family commitments [<xref ref-type="bibr" rid="ref6">6</xref>], lifestyle choices such as coffee drinking or supplement use containing caffeine [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>], and use of electronic devices (eg, smartphone use) [<xref ref-type="bibr" rid="ref14">14</xref>].</p></sec><sec id="s2-5"><title>Prompts Inserted Into the LLMs</title><p>Given the chatbot nature of LLMs, we assume that the input provided by individuals seeking sleep education will vary, like any other conversation. Depending on factors such as previous knowledge about sleep or personal experiences, we assume that some individuals may provide minimal information, while others may be more detailed. To accommodate this diversity in the input information, we developed 2 distinct input information scenarios for both our use cases. Scenario 1 resembles an individual who inserts little to no information and only asks superficial check-backs once a response is given by LLMs. Scenario 2 resembles an individual who inserts more information and asks more detailed check-backs once a response is given by LLMs. The complete conversations with LLMs are available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendices 1</xref> and <xref ref-type="supplementary-material" rid="app2">2</xref>. Prompts of the scenarios were designed by the authors who are frequently in conversations with athletes on aspects of sleep.</p><p>For use case 1, the initial prompts were as follows:</p><p>Scenario 1:</p><disp-quote><p>I feel tired in the morning and after waking up. I do not know why. This is especially worse close to soccer games. Can you give me advice on how to improve my sleep?</p></disp-quote><p>Scenario 2:</p><disp-quote><p>I am a 23 year old male highly trained/national level soccer player. I train 5 times a week and plus a game per week. I feel tired due to poor sleep at night since approximately a year. Especially the night before a game I sleep poorly. Can you give me advice on factors which might affect my sleep and how I can reduce the impact of these factors? Use only scientific literature and specifically, for each advice, state this literature and provide a reference list at the end.</p></disp-quote><p>For use case 2, the initial prompts were as follows:</p><p>Scenario 1:</p><disp-quote><p>I feel tired in the morning and after waking up. I do not know why. Maybe it is due to my running training or due to entering master level at university. Can you give me advice on how to improve my sleep?</p></disp-quote><p>Scenario 2:</p><disp-quote><p>I am a 25 year old female tier 3, highly trained/national level marathon runner player and I run approx. 100km per week with a pyramidal intensity distribution. I feel tired in the morning since approximately entering my master course at university. Can you give me advice on factors which might affect my sleep and how I can reduce the impact of these factors? Use only scientific literature and specifically, for each advice, state this literature and provide a reference list at the end.</p></disp-quote><p>Here, we used GPT-4o and Google Gemini Advanced without any use of plug-ins, as both are publicly available and thereby can be used by individuals who seek sleep education. Prompts were inserted on May 15, 2024.</p></sec><sec id="s2-6"><title>Raters</title><p>We reached out to well-educated and experienced raters on sleep and athletes to assess the provided recommendations on the outlined aspects relevant for sleep education on a 1 to 5 Likert scale. We included a total of 13 experienced raters (age span: 28&#x2010;42 years; n=6 with a PhD, n=7 with a master degree in human physiology or sleep science) working for an average of 9 (SD 7) years with athletes on matters of sleep and indicating a mean sleep research experience of 4 (SD 6) years participated in this study.</p></sec><sec id="s2-7"><title>Statistical Analysis</title><p>We calculated descriptive statistics for the Likert scores on all rated items for each question. To test for significant differences in all rated items between the training plans, a Friedman test with Bonferroni correction was performed. Significance level was set to <italic>P</italic>&#x003C;.05. Fleiss &#x03BA; was calculated to assess interrater reliability [<xref ref-type="bibr" rid="ref36">36</xref>]. Interpretation of Fleiss &#x03BA; results was conducted according to the classification by Landis and Koch [<xref ref-type="bibr" rid="ref37">37</xref>]. Fleiss &#x03BA; values were interpreted as follows: a value of 0.00-0.20 as &#x201C;slight,&#x201D; 0.21-0.40 as &#x201C;fair,&#x201D; 0.41-0.60 as &#x201C;moderate,&#x201D; 0.61-0.80 as &#x201C;substantial,&#x201D; and &#x003E;0.80 as &#x201C;almost perfect&#x201D; [<xref ref-type="bibr" rid="ref37">37</xref>]. All statistical analyses were performed in SPSS (version 28; IBM Corp).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p><xref ref-type="table" rid="table1">Table 1</xref> represents Fleiss &#x03BA; values for the different LLMs and use cases. The analysis of Fleiss &#x03BA; indicated a fair agreement of the overall interrater reliability (0.280), while results ranged between 0.183 and 0.296 (<xref ref-type="table" rid="table1">Table 1</xref>).</p><p>Descriptive statistics of the evaluated sleep recommendations are presented in <xref ref-type="table" rid="table2">Table 2</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Fleiss &#x03BA; results.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Large language model, use case, and scenario</td><td align="left" valign="bottom">Fleiss &#x03BA;</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">Google Gemini</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 1&#x2014;scenario 1 (Gem_C1-S1)</td><td align="char" char="." valign="top">0.270</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 1&#x2014;scenario 2 (Gem_C1-S2)</td><td align="char" char="." valign="top">0.198</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 2&#x2014;scenario 1 (Gem_C2-S1)</td><td align="char" char="." valign="top">0.296</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 2&#x2014;scenario 2 (Gem_C2-S2)</td><td align="char" char="." valign="top">0.183</td></tr><tr><td align="left" valign="top" colspan="3">GPT-4o</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 1&#x2014;scenario 1 (GPT_C1-S1)</td><td align="char" char="." valign="top">0.258</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 1&#x2014;scenario 2 (GPT_C1-S2)</td><td align="char" char="." valign="top">0.264</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 2&#x2014;scenario 1 (GPT_C2-S1)</td><td align="char" char="." valign="top">0.256</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use case 2&#x2014;scenario 2 (GPT_C2-S2)</td><td align="char" char="." valign="top">0.229</td></tr></tbody></table></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Descriptive analysis of Likert-scale ratings<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup>.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Relevant aspects when deploying sleep recommendations</td><td align="left" valign="top">Gem_C1-S1<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">GPT_C1-S1<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">Gem_C1-S2<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">GPT_C1-S2<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">Gem_C2-S1<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top">GPT_C2-S1<sup><xref ref-type="table-fn" rid="table2fn7">g</xref></sup></td><td align="left" valign="top">Gem_C2-S2<sup><xref ref-type="table-fn" rid="table2fn8">h</xref></sup></td><td align="left" valign="top">GPT_C2-S2<sup><xref ref-type="table-fn" rid="table2fn9">i</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top" colspan="10">General aspects, median (IQR)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Overall training plan</td><td align="left" valign="top">3 (3-3)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3.75-4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Training load</td><td align="left" valign="top">0 (0-3)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (2-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (4-5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unfamiliar sleeping environments</td><td align="left" valign="top">0 (0-3)</td><td align="left" valign="top">2 (0-3)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">3 (0-3)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">3 (0-4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Early morning or late evening training or competition</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">3 (3-3)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">0 (0-4)</td><td align="left" valign="top">4 (3-4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Arousal the night before competition or training</td><td align="left" valign="top">0 (0-3)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Circadian rhythm disruptions or consistency of sleep schedule</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">4 (3-5)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-4)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Family commitments</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Lifestyle choices</td><td align="left" valign="top">3 (0-3)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">2 (0-3)</td><td align="left" valign="top">0 (0-3)</td><td align="left" valign="top">3 (2-4)</td><td align="left" valign="top">3.5 (2.25-4)</td><td align="left" valign="top">3 (0-3)</td><td align="left" valign="top">3 (3-3)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use of electronic devices</td><td align="left" valign="top">3 (3-3.25)</td><td align="left" valign="top">4 (3-5)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (4-5)</td><td align="left" valign="top">3 (0-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Nutrition</td><td align="left" valign="top">3 (3-3)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-5)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">3 (3-4)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (3-5)</td></tr><tr><td align="left" valign="top" colspan="10">Summary rating, n (%)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003C;3</td><td align="left" valign="top">4 (40)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">2 (20)</td><td align="left" valign="top">2 (20)</td><td align="left" valign="top">2 (20)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">2 (20)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">3</td><td align="left" valign="top">6 (60)</td><td align="left" valign="top">4 (40)</td><td align="left" valign="top">2 (20)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">5 (50)</td><td align="left" valign="top">5 (50)</td><td align="left" valign="top">2(20)</td><td align="left" valign="top">3 (30)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003E;3</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">5 (50)</td><td align="left" valign="top">5 (50)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">3 (30)</td><td align="left" valign="top">5 (50)</td><td align="left" valign="top">5 (50)</td></tr><tr><td align="left" valign="top" colspan="10">Scientific sources, median (IQR)</td></tr><tr><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Is the real and existing literature stated (no &#x201C;fake citations&#x201D;)?</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (2-4)</td><td align="left" valign="top">5 (5-5)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">5 (5-5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Appropriateness of provided scientific evidence</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (4-5)</td><td align="left" valign="top">4 (4-5)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Quality of provided scientific evidence in this specific context</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (4-4)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">0 (0-0)</td><td align="left" valign="top">4 (3-4)</td><td align="left" valign="top">4 (4-4)</td></tr><tr><td align="left" valign="top" colspan="10">Summary rating, n (%)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003C;3</td><td align="left" valign="top">3 (100)</td><td align="left" valign="top">3 (100)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">3 (100)</td><td align="left" valign="top">3 (100)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">3</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">1 (33.3)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td></tr><tr><td align="left" valign="top"/><td align="char" char="." valign="top">&#x003E;3</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">3 (100)</td><td align="left" valign="top">2 (66.6)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">0 (0)</td><td align="left" valign="top">3 (0)</td><td align="left" valign="top">3 (100)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Results range from 1=bad to 5=good with 0=not applicable.</p></fn><fn id="table2fn2"><p><sup>b</sup>Gem_C1-S1: Google Gemini, use case 1&#x2014;scenario 1.</p></fn><fn id="table2fn3"><p><sup>c</sup>GPT_C1-S1: ChatGPT, use case 1&#x2014;scenario 1.</p></fn><fn id="table2fn4"><p><sup>d</sup>Gem_C1-S2: Google Gemini, use case 1&#x2014;scenario 2.</p></fn><fn id="table2fn5"><p><sup>e</sup>GPT_C1-S2: ChatGPT, use case 1&#x2014;scenario 2.</p></fn><fn id="table2fn6"><p><sup>f</sup>Gem_C2-S1: Google Gemini, use case 2&#x2014;scenario 1.</p></fn><fn id="table2fn7"><p><sup>g</sup>GPT_C2-S1: ChatGPT, use case 2&#x2014;scenario 1.</p></fn><fn id="table2fn8"><p><sup>h</sup>Gem_C2-S2: Google Gemini, use case 2&#x2014;scenario 2.</p></fn><fn id="table2fn9"><p><sup>i</sup>GPT_C2-S2: ChatGPT, use case 2&#x2014;scenario 2.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-2"><title>Differences Regarding Input Information Granularity and Between Google Gemini and GPT-4o</title><p>Results for significance testing regarding different input information granularities and differences between Google Gemini and GPT-4o are presented in <xref ref-type="table" rid="table3">Table 3</xref>.</p><p>Significance testing of the comparison between identical prompts across different LLMs shows that GPT-4o attained significantly higher Likert-scale scores in 9 of 10 criteria of relevance for sleep education (<italic>P</italic>&#x003C;.001 to <italic>P</italic>=.045). Higher input information granularity, independent of the LLM used, exhibits significantly higher Likert-scale ratings in 28 of 52 criteria items of relevance for sleep education (<italic>P</italic>&#x003C;.001 to <italic>P</italic>=.049).</p><p>In this paper, we compare the output of different LLMs when the same information was inserted. We do not show comparisons of different LLMs and different information input (eg, Gem_C1-S1 vs GPT_C1-S2) but provide these to the interested reader in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Results of the significance testing comparing training plans: between Google Gemini and GPT-4o for the same input information granularity and within Google Gemini or GPT-4o for different input information granularity.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" colspan="2">Relevant aspects when deploying sleep recommendations</td><td align="left" valign="bottom" colspan="4">Significance testing (<italic>P</italic> value) (Google Gemini versus GPT-4o; same prompt, different LLM)<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom" colspan="4">Significance testing (<italic>P</italic> value) (input information granularity; different prompt, same LLM)</td></tr><tr><td align="left" valign="top" colspan="2"/><td align="left" valign="top">Gem_C1-S1<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> versus GPT_ C1-S1<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">Gem_C1-S2<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup> versus GPT_ C1-S2<sup><xref ref-type="table-fn" rid="table3fn5">e</xref></sup></td><td align="left" valign="top">Gem_C2-S1<sup><xref ref-type="table-fn" rid="table3fn6">f</xref></sup> versus GPT_ C2-S1<sup><xref ref-type="table-fn" rid="table3fn7">g</xref></sup></td><td align="left" valign="top">Gem_C2-S2<sup><xref ref-type="table-fn" rid="table3fn8">h</xref></sup> versus GPT_ C2-S2<sup><xref ref-type="table-fn" rid="table3fn9">i</xref></sup></td><td align="left" valign="top">Gem_C1-S1 versus Gem_C1-S2</td><td align="left" valign="top">GPT_C1-S1 versus GPT_C1-S2</td><td align="left" valign="top">Gem_C2-S1 versus Gem_C2-S2</td><td align="left" valign="top">GPT_C2-S1 versus GPT_C2-S2</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="10">General aspects</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Overall training plan</td><td align="left" valign="top">.92</td><td align="left" valign="top"><italic>.02</italic></td><td align="left" valign="top">.16</td><td align="left" valign="top">.07</td><td align="left" valign="top">.47</td><td align="left" valign="top"><italic>.003</italic></td><td align="left" valign="top">.38</td><td align="left" valign="top"><italic>.005</italic></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Training load</td><td align="left" valign="top">.19</td><td align="left" valign="top">.21</td><td align="left" valign="top">.06</td><td align="left" valign="top">.21</td><td align="left" valign="top"><italic>.003</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>.005</italic></td><td align="left" valign="top"><italic>.03</italic></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Unfamiliar sleeping environments</td><td align="left" valign="top">.63</td><td align="left" valign="top">.21</td><td align="left" valign="top">.76</td><td align="left" valign="top">.72</td><td align="left" valign="top"><italic>.006</italic></td><td align="left" valign="top">.28</td><td align="left" valign="top">.15</td><td align="left" valign="top">.37</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Early morning or late evening training or competition</td><td align="left" valign="top">.88</td><td align="left" valign="top">.88</td><td align="left" valign="top">.08</td><td align="left" valign="top"><italic>.004</italic></td><td align="left" valign="top"><italic>.01</italic></td><td align="left" valign="top"><italic>.01</italic></td><td align="left" valign="top"><italic>.009</italic></td><td align="left" valign="top"><italic>.04</italic></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Arousal the night before competition or training</td><td align="left" valign="top"><italic>.005</italic></td><td align="left" valign="top">.85</td><td align="left" valign="top">.11</td><td align="left" valign="top"><italic>.02</italic></td><td align="left" valign="top"><italic>.001</italic></td><td align="left" valign="top">.54</td><td align="left" valign="top">.14</td><td align="left" valign="top">.35</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Circadian rhythm disruptions or consistency of sleep schedule</td><td align="left" valign="top">.16</td><td align="left" valign="top">.96</td><td align="left" valign="top">.43</td><td align="left" valign="top">.96</td><td align="left" valign="top"><italic>.01</italic></td><td align="left" valign="top">.25</td><td align="left" valign="top">.71</td><td align="left" valign="top">.71</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Family commitments</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top"><italic>.049</italic></td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top"><italic>.049</italic></td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x003E;.99</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Lifestyle choices</td><td align="left" valign="top">.74</td><td align="left" valign="top">.96</td><td align="left" valign="top">.74</td><td align="left" valign="top">.55</td><td align="left" valign="top">.37</td><td align="left" valign="top">.21</td><td align="left" valign="top">.28</td><td align="left" valign="top">.41</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Use of electronic devices</td><td align="left" valign="top">.07</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top">.27</td><td align="left" valign="top">.38</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top">.22</td><td align="left" valign="top">.48</td><td align="left" valign="top">.64</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Nutrition</td><td align="left" valign="top"><italic>.03</italic></td><td align="left" valign="top">.70</td><td align="left" valign="top">.96</td><td align="left" valign="top">.28</td><td align="left" valign="top"><italic>.046</italic></td><td align="left" valign="top">.83</td><td align="left" valign="top">.74</td><td align="left" valign="top">.14</td></tr><tr><td align="left" valign="top" colspan="10">Scientific sources</td></tr><tr><td align="left" valign="top">&#x2003;</td><td align="left" valign="top">Is real and existing literature stated (no &#x201C;fake citations&#x201D;)?</td><td align="left" valign="top">.62</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top">.22</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Appropriateness of provided scientific evidence</td><td align="left" valign="top">.92</td><td align="left" valign="top">.52</td><td align="left" valign="top">.17</td><td align="left" valign="top">.40</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Quality of provided scientific evidence in this specific context</td><td align="left" valign="top"><italic>.04</italic></td><td align="left" valign="top">.72</td><td align="left" valign="top">.25</td><td align="left" valign="top">.08</td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td><td align="left" valign="top"><italic>&#x003C;.001</italic></td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>LLM: large language model.</p></fn><fn id="table3fn2"><p><sup>b</sup>Gem_C1-S1: Google Gemini, use case 1&#x2014;scenario 1.</p></fn><fn id="table3fn3"><p><sup>c</sup>GPT_C1-S1: ChatGPT, use case 1&#x2014;scenario 1.</p></fn><fn id="table3fn4"><p><sup>d</sup>Gem_C1-S2: Google Gemini, use case 1&#x2014;scenario 2.</p></fn><fn id="table3fn5"><p><sup>e</sup>GPT_C1-S2: ChatGPT, use case 1&#x2014;scenario 2.</p></fn><fn id="table3fn6"><p><sup>f</sup>Gem_C2-S1: Google Gemini, use case 2&#x2014;scenario 1.</p></fn><fn id="table3fn7"><p><sup>g</sup>GPT_C2-S1: ChatGPT, use case 2&#x2014;scenario 1.</p></fn><fn id="table3fn8"><p><sup>h</sup>Gem_C2-S2: Google Gemini, use case 2&#x2014;scenario 2.</p></fn><fn id="table3fn9"><p><sup>i</sup>GPT_C2-S2: ChatGPT, use case 2&#x2014;scenario 2.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>We aimed (1) to investigate the quality of sleep recommendations provided to athletes generated by different publicly available LLMs as evaluated by experienced raters and (2) to investigate if the quality of sleep recommendations differs depending on the provided information by the user.</p><p>Our main results are as follows:</p><list list-type="bullet"><list-item><p>The highest Likert-scale rating was achieved by GPT-4o using the prompt with high input granularity (use case 2, scenario 2) with 8 ratings &#x003E;3 (tendency toward good), 3 ratings equal 3 (neutral), and 2 ratings &#x003C;3 (tendency toward bad) on a 1&#x2010;5 Likert Scale. This indicates that even the highest-ranked recommendations provided by the herein investigated LLMs are not optimal.</p></list-item><list-item><p>Sleep recommendations by GPT-4o received higher Likert-scale ratings compared to those by Google Gemini (9 of 10 significant differences, with <italic>P</italic>&#x003C;.001 to <italic>P</italic>=.04). This suggests a tendency that GPT-4o outperforms Google Gemini in the investigated sleep deficiency scenarios.</p></list-item><list-item><p>Quality of sleep recommendations enhances with higher input information granularity (significantly higher Likert-scale ratings in 28 of 52 criteria items of relevance for sleep education; <italic>P</italic>&#x003C;.001 to <italic>P</italic>=.049); however, some criteria of relevance for sleep education were partly or completely omitted, irrespective of the input information granularity.</p></list-item></list></sec><sec id="s4-2"><title>Ratings of Generated Recommendations Regarding Sleep</title><p>Our results indicate that sleep recommendations of publicly available LLMs are not rated optimally, even when inserting a prompt with a high input information granularity. Although prompting GPT-4o with high input information granularity (user case 2, scenario 2) gained the highest Likert-scale ratings by the experienced raters (n=8 &#x003E;3 Likert-scale rating), the summarized ratings demonstrate that the sleep recommendations exhibit deficiencies (n=3 ratings equal 3 [neutral]; n=2 ratings &#x003C;3 [tendency toward bad]).</p><p>The insufficiency in providing optimal recommendations aligns with previous studies assessing LLMs in other research fields. For example, a systematic review and meta-analysis on ChatGPT&#x2019;s performance in answering medical questions showed that ChatGPT has an overall accuracy of 56%, suggesting potential but inadequacy for independent clinical decision-making [<xref ref-type="bibr" rid="ref38">38</xref>]. Research in the field of nutrition reported inappropriate recommendations generated by ChatGPT, indicating that personalized dietary recommendations by ChatGPT involve unpredictable errors [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref39">39</xref>]. Therefore, LLMs should not be relied on to provide current nutritional advice without nutrition professionals [<xref ref-type="bibr" rid="ref32">32</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p><p>Our results revealed further limitations pertaining to the negligence of criteria that are relevant for sleep education by the LLMs. In particular, the criterion &#x201C;family commitments&#x201D; did not exceed a Likert-scale median of 0 (IQR 0-0) across all LLMs and levels of input information granularities. Regarding the evaluation of all LLMs and input information granularities, &#x201C;arousal the night before competition&#x201D; attained a median of 0 (IQR 0-0 to 0-4) in 5 of 8 Likert-scale ratings, while &#x201C;lifestyle&#x201D; gained a maximum median of 3.5 (IQR 2.25-4), reflecting a neutral rating. These criteria were highlighted by recent research to influence both sleep quality and quantity [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. Nastasi et al [<xref ref-type="bibr" rid="ref40">40</xref>] reported similar results in assessing ChatGPT&#x2019;s ability to provide appropriate responses to medical questions within care contexts. While the authors noted appropriate responses corresponding to clinical guidelines, they indicated insufficient recommendations in regard to personalized medical advice, especially neglecting social factors [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>Collectively, our results reveal that tailored sleep recommendations generated by GPT-4o and Google Gemini exhibit deficiencies according to received Likert-scale ratings by experienced raters, particularly in neglecting relevant criteria for sleep education. Therefore, sleep recommendations by LLMs should be carefully reviewed by a qualified coach or sleep professional before being applied in sleep educational settings.</p></sec><sec id="s4-3"><title>Differences in Ratings of GPT-4o and Google Gemini</title><p>Our results demonstrate that sleep recommendations by GPT-4o generally were rated higher compared to those by Google Gemini. Of the 10 significant differences between the 2 LLMs in Likert-scale ratings, 9 favored GPT-4o (<italic>P</italic>=.001 to <italic>P</italic>=.04). Although the remaining scenarios (n=42) did not display significant results, our findings suggest a better quality of recommendations for sleep education by GPT-4o compared to Google Gemini.</p><p>Similar to our work, different authors compared different publicly available LLMs in different scenarios. For example, G&#x00FC;nay et al [<xref ref-type="bibr" rid="ref41">41</xref>] assessed GPT-4o and Google Gemini on 40 electrocardiogram cases and their responses to the most likely diagnosis. The results revealed that GPT-4o achieved higher accuracy compared to Gemini in electrocardiogram diagnostics. Carl&#x00E0; et al [<xref ref-type="bibr" rid="ref42">42</xref>] evaluated ChatGPT and Google Gemini on 4 retinal detachment cases related to planned surgeries and revealed that ChatGPT received higher ratings for accuracy and precision compared to Gemini. Hieronimus et al [<xref ref-type="bibr" rid="ref43">43</xref>] analyzed the completeness and accuracy of dietary reference intake in meal plans for different dietary patterns created by ChatGPT and Google Bard (subsequently rebranded Gemini) and revealed higher quality in the response of ChatGPT. In hand surgery, it was shown that Google Gemini outperformed ChatGPT in classifying injuries, while ChatGPT provided more sensitive recommendations regarding surgical interventions [<xref ref-type="bibr" rid="ref44">44</xref>].</p><p>Collectively, it appears that different LLMs differ in output quality, irrespective of the use case, and there seems to be a tendency that versions of ChatGPT outperform Google Gemini (which is in line with the results of our study), even though such statements need further investigation.</p></sec><sec id="s4-4"><title>Differences in Quality Regarding Prompt Information Granularity</title><p>Our results indicate that higher input granularity leads to better-rated sleep recommendations, independent of the LLM. In the combined results of both use cases and LLMs, scenario 2 (higher input information granularity) received significantly higher Likert-scale ratings in 28 of 52 criteria items of relevance for sleep education (<italic>P</italic>=.001 to <italic>P</italic>=.049) compared to scenario 1 (low input information granularity), while scenario 1 attained higher Likert-scale ratings in 2 of 52 items (<italic>P</italic>=.001 to <italic>P</italic>=.009) compared to scenario 2.</p><p>Our finding is in line with research examining LLMs in other contexts [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref45">45</xref>-<xref ref-type="bibr" rid="ref48">48</xref>]. For example, Kunze et al [<xref ref-type="bibr" rid="ref48">48</xref>] inserted 20 knee complaints necessitating triage into ChatGPT-4 and investigated the accuracy and suitability rated by orthopedic sports medicine physicians. The authors stated that when providing additional input information, the accuracy of information output by ChatGPT-4 improved, particularly with enhancements in conservative management, surgical approaches, and related treatments [<xref ref-type="bibr" rid="ref48">48</xref>].</p><p>In the context of endurance sports, D&#x00FC;king et al [<xref ref-type="bibr" rid="ref30">30</xref>] provided 3 prompts with different levels of input information granularities to ChatGPT, resulting in 3 training plans aimed to improve running performance. Following an evaluation of these training plans by coaching experts, the authors indicated an increased quality with more input information provided [<xref ref-type="bibr" rid="ref30">30</xref>].</p><p>Collectively, when using LLMs, it seems important for users to input a sufficient amount of information into LLMs to improve output quality, at least for the herein investigated scenario.</p><p>An additional point of interest is the quality and appropriateness of the scientific sources. Our results demonstrate that more input information leads to significantly higher ratings in items pertaining to the used scientific sources (<italic>P</italic>&#x003C;.001), irrespective of the analyzed LLM. While higher input information granularity leads to improved sleep recommendations, the higher quality and appropriateness of the scientific sources may also contribute to these improvements. Since our analysis did not involve a differentiation between the effect of input information granularity and the use of scientific resources, future research should investigate both factors separately.</p><p>Conclusively, higher input information granularity leads to improved sleep recommendations and might be influenced by higher quality and appropriateness of the scientific sources. When supplying LLMs for sleep education, coaches and athletes should be highly aware of the appropriate input granularity and conduct a thorough review of the received output, potentially in consultation with an individual with strong experience in the field of sleep and athletic populations.</p></sec><sec id="s4-5"><title>Strengths, Limitations, and Future Research</title><p>Strengths of our study include the assessment of different publicly available LLMs (ie, GPT-4o and Google Gemini) in different use cases and with different input information granularity, allowing a detailed analysis of provided sleep recommendations to the athletic population. Another strength of our study is the involvement of experienced raters evaluating the recommendations of LLMs.</p><p>Our results are limited to GPT-4o and Google Gemini on the versions available on May 15, 2024. As LLMs show fast developments (eg, by being able to search the internet), transferring our results to newer versions should be performed with caution. Due to such fast developments, it appears necessary to develop assessment methods or frameworks to evaluate the quality of LLMs in different scenarios to inform practitioners of the quality of currently available LLMs. Additionally, our results are, strictly speaking, only valid for the herein tested prompts, and other prompts might yield different results.</p><p>Despite the fact that all raters were experienced or well-educated in the field of sleep, the interrater reliability of our study ranges between slight and fair agreements (0.183 to 0.296). The tendency toward low interrater reliability is in line with previous research [<xref ref-type="bibr" rid="ref30">30</xref>]. It may indicate that no single approach is universally optimal or perfect regarding athlete sleep recommendations. This suggests that different experts might rate the recommendations provided by LLMs in the respective use case differently, for example, based on personal preferences or personal experiences. While our results hold practical insights for athletes seeking sleep recommendations from LLMs (eg, to insert detailed information), individual sleep coaches might disagree with the recommendations provided by LLMs.</p><p>It seems important to note that even though it was shown that sleep education approaches can result in enhanced sleep behavior [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref49">49</xref>-<xref ref-type="bibr" rid="ref51">51</xref>], to the best of our knowledge, there is no scientific evidence available that sleep recommendations generated by LLMs improve aspects of sleep in the athletic population. Future studies should evaluate the effectiveness of sleep education provided by publicly available LLMs to improve aspects of sleep.</p></sec><sec id="s4-6"><title>Conclusions</title><p>Our study indicates that sleep recommendations generated by GPT-4o or Google Gemini are not rated optimally, independently of the level of input information granularity. However, our results demonstrate that GPT-4o provides better sleep recommendations to athletes compared to Google Gemini and that sleep recommendations improved with more detailed input information for both herein investigated LLMs. Collectively, for LLMs to be used in practice, it seems essential to insert detailed information into LLMs and to thoroughly review the provided sleep recommendations for athletic populations.</p></sec></sec></body><back><ack><p>The authors acknowledge the use of large language models for assisting with grammar and language checks during the final preparation of this manuscript. The authors checked the final manuscript and approved it for publication. JMIR Publications has provided article processing fee (APF) support for the publication of this paper. The authors also acknowledge funding for the open-access publication fees by TU Braunschweig.</p></ack><notes><sec><title>Data Availability</title><p>The datasets generated and analyzed during this study are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">GPT-4o</term><def><p>ChatGPT-4o</p></def></def-item><def-item><term id="abb2">LLM</term><def><p>large language model</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramar</surname><given-names>K</given-names> </name><name name-style="western"><surname>Malhotra</surname><given-names>RK</given-names> </name><name name-style="western"><surname>Carden</surname><given-names>KA</given-names> </name><etal/></person-group><article-title>Sleep is essential to health: an American Academy of Sleep Medicine position statement</article-title><source>J Clin Sleep Med</source><year>2021</year><month>10</month><day>1</day><volume>17</volume><issue>10</issue><fpage>2115</fpage><lpage>2119</lpage><pub-id pub-id-type="doi">10.5664/jcsm.9476</pub-id><pub-id pub-id-type="medline">34170250</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agarwal</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Gonzales</surname><given-names>R</given-names> </name><name name-style="western"><surname>Scott</surname><given-names>K</given-names> </name><name name-style="western"><surname>Merchant</surname><given-names>R</given-names> </name></person-group><article-title>Investigating the feasibility of using a wearable device to measure physiologic health data in emergency nurses and residents: observational cohort study</article-title><source>JMIR Form Res</source><year>2024</year><month>02</month><day>22</day><volume>8</volume><fpage>e51569</fpage><pub-id pub-id-type="doi">10.2196/51569</pub-id><pub-id pub-id-type="medline">38386373</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schulz</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gottlieb</surname><given-names>A</given-names> </name></person-group><article-title>Prediction of mild cognitive impairment status: pilot study of machine learning models based on longitudinal data from fitness trackers</article-title><source>JMIR Form Res</source><year>2024</year><month>07</month><day>18</day><volume>8</volume><fpage>e55575</fpage><pub-id pub-id-type="doi">10.2196/55575</pub-id><pub-id pub-id-type="medline">39024003</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Knowlden</surname><given-names>AP</given-names> </name><name name-style="western"><surname>Winchester</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>MacDonald</surname><given-names>HV</given-names> </name><name name-style="western"><surname>Geyer</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Higginbotham</surname><given-names>JC</given-names> </name></person-group><article-title>Associations among cardiometabolic risk factors, sleep duration, and obstructive sleep apnea in a southeastern US rural community: cross-sectional analysis from the SLUMBRx-PONS Study</article-title><source>JMIR Form Res</source><year>2024</year><month>11</month><day>8</day><volume>8</volume><fpage>e54792</fpage><pub-id pub-id-type="doi">10.2196/54792</pub-id><pub-id pub-id-type="medline">39514856</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gupta</surname><given-names>L</given-names> </name><name name-style="western"><surname>Morgan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Gilchrist</surname><given-names>S</given-names> </name></person-group><article-title>Does elite sport degrade sleep quality? A systematic review</article-title><source>Sports Med</source><year>2017</year><month>07</month><volume>47</volume><issue>7</issue><fpage>1317</fpage><lpage>1333</lpage><pub-id pub-id-type="doi">10.1007/s40279-016-0650-6</pub-id><pub-id pub-id-type="medline">27900583</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Walsh</surname><given-names>NP</given-names> </name><name name-style="western"><surname>Halson</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Sargent</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Sleep and the athlete: narrative review and 2021 expert consensus recommendations</article-title><source>Br J Sports Med</source><year>2021</year><month>04</month><volume>55</volume><issue>7</issue><fpage>356</fpage><lpage>368</lpage><pub-id pub-id-type="doi">10.1136/bjsports-2020-102025</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Driller</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Suppiah</surname><given-names>H</given-names> </name><name name-style="western"><surname>Rogerson</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ruddock</surname><given-names>A</given-names> </name><name name-style="western"><surname>James</surname><given-names>L</given-names> </name><name name-style="western"><surname>Virgile</surname><given-names>A</given-names> </name></person-group><article-title>Investigating the sleep habits in individual and team-sport athletes using the Athlete Sleep Behavior Questionnaire and the Pittsburgh Sleep Quality Index</article-title><source>Sleep Sci</source><year>2022</year><volume>15</volume><issue>1</issue><fpage>112</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.5935/1984-0063.20210031</pub-id><pub-id pub-id-type="medline">35662975</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Halson</surname><given-names>SL</given-names> </name></person-group><article-title>Sleep in elite athletes and nutritional interventions to enhance sleep</article-title><source>Sports Med</source><year>2014</year><month>05</month><volume>44</volume><issue>Suppl 1</issue><fpage>S13</fpage><lpage>23</lpage><pub-id pub-id-type="doi">10.1007/s40279-014-0147-0</pub-id><pub-id pub-id-type="medline">24791913</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cunha</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Costa</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Marques</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Brito</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lastella</surname><given-names>M</given-names> </name><name name-style="western"><surname>Figueiredo</surname><given-names>P</given-names> </name></person-group><article-title>The impact of sleep interventions on athletic performance: a systematic review</article-title><source>Sports Med Open</source><year>2023</year><month>07</month><day>18</day><volume>9</volume><issue>1</issue><fpage>58</fpage><pub-id pub-id-type="doi">10.1186/s40798-023-00599-z</pub-id><pub-id pub-id-type="medline">37462808</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Venter</surname><given-names>RE</given-names> </name></person-group><article-title>Perceptions of team athletes on the importance of recovery modalities</article-title><source>Eur J Sport Sci</source><year>2014</year><volume>14 Suppl 1</volume><fpage>S69</fpage><lpage>76</lpage><pub-id pub-id-type="doi">10.1080/17461391.2011.643924</pub-id><pub-id pub-id-type="medline">24444246</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Czeisler</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Klerman</surname><given-names>EB</given-names> </name></person-group><article-title>Circadian and sleep-dependent regulation of hormone release in humans</article-title><source>Recent Prog Horm Res</source><year>1999</year><volume>54</volume><fpage>97</fpage><lpage>130</lpage><pub-id pub-id-type="medline">10548874</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fullagar</surname><given-names>HHK</given-names> </name><name name-style="western"><surname>Skorski</surname><given-names>S</given-names> </name><name name-style="western"><surname>Duffield</surname><given-names>R</given-names> </name><name name-style="western"><surname>Hammes</surname><given-names>D</given-names> </name><name name-style="western"><surname>Coutts</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Meyer</surname><given-names>T</given-names> </name></person-group><article-title>Sleep and athletic performance: the effects of sleep loss on exercise performance, and physiological and cognitive responses to exercise</article-title><source>Sports Med</source><year>2015</year><month>02</month><volume>45</volume><issue>2</issue><fpage>161</fpage><lpage>186</lpage><pub-id pub-id-type="doi">10.1007/s40279-014-0260-0</pub-id><pub-id pub-id-type="medline">25315456</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Caia</surname><given-names>J</given-names> </name><name name-style="western"><surname>Scott</surname><given-names>TJ</given-names> </name><name name-style="western"><surname>Halson</surname><given-names>SL</given-names> </name><name name-style="western"><surname>Kelly</surname><given-names>VG</given-names> </name></person-group><article-title>The influence of sleep hygiene education on sleep in professional rugby league athletes</article-title><source>Sleep Health</source><year>2018</year><month>08</month><volume>4</volume><issue>4</issue><fpage>364</fpage><lpage>368</lpage><pub-id pub-id-type="doi">10.1016/j.sleh.2018.05.002</pub-id><pub-id pub-id-type="medline">30031530</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cook</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Charest</surname><given-names>J</given-names> </name></person-group><article-title>Sleep and performance in professional athletes</article-title><source>Curr Sleep Med Rep</source><year>2023</year><volume>9</volume><issue>1</issue><fpage>56</fpage><lpage>81</lpage><pub-id pub-id-type="doi">10.1007/s40675-022-00243-4</pub-id><pub-id pub-id-type="medline">36683842</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Miles</surname><given-names>KH</given-names> </name><name name-style="western"><surname>Clark</surname><given-names>B</given-names> </name><name name-style="western"><surname>Fowler</surname><given-names>PM</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pumpa</surname><given-names>KL</given-names> </name></person-group><article-title>Sleep practices implemented by team sport coaches and sports science support staff: a potential avenue to improve athlete sleep?</article-title><source>J Sci Med Sport</source><year>2019</year><month>07</month><volume>22</volume><issue>7</issue><fpage>748</fpage><lpage>752</lpage><pub-id pub-id-type="doi">10.1016/j.jsams.2019.01.008</pub-id><pub-id pub-id-type="medline">30685228</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Salah</surname><given-names>M</given-names> </name><name name-style="western"><surname>Alhalbusi</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ismail</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Abdelfattah</surname><given-names>F</given-names> </name></person-group><article-title>Chatting with ChatGPT: decoding the mind of Chatbot users and unveiling the intricate connections between user perception, trust and stereotype perception on self-esteem and psychological well-being</article-title><source>Curr Psychol</source><year>2024</year><month>03</month><volume>43</volume><issue>9</issue><fpage>7843</fpage><lpage>7858</lpage><pub-id pub-id-type="doi">10.1007/s12144-023-04989-0</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sallam</surname><given-names>M</given-names> </name></person-group><article-title>ChatGPT utility in healthcare education, research, and practice: systematic review on the promising perspectives and valid concerns</article-title><source>Healthcare (Basel)</source><year>2023</year><month>03</month><day>19</day><volume>11</volume><issue>6</issue><fpage>887</fpage><pub-id pub-id-type="doi">10.3390/healthcare11060887</pub-id><pub-id pub-id-type="medline">36981544</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Roos</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kasapovic</surname><given-names>A</given-names> </name><name name-style="western"><surname>Jansen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Kaczmarczyk</surname><given-names>R</given-names> </name></person-group><article-title>Artificial intelligence in medical education: comparative analysis of ChatGPT, Bing, and medical students in Germany</article-title><source>JMIR Med Educ</source><year>2023</year><month>09</month><day>4</day><volume>9</volume><fpage>e46482</fpage><pub-id pub-id-type="doi">10.2196/46482</pub-id><pub-id pub-id-type="medline">37665620</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dagli</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Oettl</surname><given-names>FC</given-names> </name><name name-style="western"><surname>Gujral</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Clinical accuracy, relevance, clarity, and emotional sensitivity of large language models to surgical patient questions: cross-sectional study</article-title><source>JMIR Form Res</source><year>2024</year><month>06</month><day>7</day><volume>8</volume><fpage>e56165</fpage><pub-id pub-id-type="doi">10.2196/56165</pub-id><pub-id pub-id-type="medline">38848553</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Skryd</surname><given-names>A</given-names> </name><name name-style="western"><surname>Lawrence</surname><given-names>K</given-names> </name></person-group><article-title>ChatGPT as a tool for medical education and clinical decision-making on the wards: case study</article-title><source>JMIR Form Res</source><year>2024</year><month>05</month><day>8</day><volume>8</volume><fpage>e51346</fpage><pub-id pub-id-type="doi">10.2196/51346</pub-id><pub-id pub-id-type="medline">38717811</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Sheng</surname><given-names>J</given-names> </name></person-group><article-title>Assessing ChatGPT as a medical consultation assistant for chronic hepatitis B: cross-language study of English and Chinese</article-title><source>JMIR Med Inform</source><year>2024</year><month>08</month><day>8</day><volume>12</volume><fpage>e56426</fpage><pub-id pub-id-type="doi">10.2196/56426</pub-id><pub-id pub-id-type="medline">39115930</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Vajravelu</surname><given-names>BN</given-names> </name></person-group><article-title>Assessing the current limitations of large language models in advancing health care education</article-title><source>JMIR Form Res</source><year>2025</year><month>01</month><day>16</day><volume>9</volume><fpage>e51319</fpage><pub-id pub-id-type="doi">10.2196/51319</pub-id><pub-id pub-id-type="medline">39819585</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yang</surname><given-names>X</given-names> </name><name name-style="western"><surname>Li</surname><given-names>G</given-names> </name></person-group><article-title>Psychological and behavioral insights from social media users: natural language processing-based quantitative study on mental well-being</article-title><source>JMIR Form Res</source><year>2025</year><month>01</month><day>20</day><volume>9</volume><fpage>e60286</fpage><pub-id pub-id-type="doi">10.2196/60286</pub-id><pub-id pub-id-type="medline">39832365</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zheng</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shen</surname><given-names>J</given-names> </name><name name-style="western"><surname>An</surname><given-names>R</given-names> </name></person-group><article-title>Artificial intelligence applications to measure food and nutrient intakes: scoping review</article-title><source>J Med Internet Res</source><year>2024</year><month>11</month><day>28</day><volume>26</volume><fpage>e54557</fpage><pub-id pub-id-type="doi">10.2196/54557</pub-id><pub-id pub-id-type="medline">39608003</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tagi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hamada</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Shan</surname><given-names>X</given-names> </name><etal/></person-group><article-title>A food intake estimation system using an artificial intelligence-based model for estimating leftover hospital liquid food in clinical environments: development and validation study</article-title><source>JMIR Form Res</source><year>2024</year><month>11</month><day>5</day><volume>8</volume><fpage>e55218</fpage><pub-id pub-id-type="doi">10.2196/55218</pub-id><pub-id pub-id-type="medline">39500491</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayers</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Poliak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dredze</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Comparing physician and artificial intelligence chatbot responses to patient questions posted to a public social media forum</article-title><source>JAMA Intern Med</source><year>2023</year><month>06</month><day>1</day><volume>183</volume><issue>6</issue><fpage>589</fpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2023.1838</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lukac</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dayan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Fink</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Evaluating ChatGPT as an adjunct for the multidisciplinary tumor board decision-making in primary breast cancer cases</article-title><source>Arch Gynecol Obstet</source><year>2023</year><month>12</month><volume>308</volume><issue>6</issue><fpage>1831</fpage><lpage>1844</lpage><pub-id pub-id-type="doi">10.1007/s00404-023-07130-5</pub-id><pub-id pub-id-type="medline">37458761</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seth</surname><given-names>I</given-names> </name><name name-style="western"><surname>Cox</surname><given-names>A</given-names> </name><name name-style="western"><surname>Xie</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Evaluating chatbot efficacy for answering frequently asked questions in plastic surgery: a ChatGPT case study focused on breast augmentation</article-title><source>Aesthet Surg J</source><year>2023</year><month>09</month><day>14</day><volume>43</volume><issue>10</issue><fpage>1126</fpage><lpage>1135</lpage><pub-id pub-id-type="doi">10.1093/asj/sjad140</pub-id><pub-id pub-id-type="medline">37158147</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Samaan</surname><given-names>JS</given-names> </name><name name-style="western"><surname>Yeo</surname><given-names>YH</given-names> </name><name name-style="western"><surname>Rajeev</surname><given-names>N</given-names> </name><etal/></person-group><article-title>Assessing the accuracy of responses by the language model ChatGPT to questions regarding bariatric surgery</article-title><source>Obes Surg</source><year>2023</year><month>06</month><volume>33</volume><issue>6</issue><fpage>1790</fpage><lpage>1796</lpage><pub-id pub-id-type="doi">10.1007/s11695-023-06603-5</pub-id><pub-id pub-id-type="medline">37106269</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>D&#x00FC;king</surname><given-names>P</given-names> </name><name name-style="western"><surname>Sperlich</surname><given-names>B</given-names> </name><name name-style="western"><surname>Voigt</surname><given-names>L</given-names> </name><name name-style="western"><surname>Van Hooren</surname><given-names>B</given-names> </name><name name-style="western"><surname>Zanini</surname><given-names>M</given-names> </name><name name-style="western"><surname>Zinner</surname><given-names>C</given-names> </name></person-group><article-title>ChatGPT generated training plans for runners are not rated optimal by coaching experts, but increase in quality with additional input information</article-title><source>J Sports Sci Med</source><year>2024</year><month>03</month><volume>23</volume><issue>1</issue><fpage>56</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.52082/jssm.2024.56</pub-id><pub-id pub-id-type="medline">38455449</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dergaa</surname><given-names>I</given-names> </name><name name-style="western"><surname>Saad</surname><given-names>HB</given-names> </name><name name-style="western"><surname>El Omri</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Using artificial intelligence for exercise prescription in personalised health promotion: a critical evaluation of OpenAI&#x2019;s GPT-4 model</article-title><source>Biol Sport</source><year>2024</year><month>03</month><volume>41</volume><issue>2</issue><fpage>221</fpage><lpage>241</lpage><pub-id pub-id-type="doi">10.5114/biolsport.2024.133661</pub-id><pub-id pub-id-type="medline">38524814</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dergaa</surname><given-names>I</given-names> </name><name name-style="western"><surname>Ben Saad</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ghouili</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Evaluating the applicability and appropriateness of ChatGPT as a source for tailored nutrition advice: a multi-scenario study</article-title><source>NAJM</source><year>2024</year><volume>2</volume><issue>1</issue><fpage>1</fpage><lpage>16</lpage><pub-id pub-id-type="doi">10.61838/kman.najm.2.1.1</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Havers</surname><given-names>T</given-names> </name><name name-style="western"><surname>Masur</surname><given-names>L</given-names> </name><name name-style="western"><surname>Isenmann</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Reproducibility and quality of hypertrophy-related training plans generated by GPT-4 and Google Gemini as evaluated by coaching experts</article-title><source>Biol Sport</source><year>2025</year><month>04</month><volume>42</volume><issue>2</issue><fpage>289</fpage><lpage>329</lpage><pub-id pub-id-type="doi">10.5114/biolsport.2025.145911</pub-id><pub-id pub-id-type="medline">40182716</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McKay</surname><given-names>AKA</given-names> </name><name name-style="western"><surname>Stellingwerff</surname><given-names>T</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>ES</given-names> </name><etal/></person-group><article-title>Defining training and performance caliber: a participant classification framework</article-title><source>Int J Sports Physiol Perform</source><year>2022</year><month>02</month><day>1</day><volume>17</volume><issue>2</issue><fpage>317</fpage><lpage>331</lpage><pub-id pub-id-type="doi">10.1123/ijspp.2021-0451</pub-id><pub-id pub-id-type="medline">34965513</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Casado</surname><given-names>A</given-names> </name><name name-style="western"><surname>Gonz&#x00E1;lez-Moh&#x00ED;no</surname><given-names>F</given-names> </name><name name-style="western"><surname>Gonz&#x00E1;lez-Rav&#x00E9;</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Foster</surname><given-names>C</given-names> </name></person-group><article-title>Training periodization, methods, intensity distribution, and volume in highly trained and elite distance runners: a systematic review</article-title><source>Int J Sports Physiol Perform</source><year>2022</year><volume>17</volume><issue>6</issue><fpage>820</fpage><lpage>833</lpage><pub-id pub-id-type="doi">10.1123/ijspp.2021-0435</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fleiss</surname><given-names>JL</given-names> </name></person-group><article-title>Measuring nominal scale agreement among many raters</article-title><source>Psychol Bull</source><year>1971</year><volume>76</volume><issue>5</issue><fpage>378</fpage><lpage>382</lpage><pub-id pub-id-type="doi">10.1037/h0031619</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Landis</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Koch</surname><given-names>GG</given-names> </name></person-group><article-title>The measurement of observer agreement for categorical data</article-title><source>Biometrics</source><year>1977</year><month>03</month><volume>33</volume><issue>1</issue><fpage>159</fpage><pub-id pub-id-type="doi">10.2307/2529310</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Yao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wei</surname><given-names>B</given-names> </name><name name-style="western"><surname>Jin</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>X</given-names> </name></person-group><article-title>Evaluation of ChatGPT-generated medical responses: a systematic review and meta-analysis</article-title><source>J Biomed Inform</source><year>2024</year><month>03</month><volume>151</volume><fpage>104620</fpage><pub-id pub-id-type="doi">10.1016/j.jbi.2024.104620</pub-id><pub-id pub-id-type="medline">38462064</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agne</surname><given-names>I</given-names> </name><name name-style="western"><surname>Gedrich</surname><given-names>K</given-names> </name></person-group><article-title>Personalized dietary recommendations for obese individuals&#x2014;a comparison of ChatGPT and the Food4Me algorithm</article-title><source>Clin Nutr Open Sci</source><year>2024</year><month>08</month><volume>56</volume><fpage>192</fpage><lpage>201</lpage><pub-id pub-id-type="doi">10.1016/j.nutos.2024.06.001</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nastasi</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Courtright</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Halpern</surname><given-names>SD</given-names> </name><name name-style="western"><surname>Weissman</surname><given-names>GE</given-names> </name></person-group><article-title>A vignette-based evaluation of ChatGPT&#x2019;s ability to provide appropriate and equitable medical advice across care contexts</article-title><source>Sci Rep</source><year>2023</year><month>10</month><day>19</day><volume>13</volume><issue>1</issue><fpage>17885</fpage><pub-id pub-id-type="doi">10.1038/s41598-023-45223-y</pub-id><pub-id pub-id-type="medline">37857839</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>G&#x00FC;nay</surname><given-names>S</given-names> </name><name name-style="western"><surname>&#x00D6;zt&#x00FC;rk</surname><given-names>A</given-names> </name><name name-style="western"><surname>Yi&#x011F;it</surname><given-names>Y</given-names> </name></person-group><article-title>The accuracy of Gemini, GPT-4, and GPT-4o in ECG analysis: a comparison with cardiologists and emergency medicine specialists</article-title><source>Am J Emerg Med</source><year>2024</year><month>10</month><volume>84</volume><fpage>68</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1016/j.ajem.2024.07.043</pub-id><pub-id pub-id-type="medline">39096711</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Carl&#x00E0;</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Gambini</surname><given-names>G</given-names> </name><name name-style="western"><surname>Baldascino</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Exploring AI-chatbots&#x2019; capability to suggest surgical planning in ophthalmology: ChatGPT versus Google Gemini analysis of retinal detachment cases</article-title><source>Br J Ophthalmol</source><year>2024</year><month>09</month><day>20</day><volume>108</volume><issue>10</issue><fpage>1457</fpage><lpage>1469</lpage><pub-id pub-id-type="doi">10.1136/bjo-2023-325143</pub-id><pub-id pub-id-type="medline">38448201</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hieronimus</surname><given-names>B</given-names> </name><name name-style="western"><surname>Hammann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Podszun</surname><given-names>MC</given-names> </name></person-group><article-title>Can the AI tools ChatGPT and Bard generate energy, macro- and micro-nutrient sufficient meal plans for different dietary patterns?</article-title><source>Nutr Res</source><year>2024</year><month>08</month><volume>128</volume><fpage>105</fpage><lpage>114</lpage><pub-id pub-id-type="doi">10.1016/j.nutres.2024.07.002</pub-id><pub-id pub-id-type="medline">39102765</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pressman</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Borna</surname><given-names>S</given-names> </name><name name-style="western"><surname>Gomez-Cabello</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Haider</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Forte</surname><given-names>AJ</given-names> </name></person-group><article-title>AI in hand surgery: assessing large language models in the classification and management of hand injuries</article-title><source>J Clin Med</source><year>2024</year><month>05</month><day>11</day><volume>13</volume><issue>10</issue><pub-id pub-id-type="doi">10.3390/jcm13102832</pub-id><pub-id pub-id-type="medline">38792374</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dergaa</surname><given-names>I</given-names> </name><name name-style="western"><surname>Fekih-Romdhane</surname><given-names>F</given-names> </name><name name-style="western"><surname>Hallit</surname><given-names>S</given-names> </name><etal/></person-group><article-title>ChatGPT is not ready yet for use in providing mental health assessment and interventions</article-title><source>Front Psychiatry</source><year>2023</year><volume>14</volume><fpage>1277756</fpage><pub-id pub-id-type="doi">10.3389/fpsyt.2023.1277756</pub-id><pub-id pub-id-type="medline">38239905</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Washif</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Pagaduan</surname><given-names>J</given-names> </name><name name-style="western"><surname>James</surname><given-names>C</given-names> </name><name name-style="western"><surname>Dergaa</surname><given-names>I</given-names> </name><name name-style="western"><surname>Beaven</surname><given-names>CM</given-names> </name></person-group><article-title>Artificial intelligence in sport: exploring the potential of using ChatGPT in resistance training prescription</article-title><source>Biol Sport</source><year>2024</year><month>03</month><volume>41</volume><issue>2</issue><fpage>209</fpage><lpage>220</lpage><pub-id pub-id-type="doi">10.5114/biolsport.2024.132987</pub-id><pub-id pub-id-type="medline">38524820</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zaleski</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Berkowsky</surname><given-names>R</given-names> </name><name name-style="western"><surname>Craig</surname><given-names>KJT</given-names> </name><name name-style="western"><surname>Pescatello</surname><given-names>LS</given-names> </name></person-group><article-title>Comprehensiveness, accuracy, and readability of exercise recommendations provided by an AI-based chatbot: mixed methods study</article-title><source>JMIR Med Educ</source><year>2024</year><month>01</month><day>11</day><volume>10</volume><fpage>e51308</fpage><pub-id pub-id-type="doi">10.2196/51308</pub-id><pub-id pub-id-type="medline">38206661</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kunze</surname><given-names>KN</given-names> </name><name name-style="western"><surname>Varady</surname><given-names>NH</given-names> </name><name name-style="western"><surname>Mazzucco</surname><given-names>M</given-names> </name><etal/></person-group><article-title>The large language model ChatGPT-4 exhibits excellent triage capabilities and diagnostic performance for patients presenting with various causes of knee pain</article-title><source>Arthroscopy</source><year>2025</year><month>05</month><volume>41</volume><issue>5</issue><fpage>1438</fpage><lpage>1447</lpage><pub-id pub-id-type="doi">10.1016/j.arthro.2024.06.021</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>O&#x2019;Donnell</surname><given-names>S</given-names> </name><name name-style="western"><surname>Driller</surname><given-names>MW</given-names> </name></person-group><article-title>Sleep-hygiene education improves sleep indices in elite female athletes</article-title><source>Int J Exerc Sci</source><year>2017</year><volume>10</volume><issue>4</issue><fpage>522</fpage><lpage>530</lpage><pub-id pub-id-type="doi">10.70252/DNOL2901</pub-id><pub-id pub-id-type="medline">28674597</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fullagar</surname><given-names>HHK</given-names> </name><name name-style="western"><surname>Skorski</surname><given-names>S</given-names> </name><name name-style="western"><surname>Duffield</surname><given-names>R</given-names> </name><name name-style="western"><surname>Julian</surname><given-names>R</given-names> </name><name name-style="western"><surname>Bartlett</surname><given-names>J</given-names> </name><name name-style="western"><surname>Meyer</surname><given-names>T</given-names> </name></person-group><article-title>Impaired sleep and recovery after night matches in elite football players</article-title><source>J Sports Sci</source><year>2016</year><month>07</month><volume>34</volume><issue>14</issue><fpage>1333</fpage><lpage>1339</lpage><pub-id pub-id-type="doi">10.1080/02640414.2015.1135249</pub-id><pub-id pub-id-type="medline">26750446</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Driller</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lastella</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sharp</surname><given-names>AP</given-names> </name></person-group><article-title>Individualized sleep education improves subjective and objective sleep indices in elite cricket athletes: a pilot study</article-title><source>J Sports Sci</source><year>2019</year><month>09</month><volume>37</volume><issue>17</issue><fpage>2121</fpage><lpage>2125</lpage><pub-id pub-id-type="doi">10.1080/02640414.2019.1616900</pub-id><pub-id pub-id-type="medline">31076021</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Conversation with ChatGPT4o.</p><media xlink:href="formative_v9i1e71358_app1.docx" xlink:title="DOCX File, 23 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Conversation with Google Gemini.</p><media xlink:href="formative_v9i1e71358_app2.docx" xlink:title="DOCX File, 28 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Results of the significance testing comparing training plans generated by Google Gemini and GPT-4o in response to different input information granularity.</p><media xlink:href="formative_v9i1e71358_app3.docx" xlink:title="DOCX File, 15 KB"/></supplementary-material></app-group></back></article>