<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e90037</article-id>
      <article-id pub-id-type="pmid">42303247</article-id>
      <article-id pub-id-type="doi">10.2196/90037</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Learning Gain and User Experience of AI Avatar–Based and Human-Presented Explainer Videos: Prospective Randomized Crossover Feasibility Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Law</surname>
            <given-names>Stephanie</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Wang</surname>
            <given-names>Xin</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Suen</surname>
            <given-names>Richard</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Reichert</surname>
            <given-names>Mike</given-names>
          </name>
          <degrees>B.Eng</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0001-7740-2433</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Jungmann</surname>
            <given-names>Thorsten</given-names>
          </name>
          <degrees>Dr.-Ing.</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6420-3104</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>von Jan</surname>
            <given-names>Ute</given-names>
          </name>
          <degrees>Dr. rer. biol. hum.</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-9225-593X</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Albrecht</surname>
            <given-names>Urs-Vito</given-names>
          </name>
          <degrees>MD, DrPH, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Digital Medicine</institution>
            <institution>Medical School OWL</institution>
            <institution>Bielefeld University</institution>
            <addr-line>Universitätsstraße 25</addr-line>
            <addr-line>Bielefeld, North Rhine-Westphalia, 33615</addr-line>
            <country>Germany</country>
            <phone>49 521 106 ext 86714</phone>
            <email>urs-vito.albrecht@uni-bielefeld.de</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8989-6696</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Digital Medicine</institution>
        <institution>Medical School OWL</institution>
        <institution>Bielefeld University</institution>
        <addr-line>Bielefeld, North Rhine-Westphalia</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Faculty of Engineering and Mathematics</institution>
        <institution>Hochschule Bielefeld</institution>
        <addr-line>Bielefeld, North Rhine-Westphalia</addr-line>
        <country>Germany</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Peter L Reichertz Institute for Medical Informatics of TU Braunschweig and Hannover Medical School</institution>
        <institution>Hannover Medical School</institution>
        <addr-line>Hannover, Lower Saxony</addr-line>
        <country>Germany</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Urs-Vito Albrecht <email>urs-vito.albrecht@uni-bielefeld.de</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>16</day>
        <month>6</month>
        <year>2026</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e90037</elocation-id>
      <history>
        <date date-type="received">
          <day>22</day>
          <month>12</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>3</month>
          <year>2026</year>
        </date>
        <date date-type="rev-recd">
          <day>14</day>
          <month>5</month>
          <year>2026</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>5</month>
          <year>2026</year>
        </date>
      </history>
      <copyright-statement>©Mike Reichert, Thorsten Jungmann, Ute von Jan, Urs-Vito Albrecht. Originally published in JMIR Formative Research (https://formative.jmir.org), 16.06.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2026/1/e90037" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Explainer videos are widely used in higher education. With the increasing availability of artificial intelligence (AI)–generated avatars, it remains unclear whether the presentation format—human presenter vs AI avatar—affects learning outcomes and user experience, especially in technologically complex fields.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to assess the feasibility of a randomized crossover design to investigate learning gain and user experience associated with content-identical explainer videos delivered by either an AI-generated avatar or a human presenter. Exploratory analyses examined the potential differences between the presentation formats.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>An observer-blinded, prospective randomized crossover feasibility study was conducted with 13 undergraduate engineering students. Participants viewed 2 content-identical explainer videos on fuel cell technology presented by either an AI-generated avatar or a human presenter in a randomized sequence. Learning gains were recorded using a 7-item knowledge test administered at baseline and after the first and second video presentations. User experience was assessed after each video by using the AttrakDiff2 questionnaire. Because there was no washout period and the instructional material was identical in both videos, the second learning phase was vulnerable to carryover and test-retest effects. Consequently, analyses of learning outcomes focused on the initial phase, whereas user experience was examined through pooled comparisons across both conditions.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Both presentation formats were associated with substantial short-term learning gains. The difference in the learning gain between the AI avatar and human presenter videos was not statistically significant (median newly correct items 5, IQR 3-5.5 vs 4.5, IQR 2.5-5; <italic>P</italic>=.51; <italic>Z</italic>=0.66; <italic>r</italic>=0.183). In contrast, user experience ratings were consistently higher for the human-presented video across all AttrakDiff2 dimensions, with small to medium effect sizes. The AI avatar presentation was generally perceived as neutral.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study shows that investigating AI-based explainer videos vs those using a human presenter in classroom settings is feasible and highlights methodological challenges, particularly those related to crossover designs involving content-identical materials. In this small exploratory sample, no significant differences in short-term learning gains were detected between different presentation formats. Nonetheless, participants clearly preferred human presenters in terms of user experience. These results should not be seen as proof of equivalence but rather as a foundation for future research with larger sample sizes, improved study designs, and more sensitive outcome measures.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>artificial intelligence avatars</kwd>
        <kwd>AI avatars</kwd>
        <kwd>explainer videos</kwd>
        <kwd>learning gain</kwd>
        <kwd>user experience</kwd>
        <kwd>higher education</kwd>
        <kwd>crossover study</kwd>
        <kwd>feasibility study</kwd>
        <kwd>digital learning</kwd>
        <kwd>artificial intelligence</kwd>
        <kwd>AI</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Theoretical Background</title>
        <p>Explainer videos are vital in digital education, conveying concepts through audiovisual means [<xref ref-type="bibr" rid="ref1">1</xref>]. Their importance grows with the rise of online learning, complementing text-based materials especially in self-directed learning. Platforms such as YouTube offer vast, diverse content, but quality varies, and algorithms often prioritize engagement over educational value [<xref ref-type="bibr" rid="ref2">2</xref>]. Empirical work comparing artificial intelligence (AI)–generated avatar presenters with human presenters in educational videos has only recently emerged. A small but identifiable body of randomized and quasi-experimental studies has compared both formats and converges on the finding that short-term learning gains are broadly comparable across formats whereas user experience (UX) ratings and video engagement tend to favor human presenters [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. A recent rapid review of AI-generated instructional videos synthesizing 15 primary studies from 2023 to 2025 supports this pattern [<xref ref-type="bibr" rid="ref8">8</xref>]. However, the entirety of this experimental evidence has so far been generated on either subject matter that is broadly cross-disciplinary (general science, technology, engineering, and mathematics introductions and educational technology) or on nontechnical or generic content. No randomized study to date has examined AI avatar–based explainer videos with electrical engineering undergraduate students using domain-authentic content such as fuel cells, circuitry, or signal processing. This is a notable gap because electrical engineering explainer videos are visually dense and schematic centered, introducing a distinct split-attention situation between the presenter and the static technical visualization. There is even eye tracking evidence from Nugroho et al [<xref ref-type="bibr" rid="ref9">9</xref>] suggesting that avatar presence may be more favorable than physical teacher presence for cognitive load balance in such settings, but this has not been tested in electrical engineering. Therefore, this feasibility study addresses a specific disciplinary, visualization-related, and UX gap rather than a general absence of evidence on AI-generated educational media.</p>
        <p>The effectiveness of explainer videos can be understood through established learning theories. Dual-coding theory suggests that information is better retained when processed through both verbal and visual channels, thereby creating complementary memory traces [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref12">12</xref>]. Similarly, the cognitive theory of multimedia learning posits that learners process auditory, verbal, visual, and pictorial information in separate but capacity-limited channels [<xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref13">13</xref>]. Meaningful learning occurs when learners actively select, organize, and integrate information across these channels. Therefore, multimedia instruction is most effective when it follows principles such as coherence, signaling, and temporal and spatial contiguity [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>]. As explainer videos combine narration with visual elements, they are well suited to applying these principles. However, their effectiveness may vary depending on presentation format, including whether a human presenter or an AI-generated avatar delivers content. Cognitive load theory further differentiates the demands of multimedia learning by distinguishing intrinsic, extraneous, and germane cognitive load [<xref ref-type="bibr" rid="ref15">15</xref>]. In domains such as electrical engineering, intrinsic load is often high because topics such as fuel cells involve complex processes, multiple subsystems, and specialized terminology. Therefore, effective instruction should minimize unnecessary extraneous load and support schema construction [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Presenter modality may influence this balance: human presenters may facilitate comprehension through natural nonverbal cues, whereas AI avatars may introduce additional perceptual or interpretive demands. Beyond cognitive processing, social and experiential factors also shape the impact of educational videos. Social agency theory suggests that social cues—such as a human voice, gaze, and expressive behavior—can enhance engagement and promote active processing by simulating interpersonal interaction [<xref ref-type="bibr" rid="ref17">17</xref>]. AI avatars represent a form of mediated social presence. Although they can approximate humanlike behavior, perceived artificiality and the uncanny valley effect may reduce acceptance and trust [<xref ref-type="bibr" rid="ref18">18</xref>]. UX provides an additional perspective on how learners perceive instructional media. According to the framework by Hassenzahl [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>], perceived quality includes pragmatic quality (PQ; eg, clarity and usability) and hedonic quality (eg, stimulation and identification). In educational contexts, PQ directly supports comprehension, whereas hedonic quality may influence engagement and motivation [<xref ref-type="bibr" rid="ref21">21</xref>]. However, excessive stimulation may also increase cognitive load if it is not aligned with instructional goals. Taken together, the effectiveness of explainer videos depends on the interaction of cognitive design, social signaling, and UX. This is particularly relevant in technically demanding domains such as electrical engineering, where instructional clarity and learner engagement must be balanced. While AI-generated avatars offer advantages in scalability and flexibility [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], it remains unclear whether they can match the cognitive and experiential qualities of human presenters, particularly in technically demanding domains.</p>
      </sec>
      <sec>
        <title>Study Aim</title>
        <p>The primary aim of this study was to assess the feasibility of a randomized crossover design for evaluating AI avatar–based vs human-presented explainer videos in a classroom setting. Exploratory analyses examined potential differences in learning gain and UX between presentation formats. The findings are intended to inform the design of future larger-scale studies on AI-based instructional media in higher education.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Study Design</title>
        <p>This study investigated whether the mode of content presentation in an educational explainer video—via either an AI-generated avatar or a human presenter—affects learning gain and subjective UX among undergraduate engineering students. This study used an observer-blinded, noninterventional, noninvasive prospective randomized crossover design similar to that used in previous studies comparing conventional, book-based learning with learning via a mobile augmented reality app on smartphones [<xref ref-type="bibr" rid="ref24">24</xref>,<xref ref-type="bibr" rid="ref25">25</xref>]. This study was designed as an exploratory feasibility study with a primary focus on descriptive analysis and pattern detection rather than confirmatory hypothesis testing. Three knowledge measurements were conducted: baseline (LZ<sub>0</sub>), after the first video (LZ<sub>1</sub>), and after the second video (LZ<sub>2</sub>) following crossover. The crossover structure was chosen for feasibility reasons because it allowed each participant to experience both presentation formats within a small sample and enabled the collection of both learning and UX data for each format in a classroom setting.</p>
      </sec>
      <sec>
        <title>Participants and Randomization</title>
        <p>Participants were enlisted from a defined sample population of undergraduate students in the second semester of the industrial engineering program (electronics module) at Hochschule Bielefeld – University of Applied Sciences and Arts. Recruitment took place during regular course sessions in June 2024, and all eligible students present at this time were invited to participate. Participation was voluntary, and no incentives were offered. Formal nonresponse tracking was not conducted; however, all students who agreed to participate were included in the study. Participation required prior written information and implicit consent through questionnaire completion. Participants were randomly assigned to 1 of 2 groups (A or B) and tested in separate rooms to minimize cross-group influence. A facilitator not otherwise involved with the study supervised each group in accordance with a standardized protocol. The sample size was determined pragmatically based on the availability of students within the course setting and the exploratory nature of this feasibility study. No a priori sample size calculation was performed.</p>
      </sec>
      <sec>
        <title>Study Procedure</title>
        <p>At baseline (t<sub>0</sub>), participants generated an anonymous personal code to allow for longitudinal matching of questionnaires. Demographic data (age and gender) were recorded, and baseline knowledge was assessed using a 7-item knowledge test. Group A first watched the explainer video presented by an AI-generated avatar, whereas group B watched first a video with identical content but presented by a human speaker. Immediately afterward (t<sub>1</sub>), knowledge was reassessed (LZ<sub>1</sub>), and UX was measured (UX<sub>1</sub>). Subsequently, presentation formats were crossed over. Group A viewed the human-presented video, and group B viewed the AI avatar video. After the second video (t<sub>2</sub>), learning gain (LZ<sub>2</sub>) and UX (UX<sub>2</sub>) were again assessed (<xref rid="figure1" ref-type="fig">Figure 1</xref>).</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Schematic overview of the randomized crossover study procedure. After providing consent and completing the baseline assessment (LZ<sub>0</sub>), the participants were randomized to group A or group B. In learning phase 1, group A viewed the artificial intelligence (AI) avatar explainer video, and group B viewed the human-presented explainer video, followed by posttest and user experience (UX) assessments (LZ<sub>1</sub>+UX<sub>1</sub>; t<sub>1</sub>). In learning phase 2, presentation formats were crossed over, followed by the second posttest and UX assessment (LZ<sub>2</sub>+UX<sub>2</sub>; t<sub>2</sub>). Approximate durations of each study step are indicated along the bottom of the figure.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Development of the Explainer Videos</title>
        <p>A single explainer video (<xref rid="figure2" ref-type="fig">Figure 2</xref>) introducing fundamental concepts of fuel cell technology was developed and used in 2 presentation variants. Content structure, learning objectives, visual layout, narration text, scene sequence, duration (5 minutes 25 seconds), and audio quality were kept identical across both versions. The AI-based video was created using the Synthesia platform (Synthesia Ltd), selecting a male avatar with a neutral professional appearance. The video was generated using static background slides and synthetic speech. The human-presented version was recorded in the Hochschule Bielefeld – University of Applied Sciences and Arts learning laboratory using a green screen setup, teleprompter, external microphones (Rode Wireless ME), and a smartphone camera (iPhone 14 Pro; Apple Inc). Postproduction included audio optimization and replacing the green screen with background slides identical to those used in the AI version. Video editing was performed using DaVinci Resolve (version 18.6; Blackmagic Design). The goal was to ensure that the only systematic difference between the videos was the presenter type.</p>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>Screenshots from the explainer videos illustrating the structure and operating principle of a proton exchange membrane fuel cell. The facial features are blurred in the screenshots to protect identifying features; the blurring is not part of the videos themselves. The same instructional content and visual background are shown in both versions: (A) explainer video presented by an artificial intelligence–generated avatar and (B) explainer video presented by a human instructor.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Learning Gain Assessment</title>
        <p>Knowledge was assessed using a self-developed 7-item questionnaire administered at all 3 measurement points (baseline, after video 1, and after video 2). Items were derived directly from the explainer video’s learning objectives and formulated as true-or-false statements with an additional “I don’t know” option. Each correct answer was worth 1 point, resulting in a score between 0 and 7. Learning gain was defined as an increase of at least 1 correctly answered item between 2 consecutive measurement points. A learning regression was defined as a change from a previously correct answer to an incorrect response. The knowledge test was developed specifically for this study and directly aligned with the explainer video’s predefined learning objectives. Each item was designed to assess a key concept presented in the instructional material, ensuring content coverage of the central topics. Given the exploratory, feasibility-focused design, the instrument was intentionally designed as a brief, low-burden assessment to capture immediate learning effects in the classroom. Formal validation procedures, such as expert review, pilot-testing, or psychometric evaluation (eg, internal consistency), were not conducted.</p>
      </sec>
      <sec>
        <title>UX Assessment</title>
        <p>UX was measured using the standardized AttrakDiff2 questionnaire based on the UX model by Hassenzahl [<xref ref-type="bibr" rid="ref26">26</xref>]. The instrument was administered after each video (UX<sub>1</sub> and UX<sub>2</sub>). AttrakDiff2 consists of 28 semantic differential items rated on a 7-point scale and yields 4 scale scores: PQ, hedonic quality—stimulation (HQ-S), hedonic quality—identification (HQ-I), and overall attractiveness (ATT). The instrument captures both task-oriented and experiential aspects of product perception. Missing values in the AttrakDiff2 questionnaire were not imputed. Scale scores were calculated using the available items for each participant provided that sufficient data were present.</p>
      </sec>
      <sec>
        <title>Analytical Approach</title>
        <p>Because of the small sample size and the ordinal nature of the data, analyses were mainly descriptive and exploratory. Learning gain analyses included all 3 assessment points (LZ<sub>0</sub>, LZ<sub>1</sub>, and LZ<sub>2</sub>). Between-group differences in learning gain were assessed using 2-sided Mann-Whitney <italic>U</italic> tests with a significance level of an α value of .05, with effect sizes calculated as <italic>r</italic> = <italic>Z</italic>/√N. Within-group changes between assessment points were evaluated using Wilcoxon signed-rank tests. Because the crossover design used identical instructional material without a washout period, the second learning phase (LZ<sub>1</sub> to LZ<sub>2</sub>) was considered vulnerable to test-retest and carryover effects. Therefore, the primary comparative analysis focused on the first study period (LZ<sub>0</sub> to LZ<sub>1</sub>). Analyses involving LZ<sub>2</sub> were conducted descriptively and are reported in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> because the second exposure could not be interpreted independently of prior content exposure. For UX outcomes, potential ordering effects were first examined by comparing randomized sequence groups. As no statistically significant ordering effects were detected in this exploratory sample, UX ratings from UX<sub>1</sub> and UX<sub>2</sub> were pooled by presentation format for the primary analysis. Phase-specific analyses were additionally conducted as sensitivity analyses and are reported in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. Given the crossover design and repeated measurements, more advanced methods would ordinarily be appropriate to account for within-subject dependencies and potential carryover effects. However, due to the limited sample size, analyses were restricted to simple nonparametric methods.</p>
        <p>All statistical calculations were performed using R (version 4.5.0; R Foundation for Statistical Computing) [<xref ref-type="bibr" rid="ref27">27</xref>] in combination with the <italic>coin</italic> package (version 1.4-3) [<xref ref-type="bibr" rid="ref28">28</xref>].</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study was conducted following the ethical principles outlined in the Declaration of Helsinki. The Ethics Committee of the Ärztekammer Westfalen-Lippe and the University of Münster, Germany, granted ethics approval (reference number: 2024-346-f-N). All participants were informed about the study’s objectives, procedures, and data handling before taking part, and informed consent was obtained before enrollment in the study. Participation was voluntary, and participants could withdraw at any time without consequences. All data were collected anonymously using participant codes created by the participants themselves, ensuring that no personally identifiable information was recorded. Data were handled and stored in accordance with applicable data protection laws to maintain confidentiality and privacy. Participants received neither financial nor material compensation for their involvement in the study.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Overview</title>
        <p>Feasibility outcomes were assessed across 4 key domains: recruitment, retention, data completeness, and procedural implementation. During regular course sessions, 13 students were approached and screened for eligibility. All were eligible and agreed to participate, resulting in 100% (13/13) enrollment and participation. Once enrolled, all participants completed the study, resulting in a 100% (13/13) retention rate. Study procedures—including randomization, video presentation, and repeated measurements—were implemented as described above. Overall, the study procedures were feasible in terms of implementation, timing, and participant adherence. Of the 13 students recruited for the study, 4 (30.8%) were female (mean age 21.5, SD 3.0 years), and 9 (69.2%) were male (mean age 21.8, SD 3.3 years). The 13 participants were randomized into study groups A (n=7, 53.8%) and B (n=6, 46.2%) and completed the study with no dropouts and complete data for the primary outcome. There were minor missing responses in the AttrakDiff2 questionnaire, as detailed below. In total, 23 items were missing across all participants, primarily affecting the dimensions of HQ-S and HQ-I. However, the number of missing values was limited and did not interfere with scale score calculation. Thus, all 13 participants were included in the final analysis.</p>
      </sec>
      <sec>
        <title>Learning Gain (Primary Outcome; Per-Group Learning Gains)</title>
        <p>Knowledge scores increased from LZ<sub>0</sub> to LZ<sub>1</sub> in both randomized groups, indicating substantial short-term learning within each group after the first video presentation (<xref ref-type="table" rid="table1">Table 1</xref> and <xref rid="figure3" ref-type="fig">Figures 3</xref> and <xref rid="figure4" ref-type="fig">4</xref>). The main learning analysis indicates that both the AI avatar and human-presented video were associated with substantial short-term knowledge gains from the pre- to posttest time point, with no statistically detectable between-group difference in the magnitude of gain in this small feasibility sample.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Relative distribution of the response categories (correct answer, incorrect answer, “I don’t know,” and learning regression from correct to incorrect) at baseline (LZ<sub>0</sub>) and after the first explainer video (LZ<sub>1</sub>)<sup>a</sup>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="30"/>
            <col width="380"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="180"/>
            <col width="0"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td colspan="3">Category</td>
                <td colspan="2">LZ<sub>0</sub>, n (%)</td>
                <td colspan="2">LZ<sub>1</sub>, n (%)</td>
                <td>Change (%; LZ<sub>0</sub> to LZ<sub>1</sub>)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td colspan="8">
                  <bold>Group A (n=49)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incorrect</td>
                <td colspan="2">4 (8.2)</td>
                <td colspan="2">4 (8.2)</td>
                <td colspan="2">+0.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>I don’t know</td>
                <td colspan="2">38 (77.6)</td>
                <td colspan="2">8 (16.3)</td>
                <td colspan="2">−61.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Correct</td>
                <td colspan="2">7 (14.3)</td>
                <td colspan="2">37 (75.5)</td>
                <td colspan="2">+61.2</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regression: correct to incorrect</td>
                <td colspan="2">—<sup>b</sup></td>
                <td colspan="2">0 (0)</td>
                <td colspan="2">+0.0</td>
              </tr>
              <tr valign="top">
                <td colspan="8">
                  <bold>Group B (n=42)</bold>
                </td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Incorrect</td>
                <td colspan="2">8 (19.0)</td>
                <td colspan="2">2 (4.8)</td>
                <td colspan="2">−14.3</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>I don’t know</td>
                <td colspan="2">18 (42.9)</td>
                <td colspan="2">2 (4.8)</td>
                <td colspan="2">−38.1</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Correct</td>
                <td colspan="2">16 (38.1)</td>
                <td colspan="2">37 (88.1)</td>
                <td colspan="2">+50.0</td>
              </tr>
              <tr valign="top">
                <td>
                  <break/>
                </td>
                <td>Regression: correct to incorrect</td>
                <td colspan="2">—</td>
                <td colspan="2">1 (2.4)</td>
                <td colspan="2">+2.4</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>The N values of 49 and 42 correspond to the number of participants (group A: n=7; group B: n=6) multiplied by the number of questions (7 questions per assessment), whereas the n values represent the number of answers for the respective category and assessment (aggregated over all questions).</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Not applicable.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Individual learning trajectories from baseline (LZ<sub>0</sub>) to the first posttest time point (LZ<sub>1</sub>). In group B, participant P1 showed no learning gain because all questions were answered correctly at both measurement points. AI: artificial intelligence.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Item-level response patterns for groups A and B across measurement points. Matrices display individual responses (participants P1-P7) to the 7 knowledge items (A1-A7) at baseline (LZ<sub>0</sub>) and after the first explainer video (LZ<sub>1</sub>).</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>At baseline, both groups showed limited prior knowledge, although group B started at a somewhat higher level than group A (<xref rid="figure3" ref-type="fig">Figure 3</xref>). From LZ<sub>0</sub> to LZ<sub>1</sub>, both groups demonstrated clear descriptive learning gains after viewing the first explainer video. This is shown descriptively in <xref rid="figure4" ref-type="fig">Figure 4</xref>, which illustrates the progression of responses between baseline and the first posttest time point and highlights learning gains and regressions at the individual and item level. Testing supported the descriptive findings with respect to the learning gains (LZ<sub>1</sub> – LZ<sub>0</sub>) between the AI avatar group (7/13, 53.8%) and the human presenter group (6/13, 46.2%): there was no significant difference in learning success between the 2 formats (<italic>P</italic>=.51; <italic>Z</italic>=0.66; <italic>r</italic>=0.183). Although UX ratings differed between groups, both presentation formats were associated with comparable short-term learning outcomes. This interpretation was further supported by the number of newly correct items (questions answered incorrectly or marked as “I don’t know” at baseline that were answered correctly at LZ<sub>1</sub>): group A (AI avatar video first) showed a median of 5 (IQR 3-5.5) newly learned items, whereas group B (human presenter video first) showed a median of 4.5 (IQR 2.5-5) newly correct items. Within-group analyses showed that knowledge scores increased from LZ<sub>0</sub> to LZ<sub>1</sub> in both randomized groups, indicating significant short-term learning within each group after the first video presentation. For group A (AI avatar video first), median scores improved from 1 (IQR 0-1.5) at baseline to 5 (IQR 5-6) at the posttest time point LZ<sub>1</sub> (<italic>P</italic>=.02; <italic>Z</italic>=−2.379; <italic>r</italic>=0.636; large effect). Group B (human presenter video first) also showed a substantial increase in median scores from 2.5 (IQR 0-3.75) to 6 (IQR 6-6.75), and again, this was statistically significant (<italic>P</italic>=.03; <italic>Z</italic>=−2.114; <italic>r</italic>=0.610; large effect).</p>
      </sec>
      <sec>
        <title>UX (Secondary Outcome; Evaluation of Sequence Effects)</title>
        <p>No statistically significant sequence effects related to the initial presentation mode (group A: AI avatar video first vs group B: human presenter video first) were observed across the AttrakDiff2 dimensions (<italic>P</italic>&#62;.05 in all cases; <italic>r</italic>&#60;0.15 in all cases; <xref ref-type="table" rid="table2">Table 2</xref>).</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Sequence effects based on a between-group comparison; group A was initially shown the artificial intelligence avatar–based presenter followed by the human presenter, whereas for group B, this order was reversed.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="170"/>
            <col width="90"/>
            <col width="100"/>
            <col width="90"/>
            <col width="130"/>
            <col width="220"/>
            <col width="200"/>
            <thead>
              <tr valign="top">
                <td>Quality dimension</td>
                <td><italic>Z</italic> score</td>
                <td><italic>P</italic> value</td>
                <td>
                  <italic>r</italic>
                </td>
                <td>Magnitude</td>
                <td>Group A, median (IQR)</td>
                <td>Group B, median (IQR)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ATT<sup>a</sup></td>
                <td>−0.794</td>
                <td>.43</td>
                <td>0.059</td>
                <td>Small</td>
                <td>0.0 (–1 to 1.75)</td>
                <td>0 (–1 to 1)</td>
              </tr>
              <tr valign="top">
                <td>HQ-I<sup>b</sup></td>
                <td>1.381</td>
                <td>.17</td>
                <td>0.104</td>
                <td>Small</td>
                <td>1.0 (0 to 1.5)</td>
                <td>0 (–1 to 1)</td>
              </tr>
              <tr valign="top">
                <td>HQ-S<sup>c</sup></td>
                <td>−0.394</td>
                <td>.69</td>
                <td>0.029</td>
                <td>Small</td>
                <td>0.0 (–1 to 1)</td>
                <td>0 (–1 to 1)</td>
              </tr>
              <tr valign="top">
                <td>PQ<sup>d</sup></td>
                <td>−0.817</td>
                <td>.41</td>
                <td>0.062</td>
                <td>Small</td>
                <td>0.5 (–1 to 2)</td>
                <td>1 (0 to 2)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>ATT: overall attractiveness.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>HQ-I: hedonic quality—identification.</p>
            </fn>
            <fn id="table2fn3">
              <p><sup>c</sup>HQ-S: hedonic quality—stimulation.</p>
            </fn>
            <fn id="table2fn4">
              <p><sup>d</sup>PQ: pragmatic quality.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <p>Following the pooled analysis, the human-presented video was rated significantly more favorably than the AI avatar presentation across all AttrakDiff2 dimensions, with small to medium effect sizes (<xref ref-type="table" rid="table3">Table 3</xref>). Across all pooled item ratings, the overall comparison between presentation formats remained statistically significant (<italic>P</italic>&#60;.001; <italic>Z</italic>=−6.48; <italic>r</italic>=0.243).</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Comparison of pooled AttrakDiff2 ratings between artificial intelligence avatar and human-presented videos across the 4 quality dimensions.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="210"/>
            <col width="200"/>
            <col width="210"/>
            <col width="380"/>
            <thead>
              <tr valign="top">
                <td>Quality dimension</td>
                <td><italic>P</italic> value</td>
                <td><italic>Z</italic> score</td>
                <td><italic>r</italic> (effect size)</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>ATT<sup>a</sup></td>
                <td>&#60;.001</td>
                <td>−5.409</td>
                <td>0.403 (medium)</td>
              </tr>
              <tr valign="top">
                <td>HQ-I<sup>b</sup></td>
                <td>&#60;.001</td>
                <td>−5.224</td>
                <td>0.393 (medium)</td>
              </tr>
              <tr valign="top">
                <td>HQ-S<sup>c</sup></td>
                <td>.003</td>
                <td>2.986</td>
                <td>0.223 (small)</td>
              </tr>
              <tr valign="top">
                <td>PQ<sup>d</sup></td>
                <td>&#60;.001</td>
                <td>−5.019</td>
                <td>0.379 (medium)</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>ATT: overall attractiveness.</p>
            </fn>
            <fn id="table3fn2">
              <p><sup>b</sup>HQ-I: hedonic quality—identification.</p>
            </fn>
            <fn id="table3fn3">
              <p><sup>c</sup>HQ-S: hedonic quality—stimulation.</p>
            </fn>
            <fn id="table3fn4">
              <p><sup>d</sup>PQ: pragmatic quality.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Overview of Presentation Mode–Related Effects (AI vs Human)</title>
        <p>The AttrakDiff2 portfolio plot (<xref rid="figure5" ref-type="fig">Figure 5</xref>) summarizes the character of both presentation modes independent of sequence. The AI avatar presentation mode is categorized as neutral, indicating that, while it was perceived as functional, it failed to deliver a distinctively positive hedonic or emotional experience. In contrast, the human presenter is positioned in the “desired” (high-quality) quadrant. Although this placement is relatively near the border of the neutral zone, it signifies a more balanced and successful integration of usability and user appeal. Additionally, the confidence rectangles for the 2 modes do not overlap. This lack of intersection provides visual confirmation of the statistical divergence between the 2 conditions presented below: the human presenter is perceived as superior in its overall product character compared to the AI avatar.</p>
        <fig id="figure5" position="float">
          <label>Figure 5</label>
          <caption>
            <p>AttrakDiff2 portfolio plot showing pooled presentation format ratings across the user experience assessments (UX<sub>1</sub> and UX<sub>2</sub>). AI: artificial intelligence; HQ: hedonic quality; HQ-I: hedonic quality—identification; HQ-S: hedonic quality—stimulation; PQ: pragmatic quality.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Detailed Semantic Differential</title>
        <p>The detailed semantic differential (<xref rid="figure6" ref-type="fig">Figure 6</xref>) highlights specific adjective pairs where the modes diverged. Notably, for PQ, in contrast to the human presenter, the AI avatar was perceived as rather technical, whereas for HQ-S, it was perceived as novel (with the human being rated as more ordinary) and not quite as engaging as the human presenter (who received neutral ratings on the scale from dull to captivating). However, the results for the other HQ-S items were rather mixed. Across PQ, HQ-I, and ATT, with few exceptions, the human presenter consistently received better ratings than the AI avatar presenter. This possibly explains the smaller effect size for HQ-S compared with the results for the other 3 qualities (with medium effects being observed).</p>
        <fig id="figure6" position="float">
          <label>Figure 6</label>
          <caption>
            <p>Detailed semantic differential for comparing artificial intelligence (AI) avatar and human presenter–based evaluations from the AttrakDiff2 questionnaire, showing the mean ratings per adjective pair based on the pooled data for user experience assessments UX<sub>1</sub> and UX<sub>2</sub>. ATT: overall attractiveness; HQ-I: hedonic quality—identification; HQ-S: hedonic quality—stimulation; PQ: pragmatic quality.</p>
          </caption>
          <graphic xlink:href="formative_v10i1e90037_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Feasibility Findings</title>
        <p>This study examined how the format of an explainer video—whether presented by an AI-generated avatar or a human presenter—affected short-term learning and UX for undergraduate engineering students. Additionally, it aimed to test the feasibility of using a randomized crossover design in a classroom environment. The results offer a nuanced understanding of both learning outcomes and the overall experience, providing valuable insights into the use of AI-driven instructional media in higher education.</p>
        <p>The primary objective of this study was to assess the feasibility of the study design and procedures. Recruitment within a regular classroom setting was successful, with full participation and no dropouts. The randomized crossover procedure, including repeated measurements and format switching within a single session, was logistically implementable and could be carried out as planned. Data completeness was high for the primary outcome, and only a few responses were missing in the UX questionnaire. Overall, the findings suggest that the study procedures were effective for recruitment, implementation, and data collection.</p>
      </sec>
      <sec>
        <title>Learning Gain</title>
        <p>The findings are broadly consistent with theories suggesting that well-structured instructional content may reduce the influence of presentation modality on immediate learning outcomes [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref29">29</xref>].</p>
        <p>Both presentation formats resulted in notable short-term learning improvements from baseline to the initial posttest time point (LZ<sub>0</sub> to LZ<sub>1</sub>). Analyses within groups revealed significant gains in knowledge scores for both the AI avatar and human presenter conditions, demonstrating the effectiveness of the instructional materials regardless of the study format. Importantly, there was no significant difference in learning gains between the 2 formats. This outcome supports existing research showing comparable learning results between AI-generated and human instructors [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref7">7</xref>]. A recent rapid review synthesizing 15 primary studies similarly concluded that short-term learning performance tends to be broadly equivalent across presentation formats despite differences in user perception [<xref ref-type="bibr" rid="ref8">8</xref>]. From a theoretical standpoint, this pattern aligns with the cognitive theory of multimedia learning and cognitive load theory, which highlight that learning outcomes depend primarily on the quality of information processing and the effective management of cognitive load rather than on superficial presentation features [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref16">16</xref>]. As long as instructional content is well structured and aligned with multimedia design principles, different presenter modalities may have only a limited impact on immediate knowledge acquisition [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref29">29</xref>]. Several factors should be taken into account when interpreting these findings. Differences in baseline knowledge between groups may have affected the observed learning gains. Additionally, the knowledge test consisted of only 7 dichotomous items and exhibited ceiling effects after the initial exposure, limiting its sensitivity. Given the small sample size and the exploratory nature of the study, the lack of statistically significant differences should not be seen as evidence of equivalence.</p>
      </sec>
      <sec>
        <title>UX Findings</title>
        <p>The results from the UX assessments showed notable, statistically significant differences between the presentation formats, contrasting with the learning outcomes. Participants rated the human-presented video more favorably across all AttrakDiff2 dimensions compared to the AI avatar–based video. The higher scores for PQ indicate that the human presentation was perceived more favorably. This advantage may be due to natural speech patterns, facial expressions, and nonverbal cues, which likely enhance understanding and lessen perceived cognitive load [<xref ref-type="bibr" rid="ref30">30</xref>]. Furthermore, higher ratings for hedonic quality—specifically, HQ-I and ATT—suggested that the human presenter was perceived as more relatable and socially engaging. These results align with social agency theory, which suggests that humanlike social cues—such as natural voice, eye contact, and expressive gestures—can increase learner engagement and promote a sense of interpersonal interaction [<xref ref-type="bibr" rid="ref17">17</xref>]. Although AI avatars can mimic certain social cues to some degree, their perceived artificiality may diminish feelings of social presence and identification, especially when subtle mismatches in expression or timing occur [<xref ref-type="bibr" rid="ref31">31</xref>]. Interestingly, differences in hedonic stimulation were smaller and, in some cases, slightly favored the AI avatar, which could reflect perceived novelty or technological interest. However, this did not translate into higher ATT, indicating that novelty alone cannot compensate for reduced social and pragmatic qualities. This pattern aligns with previous research showing that human presenters are typically rated higher in engagement, perceived quality, and emotional connection even when learning outcomes are similar [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. Although no statistically significant sequence effects were detected in the UX data, caution is warranted in interpreting the results. The small sample size and limited statistical power mean that the absence of clear order effects does not necessarily confirm that they were not present. Subtle carryover or contrast effects might have gone undetected. As a result, combining UX ratings across both measurement points is a practical choice but also introduces some limitations. The lack of significant sequence effects should not be viewed as definitive proof that pooling entirely eliminates order-related biases. While pooling enhances the robustness of comparisons between presentation formats, it may also mask phase-specific effects or interactions between presentation order and user perceptions. Future research with larger sample sizes and designs that explicitly address order and carryover effects is necessary to determine whether pooled analyses truly reflect differences attributable to presentation mode.</p>
      </sec>
      <sec>
        <title>Integration and Implications</title>
        <p>A key finding of this study is the observed dissociation between learning outcomes and UX. Although both presentation formats resulted in similar short-term learning gains, participants clearly favored the human presenter from a UX perspective. This difference can be understood through theoretical frameworks that differentiate between cognitive and experiential facets of learning. Cognitive theories focus on efficient information processing as the main factor influencing learning outcomes, whereas UX models emphasize the importance of pragmatic and hedonic qualities in subjective evaluation and engagement [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. The results suggest that a more positive UX does not necessarily lead to greater immediate learning gains. However, UX might still be crucial for longer-term educational factors such as motivation, sustained attention, and ongoing use of learning materials—elements not captured within this short-term investigation. Practically speaking, these findings indicate that AI-generated avatars can effectively deliver instructional content, especially in settings where scalability and resource efficiency are priorities. At the same time, the consistent preference for human presenters underscores the ongoing significance of social and perceptual factors in digital learning environments. This suggests that, while AI avatars may be suitable for standardized or large-scale educational applications, they might be less effective in contexts where engagement, identification, and perceived quality are key to successful learning. Overall, the findings align with existing research showing comparable short-term learning outcomes for both formats alongside a clear advantage in UX for human presenters [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref8">8</xref>]. Our study further extends this research into the domain of electrical engineering—a technically demanding field with high cognitive load and complex visual representations—thereby contributing to an emerging body of evidence in such specialized contexts.</p>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>Several limitations should be acknowledged. The most significant one is the small sample size (N=13), which affects statistical power and the generalizability of the findings.</p>
        <p>A key methodological limitation of this feasibility study relates to the crossover design without a washout period and the use of identical instructional content across both phases. These conditions introduced substantial risk of test-retest and carryover effects, particularly for the second learning measurement (LZ<sub>2</sub>), thereby limiting its interpretability as an independent estimate of presentation effects. Similarly, UX ratings may have been influenced by comparison or contrast effects between the first and second presentation formats. Although no statistically significant sequence effects were detected in the pooled UX analysis, the small sample size limits confidence in ruling out subtle order-related influences. Consequently, while the crossover design was useful for feasibility purposes, it was suboptimal for isolating presentation format effects on learning and UX.</p>
        <p>Finally, this study was conducted in a specific educational setting, which may limit the generalizability of the results to broader contexts.</p>
      </sec>
      <sec>
        <title>Future Work</title>
        <p>As this was an initial exploratory feasibility study, the findings offer preliminary insights but cannot support definitive conclusions about comparative effectiveness. Several avenues for future research arise from the results and the identified methodological limitations. First, larger, adequately powered studies are needed to detect small to moderate effects, especially in UX outcomes, where consistent differences were observed. Future research should be based on prestudy power calculations derived from the effect sizes observed in this study. Second, the study design warrants refinement. Due to carryover and test-retest effects noted in the crossover design, future work should consider parallel-group designs, incorporate longer washout periods, and use instructional materials that are similar but not identical to better attribute effects to presentation format. Third, assessment methods for learning outcomes should be enhanced. The brief dichotomous knowledge test used provides limited sensitivity and may suffer from ceiling effects. Future studies should use more nuanced assessment tools such as multiple-choice questions with plausible distractors, transfer tasks, open-ended questions, and delayed posttests to evaluate retention over time. Fourth, research should expand beyond immediate learning outcomes to include motivational and affective factors such as engagement, perceived credibility, trust in AI-generated presenters, and long-term acceptance. Considering the discrepancy observed between learning outcomes and UX, these elements may significantly influence the educational impact of AI-based instructional formats over time. Finally, future investigations should examine a wider range of contexts, including various disciplines, different levels of prior knowledge, and variations in avatar design—such as realism, voice, and gender—to identify the conditions under which AI-generated presenters can be most effectively integrated into educational practice.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study shows that implementing a randomized crossover procedure in a classroom setting is feasible, although it also reveals notable methodological limitations when applied to content-identical instructional materials. Both AI avatar–based and human-presented videos resulted in substantial short-term learning improvements, with no statistically significant differences observed between the 2 formats. However, UX consistently favored the human presenter across all measured aspects. These results indicate that while AI avatars can effectively facilitate knowledge acquisition, they currently fall short of human presenters in perceived quality and user satisfaction. As this study was exploratory in nature, further research involving larger sample sizes, refined measurement tools, and optimized experimental designs is necessary to better understand how presentation format influences digital learning outcomes.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Secondary and sensitivity analyses.</p>
        <media xlink:href="formative_v10i1e90037_app1.pdf" xlink:title="PDF File  (Adobe PDF File), 193 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">ATT</term>
          <def>
            <p>overall attractiveness</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">HQ-I</term>
          <def>
            <p>hedonic quality—identification</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">HQ-S</term>
          <def>
            <p>hedonic quality—stimulation</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">PQ</term>
          <def>
            <p>pragmatic quality</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">UX</term>
          <def>
            <p>user experience</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>Generative artificial intelligence (AI) tools were used in a limited capacity during manuscript preparation solely to assist with the translation of short passages from German to English (DeepL; DeepL SE [<xref ref-type="bibr" rid="ref33">33</xref>]) and the refinement of selected text passages (Grammarly; Superhuman Platform Inc; [<xref ref-type="bibr" rid="ref34">34</xref>]). All AI-assisted outputs were carefully reviewed, revised, and validated by the authors to ensure accuracy, clarity, and consistency with the intended scientific content. No generative AI tools were used for study design, data analysis, interpretation of results, or formulation of scientific conclusions.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The videos that were used, as well as the datasets generated or analyzed during this study, are available from the corresponding author on reasonable request. However, the complete datasets are not publicly available due to restrictions as they contain information that could compromise the privacy of research participants.</p>
    </notes>
    <notes>
      <title>Funding</title>
      <p>The authors acknowledge support for the publication costs from the Open Access Publication Fund of Bielefeld University and the German Research Foundation.</p>
    </notes>
    <fn-group>
      <fn fn-type="con">
        <p>MR contributed to conceptualization, methodology, data curation, formal analysis, and drafting of the original manuscript, as well as review and editing. TJ contributed to conceptualization, methodology, and review and editing. UvJ contributed to conceptualization, methodology, visualization, supervision, and review and editing. UVA contributed to methodology, supervision, and review and editing.</p>
      </fn>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tenberg</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>Didaktische Erklärvideos: Ein Praxis-Handbuch</source>
          <year>2021</year>
          <publisher-loc>Stuttgart, Germany</publisher-loc>
          <publisher-name>Franz Steiner Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dorgerloh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>KD</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Dorgerloh</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>KD</given-names>
            </name>
          </person-group>
          <article-title>Wie verändern Erklärvideos Bildungsprozesse? Die neue Erklär- und Lernkultur</article-title>
          <source>Lehren und Lernen mit Tutorials und Erklärvideos</source>
          <year>2020</year>
          <publisher-loc>Weinheim, Germany</publisher-loc>
          <publisher-name>Beltz</publisher-name>
          <fpage>7</fpage>
          <lpage>11</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Leiker</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gyllen</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Eldesouky</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Cukurova</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Generative AI for learning: investigating the potential of learning videos with synthetic virtual instructors</article-title>
          <source>Proceedings of the 24th International Conference on Artificial Intelligence in Education</source>
          <year>2023</year>
          <conf-name>AIED 2023</conf-name>
          <conf-date>July 3-7, 2023</conf-date>
          <conf-loc>Tokyo, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-031-36336-8_81</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Li</surname>
              <given-names>ZR</given-names>
            </name>
            <name name-style="western">
              <surname>Barry</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Cukurova</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Adult learners recall and recognition performance and affective feedback when learning from an AI-generated synthetic video</article-title>
          <source>arXiv. Preprint posted online on November 28, 2024</source>
          <pub-id pub-id-type="doi">10.48550/arXiv.2412.10384</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Winslow</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>AI-generated versus human-recorded lecture videos: a comparative analysis of learning outcomes</article-title>
          <source>Proceedings of the 2025 Hawaii University International Conferences</source>
          <year>2025</year>
          <conf-name>HUIC 2025</conf-name>
          <conf-date>June 11-13, 2025</conf-date>
          <conf-loc>Honolulu, HI</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://huichawaii.org/wp-content/uploads/2025/07/Winslow-Joe-2025-HUIC-STEM.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Netland</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>von Dzengelevski</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Tesch</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kwasnitschka</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Comparing human-made and AI-generated teaching videos: an experimental study on learning effects</article-title>
          <source>Comput Educ</source>
          <year>2025</year>
          <month>01</month>
          <volume>224</volume>
          <fpage>105164</fpage>
          <pub-id pub-id-type="doi">10.1016/j.compedu.2024.105164</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arkün-Kocadere</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Çağlar Özhan</surname>
              <given-names>Ş</given-names>
            </name>
          </person-group>
          <article-title>Video lectures with AI-generated instructors: low video engagement, same performance as human instructors</article-title>
          <source>Int Rev Res Open Distrib Learn</source>
          <year>2024</year>
          <volume>25</volume>
          <issue>3</issue>
          <fpage>350</fpage>
          <lpage>69</lpage>
          <pub-id pub-id-type="doi">10.19173/irrodl.v25i3.7815</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hai</surname>
              <given-names>TT</given-names>
            </name>
            <name name-style="western">
              <surname>Mai</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Hanh</surname>
              <given-names>NV</given-names>
            </name>
          </person-group>
          <article-title>A rapid review of using AI-generated instructional videos in higher education</article-title>
          <source>Front Comput Sci</source>
          <year>2026</year>
          <month>1</month>
          <day>6</day>
          <volume>7</volume>
          <fpage>1721093</fpage>
          <pub-id pub-id-type="doi">10.3389/fcomp.2025.1721093</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nugroho</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Bourguet</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Soyel</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Mareschal</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Avatars vs. video presence: effects of instructor presence on cognitive load in video-based learning</article-title>
          <source>Electron J E Learn</source>
          <year>2025</year>
          <month>07</month>
          <day>30</day>
          <volume>23</volume>
          <issue>3</issue>
          <fpage>79</fpage>
          <lpage>91</lpage>
          <pub-id pub-id-type="doi">10.34190/ejel.23.3.3964</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paivio</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Chapter 2 - imagery and language</article-title>
          <source>Imagery: Current Cognitive Approaches</source>
          <year>1971</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Academic Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Schmidt-Borcherding</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Zur Lernpsychologie von Erklärvideos: Theoretische Grundlagen</article-title>
          <source>Lehren und Lernen mit Tutorials und Erklärvideos</source>
          <year>2021</year>
          <publisher-loc>Weinheim, Germany</publisher-loc>
          <publisher-name>Beltz</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Paivio</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <source>Mental Representations: A Dual Coding Approach</source>
          <year>1986</year>
          <publisher-loc>Oxford, UK</publisher-loc>
          <publisher-name>Oxford University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <source>Multimedia Learning</source>
          <year>2009</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Fiorella</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Principles for reducing extraneous processing in multimedia learning: coherence, signaling, redundancy, spatial contiguity, and temporal contiguity principles</article-title>
          <source>The Cambridge Handbook of Multimedia Learning</source>
          <year>2014</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sweller</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Cognitive load during problem solving: effects on learning</article-title>
          <source>Cogn Sci</source>
          <year>1988</year>
          <month>04</month>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>257</fpage>
          <lpage>85</lpage>
          <pub-id pub-id-type="doi">10.1207/s15516709cog1202_4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sweller</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ayres</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kalyuga</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <source>Cognitive Load Theory</source>
          <year>2011</year>
          <publisher-loc>New York, NY</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>RE</given-names>
            </name>
          </person-group>
          <article-title>Principles based on social cues in multimedia learning: personalization, voice, image, and embodiment principles</article-title>
          <source>The Cambridge Handbook of Multimedia Learning</source>
          <year>2014</year>
          <publisher-loc>Cambridge, MA</publisher-loc>
          <publisher-name>Cambridge University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>MacDorman</surname>
              <given-names>KF</given-names>
            </name>
            <name name-style="western">
              <surname>Kageki</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>The uncanny valley [from the field]</article-title>
          <source>IEEE Robot Autom Mag</source>
          <year>2012</year>
          <month>06</month>
          <volume>19</volume>
          <issue>2</issue>
          <fpage>98</fpage>
          <lpage>100</lpage>
          <pub-id pub-id-type="doi">10.1109/MRA.2012.2192811</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burmester</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Brau</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Diefenbach</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Peissner</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Röse</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Der User Experience (UX) auf der Spur: Zum Einsatz von www.attrakdiff.de</article-title>
          <source>Usability Professionals 2008</source>
          <year>2008</year>
          <publisher-loc>Stuttgart, Germany</publisher-loc>
          <publisher-name>German Chapter der Usability Professionals Association</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Blythe</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Overbeeke</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Monk</surname>
              <given-names>AF</given-names>
            </name>
            <name name-style="western">
              <surname>Wright</surname>
              <given-names>PC</given-names>
            </name>
          </person-group>
          <article-title>The thing and I: understanding the relationship between user and product</article-title>
          <source>Funology</source>
          <year>2003</year>
          <publisher-loc>Dordrecht, The Netherlands</publisher-loc>
          <publisher-name>Springer</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burmester</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Szwillus</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ziegler</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>AttrakDiff: Ein Fragebogen zur Messung wahrgenommener hedonischer und pragmatischer Qualität</article-title>
          <source>Mensch &#38; Computer 2003</source>
          <year>2003</year>
          <publisher-loc>Wiesbaden, Germany</publisher-loc>
          <publisher-name>Vieweg+Teubner Verlag</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kay</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Exploring the use of video podcasts in education: a comprehensive review of the literature</article-title>
          <source>Comput Hum Behav</source>
          <year>2012</year>
          <month>05</month>
          <volume>28</volume>
          <issue>3</issue>
          <fpage>820</fpage>
          <lpage>31</lpage>
          <pub-id pub-id-type="doi">10.1016/j.chb.2012.01.011</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Brame</surname>
              <given-names>CJ</given-names>
            </name>
          </person-group>
          <article-title>Effective educational videos: principles and guidelines for maximizing student learning from video content</article-title>
          <source>CBE Life Sci Educ</source>
          <year>2016</year>
          <volume>15</volume>
          <issue>4</issue>
          <fpage>es6</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27789532"/>
          </comment>
          <pub-id pub-id-type="doi">10.1187/cbe.16-03-0125</pub-id>
          <pub-id pub-id-type="medline">27789532</pub-id>
          <pub-id pub-id-type="pii">15/4/es6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5132380</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noll</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>von Jan</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Raap</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Albrecht</surname>
              <given-names>UV</given-names>
            </name>
          </person-group>
          <article-title>Mobile augmented reality as a feature for self-oriented, blended learning in medicine: randomized controlled trial</article-title>
          <source>JMIR Mhealth Uhealth</source>
          <year>2017</year>
          <month>09</month>
          <day>14</day>
          <volume>5</volume>
          <issue>9</issue>
          <fpage>e139</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mhealth.jmir.org/2017/9/e139/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/mhealth.7943</pub-id>
          <pub-id pub-id-type="medline">28912113</pub-id>
          <pub-id pub-id-type="pii">v5i9e139</pub-id>
          <pub-id pub-id-type="pmcid">PMC5620455</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Albrecht</surname>
              <given-names>UV</given-names>
            </name>
            <name name-style="western">
              <surname>Folta-Schoofs</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Behrends</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>von Jan</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Effects of mobile augmented reality learning compared to textbook learning on medical students: randomized controlled pilot study</article-title>
          <source>J Med Internet Res</source>
          <year>2013</year>
          <month>08</month>
          <day>20</day>
          <volume>15</volume>
          <issue>8</issue>
          <fpage>e182</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2013/8/e182/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.2497</pub-id>
          <pub-id pub-id-type="medline">23963306</pub-id>
          <pub-id pub-id-type="pii">v15i8e182</pub-id>
          <pub-id pub-id-type="pmcid">PMC3758026</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burmester</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Koller</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <source>AttrakDiffin Fragebogen zur Messung wahrgenommener hedonischer und pragmatischer Qualität</source>
          <year>2003</year>
          <publisher-loc>Berichte des German Chapter of the ACM Wiesbaden</publisher-loc>
          <publisher-name>Vieweg+Teubner Verlag</publisher-name>
          <fpage>E</fpage>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <collab>R Core Team</collab>
          </person-group>
          <article-title>R: a language and environment for statistical computing</article-title>
          <source>R Foundation for Statistical Computing</source>
          <year>2024</year>
          <access-date>2026-05-26</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.R-project.org/">https://www.R-project.org/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hothorn</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hornik</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>van de Wiel</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Zeileis</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>A lego system for conditional inference</article-title>
          <source>Am Stat</source>
          <year>2006</year>
          <volume>60</volume>
          <issue>3</issue>
          <fpage>257</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1198/000313006x118430</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mayer</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Moreno</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Nine ways to reduce cognitive load in multimedia learning</article-title>
          <source>Educ Psychol</source>
          <year>2010</year>
          <month>06</month>
          <day>08</day>
          <volume>38</volume>
          <issue>1</issue>
          <fpage>43</fpage>
          <lpage>52</lpage>
          <pub-id pub-id-type="doi">10.1207/s15326985ep3801_6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Biocca</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Harms</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Burgoon</surname>
              <given-names>JK</given-names>
            </name>
          </person-group>
          <article-title>Toward a more robust theory and measure of social presence: review and suggested criteria</article-title>
          <source>Presence Teleoperators Virtual Environ</source>
          <year>2003</year>
          <month>10</month>
          <day>01</day>
          <volume>12</volume>
          <issue>5</issue>
          <fpage>456</fpage>
          <lpage>80</lpage>
          <pub-id pub-id-type="doi">10.1162/105474603322761270</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nowak</surname>
              <given-names>KL</given-names>
            </name>
            <name name-style="western">
              <surname>Biocca</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>The effect of the agency and anthropomorphism on users' sense of telepresence, copresence, and social presence in virtual environments</article-title>
          <source>Presence Teleoperators Virtual Environ</source>
          <year>2003</year>
          <month>10</month>
          <volume>12</volume>
          <issue>5</issue>
          <fpage>481</fpage>
          <lpage>94</lpage>
          <pub-id pub-id-type="doi">10.1162/105474603322761289</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hassenzahl</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>User experience (UX): towards an experiential perspective on product quality</article-title>
          <source>Proceedings of the 20th Conference on l'Interaction Homme-Machine</source>
          <year>2008</year>
          <conf-name>IHM '08</conf-name>
          <conf-date>September 2-5, 2008</conf-date>
          <conf-loc>Metz, France</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1512714.1512717</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="web">
          <article-title>AI solutions that help you get work done</article-title>
          <source>DeepL</source>
          <access-date>2026-03-29</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.deepl.com/en">https://www.deepl.com/en</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <source>Grammarly</source>
          <access-date>2026-03-25</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.grammarly.com/">https://www.grammarly.com/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
