<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v9i1e46390</article-id>
      <article-id pub-id-type="pmid">39832353</article-id>
      <article-id pub-id-type="doi">10.2196/46390</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Developing a Machine Learning–Based Automated Patient Engagement Estimator for Telehealth: Algorithm Development and Validation Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Marshall</surname>
            <given-names>Robert</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Huang</surname>
            <given-names>Taicheng</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes" equal-contrib="yes">
          <name name-style="western">
            <surname>Guhan</surname>
            <given-names>Pooja</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Computer Science</institution>
            <institution>University of Maryland</institution>
            <addr-line>8125 Paint Branch Dr</addr-line>
            <addr-line>College Park, MD, 20742</addr-line>
            <country>United States</country>
            <fax>1 (301) 405 6707</fax>
            <phone>1 2406309133</phone>
            <email>pguhan@umd.edu</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1551-8163</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Awasthi</surname>
            <given-names>Naman</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6036-076X</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>McDonald</surname>
            <given-names>Kathryn</given-names>
          </name>
          <degrees>BS</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0009-0007-0589-4946</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Bussell</surname>
            <given-names>Kristin</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3128-0178</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Reeves</surname>
            <given-names>Gloria</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8070-673X</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Manocha</surname>
            <given-names>Dinesh</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-7047-9801</ext-link>
        </contrib>
        <contrib id="contrib7" contrib-type="author">
          <name name-style="western">
            <surname>Bera</surname>
            <given-names>Aniket</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0182-6985</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Computer Science</institution>
        <institution>University of Maryland</institution>
        <addr-line>College Park, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Psychiatry, Child and Adolescent Division</institution>
        <institution>University of Maryland</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Nursing</institution>
        <institution>University of Maryland</institution>
        <addr-line>Baltimore, MD</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Department of Computer Science</institution>
        <institution>Purdue University</institution>
        <addr-line>West Lafayett, IN</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Pooja Guhan <email>pguhan@umd.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2025</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>20</day>
        <month>1</month>
        <year>2025</year>
      </pub-date>
      <volume>9</volume>
      <elocation-id>e46390</elocation-id>
      <history>
        <date date-type="received">
          <day>9</day>
          <month>2</month>
          <year>2023</year>
        </date>
        <date date-type="rev-request">
          <day>13</day>
          <month>4</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>30</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>3</day>
          <month>9</month>
          <year>2024</year>
        </date>
      </history>
      <copyright-statement>©Pooja Guhan, Naman Awasthi, Kathryn McDonald, Kristin Bussell, Gloria Reeves, Dinesh Manocha, Aniket Bera. Originally published in JMIR Formative Research (https://formative.jmir.org), 20.01.2025.</copyright-statement>
      <copyright-year>2025</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2025/1/e46390" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Patient engagement is a critical but challenging public health priority in behavioral health care. During telehealth sessions, health care providers need to rely predominantly on verbal strategies rather than typical nonverbal cues to effectively engage patients. Hence, the typical patient engagement behaviors are now different, and health care provider training on telehealth patient engagement is unavailable or quite limited. Therefore, we explore the application of machine learning for estimating patient engagement. This can assist psychotherapists in the development of a therapeutic relationship with the patient and enhance patient engagement in the treatment of mental health conditions during tele–mental health sessions.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This study aimed to examine the ability of machine learning models to estimate patient engagement levels during a tele–mental health session and understand whether the machine learning approach could support therapeutic engagement between the client and psychotherapist.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>We proposed a multimodal learning-based approach. We uniquely leveraged latent vectors corresponding to affective and cognitive features frequently used in psychology literature to understand a person’s level of engagement. Given the labeled data constraints that exist in health care, we explored a semisupervised learning solution. To support the development of similar technologies for telehealth, we also plan to release a dataset called Multimodal Engagement Detection in Clinical Analysis (MEDICA). This dataset includes 1229 video clips, each lasting 3 seconds. In addition, we present experiments conducted on this dataset, along with real-world tests that demonstrate the effectiveness of our method.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our algorithm reports a 40% improvement in root mean square error over state-of-the-art methods for engagement estimation. In our real-world tests on 438 video clips from psychotherapy sessions with 20 patients, in comparison to prior methods, positive correlations were observed between psychotherapists’ Working Alliance Inventory scores and our mean and median engagement level estimates. This indicates the potential of the proposed model to present patient engagement estimations that align well with the engagement measures used by psychotherapists.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>Patient engagement has been identified as being important to improve therapeutic alliance. However, limited research has been conducted to measure this in a telehealth setting, where the therapist lacks conventional cues to make a confident assessment. The algorithm developed is an attempt to model person-oriented engagement modeling theories within machine learning frameworks to estimate the level of engagement of the patient accurately and reliably in telehealth. The results are encouraging and emphasize the value of combining psychology and machine learning to understand patient engagement. Further testing in the real-world setting is necessary to fully assess its usefulness in helping therapists gauge patient engagement during online sessions. However, the proposed approach and the creation of the new dataset, MEDICA, open avenues for future research and the development of impactful tools for telehealth.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>machine learning</kwd>
        <kwd>mental health</kwd>
        <kwd>telehealth</kwd>
        <kwd>engagement detection</kwd>
        <kwd>patient engagement</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Overview</title>
        <p>The World Health Organization defines mental health as a “state of well-being” that allows a person to lead a fulfilling and productive life and contribute to society [<xref ref-type="bibr" rid="ref1">1</xref>]. In addition, the World Health Organization estimates that one-fourth of the adult population is affected by a mental disorder [<xref ref-type="bibr" rid="ref2">2</xref>]. However, there are only approximately 9 psychiatrists per 100,000 people in higher-income countries and only approximately 0.1 psychiatrists for every 1,000,000 people in lower-income countries [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>]. As stress and pressure continue to increase, leading to poor mental health outcomes, the need for improved tele–mental health care has become critical. Tele–mental health care is the process of providing psychotherapy remotely, typically using Health Insurance Portability and Accountability Act (HIPAA)–compliant videoconferencing [<xref ref-type="bibr" rid="ref5">5</xref>]. It offers an effective means of accessing mental health services and treatment, transcending geographical and cultural boundaries worldwide, and helps address the chronic shortage of psychotherapists. These services not only remove practical barriers to care, such as transportation, but also offer affordability and direct access to qualified therapists. Therefore, there has been an upward trend in the demand for such services [<xref ref-type="bibr" rid="ref6">6</xref>]. Despite these undeniable benefits, this emerging treatment modality raises new challenges in patient engagement compared to in-person care. By “engagement,” we refer to the connection between a therapist and patient, characterized by a sense of basic trust and a willingness or interest to collaborate. This connection is essential for the therapeutic process and fosters the development of a strong therapeutic relationship.</p>
      </sec>
      <sec>
        <title>Background</title>
        <p>Patient engagement is a critical priority in behavioral health care as it involves establishing effective rapport between health care providers and patients. In the context of mental health, patient engagement is an indicator of a successful therapy session. This multidimensional concept [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>] encompasses the interaction of cognitive and emotion-related components of the patient’s psychological state. However, one of the challenges in fully understanding and evaluating patient engagement lies in the difficulty of establishing a standardized measurement approach. Patient engagement is a complex and dynamic construct that defies a one-size-fits-all measurement. The diverse factors influencing patient engagement, such as individual characteristics, and health care contexts make it challenging to develop a universally accepted standard of measuring its effectiveness. Traditional quantitative metrics (appointment attendance rates, treatment adherence, and patient satisfaction) often fall short in capturing the depth and richness of patient engagement. While they can provide numerical data, they may overlook the subjective experiences of the interaction and do not necessarily reflect health care provider-patient alliance in treatment. In contrast, qualitative methods such as patient narratives and feedback provide valuable insights but lack scalability, objectivity, and uniformity across different health systems. Therefore, to address these limitations, machine learning techniques are being explored to complement and enhance these traditional methods by leveraging computational power, scalability, and ability to identify patterns, relationships, and insights that may not be otherwise apparent. Some prior works in engagement detection have focused on using a single modality, such as facial expressions [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>], speech [<xref ref-type="bibr" rid="ref11">11</xref>], body posture [<xref ref-type="bibr" rid="ref12">12</xref>], gaze direction [<xref ref-type="bibr" rid="ref13">13</xref>], or head pose [<xref ref-type="bibr" rid="ref14">14</xref>], to detect engagement. Combining different modalities has been observed to improve engagement detection accuracy [<xref ref-type="bibr" rid="ref15">15</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. The authors expanded the work of Stanford PBL Lab’s eRing [<xref ref-type="bibr" rid="ref18">18</xref>] by including information streams such as facial expressions, voice, and other biometric data. Monkaresi et al [<xref ref-type="bibr" rid="ref19">19</xref>] proposed an approach to detect engagement levels in students during a writing task by not only making use of facial features but also features obtained from remote video-based detection of heart rate. The dataset used was generated by the authors, and they used self-reports instead of external annotation for classification purposes. Chang et al [<xref ref-type="bibr" rid="ref20">20</xref>] made use of facial expressions as well as body posture for detecting engagement in learners. Fedotov et al [<xref ref-type="bibr" rid="ref21">21</xref>] proposed the use of audio, facial, and body pose features to detect engagement and disengagement for an in-the-wild dataset that had been created using videos obtained from uncontrolled environments or situations. The distribution of class labels in this dataset is not balanced.</p>
        <p>Despite the existence of a variety of such algorithms to perform engagement detection, the results obtained from these approaches (particularly single modality–based approaches) could be misleading in a telehealth setting due to factors such as camera position and resistant or guarded clients. In telehealth appointments, therapists have limited visual data available to them, as they can only view the patient’s face rather than their full body. Asking the patient to adjust their position to gain a better view of their body during telehealth appointments is a potential solution for therapists. However, this approach may lead to communication difficulties, as it can impact the patient’s proximity to the microphone, potentially affecting the quality of audio communication between the patient and the therapist. Therefore, therapists must rely on verbal communication strategies to engage patients than in-person care because they cannot use typical nonverbal cues to convey interest and be responsive to the patient (eg, a handshake at the beginning of a session, adjusting the distance between the patient and health care provider by moving a chair closer or further away, and observing a patient’s response to questions while maintaining eye contact). It is also more difficult for therapists to convey attentiveness because eye contact requires the therapist to look at a camera rather than observe or look at a person. They may also have limited training on telehealth patient engagement, leading to a lack of guidance on measuring patient engagement. Therefore, this highlights the need for a system that can provide feedback on patient engagement by just using the data that are easily accessible, namely, text, audio, and face visuals. Engagement is critical for both retention of patients in care as well as effectiveness of mental health treatment. Developing such a system will make it possible to enhance the quality of tele–mental health and ultimately improve patient outcomes.</p>
      </sec>
      <sec>
        <title>Objective</title>
        <p>Taking all this into consideration, we propose and explore a machine learning–based approach that takes visual, audio, and text data as input and estimates the engagement level of a patient with mental health problems through a regression-based approach. We demonstrate the effectiveness of our method in tele–mental health sessions and provide a new dataset called Multimodal Engagement Detection in Clinical Analysis (MEDICA) to advance mental health research by understanding patient engagement levels during therapy sessions. MEDICA consists of 1229 short video clips from mock mental health therapy sessions used by medical schools in their psychiatry teaching curriculum. To the best of our knowledge, MEDICA is the first multimodal dataset that focuses on mental health research and consists of annotations useful for understanding the psychological state of patients with mental health problems during a therapy session.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Datasets</title>
        <sec>
          <title>MEDICA Dataset</title>
          <sec>
            <title>Overview</title>
            <p>Engagement is an overloaded term, and the definition varies with the application, making it difficult and expensive to collect, annotate, and analyze such data. As a result, we find too few multimodal-based engagement detection datasets currently available for us to use. Our problem statement revolves specifically around detecting patient engagement during a tele–mental health session. In such a setting, the only information we can work with includes the patient’s face and speech (audio and text). There exist datasets such as Carnegie Mellon University Multimodal Corpus of Sentimental Intensity (CMU-MOSI) [<xref ref-type="bibr" rid="ref22">22</xref>], Carnegie Mellon University Multimodal Opinion Sentiment and Emotion Intensity (CMU-MOSEI) [<xref ref-type="bibr" rid="ref23">23</xref>], and Stanford Emotional Narratives Dataset (SEND) [<xref ref-type="bibr" rid="ref24">24</xref>] that capture such settings. However, they are not specifically for engagement detection. Given the lack of a dataset that allows researchers to use multimodal features (video, text, and audio) for engagement, we propose MEDICA, a novel dataset developed specifically to cater to engagement detection using tele–mental health session videos. To use these data to address a broader range of issues related to mental health, we also include labels pertaining to stress and emotions.</p>
            <p>To the best of our knowledge, this dataset is one of the first publicly available datasets specifically catering to multimodal research in patient engagement in mental health. <xref ref-type="table" rid="table1">Table 1</xref> presents a comparison between MEDICA and other related datasets. Despite the rise in telehealth services and the poor patient-to-therapist ratios for individuals with mental health problems, there are no datasets that even try modeling telehealth sessions to give the community an opportunity to innovate and develop new technologies. MEDICA is a humble attempt by us to kick-start interesting research opportunities. The MEDICA dataset is unique in that it contains multimodal data from multiple sensors including speech and video. These data have been annotated with the engagement levels exhibited by each participant at different points in time.</p>
            <table-wrap position="float" id="table1">
              <label>Table 1</label>
              <caption>
                <p>Comparison of the Multimodal Engagement Detection in Clinical Analysis (MEDICA) dataset with other related datasets<sup>a</sup>.</p>
              </caption>
              <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
                <col width="160"/>
                <col width="80"/>
                <col width="130"/>
                <col width="80"/>
                <col width="120"/>
                <col width="180"/>
                <col width="250"/>
                <thead>
                  <tr valign="top">
                    <td>Dataset name</td>
                    <td>Samples</td>
                    <td>Unique speakers</td>
                    <td>Modes</td>
                    <td>Emotion labels</td>
                    <td>Engagement information</td>
                    <td>Other mental health cues</td>
                  </tr>
                </thead>
                <tbody>
                  <tr valign="top">
                    <td>RECOLA<sup>b</sup> [<xref ref-type="bibr" rid="ref25">25</xref>]</td>
                    <td>3400</td>
                    <td>27</td>
                    <td>{v,a}</td>
                    <td>Yes</td>
                    <td>No</td>
                    <td>Physiological (electrocardiogram and electrothermal activity)</td>
                  </tr>
                  <tr valign="top">
                    <td>CMU MOSEAS<sup>c</sup> [<xref ref-type="bibr" rid="ref26">26</xref>]</td>
                    <td>715</td>
                    <td>Multiple</td>
                    <td>{v,a,t}</td>
                    <td>Yes</td>
                    <td>No</td>
                    <td>—<sup>d</sup></td>
                  </tr>
                  <tr valign="top">
                    <td>CMU-MOSI<sup>e</sup> [<xref ref-type="bibr" rid="ref22">22</xref>]</td>
                    <td>2199</td>
                    <td>Multiple</td>
                    <td>{v,a,t}</td>
                    <td>Yes</td>
                    <td>No</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>CMU-MOSEI<sup>f</sup> [<xref ref-type="bibr" rid="ref23">23</xref>]</td>
                    <td>3228</td>
                    <td>1000</td>
                    <td>{v,a,t}</td>
                    <td>Yes</td>
                    <td>No</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>SEND<sup>g</sup> [<xref ref-type="bibr" rid="ref24">24</xref>]</td>
                    <td>193</td>
                    <td>49</td>
                    <td>{v,a,t}</td>
                    <td>Yes</td>
                    <td>No</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>DAiSEE<sup>h</sup> [<xref ref-type="bibr" rid="ref27">27</xref>]</td>
                    <td>9068</td>
                    <td>112</td>
                    <td>{v}</td>
                    <td>No</td>
                    <td>Yes</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>HBCU<sup>i</sup> [<xref ref-type="bibr" rid="ref9">9</xref>]</td>
                    <td>120</td>
                    <td>34</td>
                    <td>{v}</td>
                    <td>No</td>
                    <td>Yes</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>In the wild [<xref ref-type="bibr" rid="ref28">28</xref>]</td>
                    <td>195</td>
                    <td>78</td>
                    <td>{v}</td>
                    <td>No</td>
                    <td>Yes</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>SDMATH [<xref ref-type="bibr" rid="ref29">29</xref>]</td>
                    <td>20</td>
                    <td>20</td>
                    <td>{v,a}</td>
                    <td>Yes</td>
                    <td>Yes</td>
                    <td>—</td>
                  </tr>
                  <tr valign="top">
                    <td>MEDICA</td>
                    <td>1229<sup>b</sup></td>
                    <td>13</td>
                    <td>{v,a,t}</td>
                    <td>Yes</td>
                    <td>Yes</td>
                    <td>Hesitation, stress, and attentiveness</td>
                  </tr>
                </tbody>
              </table>
              <table-wrap-foot>
                <fn id="table1fn1">
                  <p><sup>a</sup>Modes indicate the subset of modalities present from visual (v), audio (a), and text (t).</p>
                </fn>
                <fn id="table1fn2">
                  <p><sup>b</sup>RECOLA: Remote Collaborative and Affective.</p>
                </fn>
                <fn id="table1fn3">
                  <p><sup>c</sup>CMU MOSEAS: Carnegie Mellon University Multimodal Opinion Sentiment, Emotions and Attributes.</p>
                </fn>
                <fn id="table1fn4">
                  <p><sup>d</sup>Not applicable.</p>
                </fn>
                <fn id="table1fn5">
                  <p><sup>e</sup>CMU-MOSI: Carnegie Mellon University Multimodal Corpus of Sentiment Intensity.</p>
                </fn>
                <fn id="table1fn6">
                  <p><sup>f</sup>CMU-MOSEI: Carnegie Mellon University Multimodal Opinion Sentiment and Emotion Intensity.</p>
                </fn>
                <fn id="table1fn7">
                  <p><sup>g</sup>SEND: Stanford Emotional Narratives Dataset.</p>
                </fn>
                <fn id="table1fn8">
                  <p><sup>h</sup>DAiSEE: Dataset for Affective States in E-Environments.</p>
                </fn>
                <fn id="table1fn9">
                  <p><sup>i</sup>HBCU: historically black college and university.</p>
                </fn>
                <fn id="table1fn10">
                  <p><sup>j</sup>Current status of the dataset. The size of the dataset will be increased.</p>
                </fn>
              </table-wrap-foot>
            </table-wrap>
          </sec>
          <sec>
            <title>Acquisition</title>
            <p>MEDICA has been developed by collecting publicly available mock therapy session videos created by different psychiatry medical schools for training their students. The patients in these videos are being advised for depression, social anxiety, and posttraumatic stress disorder. We collected 13 videos, each having a duration of approximately 20 to 30 minutes. We limit the videos to the setup where both the therapist and the patient are not visible together in the same frame. In addition, we take only those videos where there is only 1 patient. Each video has a unique English-speaking patient. The therapists in these videos are actual therapists, while the patients are trained actors who have been dictated to act and react in a particular way. <xref rid="figure1" ref-type="fig">Figure 1</xref> shows the snapshots taken from various video clips in the MEDICA dataset.</p>
            <fig id="figure1" position="float">
              <label>Figure 1</label>
              <caption>
                <p>Examples from the Multimodal Engagement Detection in Clinical Analysis (MEDICA) dataset created for mental health research. This dataset has been created using publicly available videos that are usually used for training purposes by different medical schools. The faces of the individuals have been blurred to protect their identity.</p>
              </caption>
              <graphic xlink:href="formative_v9i1e46390_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
            </fig>
          </sec>
          <sec>
            <title>Processing and Annotation</title>
            <p>Our primary focus was to create a dataset that captures the behavior of patients with mental health problems during their sessions. Therefore, we selected only portions of the videos where only the patient was visible in the frames, which appeared at various intervals and durations throughout the recordings. We took these scattered clips and divided them into smaller clips of 3 seconds each, resulting in a dataset of size 1229. We use Moviepy and speech-recognition libraries to extract audio and text from the video clips, respectively. Each video was annotated for attentiveness, stress, and engagement, which were scored on a Likert scale ranging from –3 to 3; hesitation was a binary target variable (yes or no). Humans tend to have multiple emotions with varying intensities while expressing their thoughts and feelings. Therefore, the videos have been labeled for 8 emotions related to mental health: happy, sad, irritated, neutral, anxious, embarrassed, scared, and surprised. This will enable us to develop systems capable of understanding the various interacting emotions of the users.</p>
          </sec>
        </sec>
        <sec>
          <title>Real-World Data</title>
          <p>The videos collected to develop MEDICA were created by the psychiatry schools in controlled settings. By controlled, we mean that the therapists in these videos were real, while the patients were trained method actors who had been instructed in a very detailed manner regarding what to expect and how they should react. While these videos were approved by a team of psychotherapists as being inclusive of potential scenarios and patient reactions, the work would still be meaningless if not tested in the wild, that is, in settings where we have real patients who have not been instructed what to do or how to behave during the session with their real therapist. To accomplish this, we collaborated with 8 child psychotherapists. These psychotherapists are individuals who were trained and licensed as psychotherapists for both children and adults. The administrative staff identified patients that were scheduled for sessions with any of the participating therapists and provided the caregiver contact information. Our research assistant contacted eligible caregivers and provided information related to the purpose, potential risks and benefits, and responsibilities of study participation. They were informed of the study protocol that involved recording 1 to 2 telemental sessions to test our proposed approach. A total of 20 caregivers provided consent to participate in the study. They were provided with instructions on the setup of the equipment that we provided to ensure a clean recording. “Clean” refers to a recording executed with a camera of good quality with appropriate lighting conditions. The equipment included a smartphone with a good quality camera, a ring light with a stand to ensure that the session was recorded in a well-lit environment, and a hot spot internet connection to ensure that the session occurred smoothly without any network glitches. They were also given the assurance regarding preserving the confidentiality of the data being collected. All video recordings were labeled only by study number, with all identifiable information removed and stored in a double password–protected database. Participants were also informed that any facial images other than the caregiver who participated in the session would be deidentified (using methods such as blurring, etc). These steps were taken to ensure that only the caregiver’s level of engagement was measured and analyzed. In addition, this step protected the confidentiality of nonparticipants who did not provide consent.</p>
          <p>On average, each of these sessions lasted approximately 20 minutes. The demographics of the caregivers who participated in our real-world experiments can be found in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. The entire data collection process can be divided into three parts:</p>
          <list list-type="order">
            <list-item>
              <p>Before each tele–mental health session of a caregiver with their therapist, a research assistant helped the caregiver with setting up the equipment to record their session. The assistant also ensured that the caregivers were comfortable using the equipment.</p>
            </list-item>
            <list-item>
              <p>During the session, we made sure that the tele–mental health session proceeded in the same manner as a typical session. The caregivers were requested to use the equipment provided if possible, but there were no restrictions imposed by us on either the therapist or the caregiver regarding how the session should be conducted. After the presession process, the research assistant would log off. Therefore, during the session, it would be just the therapist and the caregiver having a conversation. No one else from the study would be a part of it. The only thing different about this session was that the caregiver was being recorded using the smartphone given to them. We did not record the therapist.</p>
            </list-item>
            <list-item>
              <p>After the session, after the session was complete, a research assistant guided the participant regarding the steps to stop the recording and save the data collected.</p>
            </list-item>
          </list>
          <p>Upon completion of the session, the therapist scored the quality of the collaborative relationship (therapeutic alliance) established between them during the session with the Working Alliance Inventory (WAI) [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. It is used for both child and adult therapy and is a widely studied therapeutic alliance measure. The WAI was modeled on the theoretical work conducted by Bordin [<xref ref-type="bibr" rid="ref32">32</xref>]. It captures 3 dimensions of the alliance: bond, task, and goals. Extensive tests showed 12 items per dimension to be the minimum length for effective representations of the inventory. A composite score is computed based on these 12 items for each of the sessions conducted. Henceforth, we refer to this score as the WAI score. The WAI scores can range from 12 to 84 [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. <xref rid="figure2" ref-type="fig">Figure 2</xref> shows frames from a few of the video recordings collected.</p>
          <fig id="figure2" position="float">
            <label>Figure 2</label>
            <caption>
              <p>A few frames from the real-world videos we collected. The faces have been blurred here to protect the identity of the patients. However, the consent of the patients was taken to use their unblurred faces as input to our model.</p>
            </caption>
            <graphic xlink:href="formative_v9i1e46390_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
          </fig>
          <p>While efforts were taken to ensure clean video recordings, we still could not use 7 out of the 20 videos collected for any type of analysis. The reasons for it ranged from the video being extremely shaky due to the continuous movement of some participants during their session to not having the permissions to process and analyze videos where the child spoke for the entire session instead of the caregiver. The remaining videos were processed in the same way as MEDICA. At the end of data processing, we obtained approximately 438 clips, each lasting 3 seconds. Unlike MEDICA where we had annotations for every 3 seconds, we had only a single WAI score for the entire session. Therefore, all the 3-second clips obtained from a session were annotated with the WAI score obtained for the session. Any analysis conducted on these data treated all the 438 clips independently.</p>
        </sec>
      </sec>
      <sec>
        <title>Proposed Model Design</title>
        <p><xref rid="figure3" ref-type="fig">Figure 3</xref> presents an overview of our engagement estimation model. It has 2 key components: <italic>multimodal feature extractor</italic> and <italic>semisupervised learning network</italic>. We discuss each of them in detail in the subsequent sections.</p>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>Overall block diagram of the proposed architecture. The features obtained from the 2 modalities, cognitive and affective, are combined to form hT (real) of shape 655×1. The generator takes in random noise (Gaussian) and tries to generate a fake feature map that looks similar to hfakeT that looks similar to hT (real). The discriminator tries to distinguish between hT and hfakeT. The yellow bars in the discriminator and generator refer to linear layers with activation function as leaky rectified linear activation unit. The shape of the linear layers used has been written on the bars. The number of linear layers used can be changed depending upon the dataset size and task at hand.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e46390_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <sec>
          <title>Multimodal Feature Extractor</title>
          <sec>
            <title>Overview</title>
            <p>Multimodal feature extractor in general refers to a system or algorithm that extracts and combines features from multiple modalities, such as audio, video, text, and other types of data, to provide a more comprehensive representation of input data. The goal of multimodal feature extraction is to capture complementary information from different sources, which can lead to better performance in various tasks. In our model, the multimodal feature extractor is used to comprehend the psychological state of the patient with mental health problems. We take inspiration from literature in psychology and psychiatry to build an extractor that can enable the learning network to recognize, understand, and evaluate engagement as close as possible to that of a psychotherapist. Therefore, we proposed a multicomponent approach to build the feature extractor and introduce 2 modules, namely, cognitive and affective states, to capture the different cues used by psychotherapists to assess their patients. As mentioned earlier, the visual data available are limited. Therefore, apart from the visuals, we also need to make use of verbal information, that is, understanding the speaking style of the person (audio) and what the person said (text).</p>
            <p>Therefore, given this information corresponding to a participant, the multimodal feature extractor outputs a feature vector <italic>h<sub>T</sub></italic>. <italic>h<sub>T</sub></italic> is obtained after concatenating the feature vectors <italic>h<sub>C</sub></italic> and <italic>h<sub>A</sub></italic> obtained from the cognitive and affective state modalities (components of the multimodal feature extractor), respectively. We now discuss the cognitive and affective state modalities in more detail in the subsequent sections.</p>
          </sec>
          <sec>
            <title>Cognitive State Modality</title>
            <p>The cognitive state involves comprehending complex concepts and issues and acquiring difficult skills. It conveys deep (rather than surface level) processing of information, where the person gains a critical or higher-order understanding of the subject matter and solves challenging problems.</p>
            <p>Psychotherapists usually measure and evaluate the cognitive state of the person using the mental status examination, typically conducted via in-person interviews or self-evaluations, to gauge memory, thinking, and the extent of understanding of the topic of discussion. There has been a lot of work around determining biomarkers for detecting signs of a person’s cognitive state. However, these methods are either offline or fail to consider various essential perceptual indicators. Recently, there has been a lot of work around using speech as a potential biomarker for detecting cognitive decline. For instance, stress negatively affects the cognitive functions of a person, and this can be easily detected using speech signals. Moreover, speech-based methods are attractive because they are nonintrusive, inexpensive, and can be done in real time. Four distinct features have proven to be highly effective in identifying signs of cognitive impairment and are increasingly utilized to detect conditions like Alzheimer’s and Parkinson’s diseases. These features include glottal (<italic>f<sub>g</sub></italic>), prosody (<italic>f<sub>pr</sub></italic>), phonation (<italic>f<sub>ph</sub></italic>), and articulation measures (<italic>f<sub>ar</sub></italic>), each offering unique insights into speech characteristics associated with cognitive decline.</p>
            <p>Glottal features are acoustic measures used to characterize speech under stress. The glottal is the space between the vocal cords in the larynx, and the opening and closing of the vocal cords create sound during speech. During periods of stress, there is an aberration in the amount of tension applied in the opening (abduction) and closing (adduction) of the vocal cords. Prosody features characterize the speaker’s intonation and speaking styles. Under this feature, we analyze variables such as timing, pitch, and loudness during the production of speech. Phonation refers to the production of vocal sounds by the vibration of the vocal cords. In individuals with cognitive decline, phonation is often characterized by bowing and inadequate closure of the vocal cords. These changes can lead to instability and irregularity in the vibration of the vocal cords, which in turn affects the production of speech. They are analyzed in terms of features related to perturbation measures, such as jitter (temporal perturbations of the fundamental frequency), shimmer (temporal perturbation of the amplitude of the signal), amplitude perturbation quotient, and pitch perturbation quotient. Apart from these, the degree of unvoiced is also included. Articulation features refer to the movements of the lips, tongue, and jaw during speech production. Reduced amplitude and velocity of these movements can impact speech intelligibility. The analysis of articulation is primarily based on the first 2 vocal formants F1 and F2, which correspond to the frequency of the sound waves produced by the vocal tract.</p>
            <p>For a given audio input, we extract these features using <italic>librosa</italic> [<xref ref-type="bibr" rid="ref35">35</xref>] and <italic>praat</italic> [<xref ref-type="bibr" rid="ref36">36</xref>] libraries. Therefore, we define features corresponding to cognitive state as the concatenation of these 4 audio features. Therefore, cognitive state features</p>
            <disp-formula>h<sub>c</sub> = concat (f<sub>g</sub>, f<sub>pr</sub>, f<sub>ph</sub>, f<sub>ar</sub>) <bold>(1)</bold>
          </disp-formula>
          </sec>
          <sec>
            <title>Affective State Modality</title>
            <p>The affective state encompasses emotional reactions such as excitement, boredom, curiosity, and anger. The range of affective expressions will vary based on individual demographic factors (eg, age), cultural backgrounds or norms, and mental health symptoms.</p>
            <p>To understand the affective state, we check if there exists any inconsistency between the emotions perceived and the statement the person made. Balomenos et al [<xref ref-type="bibr" rid="ref37">37</xref>] and Porter and ten Brinke [<xref ref-type="bibr" rid="ref38">38</xref>] suggest that when different modalities are modeled and projected onto a common space, they should point to similar affective cues; otherwise, the incongruity suggests distraction, deception, etc. In other words, if E1, E2, and E3 represent the emotions perceived individually from what the patient said (text), the way they said it or sounded (audio), and how they looked or expressed (visuals), respectively, then the patient would be considered engaged if E1, E2, and E3 are similar; otherwise, they would be considered disengaged. Therefore, we adopt pretrained emotion recognition models to extract affective features corresponding to audio, visuals, and text from each video sample separately.</p>
            <p>Audio features (<italic>f<sub>a</sub></italic>) are extracted using mel-frequency cepstral coefficients (MFCC) from audio clips available in the dataset. MFCC is a commonly used audio feature extraction technique in signal processing and machine learning. It is based on the concept of the mel scale, which is a nonlinear transformation of frequency that is more perceptually relevant to human hearing. By applying MFCC to audio signals, we can extract features that capture important aspects of the sound, such as pitch, timbre, and spectral shape. These features are often used in speech and music analysis as well as in various machine learning applications, including emotion recognition and speaker identification. In our work, the extracted MFCC features from audio are then passed as input to a multilayer perceptron network trained for emotion recognition in speech, specifically using the data available in the Crowd-source Emotional Multimodal Actors Dataset (CREMA-D) to extract affective features from the audio data. Through this method, we obtained a feature vector f<sub>a</sub>, for each audio clip, which provides valuable insights into the emotional characteristics of the speech.</p>
            <p>Visual features (<italic>f<sub>v</sub></italic>) correspond to the affective features extracted from video frames to understand the emotions expressed through facial cues of the patient. We used a deep learning architecture called Visual Geometry Group-B (VCG-B), which has been pretrained as described in the study by Arriaga et al [<xref ref-type="bibr" rid="ref39">39</xref>], to extract the affective features. We modified the output dimensions of the second last layer of the network to produce a feature vector <italic>f<sub>v</sub></italic> of length 100.</p>
            <p>Text (<italic>f<sub>t</sub></italic>), characterizing the emotions, sentiment, and other affective cues present in the text, is obtained using a Bidirectional Encoder Representations from Transformers(BERT)–based model, which has been pretrained on the GoEmotions dataset [<xref ref-type="bibr" rid="ref40">40</xref>]. The Bidirectional Encoder Representations from Transformers model works by processing the text input through a multilayer neural network that is trained to predict the context of each word based on its surrounding words. This allows the model to capture the complex relationships between words and their meanings, which are essential for understanding the affective context of the text.</p>
            <p>Therefore, we represent the affective state of the patient as a concatenation of <italic>f<sub>a</sub></italic>, <italic>f<sub>v</sub></italic>, and <italic>f<sub>t</sub></italic>. Hence, affective state features</p>
            <disp-formula>h<sub>A</sub>= concat (f<sub>a</sub>, f<sub>v</sub>, f<sub>t</sub>) <bold>(2)</bold>
          </disp-formula>
            <p>Therefore, the overall output of the multimodal feature extractor is a feature vector <italic>h<sub>T</sub></italic> <italic>which c</italic>an be defined as</p>
            <disp-formula>h<sub>T</sub> = concat (h<sub>C</sub>, h<sub>A</sub>)</disp-formula>
            <p>The learning network discussed in the next section makes use of this.</p>
          </sec>
        </sec>
      </sec>
      <sec>
        <title>Learning Network</title>
        <p>In this section, we discuss the second part of our proposed model, that is, the semisupervised learning network. Different machine learning techniques can be explored to solve the task of predicting the level of engagement. However, obtaining a large amount of high-quality labeled data to train a robust model for predicting patient engagement is inevitably laborious and requires expert medical knowledge. Considering that unlabeled data are relatively easy to collect, we propose a semisupervised learning–based solution. Semisupervised learning enables us to deploy machine learning systems in real-life applications (eg, image search [<xref ref-type="bibr" rid="ref41">41</xref>], speech analysis [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>], and natural language processing) where we have few labeled data samples and a lot of unlabeled data. Some prior works have also explored semisupervised learning for engagement detection in nonmedical domains. One of the earliest studies in this area is by Alyuz et al [<xref ref-type="bibr" rid="ref44">44</xref>], where they consider the development of an engagement detection system, more specifically emotional or affective engagement of students, in a semisupervised fashion to personalize systems such as intelligent tutoring systems according to their needs. Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] conducted experiments to detect user engagement using a facial feature–based semisupervised model. Most state-of-the-art semisupervised learning methods use generative adversarial nets (GANs) [<xref ref-type="bibr" rid="ref46">46</xref>].</p>
        <p>GANs are a class of machine learning models and typically have 2 neural networks competing with each other to generate more accurate predictions. These 2 neural networks are referred to as the generator and the discriminator. The generator’s goal is to artificially manufacture outputs (ie, fake data) that could easily be mistaken as real data. The goal of the discriminator is to identify the real from the artificially generated data. In trying to generate high-quality outputs, the generator learns to capture the different possible variations in the input variables, and therefore, the data manifold well. This is extremely helpful when we may not be able to access data containing a wide variety of similar engagement-related cues visible across different patients. In general, the training process of GANs involves a dynamic interplay and competition between the generator and discriminator networks. Both the generator and discriminator networks improve their abilities iteratively. The generator aims to generate more realistic samples, while the discriminator aims to become better at distinguishing between real and generated (artificial) data. The eventual goal of the training process is for the generator and discriminator to reach Nash equilibrium [<xref ref-type="bibr" rid="ref47">47</xref>]. Once the equilibrium is achieved, no player (generator or discriminator) has any incentive to deviate toward a more optimal position, that is, no player has a better strategy given the choices of the other player.</p>
        <p>Taking all this into consideration, we propose using a multimodal semisupervised GAN as our learning network for regressing on the levels of patient engagement. This type of machine learning model combines multiple modalities (in this case, cognitive and affective modalities) to generate realistic samples (or features) that can fool the discriminator. The term <italic>semisupervised</italic> refers to the network’s ability to use a limited amount of labeled data alongside a larger amount of unlabeled data to enhance its accuracy. Such a network will generalize better and be more robust compared to previously defined semisupervised learning approaches. It is different than the semisupervised GAN framework SR-GAN proposed by Olmschenk et al [<xref ref-type="bibr" rid="ref48">48</xref>] in 2 main ways. First, our network is multimodal, whereas SR-GAN is not. Second, unlike the SR-GAN generator, which focuses on generating realistic images, our GAN’s generator is designed to create realistic feature maps that mimic the multimodal feature vector <italic>h<sub>T</sub></italic>, derived from the fusion of cognitive and affective state modalities (as discussed in the previous section). The generator takes Gaussian noise as input and produces a synthetic feature vector <italic>h<sub>fakeT</sub></italic>, aiming to deceive the discriminator into classifying it as <italic>h<sub>fakeT</sub></italic>. The discriminator’s role, in turn, is to distinguish <italic>h<sub>T</sub></italic> as a fake representation and correctly identify <italic>h<sub>T</sub></italic> as the genuine feature vector.</p>
        <p>Overall, our multimodal semisupervised GAN can therefore be divided into 3 parts: multimodal feature extractor, generator, and discriminator.</p>
        <p>For the purpose of training, we make use of the following four standard loss functions, namely, labeled loss (<italic>L<sub>lab</sub></italic>), unlabeled loss (<italic>L<sub>un</sub></italic>), fake loss (<italic>L<sub>fake</sub></italic>), and generator loss (<italic>L<sub>gen</sub></italic>):</p>
        <p>1. <italic>L<sub>lab</sub></italic>—this loss is similar to typical supervised learning regression losses, where the goal is to assess the network’s ability to estimate the engagement level accurately, as close as possible to the ground truth (ie, the actual engagement level annotated for the data sample). Therefore, we compute the mean squared error of model output (<inline-graphic xlink:href="formative_v9i1e46390_fig5.png" xlink:type="simple" mimetype="image"/>, ie, predicted level of engagement) with ground truth (<italic>y<sub>t</sub></italic>, ie, actual level of engagement).</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e46390_fig10.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>2. <italic>L<sub>un</sub></italic>—in semisupervised GANs, the labeled dataset contains a small portion of data with labels, while most of the data are unlabeled. The goal of the L<sub>un</sub> loss function is to improve the performance of the generator by minimizing the distance between the feature spaces of the labeled and unlabeled datasets. This helps to ensure that the generator produces feature maps that are consistent with both labeled and unlabeled data, thereby improving the generalization performance of the model. By minimizing the distance between the feature spaces of labeled and unlabeled datasets, the model can effectively learn from both labeled and unlabeled data, which is particularly useful in cases where labeled data is scarce.</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e46390_fig6.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>3. <italic>L<sub>fake</sub></italic>—it is a loss function used to update the discriminator network. The objective of this loss is to maximize the distance between the feature space of unlabeled dataset and fake features generated by the generator. This enables the discriminator to distinguish the real and fake data features better.</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e46390_fig7.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>4. <italic>L<sub>gen</sub></italic>—the generator network’s objective is to produce feature vectors <italic>h<sub>T</sub></italic> (fake) that can be passed off as real feature vectors <italic>h<sub>T</sub></italic>: L<sub>gen</sub> updates the generator network to do just that. L<sub>gen</sub> encourages the generator to learn the underlying distribution of unlabeled data features (real) and generate fake features that match the feature statistics of the real features.</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e46390_fig8.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Similar to SR-GAN, we also make use of a gradient penalty (P) to keep the gradient of the discriminator in check, which helps convergence. The gradient penalty is calculated with respect to a randomly chosen point on the convex manifold connecting the unlabeled feature vector <italic>h<sub>T</sub></italic> to the fake feature vector <italic>h<sub>T</sub></italic>. We compute it as</p>
        <disp-formula>
          <graphic xlink:href="formative_v9i1e46390_fig9.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </disp-formula>
        <p>Here, <italic>p<sub>interpolate</sub></italic> examples are generated by <italic>αp<sub>unlabeled</sub> + (1 – α)p<sub>fake</sub></italic> for <italic>α ~ U (0,1)</italic>.</p>
        <p><italic>U (0,1)</italic> represents a uniform distribution over the range from 0 to 1.</p>
        <p>The overall loss function used for training the network is:</p>
        <disp-formula>L = L<sub>lab</sub> + L<sub>un</sub> + L<sub>fake</sub> + L<sub>gen</sub> + λP <bold>(8)</bold>
        </disp-formula>
      </sec>
      <sec>
        <title>Overall Pipeline</title>
        <p>In this section, we present an overview of how the various components of our proposed approach interact to estimate the engagement level of a patient with mental health problems during a tele–mental health session. We provide insights into the working mechanism of our model and shed light on the underlying processes involved in engagement level estimation in a multimodal setting.</p>
        <p>As depicted in <xref rid="figure3" ref-type="fig">Figure 3</xref>, the proposed model involves data preprocessing, which includes extracting video frames, audio signal, and text data from the input video clip. To capture engagement dynamics over small time scales, we analyze engagement at the microlevel, considering video clips of a few seconds duration. This aligns with the person-oriented analysis suggested by Sinatra et al [<xref ref-type="bibr" rid="ref49">49</xref>]. Specifically, for the MEDICA dataset and real-world data, we consider video clips of 3 seconds duration. All samples, labeled and unlabeled, from the dataset are treated as <italic>real</italic> data for the semisupervised GAN. The multimodal feature extractor (consisting of the cognitive and affective state modalities) is used to obtain the <italic>h<sub>T</sub></italic> features from the preprocessed visual, audio, and text data. <italic>h<sub>T</sub></italic> is considered real. In parallel, the generator network creates <italic>h<sub>fakeT</sub></italic> (ie, fake <italic>h<sub>T</sub></italic>) features using gaussian noise as input. The labeled and unlabeled <italic>h<sub>T</sub></italic> features obtained from the multimodal feature extractor and the <italic>h<sub>fakeT</sub></italic> features obtained from the generator are then input to the discriminator network. In addition to distinguishing between real (<italic>h<sub>T</sub></italic>) and fake (<italic>h<sub>fakeT</sub></italic>) features, the discriminator network outputs the engagement level of the patient with mental health problems as a continuous value.</p>
        <p>The subsequent sections outline the different studies being conducted to verify our approach.</p>
      </sec>
      <sec>
        <title>Study 1: Testing Our Proposed Approach on MEDICA</title>
        <p>The purpose of the first study is to test the applicability of our approach to estimate the levels of engagement exhibited by the participants present in the videos. The test was conducted on the MEDICA dataset. In addition, we compared the performance of the proposed model against other similar frameworks to understand its effectiveness. To test the models, we partitioned the dataset in a ratio of 70:10:20 for training, testing, and validation. Motivated by recent works in clinical psychotherapy [<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>], we used the standard evaluation metric of root mean square error (RMSE) to evaluate our approach. Smaller the RMSE value, better is the performance of the model for the engagement estimation task.</p>
      </sec>
      <sec>
        <title>Study 2: Ablation Studies</title>
        <p>The proposed model design involves the introduction of 2 modalities, namely, affective and cognitive. In this study, we explored the contribution of these 2 modalities for the purpose of engagement detection. We aimed to test the effectiveness of the model with and without these modalities. Therefore, we ran the overall pipeline described earlier by removing either one of the modalities corresponding to affective and cognitive states and reported our findings. The tests were conducted on the MEDICA dataset. Similar to our previous study, we performed the evaluation using RMSE.</p>
      </sec>
      <sec>
        <title>Study 3: Analysis on Real-World Data</title>
        <p>Evaluating the model only on MEDICA is not sufficient to prove the usability of the model in the real-world setting. Therefore, this study aimed to test our approach on data collected from real-world scenarios. The study involved testing the proposed model that has been trained on MEDICA using real-world data. We also compared the performance of our proposed model against other baselines explored in study 1 on the real-world data. Apart from testing our model’s usability for the real world, we also tested its capability to perform on unseen real-world data, which is different from the data (MEDICA) it has been trained on. Contrary to previous experiments where we compared the performance on engagement estimation values, in this test, we will study the correlation of the model-estimated engagement values with the WAI score. This will help us to also test whether the scores estimated by our model are clinically useful for therapists.</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>Both the MEDICA dataset and the real-world data collection processes have been approved by the institutional review boards (#HP-00092966) at both the University of Maryland College Park’s computer science department and University of Maryland Baltimore medical school.</p>
        <p>Participation of the caregivers while collecting the real-world data was completely voluntary. They could withdraw anytime without any negative consequences. They were informed about the purpose of the study and the analysis that would be conducted using the data collected.</p>
        <p>The therapist and the participating caregiver were given the option of allowing us to store videotaped recordings for development of future studies or to request to have all videotape sessions destroyed after a 2-year period. The video recordings were stored in our laboratory databases and subject to strict University of Maryland privacy protocol, including encryption and password protection. While the research is ongoing, only select project personnel with internal clearance can have access to the data. In addition, we ensured that the data are untampered using standard cryptographic hash functions. During the video storage process, we deidentified any facial images beside the health care provider or caregiver who are recorded (eg, sibling interrupts session) so that they are not recognizable on the recorded video. All data related to enrollment, demographics, and questionnaires will be stored in an electronic database at the University of Maryland and will be double password protected with only select research members having access. The participating caregiver was given an individual ID number, and their data were deidentified to ensure it could not be linked back to them.</p>
        <p>While the participating caregiver may not directly benefit from the study, they received 1-month free internet hot spot and a Fitbit, which were provided as part of the real-world study equipment. They were compensated with a US $20 gift card after returning the equipment.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>The semisupervised models were trained on an NVIDIA GeForce GTX 1080 Ti GPU with a batch size of 512 and a learning rate of 0.0001 for 50 epochs.</p>
      <sec>
        <title>Study 1: Testing Our Proposed Approach on MEDICA</title>
        <p>The purpose of the first study is to demonstrate the ability of our model to estimate the level of engagement exhibited by the patient in the video. This study was performed on the MEDICA dataset. As our proposed methodology leverages a semisupervised approach, we extracted labeled samples from MEDICA and unlabeled samples from the CMU-MOSEI dataset. After preprocessing, we extracted 12,854 unlabeled data points from CMU-MOSEI. We split the 1229 labeled data points from MEDICA into 70:10:20 for training, validation, and testing, respectively. Therefore, the split of the labeled training data to unlabeled training data points was 860:12,854. In addition, the ground truth engagement levels corresponding to the training data points in the MEDICA dataset were normalized to fall within a range of 0 and 1. We compared our model with the following state-of-the-art methods for engagement detection:</p>
        <p>1. Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] used a deep multiple instance learning–based framework for detecting engagement in students. They extracted local binary pattern on three orthogonal planes (LBP-TOP) features from the facial video segments and performed linear regression using a deep neural network to estimate the engagement scores.</p>
        <p>2. Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] performed a semisupervised engagement detection using a semisupervised support vector machine.</p>
        <p>In addition to being state-of-the-art, these methods can be used in a telehealth setting like ours. We used the publicly available implementation proposed by Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] and trained the entire model on MEDICA. Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] do not have a publicly available implementation. We reproduced the method to the best of our understanding.</p>
        <p><xref ref-type="table" rid="table2">Table 2</xref> summarizes the RMSE values obtained for methods described by Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] and Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>], including ours. We observed an improvement of at least 40%. Our approach is one of the first methods of engagement estimation built on the principles of psychotherapy. The modules used, specifically cognitive and affective states, help the overall model to effectively mimic the ways a psychotherapist perceives the patient’s level of engagement. Similar to psychotherapists, these modules also look for specific engagement-related cues exhibited by the patient in the video.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>Comparison of RMSE values obtained from various methods for estimating levels of engagement on the Multimodal Engagement Detection in Clinical Analysis dataset.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="500"/>
            <col width="500"/>
            <thead>
              <tr valign="top">
                <td>Method</td>
                <td>RMSE<sup>a</sup> for engagement</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td>
                <td>0.96</td>
              </tr>
              <tr valign="top">
                <td>Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>]</td>
                <td>0.17</td>
              </tr>
              <tr valign="top">
                <td>Our approach</td>
                <td>0.10</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>RMSE: root mean square error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Study 2: Ablation Studies</title>
        <p>To show the importance of the different components (affective and cognitive) used in our approach, we run our method on MEDICA by removing either one of the modules corresponding to affective or cognitive states and report our findings. <xref ref-type="table" rid="table3">Table 3</xref> summarizes the results obtained from the ablation experiments. We observe that the ablated model (ie, only using affective [A] or cognitive [C] modules) does not perform as well as the model that includes both modules. To understand and verify the contribution of these modules further, we leveraged the other labels (stress, hesitation, and attention) available in MEDICA and performed regression tasks using our proposed architecture on all of them. We observed that mode C performed better when predicting stress and hesitation values. Mode A performed better in estimating a patient’s level of attentiveness. These results agree with our understanding of cognitive state and affective state. Therefore, the combination of affective and cognitive state modes helps in efficiently predicting the engagement level of the patient.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Ablation experiments on the Multimodal Engagement Detection in Clinical Analysis dataset. We ran our proposed model using only 1 modality at a time (either affective or cognitive) and compared the performance with that of using both modalities together.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="260"/>
            <col width="200"/>
            <col width="180"/>
            <col width="180"/>
            <col width="180"/>
            <thead>
              <tr valign="top">
                <td>Modality</td>
                <td>RMSE<sup>a</sup> for engagement</td>
                <td>RMSE for stress</td>
                <td>RMSE for hesitation</td>
                <td>RMSE for attention</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Affective</td>
                <td>0.24</td>
                <td>0.15</td>
                <td>0.146</td>
                <td>0.07</td>
              </tr>
              <tr valign="top">
                <td>Cognitive</td>
                <td>0.3</td>
                <td>0.13</td>
                <td>0.16</td>
                <td>0.08</td>
              </tr>
              <tr valign="top">
                <td>Affective + cognitive (our approach)</td>
                <td>0.10</td>
                <td>0.12</td>
                <td>0.14</td>
                <td>0.1</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table3fn1">
              <p><sup>a</sup>RMSE: root mean square error.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Study 3: Analysis on Real-World Data</title>
        <p>Our model that has been trained for estimating engagement levels using the MEDICA dataset was tested on the processed real-world data. WAI scoring is based on certain observations the therapist makes during the session with the patient. The score obtained from our model was different from the WAI score, but we claim that similar to WAI, our estimates also captured the engagement levels of the patient well. If this is indeed the case, then both WAI and our estimates should be correlated. As discussed earlier, a single WAI score is reported by the therapist (health care provider) for the entire session. Unlike WAI, which reports a single score per session, our method, along with those proposed by Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] and Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>], performs microanalysis, generating engagement estimates at multiple time points within a session. To enable a fair comparison, we calculated the mean of the engagement estimates across each session for all methods. The mean engagement estimates across sessions were as follows: Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] had a mean of 0.50 (SD: 7.82e-17), Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] had a mean of 0.64 (SD: 0.017), and our approach had a mean of 0.65 (SD: 0.006). We subsequently examined each of their correlations with the corresponding WAI scores across sessions. Given that WAI scores and engagement estimates are measured on different scales, WAI scores were normalized to a 0-1 range for consistency with the engagement values from the methods. We found that Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] obtained a correlation score of –0.03, Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] obtained a score of –0.24 and our approach obtained a much better score of 0.38. We plot the engagement level estimations obtained for each session against its corresponding WAI score, as shown in <xref rid="figure4" ref-type="fig">Figure 4</xref>. The plot corresponding to our approach shows a noticeably better alignment with the WAI score trends compared to other methods [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref45">45</xref>].</p>
        <fig id="figure4" position="float">
          <label>Figure 4</label>
          <caption>
            <p>Scatterplot between the engagement level estimations obtained from each method for each session: (A) the plot between engagement level estimations obtained from our approach and the Working Alliance Inventory (WAI) scores, (B) the plot between the engagement level estimations obtained by Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] for each session and the WAI scores, (C) the plot between the engagement level estimations for each session obtained by Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] and the WAI scores.</p>
          </caption>
          <graphic xlink:href="formative_v9i1e46390_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
        <p>Additionally, instead of just taking the mean, we also took the median of the engagement level estimates available at different instances of the sessions. The average median of the engagement estimates across sessions for different methods were as follows: Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] had an average median of 0.50 (IQR 0.0), Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] had an average median of 0.64 (IQR 0.02), and our approach had an average median of 0.65 (IQR 0.007).</p>
        <p>We then examined the median of the engagement estimates for each session, obtained using different methods, and assessed their correlation with the WAI scores. In this case, Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>] obtained a correlation score of 0, Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] obtained a score of –0.18 and our approach obtained a relatively high score of 0.40. These findings reinforce that our model captures WAI patterns more effectively than prior methods. Its stronger positive correlation with the WAI scores indicates its capability to better estimate engagement levels during therapy sessions.</p>
        <p>The conceptual model of our proposed approach is also supported by the theoretical work by Bordin [<xref ref-type="bibr" rid="ref32">32</xref>]. According to this theory, the therapist-provider alliance is driven by 3 factors: bond, agreement on goals, and agreement on tasks; these factors align well with the features identified in this work. While bond would correspond with the affective state, goals and task agreement correspond with the cognitive state. The merit of the approach by Bordin [<xref ref-type="bibr" rid="ref32">32</xref>] is that it has been used for child and adult therapy, and it is one of the more widely studied therapeutic alliance measures. Therefore, it is no surprise that our approach can work well to provide an estimate of engagement levels in a tele–mental health session.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>In this study, we developed a machine learning–based model that integrates features related to cognitive and affective psychological states to predict patient engagement levels during tele–mental health sessions. The input to our proposed algorithm is the visual, audio, and text data available, while the output is the engagement level of the patient. Our investigation demonstrated the significant promise of our proposed method, achieving an average improvement of 40% in RMSE for predicting engagement levels on the newly introduced MEDICA dataset compared to prior works. In real-world settings, our method provided patient engagement estimates that closely aligned with the trends observed in WAI scores assigned by therapists based on their sessions [<xref ref-type="bibr" rid="ref30">30</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. This study highlights the potential of our multimodal engagement estimation algorithm to enhance the quality of telehealth services and ultimately improve patient outcomes.</p>
      </sec>
      <sec>
        <title>Comparison With Prior Work</title>
        <p>Patient engagement is recognized as a pivotal determinant of a successful therapy session, particularly in the realm of mental health care [<xref ref-type="bibr" rid="ref53">53</xref>]. Current psychiatric literature [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref56">56</xref>] has extensively examined a range of strategies and methodologies designed to foster [<xref ref-type="bibr" rid="ref57">57</xref>] and sustain patient engagement during therapy sessions. These strategies are crucial for achieving optimal therapeutic outcomes. There has been a growing interest in applying artificial intelligence (AI) to mental health [<xref ref-type="bibr" rid="ref58">58</xref>-<xref ref-type="bibr" rid="ref62">62</xref>], with innovative approaches being explored to enhance patient care. However, despite the proliferation of AI-based methods in mental health applications, there remains a noticeable gap in research specifically focused on understanding and quantifying engagement. This gap is even more evident in the context of tele–mental health [<xref ref-type="bibr" rid="ref63">63</xref>]. The transition from traditional in-person therapy to digital platforms has introduced unique challenges, as conventional indicators of engagement, such as body language [<xref ref-type="bibr" rid="ref64">64</xref>], eye contact [<xref ref-type="bibr" rid="ref65">65</xref>], and physical presence, maybe less discernible or entirely absent in web-based settings.</p>
        <p>Our research addresses this critical gap by pioneering an approach that estimates patient engagement specifically in the tele–mental health context. This work is the first of its kind to target engagement estimation in this setting, offering a novel perspective that has largely been overlooked in previous studies. While the broader concept of engagement has been explored by AI in various domains, existing methods [<xref ref-type="bibr" rid="ref28">28</xref>,<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref66">66</xref>] have not been adapted or tested within the unique demands of mental health care, particularly in a remote or web-based environment. From an algorithmic standpoint, previous works have explored both unimodal and multimodal learning-based networks to understand engagement. Unimodal approaches focus on individual modalities, such as speech, body posture, biometric data (eg, heart rate variability and skin conductance) [<xref ref-type="bibr" rid="ref67">67</xref>,<xref ref-type="bibr" rid="ref68">68</xref>], gaze direction [<xref ref-type="bibr" rid="ref69">69</xref>], and head pose [<xref ref-type="bibr" rid="ref70">70</xref>]. Multimodal approaches [<xref ref-type="bibr" rid="ref71">71</xref>] integrate various signals, leveraging the complementary strengths of each modality to provide a more holistic understanding of engagement. In contrast to these works, our research focuses on using the types of information that are readily available during a tele–mental health session, specifically audio, video, and text data. This approach is designed to be practical and easily implementable in real-world telehealth settings. This also ensures that the engagement estimation process is minimally disruptive to the therapeutic experience during the telehealth session. We validated our approach using our newly introduced dataset, MEDICA. In addition, we tested it on data collected from real-world therapy sessions. Our method achieved state-of-the-art performance in both cases.</p>
      </sec>
      <sec>
        <title>Strengths and Implications</title>
        <p>In this work, we explored the multidimensional and temporally dynamic nature of patient engagement [<xref ref-type="bibr" rid="ref72">72</xref>]. Our method estimates engagement at regular short intervals. These characteristics of our approach align perfectly with the person-oriented analysis discussed by Sinatra et al [<xref ref-type="bibr" rid="ref49">49</xref>]. We released a new dataset, MEDICA, to enhance mental health research, specifically toward understanding the engagement levels of patients attending therapy sessions. To the best of our knowledge, there is no other multimodal dataset that caters specifically to the needs of the mental health–based research. In addition, while there are some image-based [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref28">28</xref>] or sensor information–based datasets, there is no dataset that addresses the possibility of exploring engagement detection using visual, audio, and text modalities. We verified the usefulness of our machine learning method using 3 studies. Study 1 demonstrates the ability of our model to estimate the engagement level exhibited by the patient in video. It was done to understand the usefulness of the proposed model in comparison to other existing engagement estimation methods such as those by Nezami et al [<xref ref-type="bibr" rid="ref45">45</xref>] and Kaur et al [<xref ref-type="bibr" rid="ref28">28</xref>]. Our approach reported an RMSE [<xref ref-type="bibr" rid="ref50">50</xref>-<xref ref-type="bibr" rid="ref52">52</xref>] of 0.1, which on an average is 40% less than the RMSE reported by other methods (<xref ref-type="table" rid="table1">Table 1</xref>). Our study is one of the first to estimate engagement specifically for patients with mental health problems in a tele–mental health setting. The modalities used, specifically cognitive and affective states, propel the overall algorithm to mimic the way a psychotherapist perceives the patient’s level of engagement. As part of study 2, we were interested to understand the contribution of the 2 modalities (cognitive and affective states). The experiments conducted as part of this study are also termed as <italic>ablation</italic> as it involves running the network pipeline using only 1 of the 2 modalities available at a time. We leveraged the annotations in MEDICA on stress, hesitation, and attention to dig deeper regarding the contribution of the cognitive and affective state modalities. We ran the same ablation experiments on not only patient engagement but also stress, hesitation, and attention estimation. While the combination of affective and cognitive states gives better results (RMSE of 0.1), it was interesting to note that the observations obtained supported the theoretical discussion about the 2 states. Cognitive state modality was able to understand stress (RMSE of 0.13) better. Affective state that was built on the concept of capturing distraction using emotion inconsistency between visual, audio, and text data helped relate better with the understanding of attention (RMSE of 0.07). These results agree with our understanding of cognitive state and affective state discussed earlier.</p>
        <p>WAI is a popular measure of patient engagement used by many mental health care providers. Therefore, in study 3, we wished to understand and check if the values being estimated by our method related well with the trends observed in WAI. We did so by computing the correlation scores between WAI and the values estimated by our proposed model. In addition, to test the effectiveness of our approach, we also compared the correlation scores obtained between WAI and patient engagement values estimated by other existing methods. Positive values are preferred because they indicate similarity in distribution behavior. Negative values would indicate that the trends observed in WAI and the engagement values predicted are different and opposite. The results (<xref ref-type="table" rid="table2">Table 2</xref>) revealed that our model was able to capture the trends in WAI much better than other engagement estimation methods, reinforcing its clinical relevance. The correlation strength observed between WAI and our method’s estimated engagement values is better than other approaches, but it also indicates the need for further investigation. The trend is positive, suggesting that we have successfully modeled engagement to some extent. Further analysis will provide opportunities to enhance and strengthen the relationship between WAI and the engagement values estimated by machine learning.</p>
      </sec>
      <sec>
        <title>Limitations and Future Work</title>
        <p>One of the primary limitations of the proposed algorithm is that engagement predictions may not be optimal in case of occlusions or missing modalities, data corruption due to low internet bandwidth, and integration of wearable devices with our model. We recognize that this situation is likely to occur in a tele–mental health session and plan to incorporate solutions for it in our future work. In addition, we aim to explore ways to make the predictions more explainable, allowing psychotherapists to receive evidence-based suggestions to support their final decisions.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>Telehealth behavioral services, delivered via videoconferencing systems, have emerged as a cost-effective, dependable, and secure option for mental health treatment, particularly in recent times. However, gauging patient engagement, a critical mental health care standard, remains challenging in telehealth due to the absence of nonverbal cues and sensory data such as heart rate. To address this, we propose a novel multimodal semisupervised GAN that leverages affective and cognitive features from psychology to estimate engagement levels using visuals, audio, and text from video calls. This approach can significantly enhance social interactions and assist psychotherapists during tele–mental health sessions. Engagement is typically assessed through patient reports, which are susceptible to response bias, and current measures such as “show rate” and “patient satisfaction” do not accurately reflect the health care provider–patient alliance. Our model demonstrates its effectiveness on both our newly introduced dataset called MEDICA as well as real-world data. Given the lack of systematic engagement measurement and the limited training for health care providers on telehealth engagement, our proposed system offers a promising solution to these challenges, promoting better patient-health care provider interactions and making telehealth more effective.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Demographic information on the real-world experiment participants.</p>
        <media xlink:href="formative_v9i1e46390_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AI</term>
          <def>
            <p>artificial intelligence</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">CMU-MOSEI</term>
          <def>
            <p>Carnegie Mellon University Multimodal Opinion Sentiment and Emotion Intensity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">CMU-MOSI</term>
          <def>
            <p>Carnegie Mellon University Multimodal Corpus of Sentimental Intensity </p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">GAN</term>
          <def>
            <p>generative adversarial net</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">HIPAA</term>
          <def>
            <p>Health Insurance Portability and Accountability Act</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">MEDICA</term>
          <def>
            <p>Multimodal Engagement Detection in Clinical Analysis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">MFCC</term>
          <def>
            <p>mel-frequency cepstral coefficients</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">RMSE</term>
          <def>
            <p>root mean square error</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">WAI</term>
          <def>
            <p>Working Alliance Inventory</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb10">CREMA-D</term>
          <def>
            <p>Crowd-source Emotional Multimodal Actors Dataset</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb11">VGG-B</term>
          <def>
            <p>Visual Geometry Group-B</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb12">BERT</term>
          <def>
            <p>Bidirectional Encoder Representations from Transformers</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb13">LBP-TOP</term>
          <def>
            <p>local binary pattern on three orthogonal planes</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>This research was fully supported by the MPower grant.</p>
    </ack>
    <notes>
      <title>Data Availability</title>
      <p>The datasets generated during and analyzed during this study are available from the corresponding author on reasonable request.</p>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="web">
          <article-title>WHO highlights urgent need to transform mental health and mental health care</article-title>
          <source>World Health Organization</source>
          <year>2022</year>
          <month>6</month>
          <day>17</day>
          <access-date>2024-11-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.who.int/news/item/17-06-2022-who-highlights-urgent-need-to-transform-mental-health-and-mental-health-care">https://www.who.int/news/item/17-06-2022-who-highlights-urgent-need-to-transform-mental-health-and-mental-health-care</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="web">
          <article-title>The world health report 2001: mental health; new understanding, new hope</article-title>
          <source>World Health Organization</source>
          <year>2001</year>
          <access-date>2024-11-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://iris.who.int/handle/10665/42390">https://iris.who.int/handle/10665/42390</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murray</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Vos</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Lozano</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Naghavi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Flaxman</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Michaud</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Ezzati</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shibuya</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Salomon</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Abdalla</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Aboyans</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Abraham</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Ackerman</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Aggarwal</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ahn</surname>
              <given-names>SY</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Alvarado</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Anderson</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Andrews</surname>
              <given-names>KG</given-names>
            </name>
            <name name-style="western">
              <surname>Atkinson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Baddour</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Bahalim</surname>
              <given-names>AN</given-names>
            </name>
            <name name-style="western">
              <surname>Barker-Collo</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Barrero</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Bartels</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Basáñez</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Baxter</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bell</surname>
              <given-names>ML</given-names>
            </name>
            <name name-style="western">
              <surname>Benjamin</surname>
              <given-names>EJ</given-names>
            </name>
            <name name-style="western">
              <surname>Bennett</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Bernabé</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Bhalla</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Bhandari</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bikbov</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bin Abdulhak</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Birbeck</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Black</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Blencowe</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Blore</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Blyth</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Bolliger</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Bonaventure</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Boufous</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Bourne</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Boussinesq</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Braithwaite</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Brayne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bridgett</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Brooker</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Brooks</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Brugha</surname>
              <given-names>TS</given-names>
            </name>
            <name name-style="western">
              <surname>Bryan-Hancock</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Bucello</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Buchbinder</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Buckle</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Budke</surname>
              <given-names>CM</given-names>
            </name>
            <name name-style="western">
              <surname>Burch</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Burney</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Burstein</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Calabria</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Canter</surname>
              <given-names>CE</given-names>
            </name>
            <name name-style="western">
              <surname>Carabin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Carapetis</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Carmona</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cella</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Charlson</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Cheng</surname>
              <given-names>AT</given-names>
            </name>
            <name name-style="western">
              <surname>Chou</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Chugh</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Coffeng</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Colan</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Colquhoun</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Colson</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Condon</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Connor</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Cooper</surname>
              <given-names>LT</given-names>
            </name>
            <name name-style="western">
              <surname>Corriere</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cortinovis</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>de Vaccaro</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Couser</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Cowie</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Criqui</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Cross</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dabhadkar</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Dahiya</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dahodwala</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Damsere-Derry</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Danaei</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Davis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>De Leo</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Degenhardt</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dellavalle</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Delossantos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Denenberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Derrett</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Des Jarlais</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Dharmaratne</surname>
              <given-names>SD</given-names>
            </name>
            <name name-style="western">
              <surname>Dherani</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Diaz-Torne</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dolk</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Dorsey</surname>
              <given-names>ER</given-names>
            </name>
            <name name-style="western">
              <surname>Driscoll</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Duber</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Ebel</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Edmond</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Elbaz</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ali</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Erskine</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Erwin</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Espindola</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Ewoigbokhan</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Farzadfar</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Feigin</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Felson</surname>
              <given-names>DT</given-names>
            </name>
            <name name-style="western">
              <surname>Ferrari</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ferri</surname>
              <given-names>CP</given-names>
            </name>
            <name name-style="western">
              <surname>Fèvre</surname>
              <given-names>EM</given-names>
            </name>
            <name name-style="western">
              <surname>Finucane</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>Flaxman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Flood</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Foreman</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Forouzanfar</surname>
              <given-names>MH</given-names>
            </name>
            <name name-style="western">
              <surname>Fowkes</surname>
              <given-names>FG</given-names>
            </name>
            <name name-style="western">
              <surname>Fransen</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Freeman</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Gabbe</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Gabriel</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Gakidou</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Ganatra</surname>
              <given-names>HA</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Gaspari</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gillum</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Gmel</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gonzalez-Medina</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gosselin</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Grainger</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Grant</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Groeger</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Guillemin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Gunnell</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Haagsma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hagan</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Halasa</surname>
              <given-names>YA</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Haring</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Haro</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Harrison</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Havmoeller</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hay</surname>
              <given-names>RJ</given-names>
            </name>
            <name name-style="western">
              <surname>Higashi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hill</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hoen</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Hotez</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Hoy</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Huang</surname>
              <given-names>JJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ibeanusi</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Jacobsen</surname>
              <given-names>KH</given-names>
            </name>
            <name name-style="western">
              <surname>James</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Jarvis</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Jasrasaria</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Jayaraman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Johns</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jonas</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Karthikeyan</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Kassebaum</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Kawakami</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Keren</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Khoo</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>King</surname>
              <given-names>CH</given-names>
            </name>
            <name name-style="western">
              <surname>Knowlton</surname>
              <given-names>LM</given-names>
            </name>
            <name name-style="western">
              <surname>Kobusingye</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Koranteng</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Krishnamurthi</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Laden</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Lalloo</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Laslett</surname>
              <given-names>LL</given-names>
            </name>
            <name name-style="western">
              <surname>Lathlean</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Leasher</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>YY</given-names>
            </name>
            <name name-style="western">
              <surname>Leigh</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Levinson</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lim</surname>
              <given-names>SS</given-names>
            </name>
            <name name-style="western">
              <surname>Limb</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>JK</given-names>
            </name>
            <name name-style="western">
              <surname>Lipnick</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lipshultz</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Loane</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ohno</surname>
              <given-names>SL</given-names>
            </name>
            <name name-style="western">
              <surname>Lyons</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mabweijano</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>MacIntyre</surname>
              <given-names>MF</given-names>
            </name>
            <name name-style="western">
              <surname>Malekzadeh</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mallinger</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Manivannan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Marcenes</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>March</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Margolis</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>GB</given-names>
            </name>
            <name name-style="western">
              <surname>Marks</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumori</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Matzopoulos</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Mayosi</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>McAnulty</surname>
              <given-names>JH</given-names>
            </name>
            <name name-style="western">
              <surname>McDermott</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>McGill</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McGrath</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Medina-Mora</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Meltzer</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Mensah</surname>
              <given-names>GA</given-names>
            </name>
            <name name-style="western">
              <surname>Merriman</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Miglioli</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Miller</surname>
              <given-names>TR</given-names>
            </name>
            <name name-style="western">
              <surname>Mitchell</surname>
              <given-names>PB</given-names>
            </name>
            <name name-style="western">
              <surname>Mock</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mocumbi</surname>
              <given-names>AO</given-names>
            </name>
            <name name-style="western">
              <surname>Moffitt</surname>
              <given-names>TE</given-names>
            </name>
            <name name-style="western">
              <surname>Mokdad</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Monasta</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Montico</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moradi-Lakeh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Moran</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Morawska</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mori</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Murdoch</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Mwaniki</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Naidoo</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Nair</surname>
              <given-names>MN</given-names>
            </name>
            <name name-style="western">
              <surname>Naldi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Narayan</surname>
              <given-names>KMV</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>PK</given-names>
            </name>
            <name name-style="western">
              <surname>Nelson</surname>
              <given-names>RG</given-names>
            </name>
            <name name-style="western">
              <surname>Nevitt</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Newton</surname>
              <given-names>CR</given-names>
            </name>
            <name name-style="western">
              <surname>Nolte</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Norman</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>O'Donnell</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>O'Hanlon</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Olives</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Omer</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Ortblad</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Osborne</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Ozgediz</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Page</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pahari</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pandian</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Rivero</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Patten</surname>
              <given-names>SB</given-names>
            </name>
            <name name-style="western">
              <surname>Pearce</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Padilla</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Perez-Ruiz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Perico</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Pesudovs</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Phillips</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Pierce</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Pion</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Polanczyk</surname>
              <given-names>GV</given-names>
            </name>
            <name name-style="western">
              <surname>Polinder</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pope</surname>
              <given-names>CA</given-names>
            </name>
            <name name-style="western">
              <surname>Popova</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Porrini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pourmalek</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Prince</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pullan</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Ramaiah</surname>
              <given-names>KD</given-names>
            </name>
            <name name-style="western">
              <surname>Ranganathan</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Razavi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Regan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Rehm</surname>
              <given-names>JT</given-names>
            </name>
            <name name-style="western">
              <surname>Rein</surname>
              <given-names>DB</given-names>
            </name>
            <name name-style="western">
              <surname>Remuzzi</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Richardson</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Rivara</surname>
              <given-names>FP</given-names>
            </name>
            <name name-style="western">
              <surname>Roberts</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Robinson</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>De Leòn</surname>
              <given-names>FR</given-names>
            </name>
            <name name-style="western">
              <surname>Ronfani</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Room</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenfeld</surname>
              <given-names>LC</given-names>
            </name>
            <name name-style="western">
              <surname>Rushton</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sacco</surname>
              <given-names>RL</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Sampson</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Sanchez-Riera</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Sanman</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Schwebel</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>JG</given-names>
            </name>
            <name name-style="western">
              <surname>Segui-Gomez</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Shahraz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Shepard</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Shin</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shivakoti</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Singleton</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sleet</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Sliwa</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Smith</surname>
              <given-names>JL</given-names>
            </name>
            <name name-style="western">
              <surname>Stapelberg</surname>
              <given-names>NJ</given-names>
            </name>
            <name name-style="western">
              <surname>Steer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Steiner</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Stolk</surname>
              <given-names>WA</given-names>
            </name>
            <name name-style="western">
              <surname>Stovner</surname>
              <given-names>LJ</given-names>
            </name>
            <name name-style="western">
              <surname>Sudfeld</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Syed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Tamburlini</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tavakkoli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>HR</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>WJ</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Thomson</surname>
              <given-names>WM</given-names>
            </name>
            <name name-style="western">
              <surname>Thurston</surname>
              <given-names>GD</given-names>
            </name>
            <name name-style="western">
              <surname>Tleyjeh</surname>
              <given-names>IM</given-names>
            </name>
            <name name-style="western">
              <surname>Tonelli</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Towbin</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Truelsen</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tsilimbaris</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Ubeda</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Undurraga</surname>
              <given-names>EA</given-names>
            </name>
            <name name-style="western">
              <surname>van der Werf</surname>
              <given-names>MJ</given-names>
            </name>
            <name name-style="western">
              <surname>van Os</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vavilala</surname>
              <given-names>MS</given-names>
            </name>
            <name name-style="western">
              <surname>Venketasubramanian</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Watt</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Weatherall</surname>
              <given-names>DJ</given-names>
            </name>
            <name name-style="western">
              <surname>Weinstock</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Weintraub</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Weisskopf</surname>
              <given-names>MG</given-names>
            </name>
            <name name-style="western">
              <surname>Weissman</surname>
              <given-names>MM</given-names>
            </name>
            <name name-style="western">
              <surname>White</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>Whiteford</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Wiebe</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Wiersma</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Wilkinson</surname>
              <given-names>JD</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>HC</given-names>
            </name>
            <name name-style="western">
              <surname>Williams</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Witt</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Wolfe</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Woolf</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>Wulf</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yeh</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Zaidi</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zonies</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Lopez</surname>
              <given-names>AD</given-names>
            </name>
            <name name-style="western">
              <surname>AlMazroa</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Memish</surname>
              <given-names>ZA</given-names>
            </name>
          </person-group>
          <article-title>Disability-adjusted life years (DALYs) for 291 diseases and injuries in 21 regions, 1990-2010: a systematic analysis for the Global Burden of Disease Study 2010</article-title>
          <source>Lancet</source>
          <year>2012</year>
          <month>12</month>
          <day>15</day>
          <volume>380</volume>
          <issue>9859</issue>
          <fpage>2197</fpage>
          <lpage>223</lpage>
          <pub-id pub-id-type="doi">10.1016/S0140-6736(12)61689-4</pub-id>
          <pub-id pub-id-type="medline">23245608</pub-id>
          <pub-id pub-id-type="pii">S0140-6736(12)61689-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Oladeji</surname>
              <given-names>BD</given-names>
            </name>
            <name name-style="western">
              <surname>Gureje</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Brain drain: a challenge to global mental health</article-title>
          <source>BJPsych Int</source>
          <year>2016</year>
          <month>08</month>
          <volume>13</volume>
          <issue>3</issue>
          <fpage>61</fpage>
          <lpage>3</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/29093905"/>
          </comment>
          <pub-id pub-id-type="doi">10.1192/s2056474000001240</pub-id>
          <pub-id pub-id-type="medline">29093905</pub-id>
          <pub-id pub-id-type="pii">S2056474000001240</pub-id>
          <pub-id pub-id-type="pmcid">PMC5618877</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="web">
          <article-title>Telemental health/providers</article-title>
          <source>Anxiety &#38; Depression Association of America</source>
          <access-date>2024-11-12</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://adaa.org/find-help/treatment-help/telemental-health-providers">https://adaa.org/find-help/treatment-help/telemental-health-providers</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Greenwood</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Krzyzaniak</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Peiris</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Clark</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Scott</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Cardona</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Griffith</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Glasziou</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Telehealth versus face-to-face psychotherapy for less common mental health conditions: systematic review and meta-analysis of randomized controlled trials</article-title>
          <source>JMIR Ment Health</source>
          <year>2022</year>
          <month>03</month>
          <day>11</day>
          <volume>9</volume>
          <issue>3</issue>
          <fpage>e31780</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2022/3/e31780/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31780</pub-id>
          <pub-id pub-id-type="medline">35275081</pub-id>
          <pub-id pub-id-type="pii">v9i3e31780</pub-id>
          <pub-id pub-id-type="pmcid">PMC8956990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kelders</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>van Zyl</surname>
              <given-names>LE</given-names>
            </name>
            <name name-style="western">
              <surname>Ludden</surname>
              <given-names>GD</given-names>
            </name>
          </person-group>
          <article-title>The concept and components of engagement in different domains applied to eHealth: a systematic scoping review</article-title>
          <source>Front Psychol</source>
          <year>2020</year>
          <volume>11</volume>
          <fpage>926</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/32536888"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyg.2020.00926</pub-id>
          <pub-id pub-id-type="medline">32536888</pub-id>
          <pub-id pub-id-type="pmcid">PMC7266981</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yardley</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Spring</surname>
              <given-names>BJ</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>LG</given-names>
            </name>
            <name name-style="western">
              <surname>Crane</surname>
              <given-names>DH</given-names>
            </name>
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Merchant</surname>
              <given-names>GC</given-names>
            </name>
            <name name-style="western">
              <surname>Naughton</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Blandford</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Understanding and promoting effective engagement with digital behavior change interventions</article-title>
          <source>Am J Prev Med</source>
          <year>2016</year>
          <month>11</month>
          <volume>51</volume>
          <issue>5</issue>
          <fpage>833</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.1016/j.amepre.2016.06.015</pub-id>
          <pub-id pub-id-type="medline">27745683</pub-id>
          <pub-id pub-id-type="pii">S0749-3797(16)30243-4</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Whitehill</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Serpell</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>YC</given-names>
            </name>
            <name name-style="western">
              <surname>Foster</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Movellan</surname>
              <given-names>JR</given-names>
            </name>
          </person-group>
          <article-title>The faces of engagement: automatic recognition of student engagement from facial expressions</article-title>
          <source>IEEE Trans Affective Comput</source>
          <year>2014</year>
          <month>1</month>
          <day>1</day>
          <volume>5</volume>
          <issue>1</issue>
          <fpage>86</fpage>
          <lpage>98</lpage>
          <pub-id pub-id-type="doi">10.1109/taffc.2014.2316163</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Murshed</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Dewan</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Lin</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Wen</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Engagement detection in e-learning environments using convolutional neural networks</article-title>
          <source>Proceedings of the IEEE International Conference on Dependable, Autonomic and Secure Computing, International Conference on Pervasive Intelligence and Computing, International Conference on Cloud and Big Data Computing, International Conference on Cyber Science and Technology Congress (DASC/PiCom/CBDCom/CyberSciTech)</source>
          <year>2019</year>
          <conf-name>DASC/PiCom/CBDCom/CyberSciTech 2019</conf-name>
          <conf-date>August 5-8, 2019</conf-date>
          <conf-loc>Fukuoka, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1109/dasc/picom/cbdcom/cyberscitech.2019.00028</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Aoki</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Woodruff</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Detecting user engagement in everyday conversations</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 13, 2004</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/cs/0410027"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2004-327</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanghvi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Castellano</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Leite</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Pereira</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>McOwan</surname>
              <given-names>PW</given-names>
            </name>
            <name name-style="western">
              <surname>Paiva</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Automatic analysis of affective postures and body motion to detect engagement with a game companion</article-title>
          <source>Proceedings of the 6th International Conference on Human-Robot Interaction</source>
          <year>2011</year>
          <conf-name>HRI '11</conf-name>
          <conf-date>March 6-9, 2011</conf-date>
          <conf-loc>Lausanne, Switzerland</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1957656.1957781</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nakano</surname>
              <given-names>YI</given-names>
            </name>
            <name name-style="western">
              <surname>Ishii</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Estimating user's engagement from eye-gaze behaviors in human-agent conversations</article-title>
          <source>Proceedings of the 15th International Conference on Intelligent User Interfaces</source>
          <year>2010</year>
          <conf-name>IUI '10</conf-name>
          <conf-date>February 7-10, 2010</conf-date>
          <conf-loc>Hong Kong, China</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1719970.1719990</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sharma</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Joshi</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Gautam</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Maharjan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Khanal</surname>
              <given-names>SR</given-names>
            </name>
            <name name-style="western">
              <surname>Reis</surname>
              <given-names>MC</given-names>
            </name>
            <name name-style="western">
              <surname>Barroso</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>de Jesus Filipe</surname>
              <given-names>VM</given-names>
            </name>
          </person-group>
          <article-title>Student engagement detection using emotion analysis, eye tracking and head movement with machine learning</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 18, 2019</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1909.12913"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-3-031-22918-3_5</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Psaltis</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Apostolakis</surname>
              <given-names>KC</given-names>
            </name>
            <name name-style="western">
              <surname>Dimitropoulos</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Daras</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Multimodal student engagement recognition in prosocial games</article-title>
          <source>IEEE Trans Games</source>
          <year>2018</year>
          <month>9</month>
          <volume>10</volume>
          <issue>3</issue>
          <fpage>292</fpage>
          <lpage>303</lpage>
          <pub-id pub-id-type="doi">10.1109/tciaig.2017.2743341</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Grafsgaard</surname>
              <given-names>JF</given-names>
            </name>
            <name name-style="western">
              <surname>Wiggins</surname>
              <given-names>JB</given-names>
            </name>
            <name name-style="western">
              <surname>Boyer</surname>
              <given-names>KE</given-names>
            </name>
            <name name-style="western">
              <surname>Wiebe</surname>
              <given-names>EN</given-names>
            </name>
            <name name-style="western">
              <surname>Lester</surname>
              <given-names>LC</given-names>
            </name>
          </person-group>
          <article-title>Embodied affect in tutorial dialogue: student gesture and posture</article-title>
          <source>Proceedings of the 16th International Conference on Artificial Intelligence in Education</source>
          <year>2013</year>
          <conf-name>AIED 2013</conf-name>
          <conf-date>July 9-13, 2013</conf-date>
          <conf-loc>Memphis, TN</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-642-39112-5_1</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aslan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cataltepe</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Dundar</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Esme</surname>
              <given-names>AA</given-names>
            </name>
            <name name-style="western">
              <surname>Ferens</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Learner engagement measurement and classification in 1:1 learning</article-title>
          <source>Proceedings of the 13th International Conference on Machine Learning and Applications</source>
          <year>2014</year>
          <conf-name>ICMLA 2014</conf-name>
          <conf-date>December 3-6, 2014</conf-date>
          <conf-loc>Detroit, MI</conf-loc>
          <pub-id pub-id-type="doi">10.1109/icmla.2014.111</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ma</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fruchter</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <source>eRing: Body Motion Engagement Detection and Feedback in Global Teams</source>
          <year>2015</year>
          <month>4</month>
          <publisher-loc>Stanford, CA</publisher-loc>
          <publisher-name>Stanford University</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Monkaresi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Bosch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Calvo</surname>
              <given-names>RA</given-names>
            </name>
            <name name-style="western">
              <surname>D'Mello</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Automated detection of engagement using video-based estimation of facial expressions and heart rate</article-title>
          <source>IEEE Trans Affective Comput</source>
          <year>2017</year>
          <month>1</month>
          <day>1</day>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>15</fpage>
          <lpage>28</lpage>
          <pub-id pub-id-type="doi">10.1109/taffc.2016.2515084</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>An ensemble model using face and body tracking for engagement detection</article-title>
          <source>Proceedings of the 20th ACM International Conference on Multimodal Interaction</source>
          <year>2018</year>
          <conf-name>ICMI '18</conf-name>
          <conf-date>October 16-20, 2018</conf-date>
          <conf-loc>Boulder, CO</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3242969.3264986</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fedotov</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Perepelkina</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Kazimirova</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Konstantinova</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Minker</surname>
              <given-names>W</given-names>
            </name>
          </person-group>
          <article-title>Multimodal approach to engagement and disengagement detection with highly imbalanced in-the-wild data</article-title>
          <source>Proceedings of the Workshop on Modeling Cognitive Processes from Multimodal Data</source>
          <year>2018</year>
          <conf-name>MCPMD '18</conf-name>
          <conf-date>October 16, 2018</conf-date>
          <conf-loc>Boulder, CO</conf-loc>
          <pub-id pub-id-type="doi">10.1145/3279810.3279842</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zadeh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zellers</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Pincus</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Morency</surname>
              <given-names>LP</given-names>
            </name>
          </person-group>
          <article-title>MOSI: multimodal corpus of sentiment intensity and subjectivity analysis in online opinion videos</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on June 20, 2016</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1606.06259"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zadeh</surname>
              <given-names>AB</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Poria</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cambria</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Morency</surname>
              <given-names>LP</given-names>
            </name>
          </person-group>
          <article-title>Multimodal language analysis in the wild: CMU-MOSEI dataset and interpretable dynamic fusion graph</article-title>
          <source>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics</source>
          <year>2018</year>
          <conf-name>ACL 2018</conf-name>
          <conf-date>July 15-20, 2018</conf-date>
          <conf-loc>Melbourne, Australia</conf-loc>
          <pub-id pub-id-type="doi">10.18653/v1/p18-1208</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>DC</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Zhi-Xuan</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Reddan</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Kahhale</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Mattek</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zaki</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Modeling emotion in complex stories: the Stanford Emotional Narratives Dataset</article-title>
          <source>IEEE Trans Affect Comput</source>
          <year>2021</year>
          <month>7</month>
          <day>1</day>
          <volume>12</volume>
          <issue>3</issue>
          <fpage>579</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34484569"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/taffc.2019.2955949</pub-id>
          <pub-id pub-id-type="medline">34484569</pub-id>
          <pub-id pub-id-type="pmcid">PMC8414991</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ringeval</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Sonderegger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Sauer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Lalanne</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Introducing the RECOLA multimodal corpus of remote collaborative and affective interactions</article-title>
          <source>Proceedings of the 10th IEEE International Conference and Workshops on Automatic Face and Gesture Recognition (FG)</source>
          <year>2013</year>
          <conf-name>FG 2013</conf-name>
          <conf-date>April 22-26, 2013</conf-date>
          <conf-loc>Shanghai, China</conf-loc>
          <pub-id pub-id-type="doi">10.1109/fg.2013.6553805</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zadeh</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cao</surname>
              <given-names>YS</given-names>
            </name>
            <name name-style="western">
              <surname>Hessner</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>PP</given-names>
            </name>
            <name name-style="western">
              <surname>Poria</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Morency</surname>
              <given-names>LP</given-names>
            </name>
          </person-group>
          <article-title>CMU-MOSEAS: a multimodal language dataset for Spanish, Portuguese, German and French</article-title>
          <source>Proc Conf Empir Methods Nat Lang Process</source>
          <year>2020</year>
          <month>11</month>
          <volume>2020</volume>
          <fpage>1801</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33969362"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.emnlp-main.141</pub-id>
          <pub-id pub-id-type="medline">33969362</pub-id>
          <pub-id pub-id-type="pmcid">PMC8106386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gupta</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>D'Cunha</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Awasthi</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Balasubramanian</surname>
              <given-names>V</given-names>
            </name>
          </person-group>
          <article-title>DAiSEE: towards user engagement recognition in the wild</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on September 7, 2016</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1609.01885"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kaur</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mustafa</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Dhall</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Prediction and localization of student engagement in the wild</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on April 3, 2018</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1804.00858"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/dicta.2018.8615851</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sathayanarayana</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Satzoda</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Carini</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Salamanca</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Reilly</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Towards automated understanding of student-tutor interactions using visual deictic gestures</article-title>
          <source>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops</source>
          <year>2014</year>
          <conf-name>CVPRW 2014</conf-name>
          <conf-date>June 23-28, 2014</conf-date>
          <conf-loc>Columbus, OH</conf-loc>
          <pub-id pub-id-type="doi">10.1109/cvprw.2014.77</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Boettcher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Caspar</surname>
              <given-names>F</given-names>
            </name>
          </person-group>
          <article-title>Internet-based guided self-help for several anxiety disorders: a randomized controlled trial comparing a tailored with a standardized disorder-specific approach</article-title>
          <source>Psychotherapy (Chic)</source>
          <year>2014</year>
          <month>06</month>
          <volume>51</volume>
          <issue>2</issue>
          <fpage>207</fpage>
          <lpage>19</lpage>
          <pub-id pub-id-type="doi">10.1037/a0032527</pub-id>
          <pub-id pub-id-type="medline">24041199</pub-id>
          <pub-id pub-id-type="pii">2013-32697-001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gómez Penedo</surname>
              <given-names>JM</given-names>
            </name>
            <name name-style="western">
              <surname>Berger</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Grosse Holtforth</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Krieger</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Schröder</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hohagen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Meyer</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Moritz</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>The Working Alliance Inventory for guided Internet interventions (WAI-I)</article-title>
          <source>J Clin Psychol</source>
          <year>2020</year>
          <month>06</month>
          <volume>76</volume>
          <issue>6</issue>
          <fpage>973</fpage>
          <lpage>86</lpage>
          <pub-id pub-id-type="doi">10.1002/jclp.22823</pub-id>
          <pub-id pub-id-type="medline">31240727</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bordin</surname>
              <given-names>ES</given-names>
            </name>
          </person-group>
          <article-title>The generalizability of the psychoanalytic concept of the working alliance</article-title>
          <source>Psychother Theory Res Pract</source>
          <year>1979</year>
          <volume>16</volume>
          <issue>3</issue>
          <fpage>252</fpage>
          <lpage>60</lpage>
          <pub-id pub-id-type="doi">10.1037/h0085885</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Herrero</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Vara</surname>
              <given-names>MD</given-names>
            </name>
            <name name-style="western">
              <surname>Miragall</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Botella</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>García-Palacios</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Riper</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Kleiboer</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Baños</surname>
              <given-names>RM</given-names>
            </name>
          </person-group>
          <article-title>Working alliance inventory for online interventions-short form (WAI-TECH-SF): the role of the therapeutic alliance between patient and online program in therapeutic outcomes</article-title>
          <source>Int J Environ Res Public Health</source>
          <year>2020</year>
          <month>08</month>
          <day>25</day>
          <volume>17</volume>
          <issue>17</issue>
          <fpage>6169</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.mdpi.com/resolver?pii=ijerph17176169"/>
          </comment>
          <pub-id pub-id-type="doi">10.3390/ijerph17176169</pub-id>
          <pub-id pub-id-type="medline">32854381</pub-id>
          <pub-id pub-id-type="pii">ijerph17176169</pub-id>
          <pub-id pub-id-type="pmcid">PMC7503297</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Shinno</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Matsumoto</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hamatani</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Inaba</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ozawa</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kawasaki</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Ikai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Sutoh</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hayashi</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Shimizu</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Feasibility of guided internet-based cognitive behavioral therapy for panic disorder and social anxiety disorder in Japan: pilot single-arm trial</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <month>02</month>
          <day>29</day>
          <volume>8</volume>
          <fpage>e53659</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e53659/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53659</pub-id>
          <pub-id pub-id-type="medline">38421717</pub-id>
          <pub-id pub-id-type="pii">v8i1e53659</pub-id>
          <pub-id pub-id-type="pmcid">PMC10940979</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>McFee</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Raffel</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Liang</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ellis</surname>
              <given-names>DP</given-names>
            </name>
            <name name-style="western">
              <surname>McVicar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Battenberg</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Nieto</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>librosa: audio and music signal analysis in python</article-title>
          <source>Proceedings of the 14th Python in Science Conference</source>
          <year>2015</year>
          <conf-name>SciPy 2015</conf-name>
          <conf-date>July 6-12, 2015</conf-date>
          <conf-loc>Austin, TX</conf-loc>
          <pub-id pub-id-type="doi">10.25080/majora-7b98e3ed-003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Magdin</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Sulka</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Tomanová</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Vozár</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Voice analysis using PRAAT software and classification of user emotional state</article-title>
          <source>Int J Interact Multimed Artif Intell</source>
          <year>2019</year>
          <volume>5</volume>
          <issue>6</issue>
          <fpage>33</fpage>
          <lpage>42</lpage>
          <pub-id pub-id-type="doi">10.9781/ijimai.2019.03.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Balomenos</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Raouzaiou</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ioannou</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Drosopoulos</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Karpouzis</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Kollias</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Emotion analysis in man-machine interaction systems</article-title>
          <source>Proceedings of the First International Workshop on Machine Learning for Multimodal Interaction</source>
          <year>2004</year>
          <conf-name>MLMI 2004</conf-name>
          <conf-date>June 21-23, 2004</conf-date>
          <conf-loc>Martigny, Switzerland</conf-loc>
          <pub-id pub-id-type="doi">10.1007/978-3-540-30568-2_27</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Porter</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>ten Brinke</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Reading between the lies: identifying concealed and falsified emotions in universal facial expressions</article-title>
          <source>Psychol Sci</source>
          <year>2008</year>
          <month>05</month>
          <volume>19</volume>
          <issue>5</issue>
          <fpage>508</fpage>
          <lpage>14</lpage>
          <pub-id pub-id-type="doi">10.1111/j.1467-9280.2008.02116.x</pub-id>
          <pub-id pub-id-type="medline">18466413</pub-id>
          <pub-id pub-id-type="pii">PSCI2116</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Arriaga</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Valdenegro-Toro</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Plöger</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Real-time convolutional neural networks for emotion and gender classification</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on October 20, 2017</comment>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Demszky</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Movshovitz-Attias</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ko</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Cowen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nemade</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Ravi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>GoEmotions: a dataset of fine-grained emotions</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on May 1, 2020</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/2005.00547"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.372</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lu</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Xia</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zeng</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised learning for image retrieval using support vector machines</article-title>
          <source>Proceedings of the Second International Symposium on Neural Networks</source>
          <year>2005</year>
          <conf-name>ISNN 2005</conf-name>
          <conf-date>May 30-June 1, 2005</conf-date>
          <conf-loc>Chongqing, China</conf-loc>
          <pub-id pub-id-type="doi">10.1007/11427391_108</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Varadarajan</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Deng</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Acero</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Active learning and semi-supervised learning for speech recognition: a unified framework using the global entropy reduction maximization criterion</article-title>
          <source>Comput Speech Lang</source>
          <year>2010</year>
          <month>7</month>
          <volume>24</volume>
          <issue>3</issue>
          <fpage>433</fpage>
          <lpage>44</lpage>
          <pub-id pub-id-type="doi">10.1016/j.csl.2009.03.004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Kirchhoff</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Graph-based semi-supervised learning for phone and segment classification</article-title>
          <source>Proceedings of the INTERSPEECH 2013</source>
          <year>2013</year>
          <conf-name>INTERSPEECH 2013</conf-name>
          <conf-date>August 25-29, 2013</conf-date>
          <conf-loc>Lyon, France</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.isca-archive.org/interspeech_2013/liu13e_interspeech.html#"/>
          </comment>
          <pub-id pub-id-type="doi">10.21437/interspeech.2013-453</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alyuz</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Okur</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Oktay</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Genc</surname>
              <given-names>U</given-names>
            </name>
            <name name-style="western">
              <surname>Aslan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mete</surname>
              <given-names>SE</given-names>
            </name>
            <name name-style="western">
              <surname>Arnrich</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Esme</surname>
              <given-names>AA</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised model personalization for improved detection of learner's emotional engagement</article-title>
          <source>Proceedings of the 18th ACM International Conference on Multimodal Interaction</source>
          <year>2016</year>
          <conf-name>ICMI '16</conf-name>
          <conf-date>November 12-16, 2016</conf-date>
          <conf-loc>Tokyo, Japan</conf-loc>
          <pub-id pub-id-type="doi">10.1145/2993148.2993166</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Nezami</surname>
              <given-names>OM</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Hamey</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Semi-supervised detection of student engagement</article-title>
          <source>Proceedings of the PACIS 2017</source>
          <year>2017</year>
          <conf-name>PACIS 2017</conf-name>
          <conf-date>July 16-20, 2017</conf-date>
          <conf-loc>Langkawi, Malaysia</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goodfellow</surname>
              <given-names>IJ</given-names>
            </name>
            <name name-style="western">
              <surname>Pouget-Abadie</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mirza</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Warde-Farley</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Ozair</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Courville</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bengio</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>Generative adversarial nets</article-title>
          <source>Proceedings of the 27th International Conference on Neural Information Processing Systems - Volume 2</source>
          <year>2014</year>
          <conf-name>NIPS'14</conf-name>
          <conf-date>December 8-13, 2014</conf-date>
          <conf-loc>Montreal, QC</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saeed</surname>
              <given-names>AQ</given-names>
            </name>
            <name name-style="western">
              <surname>Sheikh Abdullah</surname>
              <given-names>SN</given-names>
            </name>
            <name name-style="western">
              <surname>Che-Hamzah</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Abdul Ghani</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>Accuracy of using generative adversarial networks for glaucoma detection: systematic review and bibliometric analysis</article-title>
          <source>J Med Internet Res</source>
          <year>2021</year>
          <month>09</month>
          <day>21</day>
          <volume>23</volume>
          <issue>9</issue>
          <fpage>e27414</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2021/9/e27414/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/27414</pub-id>
          <pub-id pub-id-type="medline">34236992</pub-id>
          <pub-id pub-id-type="pii">v23i9e27414</pub-id>
          <pub-id pub-id-type="pmcid">PMC8493455</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Olmschenk</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Zhu</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Tang</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Generalizing semi-supervised generative adversarial networks to regression using feature contrasting</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online on on November 27, 2018</comment>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/abs/1811.11269#:~:text=This%20method%20avoids%20potential%20biases,understanding%20of%20how%20GANs%20function"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sinatra</surname>
              <given-names>GM</given-names>
            </name>
            <name name-style="western">
              <surname>Heddy</surname>
              <given-names>BC</given-names>
            </name>
            <name name-style="western">
              <surname>Lombardi</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>The challenges of defining and measuring student engagement in science</article-title>
          <source>Educ Psychol</source>
          <year>2015</year>
          <month>02</month>
          <day>27</day>
          <volume>50</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>13</lpage>
          <pub-id pub-id-type="doi">10.1080/00461520.2014.1002924</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Békés</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Aafjes-van Doorn</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Zilcha-Mano</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Prout</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Psychotherapists' acceptance of telepsychotherapy during the COVID-19 pandemic: a machine learning approach</article-title>
          <source>Clin Psychol Psychother</source>
          <year>2021</year>
          <month>11</month>
          <volume>28</volume>
          <issue>6</issue>
          <fpage>1403</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/34723404"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/cpp.2682</pub-id>
          <pub-id pub-id-type="medline">34723404</pub-id>
          <pub-id pub-id-type="pmcid">PMC8652775</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Vail</surname>
              <given-names>AK</given-names>
            </name>
            <name name-style="western">
              <surname>Girard</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Bylsma</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Cohn</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Fournier</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Swartz</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Goals, tasks, and bonds: toward the computational assessment of therapist versus client perception of working alliance</article-title>
          <source>Proceedings of the 16th IEEE International Conference on Automatic Face and Gesture Recognition</source>
          <year>2021</year>
          <conf-name>FG 2021</conf-name>
          <conf-date>December 15-18, 2021</conf-date>
          <conf-loc>Jodhpur, India</conf-loc>
          <pub-id pub-id-type="doi">10.1109/fg52635.2021.9667021</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Tracey</surname>
              <given-names>TJ</given-names>
            </name>
            <name name-style="western">
              <surname>Kokotovic</surname>
              <given-names>AM</given-names>
            </name>
          </person-group>
          <article-title>Factor structure of the Working Alliance Inventory</article-title>
          <source>Psychol Assess J Consult Clin Psychol</source>
          <year>1989</year>
          <month>09</month>
          <volume>1</volume>
          <issue>3</issue>
          <fpage>207</fpage>
          <lpage>10</lpage>
          <pub-id pub-id-type="doi">10.1037/1040-3590.1.3.207</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ewalds Mulliez</surname>
              <given-names>AP</given-names>
            </name>
            <name name-style="western">
              <surname>Pomey</surname>
              <given-names>MP</given-names>
            </name>
            <name name-style="western">
              <surname>Bordeleau</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Desbiens</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Pelletier</surname>
              <given-names>JF</given-names>
            </name>
          </person-group>
          <article-title>A voice for the patients: evaluation of the implementation of a strategic organizational committee for patient engagement in mental health</article-title>
          <source>PLoS One</source>
          <year>2018</year>
          <month>10</month>
          <day>24</day>
          <volume>13</volume>
          <issue>10</issue>
          <fpage>e0205173</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pone.0205173"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pone.0205173</pub-id>
          <pub-id pub-id-type="medline">30356239</pub-id>
          <pub-id pub-id-type="pii">PONE-D-18-01482</pub-id>
          <pub-id pub-id-type="pmcid">PMC6200221</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sagen</surname>
              <given-names>JS</given-names>
            </name>
            <name name-style="western">
              <surname>Smedslund</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Simonsen</surname>
              <given-names>AE</given-names>
            </name>
            <name name-style="western">
              <surname>Habberstad</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Kjeken</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Dagfinrud</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Moe</surname>
              <given-names>RH</given-names>
            </name>
          </person-group>
          <article-title>Patient engagement in the development and delivery of healthcare services: a systematic scoping review</article-title>
          <source>BMJ Open Qual</source>
          <year>2023</year>
          <month>06</month>
          <day>27</day>
          <volume>12</volume>
          <issue>2</issue>
          <fpage>e002309</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmjopenquality.bmj.com/lookup/pmidlookup?view=long&#38;pmid=37369560"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/bmjoq-2023-002309</pub-id>
          <pub-id pub-id-type="medline">37369560</pub-id>
          <pub-id pub-id-type="pii">bmjoq-2023-002309</pub-id>
          <pub-id pub-id-type="pmcid">PMC10577732</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref55">
        <label>55</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Khosravi</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Azar</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Factors influencing patient engagement in mental health chatbots: a thematic analysis of findings from a systematic review of reviews</article-title>
          <source>Digit Health</source>
          <year>2024</year>
          <month>04</month>
          <day>22</day>
          <volume>10</volume>
          <fpage>20552076241247983</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://journals.sagepub.com/doi/abs/10.1177/20552076241247983?url_ver=Z39.88-2003&#38;rfr_id=ori:rid:crossref.org&#38;rfr_dat=cr_pub  0pubmed"/>
          </comment>
          <pub-id pub-id-type="doi">10.1177/20552076241247983</pub-id>
          <pub-id pub-id-type="medline">38655378</pub-id>
          <pub-id pub-id-type="pii">10.1177_20552076241247983</pub-id>
          <pub-id pub-id-type="pmcid">PMC11036914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref56">
        <label>56</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Remmert</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Destefano</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chinman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Oslin</surname>
              <given-names>DW</given-names>
            </name>
            <name name-style="western">
              <surname>Mavandadi</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Development of a peer-delivered primary care intervention to improve veteran mental health treatment engagement</article-title>
          <source>Psychol Serv</source>
          <year>2024</year>
          <month>08</month>
          <day>01</day>
          <pub-id pub-id-type="doi">10.1037/ser0000883</pub-id>
          <pub-id pub-id-type="medline">39088007</pub-id>
          <pub-id pub-id-type="pii">2025-09515-001</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref57">
        <label>57</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jørgensen</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lerbæk</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Frederiksen</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Watson</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Karlsson</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Patient participation in mental health care - perspectives of healthcare professionals and patients: a scoping review</article-title>
          <source>Issues Ment Health Nurs</source>
          <year>2024</year>
          <month>08</month>
          <day>20</day>
          <volume>45</volume>
          <issue>8</issue>
          <fpage>794</fpage>
          <lpage>810</lpage>
          <pub-id pub-id-type="doi">10.1080/01612840.2024.2358931</pub-id>
          <pub-id pub-id-type="medline">38900284</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref58">
        <label>58</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
          </person-group>
          <article-title>Comparing the perspectives of generative AI, mental health experts, and the general public on schizophrenia recovery: case vignette study</article-title>
          <source>JMIR Ment Health</source>
          <year>2024</year>
          <month>03</month>
          <day>18</day>
          <volume>11</volume>
          <fpage>e53043</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://mental.jmir.org/2024//e53043/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/53043</pub-id>
          <pub-id pub-id-type="medline">38533615</pub-id>
          <pub-id pub-id-type="pii">v11i1e53043</pub-id>
          <pub-id pub-id-type="pmcid">PMC11004608</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref59">
        <label>59</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Elyoseph</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Levkovich</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Shinan-Altman</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Assessing prognosis in depression: comparing perspectives of AI models, mental health professionals and the general public</article-title>
          <source>Fam Med Community Health</source>
          <year>2024</year>
          <month>01</month>
          <day>09</day>
          <volume>12</volume>
          <issue>Suppl 1</issue>
          <fpage>e002583</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://fmch.bmj.com/lookup/pmidlookup?view=long&#38;pmid=38199604"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/fmch-2023-002583</pub-id>
          <pub-id pub-id-type="medline">38199604</pub-id>
          <pub-id pub-id-type="pii">fmch-2023-002583</pub-id>
          <pub-id pub-id-type="pmcid">PMC10806564</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref60">
        <label>60</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Thieme</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Hanratty</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Lyons</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Palacios</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Marques</surname>
              <given-names>RF</given-names>
            </name>
            <name name-style="western">
              <surname>Morrison</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Doherty</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Designing human-centered AI for mental health: developing clinically relevant applications for online CBT treatment</article-title>
          <source>ACM Trans Comput Hum Interact</source>
          <year>2023</year>
          <month>03</month>
          <day>17</day>
          <volume>30</volume>
          <issue>2</issue>
          <fpage>1</fpage>
          <lpage>50</lpage>
          <pub-id pub-id-type="doi">10.1145/3564752</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref61">
        <label>61</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rollwage</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Habicht</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Juechems</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Carrington</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Viswanathan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stylianou</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hauser</surname>
              <given-names>TU</given-names>
            </name>
            <name name-style="western">
              <surname>Harper</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Using conversational AI to facilitate mental health assessments and improve clinical efficiency within psychotherapy services: real-world observational study</article-title>
          <source>JMIR AI</source>
          <year>2023</year>
          <month>12</month>
          <day>13</day>
          <volume>2</volume>
          <fpage>e44358</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://ai.jmir.org/2023//e44358/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/44358</pub-id>
          <pub-id pub-id-type="medline">38875569</pub-id>
          <pub-id pub-id-type="pii">v2i1e44358</pub-id>
          <pub-id pub-id-type="pmcid">PMC11041479</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref62">
        <label>62</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Roy</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Khandelwal</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Goswami</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dolbir</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Malekar</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Sheth</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Demo alleviate: demonstrating artificial intelligence enabled virtual assistance for telehealth: the mental health case</article-title>
          <source>Proc AAAI Conf Artif Intell</source>
          <year>2023</year>
          <month>06</month>
          <day>26</day>
          <volume>37</volume>
          <issue>13</issue>
          <fpage>16479</fpage>
          <lpage>81</lpage>
          <pub-id pub-id-type="doi">10.1609/aaai.v37i13.27085</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref63">
        <label>63</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Siegel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zuo</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Moghaddamcharkari</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>McIntyre</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Rosenblat</surname>
              <given-names>JD</given-names>
            </name>
          </person-group>
          <article-title>Barriers, benefits and interventions for improving the delivery of telemental health services during the coronavirus disease 2019 pandemic: a systematic review</article-title>
          <source>Curr Opin Psychiatry</source>
          <year>2021</year>
          <month>07</month>
          <day>01</day>
          <volume>34</volume>
          <issue>4</issue>
          <fpage>434</fpage>
          <lpage>43</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/33928918"/>
          </comment>
          <pub-id pub-id-type="doi">10.1097/YCO.0000000000000714</pub-id>
          <pub-id pub-id-type="medline">33928918</pub-id>
          <pub-id pub-id-type="pii">00001504-202107000-00016</pub-id>
          <pub-id pub-id-type="pmcid">PMC8183246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref64">
        <label>64</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Foley</surname>
              <given-names>GN</given-names>
            </name>
            <name name-style="western">
              <surname>Gentile</surname>
              <given-names>JP</given-names>
            </name>
          </person-group>
          <article-title>Nonverbal communication in psychotherapy</article-title>
          <source>Psychiatry (Edgmont)</source>
          <year>2010</year>
          <month>06</month>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>38</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20622944"/>
          </comment>
          <pub-id pub-id-type="medline">20622944</pub-id>
          <pub-id pub-id-type="pmcid">PMC2898840</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref65">
        <label>65</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mitzkovitz</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Dowd</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Cothran</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Musil</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>The eyes have it: psychotherapy in the era of masks</article-title>
          <source>J Clin Psychol Med Settings</source>
          <year>2022</year>
          <month>12</month>
          <day>03</day>
          <volume>29</volume>
          <issue>4</issue>
          <fpage>886</fpage>
          <lpage>97</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/35118604"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s10880-022-09856-x</pub-id>
          <pub-id pub-id-type="medline">35118604</pub-id>
          <pub-id pub-id-type="pii">10.1007/s10880-022-09856-x</pub-id>
          <pub-id pub-id-type="pmcid">PMC8812949</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref66">
        <label>66</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>DeVito Dabbs</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Thomas</surname>
              <given-names>TH</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Donovan</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Measuring engagement in provider-guided digital health interventions with a conceptual and analytical framework using nurse WRITE as an exemplar: exploratory study with an iterative approach</article-title>
          <source>JMIR Form Res</source>
          <year>2024</year>
          <month>07</month>
          <day>22</day>
          <volume>8</volume>
          <fpage>e57529</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://formative.jmir.org/2024//e57529/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/57529</pub-id>
          <pub-id pub-id-type="medline">39037757</pub-id>
          <pub-id pub-id-type="pii">v8i1e57529</pub-id>
          <pub-id pub-id-type="pmcid">PMC11301115</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref67">
        <label>67</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fragueiro</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Debroize</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>Bannier</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Cury</surname>
              <given-names>C</given-names>
            </name>
          </person-group>
          <article-title>Eye-tracking and skin conductance to monitor task engagement during neurofeedback sessions</article-title>
          <source>Proceedings of the 9th Graz Brain Computer Interface Conference</source>
          <year>2024</year>
          <conf-name>BCI 2024</conf-name>
          <conf-date>September 9-12, 2024</conf-date>
          <conf-loc>Graz, Austria</conf-loc>
        </nlm-citation>
      </ref>
      <ref id="ref68">
        <label>68</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ventura</surname>
              <given-names>RB</given-names>
            </name>
            <name name-style="western">
              <surname>Porfiri</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Galvanic skin response as a measure of engagement during play in virtual reality</article-title>
          <source>Proceedings of the ASME 2020 Dynamic Systems and Control Conference</source>
          <year>2020</year>
          <conf-name>ASME 2020</conf-name>
          <conf-date>October 5-7, 2020</conf-date>
          <conf-loc>Virtual Event</conf-loc>
          <pub-id pub-id-type="doi">10.1115/DSCC2020-3177</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref69">
        <label>69</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Varma</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Washington</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Chrisman</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Kline</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Leblanc</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Paskov</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Stockham</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Jung</surname>
              <given-names>JY</given-names>
            </name>
            <name name-style="western">
              <surname>Sun</surname>
              <given-names>MW</given-names>
            </name>
            <name name-style="western">
              <surname>Wall</surname>
              <given-names>DP</given-names>
            </name>
          </person-group>
          <article-title>Identification of social engagement indicators associated with autism spectrum disorder using a game-based mobile app: comparative study of gaze fixation and visual scanning methods</article-title>
          <source>J Med Internet Res</source>
          <year>2022</year>
          <month>02</month>
          <day>15</day>
          <volume>24</volume>
          <issue>2</issue>
          <fpage>e31830</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2022/2/e31830/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/31830</pub-id>
          <pub-id pub-id-type="medline">35166683</pub-id>
          <pub-id pub-id-type="pii">v24i2e31830</pub-id>
          <pub-id pub-id-type="pmcid">PMC8889483</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref70">
        <label>70</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Goldberg</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Sümer</surname>
              <given-names>Ö</given-names>
            </name>
            <name name-style="western">
              <surname>Stürmer</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Wagner</surname>
              <given-names>W</given-names>
            </name>
            <name name-style="western">
              <surname>Göllner</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Gerjets</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Kasneci</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Trautwein</surname>
              <given-names>U</given-names>
            </name>
          </person-group>
          <article-title>Attentive or not? Toward a machine learning approach to assessing students’ visible engagement in classroom instruction</article-title>
          <source>Educ Psychol Rev</source>
          <year>2019</year>
          <month>12</month>
          <day>18</day>
          <volume>33</volume>
          <fpage>27</fpage>
          <lpage>49</lpage>
          <pub-id pub-id-type="doi">10.1007/s10648-019-09514-z</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref71">
        <label>71</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Booth</surname>
              <given-names>BM</given-names>
            </name>
            <name name-style="western">
              <surname>Bosch</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>D’Mello</surname>
              <given-names>SK</given-names>
            </name>
          </person-group>
          <article-title>Engagement detection and its applications in learning: a tutorial and selective review</article-title>
          <source>Proc IEEE</source>
          <year>2023</year>
          <month>10</month>
          <volume>111</volume>
          <issue>10</issue>
          <fpage>1398</fpage>
          <lpage>422</lpage>
          <pub-id pub-id-type="doi">10.1109/jproc.2023.3309560</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref72">
        <label>72</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bernard</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>de Ossorno Garcia</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Salhi</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>John</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>DelPozo-Banos</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Patterns of engagement in a digital mental health service during COVID-19: a cohort study for children and young people</article-title>
          <source>Front Psychiatry</source>
          <year>2023</year>
          <month>7</month>
          <day>27</day>
          <volume>14</volume>
          <fpage>1143272</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/37575580"/>
          </comment>
          <pub-id pub-id-type="doi">10.3389/fpsyt.2023.1143272</pub-id>
          <pub-id pub-id-type="medline">37575580</pub-id>
          <pub-id pub-id-type="pmcid">PMC10415812</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
