<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e90644</article-id><article-id pub-id-type="doi">10.2196/90644</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>User Experience and Early Clinical Outcomes of a Mental Wellness Chatbot for Depression and Anxiety: Pilot Evaluation Mixed Methods Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Graupensperger</surname><given-names>Scott</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Ward</surname><given-names>Emily J</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Baum</surname><given-names>Graham</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bentley</surname><given-names>Kate H</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Dworkin</surname><given-names>Emily R</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Brown</surname><given-names>Millard</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chekroud</surname><given-names>Adam</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hawrilenko</surname><given-names>Matt</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib></contrib-group><aff id="aff1"><institution>Spring Health</institution><addr-line>60 Madison Ave</addr-line><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff2"><institution>Psychiatry, School of Medicine, Yale University</institution><addr-line>New Haven</addr-line><addr-line>CT</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Steenstra</surname><given-names>Ivan</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Herbener</surname><given-names>Arthur</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Fujita</surname><given-names>Junichi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Scott Graupensperger, PhD, Spring Health, 60 Madison Ave, New York, NY, 10010, United States, 1 855-629-0554; <email>scott.graupensperger@springhealth.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>14</day><month>4</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e90644</elocation-id><history><date date-type="received"><day>31</day><month>12</month><year>2025</year></date><date date-type="rev-recd"><day>27</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>27</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Scott Graupensperger, Emily J Ward, Graham Baum, Kate H Bentley, Emily R Dworkin, Millard Brown, Adam Chekroud, Matt Hawrilenko. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 14.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e90644"/><abstract><sec><title>Background</title><p>Artificial intelligence&#x2013;powered conversational agents (ie, chatbots) are increasingly popular outlets for users seeking psychological support, yet little is known about how users experience early-stage prototypes or which therapeutic processes contribute to clinical improvement. A transparent evaluation of emerging chatbot prototypes is needed to clarify if, how, and why artificial intelligence companions work and to guide their continued development.</p></sec><sec><title>Objective</title><p>This mixed methods pilot study evaluated user experience, acceptability, and preliminary clinical signals for an early-stage mental wellness chatbot. We also examined whether baseline symptom severity moderated clinical improvement.</p></sec><sec sec-type="methods"><title>Methods</title><p>Three sequential cohorts (n=125) completed a 2-week, incentivized chatbot exposure (approximately 60 min per week). Participants provided first-impression ratings, qualitative feedback, and pre&#x2013;post assessments of depressive symptoms (PHQ-8 [Patient Health Questionnaire-8]), anxiety symptoms (GAD-7 [Generalized Anxiety Disorder-7]), psychological distress, well-being, and loneliness. Statistical models estimated symptom change and tested interactions with baseline symptom severity. Mixed methods analysis integrated quantitative outcomes with large language model&#x2013;assisted qualitative content analysis of open-ended responses.</p></sec><sec sec-type="results"><title>Results</title><p>Participants described the chatbot as accessible, easy to use, and emotionally validating, while citing limitations in personalization and conversational depth. Qualitative responses consistently highlighted early therapeutic processes such as emotional validation, goal setting, and perceived attunement. Regression models showed significant pre&#x2013;post reductions in depressive (Hedges <italic>g</italic>=&#x2013;0.32) and anxiety (<italic>g</italic>=&#x2013;0.32) symptoms, alongside modest improvements in distress and well-being. Baseline severity moderated improvement, with marginal effects indicating larger predicted reductions at higher PHQ-8 and GAD-7 baseline scores (eg, PHQ-8=15: <italic>g</italic>=&#x2013;0.84; GAD-7=15: <italic>g</italic>=&#x2013;0.62).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This pilot provides a comprehensive view of early chatbot development and suggests promising user experiences and preliminary symptom improvements under structured pilot conditions. By integrating experiential and exploratory clinical data, the study identifies candidate process targets to inform ongoing refinement. Findings support continued development and demonstrate procedural feasibility for progression to larger, longer-term trials evaluating engagement and clinical outcomes under more naturalistic conditions.</p></sec></abstract><kwd-group><kwd>digital health</kwd><kwd>artificial intelligence</kwd><kwd>large language models</kwd><kwd>therapeutic alliance</kwd><kwd>mental health.</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Mental health conditions such as depression and anxiety remain among the leading causes of disability worldwide, yet access to timely, evidence-based care continues to fall short of population need [<xref ref-type="bibr" rid="ref1">1</xref>]. Structural barriers such as high costs, shortages of trained clinicians, long waitlists, geographic disparities, and perceived stigma limit the reach of traditional psychotherapy and contribute to persistent treatment gaps [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>]. As a result, scalable digital solutions have become a major focus for innovation, with particular attention to platforms that can deliver immediate, flexible, and low-threshold support outside of, or as a supplement to, formal clinical settings.</p><p>Large language model (LLM)&#x2013;based chatbots are rapidly evolving in the digital mental health space [<xref ref-type="bibr" rid="ref4">4</xref>]. As reviewed in recent narrative and systematic syntheses of artificial intelligence (AI) mental health chatbots [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>], these chatbots offer the possibility of responsive, personalized conversational support delivered at scale, with emerging studies suggesting they may help reduce mental health symptoms, increase engagement, and broaden access to care [<xref ref-type="bibr" rid="ref7">7</xref>-<xref ref-type="bibr" rid="ref13">13</xref>]. Yet, some researchers have been critical of diving straight into clinical trials [<xref ref-type="bibr" rid="ref14">14</xref>], as the development of these products has largely outpaced the empirical evidence base, with key questions remaining about how therapeutic processes unfold within chatbot interactions, how users perceive safety and trust, and whether these tools function best as stand-alone interventions or complements to clinician-delivered care [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p><p>To date, studies assessing chatbots as mental health tools provide limited transparency into how prototypes were designed, refined, and evaluated for safety. As a result, little is known about how users actually experience early-stage chatbots, such as how safe, helpful, or trustworthy they feel, what facilitates or impedes engagement, and how these factors affect the impact of the chatbot. This gap is consequential, as understanding users&#x2019; initial experiences with mental wellness chatbot prototypes is essential for responsible development and real-world effectiveness. Indeed, prior research on digital mental health interventions shows that early impressions such as perceived safety, personalization, and feeling understood are strongly associated with subsequent engagement, adherence, and indicators of therapeutic response [<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>]. These experiential factors also shape trust and therapeutic alliance, which are central determinants of whether users continue to engage long enough to experience meaningful benefits [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>]. However, recent theoretical work suggests that alliance and other change processes in chatbot-based interventions may not operate identically to those in human psychotherapy, given that chatbots lack human therapists&#x2019; ontological and sociocultural status [<xref ref-type="bibr" rid="ref21">21</xref>]. This further highlights the need to empirically examine how users experience and interpret interactions with early-stage mental wellness chatbots.</p><p>Whereas several recent studies have focused on symptom outcomes in randomized trials [<xref ref-type="bibr" rid="ref5">5</xref>] or evaluated chatbot performance in controlled vignette-based simulations [<xref ref-type="bibr" rid="ref22">22</xref>], fewer investigations have examined early-stage prototype development using mixed methods approaches that integrate user experience, safety workflows, and preliminary clinical signals. As a result, there remains limited visibility into how conversational design choices, early user impressions, and structured protocols shape engagement and short-term outcomes. Because complex, multidimensional user experiences are best captured using human-centered, mixed methods that combine quantitative usability and symptom metrics with qualitative insight into how users interpret and experience emerging AI tools [<xref ref-type="bibr" rid="ref23">23</xref>], early-stage evaluations benefit from integrating both forms of evidence. This study provides a comprehensive assessment of an early-stage chatbot prototype, explicitly linking user experience and exploratory clinical outcomes to inform responsible refinement and progression to larger trials.</p></sec><sec id="s1-2"><title>This Study</title><p>Situated within digital mental health evaluation research, this pilot study used mixed methods to evaluate an early-stage LLM&#x2013;based mental wellness chatbot designed to support adults experiencing at least mild anxiety or depression. Across 3 successive cohorts, participants interacted with the chatbot over a 2-week period, providing both quantitative and qualitative feedback on usability, acceptability, and perceived therapeutic value. We also explored pre-post changes in mental health symptoms as a preliminary gauge for a potential therapeutic signal. In the absence of randomization or a comparison arm, early clinical signals from chatbot prototypes should be interpreted as exploratory rather than as evidence of efficacy [<xref ref-type="bibr" rid="ref24">24</xref>]. By integrating experiential and early clinical outcome data within a structured pilot framework, this study contributes prototype-stage evidence to inform responsible refinement, optimization, and future controlled evaluation of mental wellness chatbots. Accordingly, the aim of this study was to conduct a comprehensive mixed methods evaluation of a generative LLM&#x2013;based mental wellness chatbot, examining user experience, safety monitoring feasibility, and exploratory clinical signal to inform responsible prototype refinement and future controlled trials.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview of the AI Chatbot</title><p>The LLM&#x2013;powered mental wellness chatbot evaluated in this study was developed to provide brief emotional support for adults experiencing mild to moderate anxiety or depressive symptoms. Responses were guided by evidence-based principles drawn from supportive and cognitive-behavioral therapy frameworks, enabling the chatbot to respond empathically to user input while encouraging adaptive coping and reflection.</p><p>Rather than delivering a structured, manualized treatment protocol, the chatbot facilitates supportive mental wellness conversations informed by evidence-based common factors, including emotional validation and reflective listening, collaborative clarification of goals, and brief coping or activation prompts (eg, grounding or perspective-taking exercises) [<xref ref-type="bibr" rid="ref25">25</xref>]. These processes were intentionally scaffolded through system-level instructions emphasizing emotional safety, nondirectiveness, and avoidance of clinical diagnosis or prescriptive advice. The app functioned as a stand-alone iOS application with a cloud-based backend hosted by Amazon Web Services. Additional details regarding the therapeutic framework, AI architecture, and real-time safety classifier are provided in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>].</p></sec><sec id="s2-2"><title>Study Procedures</title><p>The study was conducted from September to November 2025. Participants were recruited through dscout (dscout Inc), a web-based research platform with a large opt-in panel of adults across the United States that is focused on user experience research. Eligible participants were aged &#x2265;21 years who reported at least mild symptoms of anxiety or depression at the time of screening. Individuals who endorsed suicidal or self-harm thoughts in the past 2 weeks (ie, score of &#x2265;1 on the ninth item of the Patient Health Questionnaire-9 [PHQ-9]) or serious mental illness, as well as those currently receiving psychotherapy, were excluded from this early-stage pilot test of an app that has not yet been evaluated for individuals with complex clinical needs. Additional screening and demographic balance procedures are described in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. Enrollment occurred in 3 successive cohorts to allow iterative refinement of the chatbot prototype between cohorts. After completing eligibility screening, participants were invited to complete a baseline survey assessing demographics, clinical symptomatology (eg, depressive and anxiety symptoms), and attitudes toward mental health therapy and chatbots for emotional support. After baseline, participants were instructed to download the prototype application via TestFlight&#x2014;Apple&#x2019;s official beta-testing platform for iOS. Participants were instructed to use the chatbot for at least 60 minutes per week during the 2-week study period. The first use of the chatbot triggered a first impressions survey and open-ended qualitative prompts focused on initial user experience feedback. One week following the app download, participants completed a brief follow-up survey and qualitative prompts focused on usability and acceptability. Finally, the 2-week follow-up survey reassessed the clinical metrics and attitudes toward mental wellness chatbots originally assessed at baseline. All survey instruments/measures and interview materials are described in the <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>].</p><p>Chatbot interactions were continuously monitored in real-time by an automated, transcript-based safety classifier that was calibrated to high sensitivity due to the early stage of chatbot testing. The classifier detects participant responses indicating potential harm to self, to others, or from others (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>] provides details). All flagged responses triggered a real-time alert to the study clinicians, who reviewed each flagged transcript within 2 hours to determine whether phone-based outreach for a safety check or emergency intervention was warranted.</p></sec><sec id="s2-3"><title>Participants</title><p>The screening survey was hosted on dscout and distributed to their active panel of participants. As required by dscout, the study description made clear that this was a 2-week digital mental health and well-being study involving the use of a mobile app. The screener stopped participants as soon as they failed an eligibility criterion to reduce the burden of continuing to ask unnecessary items once ineligibility was established. Because exclusions occurred dynamically throughout the screener, precise counts for every individual exclusion step cannot be calculated.</p><p>A detailed description of eligibility screening and a participant flow diagram is provided in Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]. In short, 3406 individuals completed the screening, and 184 (5.4%) were eligible, with the most common exclusions being living in an ineligible state, not meeting the minimum symptom severity threshold (PHQ-2 or Generalized Anxiety Disorder-2 [GAD-2] score &#x2265;3), or being currently engaged in some form of mental health care. Of the eligible pool, 125 were ultimately enrolled. Consistent with the aims of pilot-stage testing, this sample was not designed to represent individuals with acute or high-risk clinical presentations. The final sample represented 32 different US states, ranging in age from 21 to 67 (mean 34.6<italic>,</italic> SD 9.9) years, and 80 (64%) identified as women. While most reported being White (74/125, 59.2%), 19 (15.2%) were Black or African American, 13 (10.4%) were Asian or Asian American, 10 (8%) were multiple races, and 9 (7.2%) reported another race or preferred not to answer. Regarding ethnicity, 13 (10.4%) were Hispanic/Latinx. Most were employed (91/125, 72.8%), while 17 (13.6%) listed their employment status as currently a student, and 17 (13.6%) were not employed. More than half (70/125, 56%) reported some form of prior mental health care. While 89 (74.2%) had previously used an AI chatbot, only 30 (25.2%) had used one for mental health support or information.</p></sec><sec id="s2-4"><title>Ethical Considerations</title><p>This study was approved by Pearl IRB (Indianapolis, IN; Study# 2025&#x2010;0463), an independent institutional review board. All procedures were conducted in accordance with the ethical standards of the 1964 Helsinki Declaration and its later amendments. Consent was obtained electronically from all participants prior to participation. Usage was incentivized at US $50 per week for those who met the 60-minute threshold and US $30 for those who exceeded 30 minutes but did not reach 60 minutes. Including compensation for survey and interview completion, participants could earn up to US $275 in total. All quantitative and qualitative data were deidentified prior to analysis and stored on secure, access-restricted servers. The secure LLM (GPT-4o-mini) instance used for qualitative analysis did not retain participant data or use it for model training. Only authorized members of the research team had access to study data.</p></sec><sec id="s2-5"><title>Qualitative Analytic Approach</title><p>Open-ended written responses and transcribed video responses were analyzed using a structured, LLM-assisted qualitative content analysis approach designed to characterize recurring feedback patterns within qualitative data. LLM-assisted content analysis is an emergent qualitative technique that leverages LLMs&#x2019; strengths in pattern recognition and linguistic synthesis [<xref ref-type="bibr" rid="ref38">38</xref>]. An inductive approach was appropriate given that our goal for this analysis was purely descriptive: that is, we aimed to characterize emergent themes rather than to apply prespecified theories or codes to the data [<xref ref-type="bibr" rid="ref39">39</xref>]. The LLM was prompted to extract candidate themes related to user experience (both positive and negative) and to provide illustrative quotes directly drawn from participant responses. Themes were generated separately for each cohort to explore potential shifts as the chatbot prototype evolved across iterative refinements.</p><p>In keeping with qualitative rigor standards [<xref ref-type="bibr" rid="ref40">40</xref>], several strategies were used to enhance trustworthiness. A reflexive approach was adopted to account for potential bias arising from the nonneutrality of the LLM and the researchers&#x2019; positionality as employees of the company developing the chatbot. To increase dependability, the analytic procedure was repeated independently 3 times using identical prompts, and theme labels and descriptions were compared across runs to assess stability and consistency in structure. To enhance credibility [<xref ref-type="bibr" rid="ref40">40</xref>], the researchers reviewed LLM-generated themes and quotations to ensure that quotes were coherent, nonredundant, and accurately reflected the described pattern. To ensure confirmability, we retained only themes that were consistently reproduced across model runs and deemed coherent in relation to the dataset as a whole. Transferability was supported by a detailed description of the study context, participant characteristics, and prototype-stage setting, allowing readers to evaluate applicability to similar contexts.</p></sec><sec id="s2-6"><title>Statistical Analyses</title><p>The goal of this user experience pilot trial was to recruit a sufficiently large sample to characterize usability and acceptability, gather rich user feedback, and obtain preliminary estimates of symptom change to inform the design and justification of future full-scale trials; accordingly, it was not statistically powered to detect clinical effects. First, descriptive statistics were used to summarize responses to the first-impression survey and 1-week follow-up surveys, including items assessing perceived emotional safety, trust, sense of being understood, professionalism, personalization, acceptability, and perceived advantages relative to a human therapist. Mean item ratings and stacked bar distributions were calculated using all available data.</p><p>The PHQ-8 and GAD-7 were analyzed separately to capture domain-specific symptom change and also combined into a Patient Health Questionnaire Anxiety and Depression Scale (PHQ-ADS) composite representing overall symptom burden [<xref ref-type="bibr" rid="ref26">26</xref>], as this is a common metric used in digital mental health studies [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. The composite was included to facilitate interpretation of global symptom change rather than to introduce a distinct confirmatory endpoint. Consistent with the developmental aims of this pilot, all symptom outcomes were interpreted as exploratory indicators to inform future trial design.</p><p>Clinical outcome analyses were restricted to participants with at least mild symptoms at baseline for the given outcome. For depressive symptoms, models included only participants with PHQ-8 scores &#x2265;5 at baseline; for anxiety symptoms, only those with baseline GAD-7 scores &#x2265;5 were included. Models estimating total symptoms (PHQ-ADS) and secondary outcomes were estimated among participants who had either PHQ-8 or GAD-7 &#x2265;5 at baseline (not necessarily both). Changes in clinical outcomes from baseline to the 2-week follow-up were estimated using linear mixed-effects models with a random intercept for participant, with time (follow-up vs baseline) as the main predictor and demographic covariates and cohort indicators included as fixed effects. Exploratory regression models tested whether first-impression ratings of the chatbot predicted 1-week therapeutic alliance and acceptability scores, adjusting for covariates and cohort. Finally, Pearson correlations examined associations between each first-impression rating and residualized symptom change scores (ie, calculated by regressing follow-up symptom scores on baseline severity and using the residuals as indices of symptom change independent of baseline levels), providing preliminary evidence for experiential predictors of improvement.</p><p>This study was reported in accordance with the CONSORT (Consolidated Standards of Reporting Trials) extension for pilot and feasibility trials (<xref ref-type="supplementary-material" rid="app2">Checklist 1</xref>).</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Study Protocol Feasibility Metrics</title><sec id="s3-1-1"><title>Overview</title><p>The following metrics describe the feasibility of the structured, incentivized study protocol rather than naturalistic engagement patterns. Because app use was incentivized to encourage adequate prototype evaluation, retention, and usage statistics should be interpreted as indicators of procedural feasibility informing next-stage trials, not as estimates of voluntary real-world uptake. Retention was strong under the incentivized, time-limited study protocol (96% at 1 week; 95% at 2 weeks). We nevertheless examined correlates of attrition using logistic regression and found that dropout was unrelated to age, gender, education, employment, household income, baseline symptom levels, or baseline attitudes toward mental wellness chatbots. Across the 3 enrollment cohorts, there were no significant baseline differences in clinical severity.</p><p>Over the 2-week intervention period (defined as the first 16 days following activation to accommodate minor variation in start timing), participants completed a median of 7 sessions, with 79.6% completing at least 5 sessions and 30.1% completing at least 10 sessions. Participants used the chatbot on 7.4 distinct days, on average (SD 2.8), indicating that engagement was generally distributed across the exposure period rather than all at once. Across sessions, participants generated a median of 157 talk-turns (ie, message exchanges), indicating sustained conversational engagement with the chatbot. Adherence to the study protocol was high, with 75.2% of participants reaching the recommended exposure (&#x2265;120 total minutes of use) and 87.6% completing at least 90 minutes (median 141.9 minutes). Older age was significantly associated with greater total minutes of app use; no other baseline demographic or attitudinal variables were related to minutes of use or total talk turns with the chatbot. Notably, household income was not significantly associated with total minutes of use (Spearman &#x03C1;=&#x2212;0.06, <italic>P</italic>=.57) or with likelihood of reaching the incentivized exposure threshold (odds ratio 1.00, 95% CI 1.00-1.00; <italic>P</italic>=.91).</p></sec><sec id="s3-1-2"><title>Safety Monitoring</title><p>During the 2-week exposure period, 10 (8.0%) participants triggered a real-time safety alert (no participants triggered more than one). Of these 10 alerts, 5 were flagged for potential risk of harm to self, 2 for harm to others, and 3 for harm from others. All transcripts flagged by the real-time safety classifier were reviewed by a study clinician within 2 hours, and none met the study threshold for phone-based safety outreach (eg, indicators of acute or imminent risk). The &#x201C;harm to self&#x201D; alerts referenced concerns about death in other contexts (eg, panic attacks, loved ones) or risk factors (eg, feeling like a burden), but no suicidal thoughts. &#x201C;Harm to others&#x201D; alerts reflected third-party nonviolent contextual behaviors or hypothetical situations, and &#x201C;harm from others&#x201D; alerts reflected general interpersonal conflict or precautionary safety concerns.</p></sec></sec><sec id="s3-2"><title>Qualitative User Experience Findings</title><p>Consistent with the early-stage, developmental focus of this pilot, qualitative analyses were exploratory and used primarily to identify actionable areas for product refinement. Across the 3 cohorts, participants described the chatbot as accessible, easy to use, and nonjudgmental, but limited by repetitive and impersonal responses. Qualitative content analysis identified three primary domains of experience: (1) accessibility and usability, (2) emotional validation and reflection, and (3) limitations in personalization and conversational depth.</p><p>Participants across all cohorts described the chatbot as a convenient outlet for emotional expression and appreciated being able to engage at any time and fit sessions flexibly into their routines. As one participant explained, &#x201C;The chatbot was able to express my feelings without any judgment, and it&#x2019;s there whenever I needed to talk.&#x201D; [Cohort 1]. Others highlighted the app&#x2019;s intuitive interface and customizable sessions, calling it &#x201C;super easy to use. To set up... super easy. It may be the simplest app I&#x2019;ve ever used to actually get in and do that.&#x201D; [Cohort 2] and appreciating that &#x201C;it was handy. I could use it at any point in time 24/7.&#x201D; [Cohort 3]. These qualities made the chatbot particularly appealing as a low-barrier, on-demand support tool.</p><p>Across cohorts, participants also valued the chatbot&#x2019;s ability to validate emotions and encourage self-reflection. Many described feeling heard and supported when the chatbot acknowledged their emotions, with one noting that it &#x201C;did a good job validating my feelings and helping me remember that others feel the same way&#x201D; [Cohort 1]. Later users reported that it facilitated goal setting and simple coping strategies, such as reminders to take breaks or practice breathing exercises: &#x201C;It helped me set a goal, did a grounding exercise, and gave me tools to get started&#x201D; [Cohort 3]. These elements contributed to perceptions of the chatbot as supportive, though somewhat limited in depth.</p><p>Despite these strengths, the chatbot&#x2019;s repetitiveness and lack of personalization emerged as key pain points. Participants frequently described conversations as &#x201C;robotic&#x201D; or &#x201C;circular,&#x201D; with limited responsiveness to context. One Cohort 1 user noted, &#x201C;It just kept asking the same questions and using the same exact language,&#x201D; while a Cohort 2 participant remarked, &#x201C;Every response was starting with my name... it seems very unnatural and stiff.&#x201D; Others lamented that the chatbot failed to pivot or offer specific advice: &#x201C;It kept asking me the same questions... whereas a human could pivot the conversation&#x201D; [Cohort 3]. Feedback on session structure also shifted across cohorts. Cohort 1 users requested more guidance around session length, which informed the introduction of timed sessions; however, Cohorts 2 and 3 reported occasional frustration when session cut-offs truncated ongoing discussion.</p><p>Across cohorts, core themes were consistent: participants valued accessibility and emotional validation but critiqued repetitive and impersonal responses. Cohort-level differences primarily reflected emphasis on emotional safety in Cohort 1, usability and goal-setting in Cohort 2, and coping tools and session design in Cohort 3. As the prototype evolved across cohorts, feedback appeared to shift from general impressions to more specific critiques of functionality and conversational adaptability. Overall, participants saw clear potential in the chatbot while emphasizing that improvements in personalization, conversational nuance, and responsiveness will be essential for enhancing its value.</p></sec><sec id="s3-3"><title>Usability of the Chatbot App</title><p>Usability, assessed using an adapted Intervention Usability Scale (IUS) [<xref ref-type="bibr" rid="ref27">27</xref>] specific to the mental wellness chatbot (0&#x2010;100 scale), was generally favorable. Participants reported a mean IUS score of 75.0 (SD 13.6). Although the IUS does not have established percentile norms, scores in the mid-70s are interpreted as &#x201C;good&#x201D; usability on the original System Usability Scale [<xref ref-type="bibr" rid="ref43">43</xref>]. For context, the mean usability rating in this study was higher than that reported for the motivational interviewing intervention in the original IUS validation paper (68.7), suggesting that the chatbot app was, on the whole, experienced as usable by participants. Household income was not correlated with usability scores (<italic>r</italic>=0.07, <italic>P</italic>=.47).</p></sec><sec id="s3-4"><title>User Experience and Acceptability Ratings</title><p>Participants generally reported moderately positive first impressions of the chatbot&#x2019;s emotional safety, trustworthiness, sense of being heard and understood, and professionalism, with mean ratings hovering slightly above the scale midpoint on most items (<xref ref-type="fig" rid="figure1">Figure 1A</xref>). However, a substantial minority of participants endorsed neutral or negative response options, particularly for items related to feeling deeply understood (&#x2248;40%) and perceived personalization of responses (&#x2248;65%), indicating focal opportunities for improvement. At the 1-week follow-up, perceptions of emotional safety, comfort, and professionalism were generally stable (<xref ref-type="fig" rid="figure1">Figure 1B</xref>), although ratings of response personalization declined relative to first impressions.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Panel (A) displays ratings from the first-impression survey completed immediately after initial interaction with the chatbot; Panel (B) displays ratings from the 1-week follow-up survey.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e90644_fig01.png"/></fig><p>Acceptability ratings at one week reflected a similar pattern of mixed strengths and areas to improve upon (<xref ref-type="fig" rid="figure2">Figure 2A</xref>). Most participants agreed or strongly agreed that they trusted the chatbot to respond appropriately and understood how the app was intended to help. However, broader indicators of acceptability were notably lower. Fewer than 10% strongly agreed that they liked interacting with the app, and fewer than half agreed or strongly agreed overall. Perceived helpfulness was also modest, with only &#x2248;40% agreeing that the chatbot&#x2019;s responses were helpful for their mental health. Ratings of self-efficacy also highlight an area for improvement, with fewer than half agreeing that they were able to use the app consistently and correctly. Household income was not related to first impression ratings (<italic>r</italic>=0.05, <italic>P</italic>=.59) or overall acceptability (<italic>r</italic>=&#x2013;0.09, <italic>P</italic>=.37).</p><p>Therapeutic alliance ratings at 1-week follow-up were moderate across bond, task agreement, and goal agreement subdimensions, with mean item scores just narrowly above the scale midpoint (Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]). Compared with human therapists, participants generally viewed the chatbot as more accessible, less stigmatizing, and easier to open up to and be honest with (<xref ref-type="fig" rid="figure2">Figure 2B</xref>). Taken together, these findings indicate that while the chatbot was generally viewed as safe, respectful, and understandable, with some key advantages over traditional therapy settings, participants&#x2019; experiences of enjoyment, perceived helpfulness, confidence in use, and personalization were more variable and less positive, on average. Household income was not related to perceived therapeutic alliance (<italic>r</italic>=&#x2212;0.11, <italic>P</italic>=.28).</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Panel (A) displays acceptability ratings of the chatbot; Panel (B) displays ratings of perceived advantages of the chatbot relative to traditional, human therapy.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e90644_fig02.png"/></fig></sec><sec id="s3-5"><title>Clinical Outcomes</title><sec id="s3-5-1"><title>Overview</title><p>Descriptive estimates of clinical outcomes at baseline and 2-week follow-up are shown in <xref ref-type="table" rid="table1">Table 1</xref>, and regression model results are shown in <xref ref-type="table" rid="table2">Table 2</xref>. Mixed-effects models showed a significant reduction in depressive symptoms from baseline to 2-week follow-up, corresponding to a small-to-moderate standardized effect size (Hedges <italic>g</italic>, computed using the baseline SD/Glass&#x2019;s &#x0394;=&#x2013;0.32, 95% CI &#x2013;0.52 to &#x2013;0.11). Anxiety symptoms also decreased significantly, with a similar standardized effect magnitude (<italic>g</italic>=&#x2013;0.32, 95% CI&#x2013;0.50 to &#x2013;0.14). Total-symptom models, including all participants with either PHQ-8 or GAD-7 scores &#x003E;4, indicated significant overall improvement across the 2-week period (<italic>g</italic>=&#x2013;0.26, 95% CI &#x2013;0.42 to &#x2013;0.09). Household income was not correlated with residualized change in total symptoms (<italic>r</italic>=&#x2013;0.14, <italic>P</italic>=.16).</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Descriptive statistics for clinical outcomes at baseline and follow-up. The analytic sample for assessing depressive and anxiety symptoms included those with baseline patient health questionnaire (PHQ) or generalized anxiety disorder (GAD) scores &#x003E;4, respectively. All other outcomes included participants with either PHQ or GAD &#x003E;4 at baseline.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Measure/scale</td><td align="left" valign="bottom" colspan="2">Baseline</td><td align="left" valign="bottom" colspan="2">Two-week follow-up</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">N</td><td align="left" valign="bottom">Mean (SD)</td><td align="left" valign="bottom">N</td><td align="left" valign="bottom">Mean (SD)</td></tr></thead><tbody><tr><td align="left" valign="top">Depressive symptoms (PHQ-8)</td><td align="left" valign="top">96</td><td align="left" valign="top">10.14 (4.10)</td><td align="left" valign="top">92</td><td align="left" valign="top">8.84 (4.39)</td></tr><tr><td align="left" valign="top">Anxiety symptoms (GAD-7)</td><td align="left" valign="top">107</td><td align="left" valign="top">10.38 (4.10)</td><td align="left" valign="top">102</td><td align="left" valign="top">9.18 (4.73)</td></tr><tr><td align="left" valign="top">Total symptoms (PHQ+ GAD)</td><td align="left" valign="top">116</td><td align="left" valign="top">18.85 (7.77)</td><td align="left" valign="top">111</td><td align="left" valign="top">16.94 (8.24)</td></tr><tr><td align="left" valign="top">Well-being (WHO-5)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="top">116</td><td align="left" valign="top">9.43 (4.34)</td><td align="left" valign="top">110</td><td align="left" valign="top">10.26 (4.82)</td></tr><tr><td align="left" valign="top">Psychological distress (K6)</td><td align="left" valign="top">116</td><td align="left" valign="top">7.38 (3.68)</td><td align="left" valign="top">110</td><td align="left" valign="top">6.59 (3.85)</td></tr><tr><td align="left" valign="top">Loneliness (UCLA-3)<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="top">114</td><td align="left" valign="top">2.30 (1.72)</td><td align="left" valign="top">109</td><td align="left" valign="top">2.15 (1.99)</td></tr><tr><td align="left" valign="top">Chatbot attitudes (APOI<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup>)</td><td align="left" valign="top">116</td><td align="left" valign="top">49.38 (9.34)</td><td align="left" valign="top">111</td><td align="left" valign="top">48.54 (11.02)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>WHO-5: World Health Organization-5 Well-Being Index.</p></fn><fn id="table1fn2"><p><sup>b</sup>UCLA-3: University of California, Los Angeles 3-Item Loneliness Scale.</p></fn><fn id="table1fn3"><p><sup>c</sup>APOI: Attitudes Towards Psychological Online Interventions.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Longitudinal mixed-effects models predicting depressive, anxiety, and total symptom scores by time (baseline vs 2-week follow-up).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Covariate</td><td align="left" valign="bottom" colspan="3">Depressive symptoms (PHQ-8)<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> (n=96)</td><td align="left" valign="bottom" colspan="3">Anxiety symptoms (GAD-7)<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> (n=107)</td><td align="left" valign="bottom" colspan="3">Total symptoms (PHQ-ADS)<sup><xref ref-type="table-fn" rid="table2fn1">c,d</xref></sup> (n=116)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Time (follow-up vs baseline)</td><td align="left" valign="top">&#x2212;1.30<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.44<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.003<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x2212;1.31<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.38<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.001<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x2212;2.01<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.67<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.003<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr><tr><td align="left" valign="top">Cohort 2 (vs cohort 1)</td><td align="left" valign="top">0.34</td><td align="left" valign="top">0.93</td><td align="left" valign="top">.718</td><td align="left" valign="top">&#x2212;0.25</td><td align="left" valign="top">0.96</td><td align="left" valign="top">.79</td><td align="left" valign="top">&#x2212;0.23</td><td align="left" valign="top">1.66</td><td align="left" valign="top">.89</td></tr><tr><td align="left" valign="top">Cohort 3 (vs cohort 1)</td><td align="left" valign="top">&#x2212;0.31</td><td align="left" valign="top">0.89</td><td align="left" valign="top">.73</td><td align="left" valign="top">&#x2212;1.1</td><td align="left" valign="top">0.93</td><td align="left" valign="top">.24</td><td align="left" valign="top">&#x2212;2.11</td><td align="left" valign="top">1.61</td><td align="left" valign="top">.19</td></tr><tr><td align="left" valign="top">Age</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.04</td><td align="left" valign="top">.95</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.05</td><td align="left" valign="top">.29</td><td align="left" valign="top">0.05</td><td align="left" valign="top">0.07</td><td align="left" valign="top">.51</td></tr><tr><td align="left" valign="top">Woman (vs man)</td><td align="left" valign="top">&#x2212;0.50</td><td align="left" valign="top">0.78</td><td align="left" valign="top">.52</td><td align="left" valign="top">0.11</td><td align="left" valign="top">0.82</td><td align="left" valign="top">.90</td><td align="left" valign="top">&#x2212;0.93</td><td align="left" valign="top">1.43</td><td align="left" valign="top">.52</td></tr><tr><td align="left" valign="top">College degree (vs no degree)</td><td align="left" valign="top">&#x2212;1.04</td><td align="left" valign="top">0.97</td><td align="left" valign="top">.28</td><td align="left" valign="top">0.3</td><td align="left" valign="top">1.01</td><td align="left" valign="top">.76</td><td align="left" valign="top">&#x2212;1.52</td><td align="left" valign="top">1.77</td><td align="left" valign="top">.39</td></tr><tr><td align="left" valign="top">Postgraduate degree (vs no degree)</td><td align="left" valign="top">&#x2212;1.82</td><td align="left" valign="top">1.08</td><td align="left" valign="top">.09</td><td align="left" valign="top">&#x2212;1.92</td><td align="left" valign="top">1.11</td><td align="left" valign="top">.08</td><td align="left" valign="top">&#x2212;4.25<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">1.95<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.0<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup>3</td></tr><tr><td align="left" valign="top">Unemployed (vs employed)</td><td align="left" valign="top">1.32</td><td align="left" valign="top">1.09</td><td align="left" valign="top">.22</td><td align="left" valign="top">-1.69</td><td align="left" valign="top">1.13</td><td align="left" valign="top">.14</td><td align="left" valign="top">&#x2212;1.31</td><td align="left" valign="top">1.94</td><td align="left" valign="top">.50</td></tr><tr><td align="left" valign="top">Current student (vs employed)</td><td align="left" valign="top">&#x2212;1.07</td><td align="left" valign="top">1.31</td><td align="left" valign="top">.41</td><td align="left" valign="top">-0.01</td><td align="left" valign="top">1.28</td><td align="left" valign="top">&#x003E;.99</td><td align="left" valign="top">&#x2212;1.32</td><td align="left" valign="top">2.14</td><td align="left" valign="top">.54</td></tr><tr><td align="left" valign="top">Asian/Asian American (vs White)</td><td align="left" valign="top">&#x2212;1.09</td><td align="left" valign="top">1.19</td><td align="left" valign="top">.36</td><td align="left" valign="top">0.85</td><td align="left" valign="top">1.25</td><td align="left" valign="top">.50</td><td align="left" valign="top">0.04</td><td align="left" valign="top">2.14</td><td align="left" valign="top">.99</td></tr><tr><td align="left" valign="top">Black/African American (vs White)</td><td align="left" valign="top">2.30</td><td align="left" valign="top">1.28</td><td align="left" valign="top">.07</td><td align="left" valign="top">0.09</td><td align="left" valign="top">1.33</td><td align="left" valign="top">.94</td><td align="left" valign="top">1.75</td><td align="left" valign="top">2.17</td><td align="left" valign="top">.42</td></tr><tr><td align="left" valign="top">Multiple races (vs White)</td><td align="left" valign="top">1.68</td><td align="left" valign="top">1.78</td><td align="left" valign="top">.35</td><td align="left" valign="top">2.43</td><td align="left" valign="top">1.77</td><td align="left" valign="top">.17</td><td align="left" valign="top">6.22<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">3.12<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.047<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr><tr><td align="left" valign="top">Another race (vs White)</td><td align="left" valign="top">5.22</td><td align="left" valign="top">3.14</td><td align="left" valign="top">.10</td><td align="left" valign="top">2.75</td><td align="left" valign="top">3.29</td><td align="left" valign="top">.40</td><td align="left" valign="top">8.30</td><td align="left" valign="top">5.78</td><td align="left" valign="top">.15</td></tr><tr><td align="left" valign="top">Hispanic (vs Non-Hispanic)</td><td align="left" valign="top">4.79</td><td align="left" valign="top">2.74</td><td align="left" valign="top">.08</td><td align="left" valign="top">4.02</td><td align="left" valign="top">2.89</td><td align="left" valign="top">.16</td><td align="left" valign="top">9.88</td><td align="left" valign="top">5.18</td><td align="left" valign="top">.056</td></tr><tr><td align="left" valign="top">Interaction models<sup><xref ref-type="table-fn" rid="table2fn6">f</xref></sup></td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Time &#x00D7; baseline symptom Levels</td><td align="left" valign="top">&#x2212;0.64<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.17<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x2212;0.27<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.09<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">.001<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x2212;0.35<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">0.08<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>Participants in the depressive symptoms model had to have a baseline PHQ score &#x003E;4.</p></fn><fn id="table2fn2"><p><sup>b</sup>Participants in the anxiety symptoms model had to have a baseline GAD score &#x003E;4.</p></fn><fn id="table2fn3"><p><sup>c</sup>Participants in the total symptoms model had to have either PHQ or GAD score &#x003E;4 at baseline.</p></fn><fn id="table2fn4"><p><sup>d</sup>PHQ-ADS: Patient Health Questionnaire Anxiety and Depression Scale.</p></fn><fn id="table2fn5"><p><sup>e</sup>Statistically significant estimates.</p></fn><fn id="table2fn6"><p><sup>f</sup>Interaction models were each tested separately, controlling for the same covariates as the main effects models above. Inverse interaction coefficients indicate that participants with higher baseline symptoms showed greater reductions from baseline to follow-up (ie, larger improvements).</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5-2"><title>Baseline Symptomatology as a Moderator</title><p>Across outcomes, participants with higher baseline symptom severity demonstrated larger improvements over the 2-week period, reflected by significant baseline-severity&#x00D7;time interactions (<xref ref-type="table" rid="table2">Table 2</xref>). To probe these interactions, we estimated the marginal intervention effect across a continuum of plausible baseline values. These estimates are model-based and reflect greater statistical uncertainty at the upper end of the baseline severity distribution, where observations were relatively sparse and should be interpreted cautiously.</p><p>For depressive symptoms, the magnitude of improvement increased steadily as baseline PHQ-8 severity increased (<xref ref-type="fig" rid="figure3">Figure 3A</xref>). At a baseline PHQ-8 score of 10, the predicted effect size was <italic>g</italic>=&#x2013;0.29, consistent with a modest reduction. At baseline scores of 12.5 (<italic>g</italic>=&#x2013;0.57) and 15 (<italic>g</italic>=&#x2013;0.84), the corresponding predicted effects indicate medium to large improvements. At higher baseline severity (PHQ-8=17.5), the estimated improvement was <italic>g</italic>=&#x2013;1.12, reflecting a very large effect. A similar pattern emerged for anxiety symptoms (<xref ref-type="fig" rid="figure3">Figure 3B</xref>). At a baseline GAD-7 score of 10, the predicted effect size was <italic>g</italic>=&#x2013;0.29. This effect size increased at baseline scores of 12.5 (<italic>g</italic>=&#x2013;0.46) and 15 (<italic>g</italic>=&#x2013;0.62). At higher baseline severity (GAD-7=17.5), the model predicted a larger improvement (g=&#x2013;0.78).</p><p>These marginal-effect curves show that while effect size magnitudes were modest at lower baseline symptom levels, participants with moderate to severe depressive or anxiety symptoms demonstrated substantially greater reductions over the 2-week exposure period. These results indicate that baseline severity meaningfully moderated clinical change, with higher-severity individuals showing the largest predicted reductions.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Curves show the model-estimated marginal effect of time (follow-up vs baseline) from mixed-effects models with a time&#x00D7; baseline interaction. Shaded regions indicate 95% CIs based on the delta method. GAD: generalized anxiety disorder; PHQ: patient health questionnaire.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e90644_fig03.png"/></fig></sec><sec id="s3-5-3"><title>Secondary Clinical Outcomes</title><p>For secondary outcomes (<xref ref-type="table" rid="table3">Table 3</xref>), well-being increased (<italic>g</italic>=0.19, 95% CI 0.03-0.36) and psychological distress decreased (<italic>g</italic>=&#x2013;0.21, 95% CI &#x2013;0.38 to &#x2013;0.04), with both effect sizes in the small-to-moderate range. Loneliness showed a small, nonsignificant reduction (<italic>g</italic>=&#x2013;0.11, 95% CI &#x2013;0.26 to 0.04). Attitudes toward mental wellness chatbots did not shift significantly across the exposure period, although the direction of change suggested slightly less favorable attitudes at follow-up (<italic>g</italic>=&#x2013;0.09, 95% CI &#x2013;0.24 to 0.05).</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Longitudinal mixed-effects models predicting secondary clinical outcomes by time (baseline vs 2-week follow-up). Participants in these models had to have either a patient health questionnaire (PHQ) or generalized anxiety disorder (GAD) score &#x003E;4 at baseline.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Covariate</td><td align="left" valign="bottom" colspan="3">Wellbeing (WHO-5) (n=116)</td><td align="left" valign="bottom" colspan="3">Psychological distress (K6) (n=116)</td><td align="left" valign="bottom" colspan="3">Loneliness (UCLA-3)<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> (n=116)</td><td align="left" valign="bottom" colspan="3">APOI<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> &#x2013; Therapy Chatbot (n=116)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Time (follow-up vs baseline)</td><td align="left" valign="top">0.85<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.37<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">.02<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.77<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.32<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">.02<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.19</td><td align="left" valign="top">0.13</td><td align="left" valign="top">.15</td><td align="left" valign="top">&#x2212;0.88</td><td align="left" valign="top">0.68</td><td align="left" valign="top">.19</td></tr><tr><td align="left" valign="top">Cohort 2 (vs cohort 1)</td><td align="left" valign="top">&#x2212;0.19</td><td align="left" valign="top">0.99</td><td align="left" valign="top">.84</td><td align="left" valign="top">&#x2212;0.37</td><td align="left" valign="top">0.78</td><td align="left" valign="top">.64</td><td align="left" valign="top">&#x2212;0.29</td><td align="left" valign="top">0.39</td><td align="left" valign="top">.46</td><td align="left" valign="top">3.32</td><td align="left" valign="top">2.18</td><td align="left" valign="top">.13</td></tr><tr><td align="left" valign="top">Cohort 3 (vs cohort 1)</td><td align="left" valign="top">0.31</td><td align="left" valign="top">0.96</td><td align="left" valign="top">.74</td><td align="left" valign="top">&#x2212;0.25</td><td align="left" valign="top">0.76</td><td align="left" valign="top">.74</td><td align="left" valign="top">&#x2212;0.55</td><td align="left" valign="top">0.38</td><td align="left" valign="top">.15</td><td align="left" valign="top">0.67</td><td align="left" valign="top">2.11</td><td align="left" valign="top">.75</td></tr><tr><td align="left" valign="top">Age</td><td align="left" valign="top">&#x2212;0.04</td><td align="left" valign="top">0.04</td><td align="left" valign="top">.31</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.03</td><td align="left" valign="top">.89</td><td align="left" valign="top">0.01</td><td align="left" valign="top">0.02</td><td align="left" valign="top">.42</td><td align="left" valign="top">0.02</td><td align="left" valign="top">0.10</td><td align="left" valign="top">.82</td></tr><tr><td align="left" valign="top">Woman (vs man)</td><td align="left" valign="top">&#x2212;1.07</td><td align="left" valign="top">0.85</td><td align="left" valign="top">.21</td><td align="left" valign="top">&#x2212;1.17</td><td align="left" valign="top">0.67</td><td align="left" valign="top">.08</td><td align="left" valign="top">&#x2212;0.27</td><td align="left" valign="top">0.34</td><td align="left" valign="top">.42</td><td align="left" valign="top">&#x2212;0.17</td><td align="left" valign="top">1.88</td><td align="left" valign="top">.93</td></tr><tr><td align="left" valign="top">College degree (vs no degree)</td><td align="left" valign="top">&#x2212;0.25</td><td align="left" valign="top">1.06</td><td align="left" valign="top">.81</td><td align="left" valign="top">&#x2212;0.86</td><td align="left" valign="top">0.83</td><td align="left" valign="top">.30</td><td align="left" valign="top">&#x2212;0.26</td><td align="left" valign="top">0.42</td><td align="left" valign="top">.54</td><td align="left" valign="top">&#x2212;0.95</td><td align="left" valign="top">2.33</td><td align="left" valign="top">.68</td></tr><tr><td align="left" valign="top">Postgraduate degree (vs no degree)</td><td align="left" valign="top">0.79</td><td align="left" valign="top">1.16</td><td align="left" valign="top">.50</td><td align="left" valign="top">&#x2212;1.80<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.92<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">.049<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">&#x2212;0.51</td><td align="left" valign="top">0.46</td><td align="left" valign="top">.27</td><td align="left" valign="top">&#x2212;5.00</td><td align="left" valign="top">2.56</td><td align="left" valign="top">.05</td></tr><tr><td align="left" valign="top">Unemployed (vs employed)</td><td align="left" valign="top">&#x2212;0.85</td><td align="left" valign="top">1.16</td><td align="left" valign="top">.46</td><td align="left" valign="top">&#x2212;0.94</td><td align="left" valign="top">0.91</td><td align="left" valign="top">.31</td><td align="left" valign="top">0.10</td><td align="left" valign="top">0.45</td><td align="left" valign="top">.83</td><td align="left" valign="top">&#x2212;0.38</td><td align="left" valign="top">2.54</td><td align="left" valign="top">.88</td></tr><tr><td align="left" valign="top">Current student (vs employed)</td><td align="left" valign="top">&#x2212;1.00</td><td align="left" valign="top">1.27</td><td align="left" valign="top">.43</td><td align="left" valign="top">&#x2212;0.35</td><td align="left" valign="top">1.00</td><td align="left" valign="top">.73</td><td align="left" valign="top">0.67</td><td align="left" valign="top">0.50</td><td align="left" valign="top">.18</td><td align="left" valign="top">&#x2212;1.97</td><td align="left" valign="top">2.80</td><td align="left" valign="top">.48</td></tr><tr><td align="left" valign="top">Asian/Asian American (vs White)</td><td align="left" valign="top">0.33</td><td align="left" valign="top">1.27</td><td align="left" valign="top">.79</td><td align="left" valign="top">0.97</td><td align="left" valign="top">1.01</td><td align="left" valign="top">.33</td><td align="left" valign="top">0.64</td><td align="left" valign="top">0.50</td><td align="left" valign="top">.21</td><td align="left" valign="top">&#x2212;0.31</td><td align="left" valign="top">2.81</td><td align="left" valign="top">.91</td></tr><tr><td align="left" valign="top">Black/African American (vs White)</td><td align="left" valign="top">0.47</td><td align="left" valign="top">1.29</td><td align="left" valign="top">.72</td><td align="left" valign="top">0.11</td><td align="left" valign="top">1.02</td><td align="left" valign="top">.91</td><td align="left" valign="top">1.13<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">0.51<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">.03<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">1.86</td><td align="left" valign="top">2.84</td><td align="left" valign="top">.51</td></tr><tr><td align="left" valign="top">Multiple races (vs White)</td><td align="left" valign="top">&#x2212;1.29</td><td align="left" valign="top">1.86</td><td align="left" valign="top">.49</td><td align="left" valign="top">2.39</td><td align="left" valign="top">1.47</td><td align="left" valign="top">.10</td><td align="left" valign="top">0.20</td><td align="left" valign="top">0.74</td><td align="left" valign="top">.79</td><td align="left" valign="top">&#x2212;2.83</td><td align="left" valign="top">4.07</td><td align="left" valign="top">.49</td></tr><tr><td align="left" valign="top">Another race (vs White)</td><td align="left" valign="top">2.29</td><td align="left" valign="top">3.45</td><td align="left" valign="top">.51</td><td align="left" valign="top">4.85</td><td align="left" valign="top">2.72</td><td align="left" valign="top">.07</td><td align="left" valign="top">0.09</td><td align="left" valign="top">1.36</td><td align="left" valign="top">.95</td><td align="left" valign="top">9.29</td><td align="left" valign="top">7.57</td><td align="left" valign="top">.22</td></tr><tr><td align="left" valign="top">Hispanic (vs Non-Hispanic)</td><td align="left" valign="top">0.79</td><td align="left" valign="top">3.08</td><td align="left" valign="top">.80</td><td align="left" valign="top">5.21<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">2.43<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">.03<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="top">1.12</td><td align="left" valign="top">1.22</td><td align="left" valign="top">.36</td><td align="left" valign="top">2.39</td><td align="left" valign="top">6.78</td><td align="left" valign="top">.72</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>UCLA-3: University of California, Los Angeles 3-Item Loneliness Scale.</p></fn><fn id="table3fn2"><p><sup>b</sup>APOI: Attitudes Towards Psychological Online Interventions.</p></fn><fn id="table3fn3"><p><sup>c</sup>Statistically significant estimates.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-5-4"><title>Associations Between Usage and Symptom Change</title><p>Using residualized change scores adjusted for baseline symptom severity, Pearson correlations indicated that associations between chatbot usage metrics and total symptom change (PHQ-ADS) were small and not statistically significant (<italic>r</italic>=&#x2013;0.13 to &#x2013;0.08; Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]). However, all correlations trended in the expected direction: participants who used the app more (more minutes, sessions, talk-turns, or days active) tended to show slightly greater symptom improvement. Given the structured, incentivized exposure window, variability in usage was intentionally constrained, limiting the ability to observe naturalistic dose&#x2013;response associations.</p></sec><sec id="s3-5-5"><title>Associations Between First Impressions and Symptom Change</title><p>Pearson correlations between first-impression ratings and residualized symptom change scores showed that 3 early experiential perceptions were significantly associated with greater improvement in total symptoms (PHQ-ADS; Figure S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref37">37</xref>]): feeling that the chatbot understood participants&#x2019; concerns and needs (<italic>r</italic>=&#x2013;0.22, 95% CI &#x2013;0.39 to &#x2212;0.04), perceiving the chatbot as professional (<italic>r</italic>=&#x2013;0.25, 95% CI &#x2013;0.42 to &#x2013;0.07), and rating the chatbot&#x2019;s responses as personalized (<italic>r</italic>=&#x2013;0.29, 95% CI &#x2013;0.45 to &#x2013;0.10). Correlations with emotional safety, trust, comfort sharing, and feeling heard or validated were small and nonsignificant.</p></sec></sec><sec id="s3-6"><title>Experiential Predictors of Therapeutic Alliance and Acceptability</title><p>Exploratory models examined which specific experiences, assessed at the 1-week follow-up, were associated with therapeutic alliance and acceptability (<xref ref-type="table" rid="table4">Table 4</xref>). Although the trust item was removed due to multicollinearity issues (ie, Variance Inflation Factor &#x003E;4), several experiences emerged as relevant correlates. Participants who felt more emotionally safe, heard/validated, and understood by the chatbot reported significantly higher therapeutic alliance scores and greater overall acceptability. Additionally, perceived personalization of the chatbot was significantly related to therapeutic alliance and acceptability, though with a slightly smaller effect. Taken together, participants&#x2019; sense of emotional safety and validation from the chatbot were the strongest experiential correlates of therapeutic alliance and acceptability.</p><p>These models also revealed notable demographic patterns. Relative to White participants, those identifying as Black, Hispanic, or multiracial/another race reported significantly higher acceptability, and Black participants also reported higher alliance. Additionally, participants in Cohort 2 (and for alliance, Cohort 3) reported higher ratings than those in Cohort 1, possibly reflecting iterations made to the chatbot based on the first cohort&#x2019;s feedback.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Experiential predictors of therapeutic alliance and acceptability scores. Participants in these models had to have either a patient health questionnaire (PHQ) or generalized anxiety disorder (GAD) score &#x003E;4 at baseline. These 2 outcomes, therapeutic alliance and acceptability scores, are correlated <italic>r</italic>=0.81, <italic>P</italic>&#x003C;.001.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Covariate</td><td align="left" valign="bottom" colspan="3">Therapeutic alliance</td><td align="left" valign="bottom" colspan="3">Acceptability scores</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td><td align="left" valign="top"><italic>&#x03B2;</italic></td><td align="left" valign="top">SE</td><td align="left" valign="top"><italic>P</italic> value</td></tr></thead><tbody><tr><td align="left" valign="top">Felt emotionally safe using the chatbot</td><td align="left" valign="top">0.46<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.18<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.19<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.08<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.02<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Felt comfortable sharing with the chatbot</td><td align="left" valign="top">&#x2212;0.33</td><td align="left" valign="top">0.20</td><td align="left" valign="top">.10</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.09</td><td align="left" valign="top">.96</td></tr><tr><td align="left" valign="top">Felt heard and validated by the chatbot</td><td align="left" valign="top">0.64<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.16<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.17<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.07<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.02<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Felt the chatbot understood concerns and needs</td><td align="left" valign="top">0.38<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.16<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.02<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.29<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.07<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">&#x003C;.001<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Felt the chatbot was professional</td><td align="left" valign="top">0.30</td><td align="left" valign="top">0.21</td><td align="left" valign="top">.15</td><td align="left" valign="top">0.08</td><td align="left" valign="top">0.09</td><td align="left" valign="top">.37</td></tr><tr><td align="left" valign="top">Felt the chatbot responses were personalized</td><td align="left" valign="top">0.26<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.13<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.04<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.12<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.05<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.03<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Cohort 2 (vs cohort 1)</td><td align="left" valign="top">0.99<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.31<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.002<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.36<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.14<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.009<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Cohort 3 (vs cohort 1)</td><td align="left" valign="top">0.97<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.31<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.002<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.21</td><td align="left" valign="top">0.13</td><td align="left" valign="top">.12</td></tr><tr><td align="left" valign="top">Age</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.01</td><td align="left" valign="top">.89</td><td align="left" valign="top">0.00</td><td align="left" valign="top">0.01</td><td align="left" valign="top">.53</td></tr><tr><td align="left" valign="top">Woman (vs man)</td><td align="left" valign="top">0.37</td><td align="left" valign="top">0.27</td><td align="left" valign="top">.16</td><td align="left" valign="top">0.03</td><td align="left" valign="top">0.12</td><td align="left" valign="top">.79</td></tr><tr><td align="left" valign="top">College degree (vs no degree)</td><td align="left" valign="top">&#x2212;0.63</td><td align="left" valign="top">0.33</td><td align="left" valign="top">.06</td><td align="left" valign="top">&#x2212;0.21</td><td align="left" valign="top">0.15</td><td align="left" valign="top">.14</td></tr><tr><td align="left" valign="top">Postgraduate degree (vs no degree)</td><td align="left" valign="top">&#x2212;0.12</td><td align="left" valign="top">0.37</td><td align="left" valign="top">.75</td><td align="left" valign="top">&#x2212;0.18</td><td align="left" valign="top">0.16</td><td align="left" valign="top">.26</td></tr><tr><td align="left" valign="top">Unemployed (vs employed)</td><td align="left" valign="top">0.48</td><td align="left" valign="top">0.37</td><td align="left" valign="top">.20</td><td align="left" valign="top">0.41<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.16<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Current student (vs employed)</td><td align="left" valign="top">&#x2212;0.77</td><td align="left" valign="top">0.43</td><td align="left" valign="top">.08</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">0.19</td><td align="left" valign="top">.29</td></tr><tr><td align="left" valign="top">Asian/Asian American (vs White)</td><td align="left" valign="top">0.63</td><td align="left" valign="top">0.42</td><td align="left" valign="top">.13</td><td align="left" valign="top">0.16</td><td align="left" valign="top">0.18</td><td align="left" valign="top">.38</td></tr><tr><td align="left" valign="top">Black/African American (vs White)</td><td align="left" valign="top">1.20<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.40<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.004<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.44<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.18<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.01<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Multiple races (vs White)</td><td align="left" valign="top">1.05</td><td align="left" valign="top">0.57</td><td align="left" valign="top">.07</td><td align="left" valign="top">0.54<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.25<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.03<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Another race (vs White)</td><td align="left" valign="top">1.52</td><td align="left" valign="top">1.05</td><td align="left" valign="top">.15</td><td align="left" valign="top">1.00<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.46<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.03<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr><tr><td align="left" valign="top">Hispanic (vs Non-Hispanic)</td><td align="left" valign="top">1.21</td><td align="left" valign="top">0.93</td><td align="left" valign="top">.20</td><td align="left" valign="top">1.09<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">0.41<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="top">.009<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>Statistically significant estimates.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Summary of Findings</title><p>This mixed methods pilot evaluated (1) user experience and acceptability, (2) feasibility of safety monitoring, and (3) an exploratory clinical signal for an early-stage generative AI mental wellness chatbot. Under an incentivized, time-limited protocol, participants reported the chatbot as accessible and emotionally validating while noting limitations in personalization and conversational depth. Safety workflows were feasible: 8% of participants triggered an automated safety alert, all were reviewed within 2 hours, and none required outreach under prespecified criteria. Models showed small-to-moderate pre&#x2013;post reductions in depressive and anxiety symptoms, with larger predicted improvements among participants with higher baseline severity, though model-predicted effects at the upper end should be interpreted cautiously given sparser data among those with elevated baseline symptoms. Together, findings support procedural feasibility and identify candidate therapeutic targets to inform ongoing refinement and future controlled trials.</p></sec><sec id="s4-2"><title>Interpretation and Comparison With Prior Work</title><sec id="s4-2-1"><title>Clinical Signal</title><p>Participants demonstrated measurable improvements in depressive and anxiety symptoms over the 2-week exposure period, despite the brief, incentivized format and the early developmental stage of the prototype. Small-to-moderate reductions were also observed for psychological distress and well-being, suggesting that even limited engagement with the chatbot produced detectable short-term clinical benefits. These findings are notable given the relatively mild symptom severity of the enrolled sample and the modest exposure (&#x2248;60 min per week).</p><p>Baseline symptomatology moderation analyses provide critical context for understanding these effects in comparison to other recently published chatbot studies. Participants with more severe baseline symptoms experienced substantially greater improvement, with standardized effect sizes increasing sharply with more elevated baseline severity. This pattern indicates that the chatbot prototype&#x2019;s clinical impact is not uniform but may vary as a function of initial symptom burden; however, given the restricted risk profile of this pilot sample, moderation patterns should be interpreted cautiously, especially at the higher end, given sparse data and wider CIs. Indeed, the modest average effects observed in the full sample are partly a function of the sample&#x2019;s relatively low baseline severity, not necessarily a limitation of the chatbot itself.</p><p>This has direct relevance for interpreting our findings within the burgeoning literature on mental wellness chatbots. Recent trials of more mature systems, such as Therabot [<xref ref-type="bibr" rid="ref9">9</xref>] and Ash [<xref ref-type="bibr" rid="ref44">44</xref>], have reported larger effect sizes than the main effects estimated herein. However, these trials deliberately recruited participants with moderate to severe symptoms, required higher-frequency, longer-duration exposure, and used different statistical modeling approaches to estimate effect sizes. Because symptom severity is a strong determinant of effect magnitude, studies that enroll more symptomatic participants naturally provide more opportunity for measurable change [<xref ref-type="bibr" rid="ref45">45</xref>]. Our moderation results reinforce this point as higher baseline symptoms were associated with substantially larger predicted improvements. Thus, while direct comparisons to more established chatbots are not yet appropriate given differences in product maturity, engagement requirements, and sample symptom levels, our findings suggest that this early-stage prototype has the potential to produce clinically meaningful effects when used by individuals with greater initial symptom severity. We interpret this evidence as proof of concept, clarifying both the potential of the nascent system and the likely improvement as the product becomes more refined and is tested in samples with higher baseline symptomatology.</p></sec><sec id="s4-2-2"><title>User Experience and Design Considerations</title><p>The convergent quantitative and qualitative findings highlight both the prototype&#x2019;s strengths and the most critical areas for refinement. Participants frequently characterized the chatbot as accessible, nonjudgmental, and easy to use. These positive qualities strongly aligned with the goal of AI-supported mental health tools to lower barriers to care [<xref ref-type="bibr" rid="ref46">46</xref>]. Users&#x2019; impressions also aligned with high usability ratings and moderate therapeutic alliance scores, indicating that participants were able to form a basic sense of responsiveness and support with the tool. Importantly, alliance in AI-mediated interactions is conceptually distinct from the relational bond cultivated with human therapists and should not be interpreted using the same benchmarks [<xref ref-type="bibr" rid="ref47">47</xref>]. As others in this area have argued [<xref ref-type="bibr" rid="ref48">48</xref>], alliance plays a more modest and indirect mechanistic role in digital interventions. In this context, the moderate alliance observed presently likely represents a realistic upper bound for early-stage prototypes, but is nevertheless a key focus for forthcoming product development.</p><p>A core design tradeoff in AI tools is that stronger safety and content-guardrail constraints often limit the natural fluidity of conversation, shaping how users experience and evaluate the system. Integration of the qualitative and quantitative findings revealed consistent limitations in personalization, conversational depth, and perceived helpfulness. Some participants described the interaction pattern as repetitive, formulaic, or context-insensitive, which directly aligns with lower quantitative ratings on items related to personalization and understanding. Acceptability ratings also revealed mixed experiences: trust and perceived appropriateness were strong, whereas enjoyment, perceived helpfulness, and self-efficacy in using the chatbot correctly were more variable. Notably, attitudes toward mental wellness chatbots overall became slightly less favorable from baseline to follow-up, suggesting that initial optimism or curiosity may have surpassed the practical realities of interacting with an early-stage prototype, which is a pattern frequently reported in digital mental health when user expectations exceed current product capabilities [<xref ref-type="bibr" rid="ref49">49</xref>,<xref ref-type="bibr" rid="ref50">50</xref>].</p><p>Participants often expressed expecting immediate advice capable of resolving distress within a single interaction. Although understandable, such expectations run counter to core therapeutic principles. Evidence-based psychotherapy discourages providing directive advice at the outset of treatment, before adequate case understanding has been established [<xref ref-type="bibr" rid="ref51">51</xref>], and even the most effective interventions require ongoing practice and reflection rather than instant resolution. The present mental wellness chatbot emphasizes clarifying questions and reflective prompts that can feel repetitive or circular but are designed to mirror therapy processes [<xref ref-type="bibr" rid="ref25">25</xref>]. In contrast, general-purpose AI systems (ie, ChatGPT) often provide rapid, directive answers that may feel more responsive yet do not align with therapeutic process fidelity. When user expectations for immediate relief exceed what a clinically guided system can reasonably provide, perceived helpfulness may decline, emphasizing the need to set clear expectations about the capabilities and purpose of early-stage mental wellness chatbots.</p><p>Unexpectedly, demographic patterns also offer important implications for chatbot-based mental well-being tools. Black, Hispanic, and multiracial participants reported higher acceptability and alliance than White participants. These findings are exploratory and should be interpreted cautiously, particularly given sample size considerations and the structured, incentivized design of the study. While chatbot-based interactions may represent a potentially lower-barrier modality for some groups who face disproportionate barriers in traditional mental health systems, further investigation in larger and more diverse samples under naturalistic conditions is required before drawing conclusions regarding equity implications.</p></sec><sec id="s4-2-3"><title>Therapeutic Signal and Early Mechanisms of Change</title><p>Consistent with theory on early therapeutic processes, several first-impression ratings, including feeling understood, perceiving professionalism, and experiencing personalization, were significantly associated with greater symptom improvement. These associations are consistent with participant narratives describing moments of emotional validation and goal-setting support. Importantly, these associations emerged despite the prototype&#x2019;s limited personalization, suggesting that even brief experiences of attunement or perceived competence may exert disproportionate influence on therapeutic expectancies and engagement.</p><p>Conversely, the absence of dose&#x2013;response relationships between usage metrics and symptom change is likely attributable to the structure of the pilot study itself. Engagement was incentivized, compressed, and highly uniform across participants, limiting the natural variability needed to detect behavioral predictors of improvement. Qualitative reports were consistent with this interpretation: while some participants sought emotional support, others acknowledged using the app primarily to meet compensation thresholds. Longer, more naturalistic trials (ie, flexible use) are likely to provide a more accurate test of organic engagement&#x2013;outcome relationships.</p></sec><sec id="s4-2-4"><title>Safety Monitoring</title><p>Finally, the safety monitoring procedures tested in this pilot support the feasibility of this clinician-supervised mental wellness chatbot. In this study, 8% of participants triggered at least one automated safety alert, and all alerts were promptly reviewed by a clinician. Consistent with the deliberately high-sensitivity safety classifier and nonacute sample, none met the prespecified threshold for phone-based outreach (eg, acute or imminent risk). This pattern suggests that real-time safety guardrails can successfully surface potentially concerning content while keeping the volume of urgent follow-ups manageable, which is encouraging for larger human-in-the-loop trials. Nonetheless, further testing among higher-risk samples with longer exposure periods is needed to determine how the workload associated with safety monitoring would differ when higher-risk users (eg, those with suicidal thinking) are not excluded.</p></sec></sec><sec id="s4-3"><title>Implications for Early-Stage Development of Mental Well-Being Chatbots</title><p>Collectively, these mixed methods findings suggest that even a minimally refined generative AI&#x2013;based chatbot can deliver a safe and accessible user experience that may be associated with modest short-term improvements in mental health symptoms under pilot conditions. The iterative cohort design highlights the value of rapid user feedback loops for refining conversational flow, session pacing, and safety monitoring protocols. As LLM-based systems continue to evolve, improvements in contextual coherence, personalization, and adaptive responsiveness may strengthen user experience and engagement, but these possibilities require evaluation under more rigorous, controlled conditions.</p><p>As chatbot systems are iteratively refined, tracking theory-consistent mediators, such as experiential and behavioral change processes, across product versions will be critical to determining whether design modifications meaningfully enhance user experience and clinical signal. Longer-term randomized studies under more naturalistic conditions are also needed to evaluate durability and to examine how chatbots may operate within integrated care pathways; for example, as a pretherapy intake tool, a between-session reinforcement, a maintenance tool following treatment, or a monitored entry point that escalates to human clinicians when symptom trajectories fail to improve or deteriorate [<xref ref-type="bibr" rid="ref52">52</xref>]. Finally, future work should continue to explore the balance between usability and therapeutic rigor, as highly engaging conversational interfaces must also preserve core therapeutic processes that may not always prioritize immediate gratification.</p></sec><sec id="s4-4"><title>Limitations</title><p>Regarding limitations, the brief, 2-week exposure period limits conclusions about durability, particularly because outcomes were measured during or immediately after active use. Such designs may capture proximal engagement effects rather than sustained change [<xref ref-type="bibr" rid="ref14">14</xref>]. However, this timeframe was appropriate for a pilot focused on usability, acceptability, and early clinical signals, and it enabled rapid prototype iteration. The single-arm pre&#x2013;post design also raises the possibility of regression to the mean. In the absence of a randomized comparison group, improvements cannot be attributed definitively to the chatbot intervention, yet the magnitude of observed improvements&#x2014;especially at higher baseline severity&#x2014;appears larger than short-term changes typically reported in waitlist-controlled meta-analyses [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>]</p><p>Participants were recruited from a convenience panel familiar with providing user-experience feedback on digital apps (dscout), which may limit generalizability, but provided high compliance and rich qualitative feedback appropriate for early-stage evaluation. Relatedly, pilot sampling purposefully excluded individuals with acute risk and those currently in treatment, consistent with the nascent state of this chatbot app. Engagement during this pilot was supported by usage-contingent incentives to ensure adequate exposure for prototype evaluation. This structure was appropriate for assessing usability, safety monitoring procedures, and early experiential processes, but engagement metrics should be interpreted as reflecting the feasibility of the study protocol rather than voluntary real-world uptake. While such incentives are common in early prototyping research, future effectiveness trials should evaluate voluntary, real-world engagement. Moreover, incentive structures may have influenced not only engagement patterns but also self-reported experiential and clinical outcomes, introducing potential response bias and limiting the internal validity of these self-reported measures. Although exploratory analyses did not detect significant associations between household income and self-reported experiential or clinical outcomes, income data were missing for 13.6% of participants, which may introduce selection bias if nonresponse was systematic.</p><p>Finally, the qualitative aspects relied on LLM-assisted content analysis to identify recurring experiential patterns rather than on researcher-led reflexive thematic analysis. Although theme stability was assessed across multiple independent model runs and outputs were reviewed for coherence, this approach did not incorporate traditional qualitative features such as prolonged coding immersion, formal reflexivity procedures, or independent human coding. Accordingly, qualitative findings should be interpreted as descriptive and exploratory, appropriate to the formative aims of this pilot study.</p></sec><sec id="s4-5"><title>Conclusions</title><p>This pilot user experience study establishes critical feasibility and proof of concept for an early-stage mental well-being chatbot that can deliver meaningful and safe usability and measurable short-term symptom improvement, even in a mildly symptomatic sample. Transparent documentation of iterative development, combined with mixed methods insights and severity-based marginal effects, highlights both promising preliminary findings and the design targets most likely to enhance future engagement and clinical impact. These findings guide continued refinement and set the stage for rigorous, longer-term trials evaluating sustained engagement, clinical durability, and performance in more symptomatic samples.</p><p>Beyond this prototype, the study demonstrates a practical evaluation template for early-stage LLM mental health tools that combines user experience, safety workflow feasibility, and exploratory clinical outcomes with explicit caution about efficacy inference. As mental wellness chatbots continue to develop, transparent reporting of design constraints, safety monitoring, and user-perceived therapeutic processes will be critical for responsible iteration and for interpreting early clinical signals prior to larger randomized trials and real-world deployment.</p></sec></sec></body><back><ack><p>The authors wish to acknowledge the ongoing efforts by Elizabeth Shaffer, Alexa Varsavsky, Chase Hall, and their team at Thoughtful AI. In addition to the use of artificial intelligence that was reported within the study, the authors used ChatGPT (OpenAI) during the research and writing process to assist with debugging statistical code and to suggest wording changes aimed at improving clarity and reducing word count. No data, quantitative analyses, figures, or images were generated by artificial intelligence.</p></ack><notes><sec><title>Funding</title><p>This study was supported by internal resources from Spring Health, including employee time and company infrastructure. No external funding was received.</p></sec><sec><title>Data Availability</title><p>Deidentified quantitative data in their final form used for analyses will only be made available on reasonable request based upon compliance with legal, regulatory, confidentiality, and patient privacy requirements. Transcript and qualitative data will not be publicly available.</p></sec></notes><fn-group><fn fn-type="con"><p>SG contributed to conceptualization, methodology, formal analysis, and writing of the original draft, as well as reviewing and editing the manuscript. EJW contributed to methodology, formal analysis, and reviewing and editing the manuscript. GB and ERD contributed to project administration, investigation, and reviewing and editing the manuscript. KHB contributed to the investigation and writing of the original draft, as well as reviewing and editing the manuscript. MB and AC contributed to conceptualization, supervision, and reviewing and editing the manuscript. MH contributed to conceptualization, methodology, supervision, writing of the original draft, and reviewing and editing the manuscript.</p></fn><fn fn-type="conflict"><p>SG, MH, EJW, GB, ERD, KHB, MB, and AC reported being employed by and holding equity in Spring Care Inc outside the submitted work. In addition, AC reported being the lead inventor on 3 patent submissions relating to treatment for major depressive disorder (US Patent and Trademark Office number Y0087.70116US00 and provisional application numbers 62/491 660 and 62/629 041) outside the submitted work. Finally, AC reported holding equity in Carbon Health Technologies Inc, Wheel Health Inc, Parallel Technologies Inc, Healthie Inc, and UnitedHealthcare; and providing unpaid advisory services to health care technology startups outside the submitted work.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">CONSORT</term><def><p>Consolidated Standards of Reporting Trials</p></def></def-item><def-item><term id="abb3">GAD</term><def><p>generalized anxiety disorder</p></def></def-item><def-item><term id="abb4">IUS</term><def><p>Intervention Usability Scale</p></def></def-item><def-item><term id="abb5">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb6">PHQ</term><def><p>patient health questionnaire</p></def></def-item><def-item><term id="abb7">PHQ-ADS</term><def><p>Patient Health Questionnaire Anxiety and Depression Scale</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>World mental health today: latest data</article-title><source>World Health Organization</source><year>2025</year><access-date>2025-11-19</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.who.int/publications/i/item/9789240113817">https://www.who.int/publications/i/item/9789240113817</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brohan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Chowdhary</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dua</surname><given-names>T</given-names> </name><etal/></person-group><article-title>The WHO Mental Health Gap Action Programme for mental, neurological, and substance use conditions: the new and updated guideline recommendations</article-title><source>Lancet Psychiatry</source><year>2024</year><month>02</month><volume>11</volume><issue>2</issue><fpage>155</fpage><lpage>158</lpage><pub-id pub-id-type="doi">10.1016/S2215-0366(23)00370-X</pub-id><pub-id pub-id-type="medline">37980915</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Thornicroft</surname><given-names>G</given-names> </name><name name-style="western"><surname>Sunkel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Alikhon Aliev</surname><given-names>A</given-names> </name><etal/></person-group><article-title>The Lancet Commission on ending stigma and discrimination in mental health</article-title><source>Lancet</source><year>2022</year><month>10</month><day>22</day><volume>400</volume><issue>10361</issue><fpage>1438</fpage><lpage>1480</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(22)01470-2</pub-id><pub-id pub-id-type="medline">36223799</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Balcombe</surname><given-names>L</given-names> </name></person-group><article-title>AI chatbots in mental health care: integrative review of challenges and solutions</article-title><source>Public Health Healthcare</source><comment>Preprint posted online on 2025</comment><pub-id pub-id-type="doi">10.20944/preprints202509.1893.v1</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bodner</surname><given-names>R</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>K</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>R</given-names> </name><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name></person-group><article-title>Efficacy and risks of artificial intelligence chatbots for anxiety and depression: a narrative review of recent clinical studies</article-title><source>Curr Opin Psychiatry</source><year>2026</year><month>01</month><day>1</day><volume>39</volume><issue>1</issue><fpage>19</fpage><lpage>25</lpage><pub-id pub-id-type="doi">10.1097/YCO.0000000000001048</pub-id><pub-id pub-id-type="medline">41198140</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hua</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Siddals</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Charting the evolution of artificial intelligence mental health chatbots from rule-based systems to large language models: a systematic review</article-title><source>World Psychiatry</source><year>2025</year><month>10</month><volume>24</volume><issue>3</issue><fpage>383</fpage><lpage>394</lpage><pub-id pub-id-type="doi">10.1002/wps.21352</pub-id><pub-id pub-id-type="medline">40948070</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>C</given-names> </name><name name-style="western"><surname>Lam</surname><given-names>KT</given-names> </name><name name-style="western"><surname>Yip</surname><given-names>KM</given-names> </name><etal/></person-group><article-title>Comparison of an AI chatbot with a nurse hotline in reducing anxiety and depression levels in the general population: pilot randomized controlled trial</article-title><source>JMIR Hum Factors</source><year>2025</year><month>03</month><day>6</day><volume>12</volume><fpage>e65785</fpage><pub-id pub-id-type="doi">10.2196/65785</pub-id><pub-id pub-id-type="medline">40048637</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Habicht</surname><given-names>J</given-names> </name><name name-style="western"><surname>Dina</surname><given-names>LM</given-names> </name><name name-style="western"><surname>McFadyen</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Generative AI-enabled therapy support tool for improved clinical outcomes and patient engagement in group therapy: real-world observational study</article-title><source>J Med Internet Res</source><year>2025</year><month>03</month><day>10</day><volume>27</volume><issue>1</issue><fpage>e60435</fpage><pub-id pub-id-type="doi">10.2196/60435</pub-id><pub-id pub-id-type="medline">40063074</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heinz</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Mackin</surname><given-names>DM</given-names> </name><name name-style="western"><surname>Trudeau</surname><given-names>BM</given-names> </name><etal/></person-group><article-title>Randomized trial of a generative AI chatbot for mental health treatment</article-title><source>NEJM AI</source><year>2025</year><month>03</month><day>27</day><volume>2</volume><issue>4</issue><pub-id pub-id-type="doi">10.1056/AIoa2400802</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>M</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Therapeutic potential of social chatbots in alleviating loneliness and social anxiety: quasi-experimental mixed methods study</article-title><source>J Med Internet Res</source><year>2025</year><month>01</month><day>14</day><volume>27</volume><issue>1</issue><fpage>e65589</fpage><pub-id pub-id-type="doi">10.2196/65589</pub-id><pub-id pub-id-type="medline">39808786</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Manole</surname><given-names>A</given-names> </name><name name-style="western"><surname>C&#x00E2;rciumaru</surname><given-names>R</given-names> </name><name name-style="western"><surname>Br&#x00EE;nza&#x0219;</surname><given-names>R</given-names> </name><name name-style="western"><surname>Manole</surname><given-names>F</given-names> </name></person-group><article-title>Harnessing AI in anxiety management: a chatbot-based intervention for personalized mental health support</article-title><source>Information</source><year>2024</year><month>12</month><day>2</day><volume>15</volume><issue>12</issue><fpage>768</fpage><pub-id pub-id-type="doi">10.3390/info15120768</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ma</surname><given-names>T</given-names> </name></person-group><article-title>Depression intervention using AI chatbots with social cues: a randomized trial of effectiveness</article-title><source>J Affect Disord</source><year>2025</year><month>11</month><day>15</day><volume>389</volume><fpage>119760</fpage><pub-id pub-id-type="doi">10.1016/j.jad.2025.119760</pub-id><pub-id pub-id-type="medline">40562106</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yeh</surname><given-names>PL</given-names> </name><name name-style="western"><surname>Kuo</surname><given-names>WC</given-names> </name><name name-style="western"><surname>Tseng</surname><given-names>BL</given-names> </name><name name-style="western"><surname>Sung</surname><given-names>YH</given-names> </name></person-group><article-title>Does the AI-driven chatbot work? effectiveness of the Woebot app in reducing anxiety and depression in group counseling courses and student acceptance of technological aids</article-title><source>Curr Psychol</source><year>2025</year><month>05</month><volume>44</volume><issue>9</issue><fpage>8133</fpage><lpage>8145</lpage><pub-id pub-id-type="doi">10.1007/s12144-025-07359-0</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heckman</surname><given-names>TG</given-names> </name><name name-style="western"><surname>Markowitz</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Heckman</surname><given-names>BD</given-names> </name></person-group><article-title>A generative AI chatbot for mental health treatment: a step in the right direction?</article-title><source>NEJM AI</source><year>2025</year><month>08</month><day>28</day><volume>2</volume><issue>9</issue><pub-id pub-id-type="doi">10.1056/AIp2500453</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herbener</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Klincewicz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Frank</surname><given-names>L</given-names> </name><name name-style="western"><surname>Damholdt</surname><given-names>MF</given-names> </name></person-group><article-title>A critical discussion of strategies and ramifications of implementing conversational agents in mental healthcare</article-title><source>Comput Hum Behav Artif Hum</source><year>2025</year><month>08</month><volume>5</volume><fpage>100182</fpage><pub-id pub-id-type="doi">10.1016/j.chbah.2025.100182</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herbener</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Klincewicz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Damholdt</surname><given-names>MF</given-names> </name></person-group><article-title>A narrative review of the active ingredients in psychotherapy delivered by conversational agents</article-title><source>Comput Hum Behav Artif Hum</source><year>2024</year><month>05</month><volume>14</volume><fpage>100401</fpage><pub-id pub-id-type="doi">10.1016/j.chbr.2024.100401</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mair</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Hashim</surname><given-names>J</given-names> </name><name name-style="western"><surname>Thai</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Understanding and overcoming barriers to digital health adoption: a patient and public involvement study</article-title><source>Transl Behav Med</source><year>2025</year><month>01</month><day>16</day><volume>15</volume><issue>1</issue><fpage>ibaf010</fpage><pub-id pub-id-type="doi">10.1093/tbm/ibaf010</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name><name name-style="western"><surname>Fuller-Tyszkiewicz</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Uptake, adherence, and attrition in clinical trials of depression and anxiety apps: a systematic review and meta-analysis</article-title><source>JAMA Psychiatry</source><year>2026</year><month>01</month><day>1</day><volume>83</volume><issue>1</issue><fpage>43</fpage><lpage>50</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2025.3439</pub-id><pub-id pub-id-type="medline">41259035</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wagner</surname><given-names>E</given-names> </name><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name></person-group><article-title>Patient and provider insights on the design and engagement of digital health recommendation systems</article-title><source>J Technol Behav Sci</source><year>2025</year><month>06</month><day>20</day><pub-id pub-id-type="doi">10.1007/s41347-025-00522-1</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Malouin-Lachance</surname><given-names>A</given-names> </name><name name-style="western"><surname>Capolupo</surname><given-names>J</given-names> </name><name name-style="western"><surname>Laplante</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hudon</surname><given-names>A</given-names> </name></person-group><article-title>Does the digital therapeutic alliance exist? integrative review</article-title><source>JMIR Ment Health</source><year>2025</year><month>02</month><day>7</day><volume>12</volume><fpage>e69294</fpage><pub-id pub-id-type="doi">10.2196/69294</pub-id><pub-id pub-id-type="medline">39924298</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Herbener</surname><given-names>AB</given-names> </name><name name-style="western"><surname>Damholdt</surname><given-names>MF</given-names> </name></person-group><article-title>A theoretical framework of the processes of change in mental health interventions delivered by artificial therapists</article-title><source>J Psychol AI</source><year>2025</year><month>12</month><day>31</day><volume>1</volume><issue>1</issue><pub-id pub-id-type="doi">10.1080/29974100.2025.2590495</pub-id><pub-id pub-id-type="medline">2590495</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ayers</surname><given-names>JW</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Poliak</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Evaluating artificial intelligence responses to public health questions</article-title><source>JAMA Netw Open</source><year>2023</year><month>06</month><day>1</day><volume>6</volume><issue>6</issue><fpage>e2317517</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2023.17517</pub-id><pub-id pub-id-type="medline">37285160</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fleming</surname><given-names>W</given-names> </name><name name-style="western"><surname>Coutts</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pochard</surname><given-names>D</given-names> </name><name name-style="western"><surname>Trivedi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sanderson</surname><given-names>K</given-names> </name></person-group><article-title>Human-centered design and digital transformation of mental health services</article-title><source>JMIR Hum Factors</source><year>2025</year><month>08</month><day>11</day><volume>12</volume><fpage>e66040</fpage><pub-id pub-id-type="doi">10.2196/66040</pub-id><pub-id pub-id-type="medline">40789172</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torous</surname><given-names>J</given-names> </name><name name-style="western"><surname>Topol</surname><given-names>EJ</given-names> </name></person-group><article-title>Assessing generative artificial intelligence for mental health</article-title><source>Lancet</source><year>2025</year><month>06</month><day>11</day><volume>406</volume><issue>10504</issue><fpage>683</fpage><pub-id pub-id-type="doi">10.1016/S0140-6736(25)01237-1</pub-id><pub-id pub-id-type="medline">40516569</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cuijpers</surname><given-names>P</given-names> </name><name name-style="western"><surname>Reijnders</surname><given-names>M</given-names> </name><name name-style="western"><surname>Huibers</surname><given-names>MJH</given-names> </name></person-group><article-title>The role of common factors in psychotherapy outcomes</article-title><source>Annu Rev Clin Psychol</source><year>2019</year><month>05</month><day>7</day><volume>15</volume><issue>1</issue><fpage>207</fpage><lpage>231</lpage><pub-id pub-id-type="doi">10.1146/annurev-clinpsy-050718-095424</pub-id><pub-id pub-id-type="medline">30550721</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Baye</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lourens</surname><given-names>SG</given-names> </name></person-group><article-title>Comparative validity and responsiveness of PHQ-ADS and other composite anxiety-depression measures</article-title><source>J Affect Disord</source><year>2019</year><month>03</month><day>1</day><volume>246</volume><fpage>437</fpage><lpage>443</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2018.12.098</pub-id><pub-id pub-id-type="medline">30599366</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lyon</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Pullmann</surname><given-names>MD</given-names> </name><name name-style="western"><surname>Jacobson</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Assessing the usability of complex psychosocial interventions: the intervention usability scale</article-title><source>Implement Res Pract</source><year>2021</year><volume>2</volume><fpage>2633489520987828</fpage><pub-id pub-id-type="doi">10.1177/2633489520987828</pub-id><pub-id pub-id-type="medline">35601889</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Posner</surname><given-names>K</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>GK</given-names> </name><name name-style="western"><surname>Stanley</surname><given-names>B</given-names> </name><etal/></person-group><article-title>The Columbia-suicide severity rating scale: initial validity and internal consistency findings from three multisite studies with adolescents and adults</article-title><source>Am J Psychiatry</source><year>2011</year><month>12</month><volume>168</volume><issue>12</issue><fpage>1266</fpage><lpage>1277</lpage><pub-id pub-id-type="doi">10.1176/appi.ajp.2011.10111704</pub-id><pub-id pub-id-type="medline">22193671</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JBW</given-names> </name><name name-style="western"><surname>Lowe</surname><given-names>B</given-names> </name></person-group><article-title>An ultra-brief screening scale for anxiety and depression: the PHQ-4</article-title><source>Psychosomatics</source><year>2009</year><month>11</month><day>1</day><volume>50</volume><issue>6</issue><fpage>613</fpage><lpage>621</lpage><pub-id pub-id-type="doi">10.1176/appi.psy.50.6.613</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JBW</given-names> </name></person-group><article-title>The PHQ-9: validity of a brief depression severity measure</article-title><source>J Gen Intern Med</source><year>2001</year><month>09</month><volume>16</volume><issue>9</issue><fpage>606</fpage><lpage>613</lpage><pub-id pub-id-type="doi">10.1046/j.1525-1497.2001.016009606.x</pub-id><pub-id pub-id-type="medline">11556941</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Levis</surname><given-names>B</given-names> </name><name name-style="western"><surname>Riehm</surname><given-names>KE</given-names> </name><etal/></person-group><article-title>Equivalency of the diagnostic accuracy of the PHQ-8 and PHQ-9: a systematic review and individual participant data meta-analysis</article-title><source>Psychol Med</source><year>2020</year><month>06</month><volume>50</volume><issue>8</issue><fpage>1368</fpage><lpage>1380</lpage><pub-id pub-id-type="doi">10.1017/S0033291719001314</pub-id><pub-id pub-id-type="medline">31298180</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Spitzer</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Kroenke</surname><given-names>K</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>JBW</given-names> </name><name name-style="western"><surname>L&#x00F6;we</surname><given-names>B</given-names> </name></person-group><article-title>A brief measure for assessing generalized anxiety disorder: the GAD-7</article-title><source>Arch Intern Med</source><year>2006</year><month>05</month><day>22</day><volume>166</volume><issue>10</issue><fpage>1092</fpage><lpage>1097</lpage><pub-id pub-id-type="doi">10.1001/archinte.166.10.1092</pub-id><pub-id pub-id-type="medline">16717171</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bech</surname><given-names>P</given-names> </name><name name-style="western"><surname>Gudex</surname><given-names>C</given-names> </name><name name-style="western"><surname>Johansen</surname><given-names>KS</given-names> </name></person-group><article-title>The WHO (Ten) Weil-Being Index: validation in diabetes</article-title><source>Psychother Psychosom</source><year>1996</year><volume>65</volume><issue>4</issue><fpage>183</fpage><lpage>190</lpage><pub-id pub-id-type="doi">10.1159/000289073</pub-id><pub-id pub-id-type="medline">8843498</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kessler</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Andrews</surname><given-names>G</given-names> </name><name name-style="western"><surname>Colpe</surname><given-names>LJ</given-names> </name><etal/></person-group><article-title>Short screening scales to monitor population prevalences and trends in non-specific psychological distress</article-title><source>Psychol Med</source><year>2002</year><month>08</month><volume>32</volume><issue>6</issue><fpage>959</fpage><lpage>976</lpage><pub-id pub-id-type="doi">10.1017/s0033291702006074</pub-id><pub-id pub-id-type="medline">12214795</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hughes</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Waite</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Hawkley</surname><given-names>LC</given-names> </name><name name-style="western"><surname>Cacioppo</surname><given-names>JT</given-names> </name></person-group><article-title>A short scale for measuring loneliness in large surveys: results from two population-based studies</article-title><source>Res Aging</source><year>2004</year><volume>26</volume><issue>6</issue><fpage>655</fpage><lpage>672</lpage><pub-id pub-id-type="doi">10.1177/0164027504268574</pub-id><pub-id pub-id-type="medline">18504506</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schr&#x00F6;der</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sautier</surname><given-names>L</given-names> </name><name name-style="western"><surname>Kriston</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Development of a questionnaire measuring attitudes towards psychological online interventions&#x2013;the APOI</article-title><source>J Affect Disord</source><year>2015</year><month>11</month><day>15</day><volume>187</volume><fpage>136</fpage><lpage>141</lpage><pub-id pub-id-type="doi">10.1016/j.jad.2015.08.044</pub-id><pub-id pub-id-type="medline">26331687</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sekhon</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cartwright</surname><given-names>M</given-names> </name><name name-style="western"><surname>Francis</surname><given-names>JJ</given-names> </name></person-group><article-title>Acceptability of healthcare interventions: an overview of reviews and development of a theoretical framework</article-title><source>BMC Health Serv Res</source><year>2017</year><month>01</month><day>26</day><volume>17</volume><issue>1</issue><fpage>88</fpage><pub-id pub-id-type="doi">10.1186/s12913-017-2031-8</pub-id><pub-id pub-id-type="medline">28126032</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hayes</surname><given-names>AS</given-names> </name></person-group><article-title>&#x201C;Conversing&#x201D; with qualitative data: enhancing qualitative research through large language models (LLMs)</article-title><source>Int J Qual Methods</source><year>2025</year><month>04</month><volume>24</volume><fpage>16094069251322346</fpage><pub-id pub-id-type="doi">10.1177/16094069251322346</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Miles</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Huberman</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Salda&#x00F1;a</surname><given-names>J</given-names> </name></person-group><source>Qualitative Data Analysis: A Methods Sourcebook</source><year>2020</year><publisher-name>Sage Publications</publisher-name></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Guba</surname><given-names>EG</given-names> </name><name name-style="western"><surname>Lincoln</surname><given-names>YS</given-names> </name></person-group><source>Fourth Generation Evaluation</source><year>1989</year><publisher-name>SAGE</publisher-name></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nos&#x00E8;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Muriago</surname><given-names>G</given-names> </name><name name-style="western"><surname>Turrini</surname><given-names>G</given-names> </name><etal/></person-group><article-title>Effectiveness of a self-guided digital intervention for mental health and psychological well-being in university students: pre- and postintervention study</article-title><source>J Med Internet Res</source><year>2025</year><month>08</month><day>19</day><volume>27</volume><fpage>e69031</fpage><pub-id pub-id-type="doi">10.2196/69031</pub-id><pub-id pub-id-type="medline">40829124</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheng</surname><given-names>AL</given-names> </name><name name-style="western"><surname>Abraham</surname><given-names>J</given-names> </name><name name-style="western"><surname>Hartz</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Laber</surname><given-names>EB</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>JP</given-names> </name></person-group><article-title>Evaluating and optimizing just-in-time adaptive interventions in a digital mental health intervention (Wysa for chronic pain) for middle-aged and older adults with chronic pain: protocol for a series of randomized trials</article-title><source>JMIR Res Protoc</source><year>2025</year><month>09</month><day>17</day><volume>14</volume><fpage>e77532</fpage><pub-id pub-id-type="doi">10.2196/77532</pub-id><pub-id pub-id-type="medline">40961490</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bangor</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kortum</surname><given-names>PT</given-names> </name><name name-style="western"><surname>Miller</surname><given-names>JT</given-names> </name></person-group><article-title>An empirical evaluation of the system usability scale</article-title><source>Int J Hum Comput Interact</source><year>2008</year><month>07</month><day>29</day><volume>24</volume><issue>6</issue><fpage>574</fpage><lpage>594</lpage><pub-id pub-id-type="doi">10.1080/10447310802205776</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Hull</surname><given-names>TD</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>L</given-names> </name><name name-style="western"><surname>Arean</surname><given-names>PA</given-names> </name><name name-style="western"><surname>Malgaroli</surname><given-names>M</given-names> </name></person-group><article-title>Generative AI purpose-built for social and mental health: a real-world pilot</article-title><source>arXiv</source><comment>Preprint posted online on  Nov 12, 2025</comment><pub-id pub-id-type="doi">10.48550/arXiv.2511.11689</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cuijpers</surname><given-names>P</given-names> </name><name name-style="western"><surname>Harrer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Miguel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Karyotaki</surname><given-names>E</given-names> </name><name name-style="western"><surname>Papola</surname><given-names>D</given-names> </name></person-group><article-title>Does baseline severity interact with the effects of psychotherapy for depression? a meta-analytic review</article-title><source>J Affect Disord</source><year>2026</year><month>04</month><day>15</day><volume>399</volume><fpage>121106</fpage><pub-id pub-id-type="doi">10.1016/j.jad.2025.121106</pub-id><pub-id pub-id-type="medline">41490834</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Asim</surname><given-names>M</given-names> </name><name name-style="western"><surname>Yousaf</surname><given-names>HF</given-names> </name><name name-style="western"><surname>Nasir</surname><given-names>U</given-names> </name><name name-style="western"><surname>Farooq</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Yousaf</surname><given-names>F</given-names> </name></person-group><article-title>The influence of AI-powered chatbots on reducing mental health care barriers in rural areas</article-title><source>Dialogue Soc Sci Rev DSSR</source><year>2025</year><month>01</month><day>14</day><access-date>2026-04-08</access-date><volume>3</volume><issue>1</issue><fpage>664</fpage><lpage>677</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://dialoguesreview.com/index.php/2/article/view/108">https://dialoguesreview.com/index.php/2/article/view/108</ext-link></comment></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tong</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lederman</surname><given-names>R</given-names> </name><name name-style="western"><surname>D&#x2019;Alfonso</surname><given-names>S</given-names> </name><name name-style="western"><surname>Berry</surname><given-names>K</given-names> </name><name name-style="western"><surname>Bucci</surname><given-names>S</given-names> </name></person-group><article-title>Digital therapeutic alliance with fully automated mental health smartphone apps: a narrative review</article-title><source>Front Psychiatry</source><year>2022</year><volume>13</volume><fpage>819623</fpage><pub-id pub-id-type="doi">10.3389/fpsyt.2022.819623</pub-id><pub-id pub-id-type="medline">35815030</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tremain</surname><given-names>H</given-names> </name><name name-style="western"><surname>McEnery</surname><given-names>C</given-names> </name><name name-style="western"><surname>Fletcher</surname><given-names>K</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>G</given-names> </name></person-group><article-title>The therapeutic alliance in digital mental health interventions for serious mental illnesses: narrative review</article-title><source>JMIR Ment Health</source><year>2020</year><month>08</month><day>7</day><volume>7</volume><issue>8</issue><fpage>e17204</fpage><pub-id pub-id-type="doi">10.2196/17204</pub-id><pub-id pub-id-type="medline">32763881</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Musiat</surname><given-names>P</given-names> </name><name name-style="western"><surname>Goldstone</surname><given-names>P</given-names> </name><name name-style="western"><surname>Tarrier</surname><given-names>N</given-names> </name></person-group><article-title>Understanding the acceptability of e-mental health--attitudes and expectations towards computerised self-help treatments for mental health problems</article-title><source>BMC Psychiatry</source><year>2014</year><month>04</month><day>11</day><volume>14</volume><fpage>109</fpage><pub-id pub-id-type="doi">10.1186/1471-244X-14-109</pub-id><pub-id pub-id-type="medline">24725765</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pourrazavi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Azimi</surname><given-names>S</given-names> </name><name name-style="western"><surname>Fakhari</surname><given-names>A</given-names> </name><name name-style="western"><surname>Barzegar</surname><given-names>H</given-names> </name><name name-style="western"><surname>Farahbakhsh</surname><given-names>M</given-names> </name></person-group><article-title>Preferences and expectations of end-users from a mental health educational portal: a qualitative study</article-title><source>Health Promot Perspect</source><year>2024</year><volume>14</volume><issue>3</issue><fpage>297</fpage><lpage>303</lpage><pub-id pub-id-type="doi">10.34172/hpp.43077</pub-id><pub-id pub-id-type="medline">39633623</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hill</surname><given-names>CE</given-names> </name><name name-style="western"><surname>Knox</surname><given-names>S</given-names> </name><name name-style="western"><surname>Duan</surname><given-names>C</given-names> </name></person-group><article-title>Psychotherapist advice, suggestions, recommendations: a research review</article-title><source>Psychotherapy (Chic)</source><year>2023</year><month>09</month><volume>60</volume><issue>3</issue><fpage>295</fpage><lpage>305</lpage><pub-id pub-id-type="doi">10.1037/pst0000476</pub-id><pub-id pub-id-type="medline">36931814</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Graupensperger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chekroud</surname><given-names>A</given-names> </name><etal/></person-group><article-title>AI-enabled continuous care features in real-world psychotherapy: treatment engagement and clinical outcomes</article-title><source>medRxiv</source><comment>Preprint posted online on  Feb 25, 2026</comment><pub-id pub-id-type="doi">10.64898/2026.01.30.26345238</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Scott</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Bisby</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Heriseanu</surname><given-names>AI</given-names> </name><etal/></person-group><article-title>Understanding the untreated course of anxiety disorders in treatment-seeking samples: a systematic review and meta-analysis</article-title><source>J Anxiety Disord</source><year>2022</year><month>06</month><volume>89</volume><fpage>102590</fpage><pub-id pub-id-type="doi">10.1016/j.janxdis.2022.102590</pub-id><pub-id pub-id-type="medline">35689850</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tong</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Ho</surname><given-names>FS</given-names> </name><name name-style="western"><surname>Chu</surname><given-names>OH</given-names> </name><name name-style="western"><surname>Mak</surname><given-names>WW</given-names> </name></person-group><article-title>Time-dependent changes in depressive symptoms among control participants in digital-based psychological intervention studies: meta-analysis of randomized controlled trials</article-title><source>J Med Internet Res</source><year>2023</year><month>04</month><day>12</day><volume>25</volume><issue>1</issue><fpage>e39029</fpage><pub-id pub-id-type="doi">10.2196/39029</pub-id><pub-id pub-id-type="medline">37043276</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Additional figures and information.</p><media xlink:href="formative_v10i1e90644_app1.docx" xlink:title="DOCX File, 4095 KB"/></supplementary-material><supplementary-material id="app2"><label>Checklist 1</label><p>CONSORT checklist.</p><media xlink:href="formative_v10i1e90644_app2.docx" xlink:title="DOCX File, 27 KB"/></supplementary-material></app-group></back></article>