<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="letter"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e85906</article-id><article-id pub-id-type="doi">10.2196/85906</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Letter</subject></subj-group></article-categories><title-group><article-title>Machine Learning&#x2013;Based Audiovisual Phenotyping for Measuring Communication, Shared Decision-Making, and Trust</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Khaikin</surname><given-names>Shely</given-names></name><degrees>BA</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Tiruvadi</surname><given-names>Vineet</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Brooks</surname><given-names>Jeffrey</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Baird</surname><given-names>Alice</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Grela-Mpoko</surname><given-names>Anne-Catherine</given-names></name><degrees>BS, MPH</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hoffman</surname><given-names>Lindsey</given-names></name><degrees>BS</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Crossley</surname><given-names>Jadyn</given-names></name><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Leasy</surname><given-names>Menachem</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Fineman</surname><given-names>Jaime</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Savoy</surname><given-names>Margot</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff6">6</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Igarabuza</surname><given-names>Laura</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Paranjape</surname><given-names>Anuradha</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff7">7</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Foo</surname><given-names>Cheryl YS</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff8">8</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Birnbaum</surname><given-names>Michael L</given-names></name><degrees>MD</degrees><xref ref-type="aff" rid="aff9">9</xref><xref ref-type="aff" rid="aff10">10</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Zisman-Ilani</surname><given-names>Yaara</given-names></name><degrees>MA, PhD</degrees><xref ref-type="aff" rid="aff11">11</xref><xref ref-type="aff" rid="aff12">12</xref><xref ref-type="aff" rid="aff13">13</xref></contrib></contrib-group><aff id="aff1"><institution>Shared Decision Making Laboratory, Temple University</institution><addr-line>Philadelphia</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff2"><institution>Harvard University</institution><addr-line>Cambridge</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff3"><institution>Hume AI</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff4"><institution>Department of Clinical Family and Community Medicine, Lewis Katz School of Medicine, Temple University</institution><addr-line>Philadelphia</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff5"><institution>Department of Clinical Medicine, Lewis Katz School of Medicine, Temple University</institution><addr-line>Philadelphia</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff6"><institution>American Academy of Family Physicians</institution><addr-line>Washington</addr-line><addr-line>DC</addr-line><country>United States</country></aff><aff id="aff7"><institution>University of Colorado School of Medicine</institution><addr-line>Aurora</addr-line><addr-line>CO</addr-line><country>United States</country></aff><aff id="aff8"><institution>Center of Excellence for Psychosocial and Systemic Research, Department of Psychiatry, Massachusetts General Hospital</institution><addr-line>Boston</addr-line><addr-line>MA</addr-line><country>United States</country></aff><aff id="aff9"><institution>New York State Psychiatric Institute</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff10"><institution>Department of Psychiatry, Vagelos College of Physicians and Surgeons, Columbia University</institution><addr-line>New York</addr-line><addr-line>NY</addr-line><country>United States</country></aff><aff id="aff11"><institution>Department of Clinical, Educational and Health Psychology, Division of Psychology and Language Sciences, University College London</institution><addr-line>1-19 Torrington Place</addr-line><addr-line>London</addr-line><country>United Kingdom</country></aff><aff id="aff12"><institution>Department of Social and Behavioral Sciences, Barnett College of Public Health, Temple University</institution><addr-line>Philadelphia</addr-line><addr-line>PA</addr-line><country>United States</country></aff><aff id="aff13"><institution>Department of Psychiatry and Behavioral Sciences, Lewis Katz School of Medicine, Temple University</institution><addr-line>Philadelphia</addr-line><addr-line>PA</addr-line><country>United States</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Schwartz</surname><given-names>Amy</given-names></name></contrib><contrib contrib-type="editor"><name name-style="western"><surname>Balcarras</surname><given-names>Matthew</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Fukunaga</surname><given-names>Mayuko Ito</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Gong</surname><given-names>Ziyang</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Yaara Zisman-Ilani, MA, PhD, Department of Clinical, Educational and Health Psychology, Division of Psychology and Language Sciences, University College London, 1-19 Torrington Place, London, WC1E 7Hb, United Kingdom; <email>y.zisman-ilani@ucl.ac.uk</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>3</day><month>3</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e85906</elocation-id><history><date date-type="received"><day>15</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>28</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Shely Khaikin, Vineet Tiruvadi, Jeffrey Brooks, Alice Baird, Anne-Catherine Grela-Mpoko, Lindsey Hoffman, Jadyn Crossley, Menachem Leasy, Jaime Fineman, Margot Savoy, Laura Igarabuza, Anuradha Paranjape, Cheryl YS Foo, Michael L Birnbaum, Yaara Zisman-Ilani. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 3.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e85906"/><abstract><p>Machine learning&#x2013;based audiovisual phenotyping can reveal hidden discrepancies between patients&#x2019; self-reported experiences and nonverbal expressions, offering a promising tool for objectively assessing communication quality and advancing health equity.</p></abstract><kwd-group><kwd>audiovisual digital phenotyping</kwd><kwd>shared decision-making</kwd><kwd>AI</kwd><kwd>artificial intelligence</kwd><kwd>depression</kwd><kwd>primary care</kwd><kwd>natural language processing</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>Although depression is highly prevalent, many patients do not engage with prescribed treatments, particularly racial and ethnic minority individuals in primary care settings where clinicians lack time and infrastructure for effective communication [<xref ref-type="bibr" rid="ref1">1</xref>]. Shared decision-making (SDM) can enhance engagement [<xref ref-type="bibr" rid="ref2">2</xref>], but SDM is not yet the norm [<xref ref-type="bibr" rid="ref3">3</xref>]. Social desirability bias, power dynamics, and cultural norms may lead patients to report high SDM and trust [<xref ref-type="bibr" rid="ref4">4</xref>] despite feeling otherwise.</p><p>Objective measurements may better capture true experiences. Current SDM assessments rely on subjective self-reports or observer ratings with no objective alternatives. Audiovisual digital phenotyping (ADP) is useful in monitoring depression [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref6">6</xref>] and could assess communication quality. This study evaluated multimodal ADP&#x2019;s usability for assessing health communication, SDM, and trust in depression care. We compared ADP outputs from audio, visual, and language modalities with validated patient-reported measures to identify patterns of discrepancies and alignments.</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>Twenty-four participants were recruited from primary care practices. Eligible adults had a depressive disorder diagnosis (ICD-10-CM F33.xx) and a recent primary care visit. Participants completed recorded video interviews about patient-provider communication, decision-making experiences, and self-report measures: SDM-Q-9-Psy [<xref ref-type="bibr" rid="ref7">7</xref>] (low to high SDM), CollaboRATE [<xref ref-type="bibr" rid="ref8">8</xref>] (low to high provider engagement effort), and Trust scale [<xref ref-type="bibr" rid="ref9">9</xref>] (low to high trust). Mean and sum scores were calculated; participants were categorized as having negative or positive communication experiences based on lower or higher scores, respectively.</p><p>From the interviews, two short video clips per participant were extracted: one reflecting positive communication experiences and one reflecting negative experiences. Verbal and nonverbal responses were analyzed with three on-premise Hume AI expression models capturing ADP [<xref ref-type="bibr" rid="ref10">10</xref>,<xref ref-type="bibr" rid="ref11">11</xref>]: (1) a facial expression model (FaceNet Inception-ResNet V1) capturing movement and nonverbal expression of the face; (2) a speech prosody model (Whisper-Small), assessing tone and vocal dynamics from audio; and (3) a natural language processing (NLP) model (BERT), identifying the emotionality of the spoken transcript. For each participant, the top three emotions per modality were extracted.</p><p>Alignment or discrepancy between self-report and ADP was assessed using face validity by comparing self-report scores with emotional outputs. Participants with positive experiences were matched to positive clips; those with negative experiences were matched to negative clips. Alignment required concordance between reported experience and extracted emotions (eg, negative emotions in negative clips for low scorers). Exploratory analyses examined clips with opposite experience types.</p></sec><sec id="s2-2"><title>Ethical Considerations</title><p>This study was approved by the Temple University Institutional Review Board (Protocol #29435). Patients provided informed consent for participation. Participants received a US $20 gift card. Self-reported and ADP data were deidentified.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>Of the 24 participants who completed the study, data from 6 were analyzable after excluding cases with simultaneous on-screen appearances of participant and interviewer or poor video quality. The final sample included 3 women (50%), 5 Black participants (83%), and 4 unemployed participants (67%, <xref ref-type="table" rid="table1">Table 1</xref>). Because the six interviews lasted 48 (SD 13.1) minutes on average, we selected shorter clips for ADP analysis. Selected clip lengths were 14&#x2010;58 seconds (mean 29.4, SD 12.7 seconds), each containing approximately 30 analyzable frames per second (about 840&#x2010;3480 frames per participant for two clips). Categorization into low and high communication experience was conducted based on an SDM-Q-9-Psy score higher than 2.5 and a Trust score higher than 27.5, as most CollaboRATE scores were above average.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Demographic characteristics and communication experiences.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Participant characteristics</td><td align="left" valign="bottom">P1</td><td align="left" valign="bottom">P2</td><td align="left" valign="bottom">P3</td><td align="left" valign="bottom">P4</td><td align="left" valign="bottom">P5</td><td align="left" valign="bottom">P6</td></tr></thead><tbody><tr><td align="left" valign="top">Age (y)</td><td align="char" char="." valign="top">24</td><td align="char" char="." valign="top">56</td><td align="char" char="." valign="top">58</td><td align="char" char="." valign="top">58</td><td align="char" char="." valign="top">68</td><td align="char" char="." valign="top">39</td></tr><tr><td align="left" valign="top">Sex</td><td align="left" valign="top">Male</td><td align="left" valign="top">Female</td><td align="left" valign="top">Female</td><td align="left" valign="top">Male</td><td align="left" valign="top">Female</td><td align="left" valign="top">Male</td></tr><tr><td align="left" valign="top">Hispanic or Latino</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">No</td><td align="left" valign="top">Yes</td><td align="left" valign="top">No</td></tr><tr><td align="left" valign="top">Race</td><td align="left" valign="top">Black or African American</td><td align="left" valign="top">White or Caucasian</td><td align="left" valign="top">Black or African American and American Indian/Native American or Alaska Native</td><td align="left" valign="top">Black or African American</td><td align="left" valign="top">Black or African American and Other</td><td align="left" valign="top">Black or African American</td></tr><tr><td align="left" valign="top">Employment status</td><td align="left" valign="top">Employed</td><td align="left" valign="top">Unemployed</td><td align="left" valign="top">Unemployed</td><td align="left" valign="top">Unemployed</td><td align="left" valign="top">Unemployed</td><td align="left" valign="top">Unemployed</td></tr><tr><td align="left" valign="top">Decision made at the consultation</td><td align="left" valign="top">Referral to outpatient center</td><td align="left" valign="top">Refills, no new decisions were made</td><td align="left" valign="top">Referrals</td><td align="left" valign="top">Keep current medication</td><td align="left" valign="top">Stop therapy</td><td align="left" valign="top">Change in medications</td></tr><tr><td align="left" valign="top">SDM-Q-9-Psy, mean score (range 0&#x2010;5)</td><td align="char" char="." valign="top">1.11</td><td align="char" char="." valign="top">1.89</td><td align="char" char="." valign="top">3.00</td><td align="char" char="." valign="top">4.56</td><td align="char" char="." valign="top">5.00</td><td align="char" char="." valign="top">5.00</td></tr><tr><td align="left" valign="top">CollaboRATE, mean score (range 0&#x2010;9)</td><td align="char" char="." valign="top">5.67</td><td align="char" char="." valign="top">4.00</td><td align="char" char="." valign="top">6.67</td><td align="char" char="." valign="top">9.00</td><td align="char" char="." valign="top">9.00</td><td align="char" char="." valign="top">9.00</td></tr><tr><td align="left" valign="top">Trust in provider, sum score (range 0&#x2010;55)</td><td align="char" char="." valign="top">22</td><td align="char" char="." valign="top">24</td><td align="char" char="." valign="top">37</td><td align="char" char="." valign="top">43</td><td align="char" char="." valign="top">54</td><td align="char" char="." valign="top">55</td></tr></tbody></table></table-wrap><p>Four participants (P3-P6) reported positive communication experiences. However, for 3 (75%) participants, ADP analysis revealed discrepancies between self-reported positive experiences and the presence of negative (eg, distress or disappointment) or neutral (eg, confusion) emotion outputs in positive clips (<xref ref-type="table" rid="table2">Table 2</xref>). Disappointment, awkwardness, and annoyance were common negative emotions in negative clips by participants who reported positive overall experiences. These relationship-related emotions may reflect disappointment with specific aspects of the patient-provider communication. Among the ADP modalities, the greatest discrepancies between verbal content and ADP were observed in facial expression and NLP (in positive clips), whereas speech prosody aligned more closely with survey results in 2 participants (P4 and P6), with emotional outputs such as excitement and amusement (<xref ref-type="table" rid="table2">Table 2</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Audiovisual digital phenotyping of emotional outputs.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Participant</td><td align="left" valign="bottom" colspan="6">Clip type and modality</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="3">Negative, mean (SD)</td><td align="left" valign="bottom" colspan="3">Positive, mean (SD)</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">FE<ext-link ext-link-type="uri" xlink:href="https://jmir.kriyadocs.com/web_preview?doi=85906&#x0026;project=formative&#x0026;customer=jmir#table1fn1">a</ext-link></td><td align="left" valign="top">SP<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top">NL<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">FE</td><td align="left" valign="top">SP</td><td align="left" valign="top">NL</td></tr></thead><tbody><tr><td align="left" valign="top">P1<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Amusement: 0.41 (0.13)</p></list-item><list-item><p>Joy: 0.40 (0.16)</p></list-item><list-item><p>Satisfaction: 0.35 (0.11)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Anxiety: 0.18 (0.19)</p></list-item><list-item><p>Confusion: 0.16 (0.17)</p></list-item><list-item><p>Calmness: 0.15 (0.14)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.32 (0.22)</p></list-item><list-item><p>Anxiety: 0.22 (0.15)</p></list-item><list-item><p>Contemplation: 0.18 (0.16)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Amusement: 0.50 (0.20)</p></list-item><list-item><p>Joy: 0.50 (0.23)</p></list-item><list-item><p>Satisfaction: 0.40 (0.11)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Realization: 0.16 (0.15)</p></list-item><list-item><p>Amusement:0.12 (0.10)</p></list-item><list-item><p>Disgust: 0.12 (0.12)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Excitement: 0.27 (0.23)</p></list-item><list-item><p>Enthusiasm: 0.24 (0.07)</p></list-item><list-item><p>Interest: 0.21 (0.13)</p></list-item></list></td></tr><tr><td align="left" valign="top">P2<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Calmness: 0.41 (0.17)</p></list-item><list-item><p>Tiredness: 0.36 (0.16)</p></list-item><list-item><p>Boredom: 0.32 (0.08)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Awkwardness: 0.20 (0.14)</p></list-item><list-item><p>Sadness: 0.17 (0.19)</p></list-item><list-item><p>Realization: 0.14 (0.11)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Annoyance: 0.31 (0.18)</p></list-item><list-item><p>Disappointment: 0.24 (0.14)</p></list-item><list-item><p>Pain: 0.16 (0.18)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.35 (0.13)</p></list-item><list-item><p>Concentration: 0.33 (0.15)</p></list-item><list-item><p>Calmness: 0.31 (0.15)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Disappointment: 0.29 (0.30)</p></list-item><list-item><p>Confusion: 0.25 (0.14)</p></list-item><list-item><p>Realization: 0.21 (0.13)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Disapproval: 0.31 (0.25)</p></list-item><list-item><p>Disgust: 0.24 (0.34)</p></list-item><list-item><p>Annoyance: 0.20 (0.13)</p></list-item></list></td></tr><tr><td align="left" valign="top">P3<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.49 (0.14)</p></list-item><list-item><p>Doubt: 0.33 (0.09)</p></list-item><list-item><p>Distress: 0.28 (0.08)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Realization: 0.27 (0.17)</p></list-item><list-item><p>Distress: 0.19 (0.15)</p></list-item><list-item><p>Awkwardness: 0.17 (0.05)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Annoyance: 0.36 (0.18)</p></list-item><list-item><p>Disappointment: 0.32 (0.17)</p></list-item><list-item><p>Sadness: 0.22 (0.19)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.43 (0.11)</p></list-item><list-item><p>Concentration: 0.34 (0.12)</p></list-item><list-item><p>Calmness: 0.32 (0.13)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Distress: 0.21 (0.28)</p></list-item><list-item><p>Disappointment: 0.19 (0.16)</p></list-item><list-item><p>Realization: 0.16 (0.09)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Annoyance: 0.16 (0.06)</p></list-item><list-item><p>Anxiety: 0.14 (0.13)</p></list-item><list-item><p>Disappointment: 0.13 (0.12)</p></list-item></list></td></tr><tr><td align="left" valign="top">P4<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.40 (0.15)</p></list-item><list-item><p>Disappointment: 0.33 (0.05)</p></list-item><list-item><p>Sadness: 0.33 (0.08)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Awkwardness: 0.18 (0.11)</p></list-item><list-item><p>Realization: 0.17 (0.11)</p></list-item><list-item><p>Calmness: 0.12 (0.16)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Awkwardness: 0.50 (0.10)</p></list-item><list-item><p>Anxiety: 0.23 (0.17)</p></list-item><list-item><p>Annoyance: 0.19 (0.11)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Pain: 0.56 (0.19)</p></list-item><list-item><p>Sadness: 0.50 (0.10)</p></list-item><list-item><p>Distress: 0.46 (0.09)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Determination: 0.19 (0.19)</p></list-item><list-item><p>Excitement: 0.16 (0.25)</p></list-item><list-item><p>Amusement: 0.15 (0.19)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Awkwardness: 0.31 (0.07)</p></list-item><list-item><p>Realization: 0.20 (0.03)</p></list-item><list-item><p>Doubt: 0.18 (0.12)</p></list-item></list></td></tr><tr><td align="left" valign="top">P5<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.35 (0.11)</p></list-item><list-item><p>Distress: 0.29 (0.08)</p></list-item><list-item><p>Pain: 0.27 (0.18)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Realization: 0.12 (0.13)</p></list-item><list-item><p>Amusement: 0.12 (0.14)</p></list-item><list-item><p>Sadness: 0.11 (0.22)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Sadness: 0.43 (0.31)</p></list-item><list-item><p>Disappointment: 0.23 (0.14)</p></list-item><list-item><p>Annoyance: 0.20 (0.21)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.39 (0.11)</p></list-item><list-item><p>Distress: 0.30 (0.06)</p></list-item><list-item><p>Disappointment: 0.30 (0.08)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Realization: 0.14 (0.06)</p></list-item><list-item><p>Contemplation: 0.12 (0.09)</p></list-item><list-item><p>Awkwardness: 0.12 (0.07)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Disappointment: 0.16 (0.16)</p></list-item><list-item><p>Realization: 0.16 (0.06)</p></list-item><list-item><p>Contemplation: 0.14 (0.04)</p></list-item></list></td></tr><tr><td align="left" valign="top">P6<sup><xref ref-type="table-fn" rid="table2fn5">e</xref></sup></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Interest: 0.42 (0.05)</p></list-item><list-item><p>Amusement: 0.39 (0.13)</p></list-item><list-item><p>Concentration: 0.33 (0.09)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Anger: 0.19 (0.23)</p></list-item><list-item><p>Contemplation: 0.17 (0.11)</p></list-item><list-item><p>Disgust: 0.13 (0.12)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Contemplation: 0.18 (0.14)</p></list-item><list-item><p>Emphatic pain: 0.15 (0.18)</p></list-item><list-item><p>Sympathy: 0.12 (0.13)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Confusion: 0.48 (0.04)</p></list-item><list-item><p>Concentration: 0.44 (0.05)</p></list-item><list-item><p>Doubt: 0.37 (0.04)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Realization: 0.21 (0.11)</p></list-item><list-item><p>Amusement: 0.13 (0.07)</p></list-item><list-item><p>Positive surprise: 0.13 (0.13)</p></list-item></list></td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Gratitude: 0.43 (0.28)</p></list-item><list-item><p>Relief: 0.23 (0.14)</p></list-item><list-item><p>Satisfaction: 0.20 (0.10)</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>FE: facial expression (FaceNet Inception-ResNet V1).</p></fn><fn id="table2fn2"><p><sup>b</sup>SP: speech prosody (Whisper-Small).</p></fn><fn id="table2fn3"><p><sup>c</sup>NL: natural language (BERT).</p></fn><fn id="table2fn4"><p><sup>d</sup>Low SDM-Q-9-Psy score.</p></fn><fn id="table2fn5"><p><sup>e</sup>High SDM-Q-9-Psy score.</p></fn></table-wrap-foot></table-wrap><p>Two participants (P1 and P2) reported negative communication experiences on surveys. In their negative clips, NLP and prosody reflected these experience (eg, anxiety), while facial expressions showed mixed patterns: P1 displayed positive emotions (eg, amusement) and P2 displayed neutral emotions (eg, calmness). In positive clips, P1 showed predominantly positive emotions across all modalities, whereas P2 displayed a mix of neutral and negative emotions (eg, confusion) across all modalities, indicating a discrepancy with the positive clip classification but alignment with P2&#x2019;s overall negative self-reported communication experience. Notably, P1 exhibited similar facial expressions across positive and negative clips.</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>This pilot study demonstrated the usability of multimodal ADP for evaluating patient-provider communication, SDM, trust, and engagement, with prosody showing the strongest alignment with self-reported experiences and facial expression showing the weakest alignment. Discrepancies between self-reports and nonverbal expressions may help explain high rates of service disengagement and treatment nonadherence among patients, whose nonverbal communication cues may be clinically overlooked despite reported trust and engagement [<xref ref-type="bibr" rid="ref12">12</xref>]. Nonverbal expressions aligned with self-reports for negative experiences but contradicted self-reports for positive experiences, highlighting the need for providers to be mindful of social desirability bias and patient-provider power imbalances.</p><p>To protect privacy, analyses used on-premises technology, which offers fewer advantages than cloud-based artificial intelligence (AI) models. This created challenges with simultaneous on-screen appearances, poor lighting, and nonstandard camera angles, resulting in a reduced sample size for comparing ADP with SDM and trust measures. Despite constraints, ADP provided hundreds of thousands of analyzable frames per clip, offering extensive repeated measurements. Postappointment data collection was another limitation.</p><p>Technologically, facial expression sensitivity in depression requires optimization, as limited facial expression may affect provider responses and ADP emotion extraction. Future research should address how to implement commercial AI tools while respecting ethical requirements when handling protected health information [<xref ref-type="bibr" rid="ref13">13</xref>]. Additional considerations for on-premises AI studies should ensure sufficient computing capacity to support analyses.</p><p>Given our predominantly Black patient sample, findings highlight providers&#x2019; need to recognize how social desirability bias, power dynamics, and cultural norms may lead patients to report positive experiences despite feeling disengaged. This demonstrates multimodal ADP&#x2019;s promise for objectively assessing communication quality and advancing health equity.</p></sec></body><back><ack><p>Authors thank Macie Sullivan, BA, a research assistant at the Shared Decision Making Laboratory, for her help with data collection.</p><p>Artificial intelligence (AI) tools were used solely for copy editing, grammar checking, and spelling corrections during manuscript preparation. No generative content was created by AI.</p></ack><notes><sec><title>Funding</title><p>This study was partly supported by the Temple University Grant-in-Aid Program.</p></sec><sec><title>Data Availability</title><p>Deidentified data supporting the findings of this study are available from the corresponding author upon reasonable request, subject to approval by the Temple University Institutional Review Board.</p></sec></notes><fn-group><fn fn-type="con"><p>Conceptualization: YZ-I</p><p>Analysis: SK</p><p>Funding acquisition: YZ-I</p><p>Methodology &#x0026; Resources: SK, VT, JB, AB, YZ-I</p><p>Project administration: SK, ACG-M, LH, JC, AP</p><p>Supervision: YZ-I</p><p>Writing &#x2013; original draft: SK, YZ-I</p><p>Writing &#x2013; review &#x0026; editing: all authors</p></fn><fn fn-type="conflict"><p>VT, JB, and AB have worked for Hume AI. The remaining authors declare no competing interests.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">ADP</term><def><p>audiovisual digital phenotyping</p></def></def-item><def-item><term id="abb2">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb3">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb4">SDM</term><def><p>shared decision-making</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schillok</surname><given-names>H</given-names> </name><name name-style="western"><surname>Gensichen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Panagioti</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Effective components of collaborative care for depression in primary care: an individual participant data meta-analysis</article-title><source>JAMA Psychiatry</source><year>2025</year><month>09</month><day>1</day><volume>82</volume><issue>9</issue><fpage>868</fpage><lpage>876</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2025.0183</pub-id><pub-id pub-id-type="medline">40136273</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zisman-Ilani</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Roth</surname><given-names>RM</given-names> </name><name name-style="western"><surname>Mistler</surname><given-names>LA</given-names> </name></person-group><article-title>Time to support extensive implementation of shared decision making in psychiatry</article-title><source>JAMA Psychiatry</source><year>2021</year><month>11</month><day>1</day><volume>78</volume><issue>11</issue><fpage>1183</fpage><lpage>1184</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2021.2247</pub-id><pub-id pub-id-type="medline">34406346</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Matthews</surname><given-names>EB</given-names> </name><name name-style="western"><surname>Savoy</surname><given-names>M</given-names> </name><name name-style="western"><surname>Paranjape</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Shared decision making in primary care based depression treatment: communication and decision-making preferences among an underserved patient population</article-title><source>Front Psychiatry</source><year>2021</year><volume>12</volume><fpage>681165</fpage><pub-id pub-id-type="doi">10.3389/fpsyt.2021.681165</pub-id><pub-id pub-id-type="medline">34322040</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zisman-Ilani</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Peek</surname><given-names>ME</given-names> </name></person-group><article-title>Improving equity in shared decision-making</article-title><source>JAMA Intern Med</source><year>2024</year><month>09</month><day>1</day><volume>184</volume><issue>9</issue><fpage>1130</fpage><lpage>1131</lpage><pub-id pub-id-type="doi">10.1001/jamainternmed.2024.2993</pub-id><pub-id pub-id-type="medline">39008309</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Birnbaum</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Abrami</surname><given-names>A</given-names> </name><name name-style="western"><surname>Heisig</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Acoustic and facial features from clinical interviews for machine learning-based psychiatric diagnosis: algorithm development</article-title><source>JMIR Ment Health</source><year>2022</year><month>01</month><day>24</day><volume>9</volume><issue>1</issue><fpage>e24699</fpage><pub-id pub-id-type="doi">10.2196/24699</pub-id><pub-id pub-id-type="medline">35072648</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abbas</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sauder</surname><given-names>C</given-names> </name><name name-style="western"><surname>Yadav</surname><given-names>V</given-names> </name><etal/></person-group><article-title>Remote digital measurement of facial and vocal markers of major depressive disorder severity and treatment response: a pilot study</article-title><source>Front Digit Health</source><year>2021</year><volume>3</volume><fpage>610006</fpage><pub-id pub-id-type="doi">10.3389/fdgth.2021.610006</pub-id><pub-id pub-id-type="medline">34713091</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zisman-Ilani</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Roe</surname><given-names>D</given-names> </name><name name-style="western"><surname>Scholl</surname><given-names>I</given-names> </name><name name-style="western"><surname>H&#x00E4;rter</surname><given-names>M</given-names> </name><name name-style="western"><surname>Karnieli-Miller</surname><given-names>O</given-names> </name></person-group><article-title>Shared decision making during active psychiatric hospitalization: assessment and psychometric properties</article-title><source>Health Commun</source><year>2017</year><month>01</month><volume>32</volume><issue>1</issue><fpage>126</fpage><lpage>130</lpage><pub-id pub-id-type="doi">10.1080/10410236.2015.1099504</pub-id><pub-id pub-id-type="medline">27168160</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elwyn</surname><given-names>G</given-names> </name><name name-style="western"><surname>Barr</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Grande</surname><given-names>SW</given-names> </name><name name-style="western"><surname>Thompson</surname><given-names>R</given-names> </name><name name-style="western"><surname>Walsh</surname><given-names>T</given-names> </name><name name-style="western"><surname>Ozanne</surname><given-names>EM</given-names> </name></person-group><article-title>Developing CollaboRATE: a fast and frugal patient-reported measure of shared decision making in clinical encounters</article-title><source>Patient Educ Couns</source><year>2013</year><month>10</month><volume>93</volume><issue>1</issue><fpage>102</fpage><lpage>107</lpage><pub-id pub-id-type="doi">10.1016/j.pec.2013.05.009</pub-id><pub-id pub-id-type="medline">23768763</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hall</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Camacho</surname><given-names>F</given-names> </name><name name-style="western"><surname>Dugan</surname><given-names>E</given-names> </name><name name-style="western"><surname>Balkrishnan</surname><given-names>R</given-names> </name></person-group><article-title>Trust in the medical profession: conceptual and measurement issues</article-title><source>Health Serv Res</source><year>2002</year><month>10</month><volume>37</volume><issue>5</issue><fpage>1419</fpage><lpage>1439</lpage><pub-id pub-id-type="doi">10.1111/1475-6773.01070</pub-id><pub-id pub-id-type="medline">12479504</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Baird</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tzirakis</surname><given-names>P</given-names> </name><name name-style="western"><surname>Brooks</surname><given-names>JA</given-names> </name><etal/></person-group><article-title>The ACII 2022 affective vocal bursts workshop &#x0026; competition</article-title><conf-name>2022 10th International Conference on Affective Computing and Intelligent Interaction Workshops and Demos (ACIIW)</conf-name><conf-date>Oct 17-21, 2022</conf-date><conf-loc>Nara, Japan</conf-loc><fpage>1</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.1109/ACIIW57231.2022.10086002</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Demszky</surname><given-names>D</given-names> </name><name name-style="western"><surname>Movshovitz-Attias</surname><given-names>D</given-names> </name><name name-style="western"><surname>Ko</surname><given-names>J</given-names> </name><name name-style="western"><surname>Cowen</surname><given-names>A</given-names> </name><name name-style="western"><surname>Nemade</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ravi</surname><given-names>S</given-names> </name></person-group><article-title>GoEmotions: a dataset of fine-grained emotions</article-title><conf-name>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</conf-name><conf-date>Jul 5-10, 2020</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.aclweb.org/anthology/2020.acl-main">https://www.aclweb.org/anthology/2020.acl-main</ext-link></comment><pub-id pub-id-type="doi">10.18653/v1/2020.acl-main.372</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Richardson</surname><given-names>BT</given-names> </name><name name-style="western"><surname>Jackson</surname><given-names>J</given-names> </name><name name-style="western"><surname>Marable</surname><given-names>G</given-names> </name><etal/></person-group><article-title>The role of Black churches in promoting mental health for communities of socioeconomically disadvantaged Black Americans</article-title><source>Psychiatr Serv</source><year>2024</year><month>08</month><day>1</day><volume>75</volume><issue>8</issue><fpage>740</fpage><lpage>747</lpage><pub-id pub-id-type="doi">10.1176/appi.ps.20230263</pub-id><pub-id pub-id-type="medline">38595118</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Galatzer-Levy</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Tomasev</surname><given-names>N</given-names> </name><name name-style="western"><surname>Chung</surname><given-names>S</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>G</given-names> </name></person-group><article-title>Generative psychometrics-an emerging frontier in mental health measurement</article-title><source>JAMA Psychiatry</source><year>2026</year><month>01</month><day>1</day><volume>83</volume><issue>1</issue><fpage>5</fpage><lpage>6</lpage><pub-id pub-id-type="doi">10.1001/jamapsychiatry.2025.3258</pub-id><pub-id pub-id-type="medline">41259050</pub-id></nlm-citation></ref></ref-list></back></article>