<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e79558</article-id><article-id pub-id-type="doi">10.2196/79558</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Multimodal Sentiment and Emotion Analysis Framework for Personalized Health Coaching Messages: Proof-of-Concept Study</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Md Zuki</surname><given-names>Muhammad Aiman</given-names></name><degrees>MDS</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Mohamad Ali</surname><given-names>Nazlena</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Chaw</surname><given-names>Jun Kit</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1"/><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib></contrib-group><aff id="aff1"><institution>Institute of Visual Informatics (IVI), Universiti Kebangsaan Malaysia</institution><addr-line>Jalan Tun Ismail Ali</addr-line><addr-line>Bandar Baru Bangi</addr-line><addr-line>Selangor</addr-line><country>Malaysia</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Song</surname><given-names>Cen</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Gore</surname><given-names>Ross</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Nazlena Mohamad Ali, PhD, Institute of Visual Informatics (IVI), Universiti Kebangsaan Malaysia, Jalan Tun Ismail Ali, Bandar Baru Bangi, Selangor, 43600, Malaysia, 60 0389272402; <email>nazlena.ali@ukm.edu.my</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>all authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>21</day><month>4</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e79558</elocation-id><history><date date-type="received"><day>26</day><month>06</month><year>2025</year></date><date date-type="rev-recd"><day>04</day><month>02</month><year>2026</year></date><date date-type="accepted"><day>04</day><month>02</month><year>2026</year></date></history><copyright-statement>&#x00A9; Muhammad Aiman Md Zuki, Nazlena Mohamad Ali, Jun Kit Chaw. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 21.4.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e79558"/><abstract><sec><title>Background</title><p>Text generation approaches in health care communication have evolved along 2 major paths. The first path involves generative adversarial networks, progressing from basic architectures to specialized variants like Text-to-Text Generative Adversarial Network (TT-GAN) and Time and Frequency Domain-Based Generative Adversarial Network (TF-GAN), which address challenges in discrete text generation through techniques such as Gumbel-Softmax and reinforcement learning. The second path emerges from transformer-based architectures, particularly Generative Pretrained Transformer-2 (GPT-2), which uses extensive pretraining and self-attention mechanisms to generate contextually appropriate text. GPT-2&#x2019;s transformer architecture enhances persuasive health communication by generating personalized messages using various strategies like task support, dialogue support, and social support for effective health interventions.</p></sec><sec><title>Objective</title><p>This study aimed to use GPT-2 as a generative method to construct persuasive text in a dataset and compare the performance of sentiment analysis and emotion detection analysis.</p></sec><sec sec-type="methods"><title>Methods</title><p>We combined sentiment analysis tools (VADER [Valence Aware Dictionary and Sentiment Reasoner] and TextBlob) with emotion detection methods (Text2Emotion and NRCLex [National Research Council Lexicon]) to analyze health coaching messages across different persuasive types: reminder, reward, suggestion, and praise.</p></sec><sec sec-type="results"><title>Results</title><p>TextBlob and VADER achieved accuracies of 57% and 69%, respectively, while RoBERTa (robustly optimized BERT approach)-sentiment outperformed them with an accuracy of 88%. Emotion detection showed a high prevalence of &#x201C;joy&#x201D; and &#x201C;happy&#x201D; labels (93.69% positive skew). While transformers excel in accuracy, lexicon-based models like VADER offer a better performance-efficiency balance for real-time health communication systems. For emotion detection, all categories showed perfect accuracy (1.0), while trust showed mixed results, with precision, recall, and <italic>F</italic><sub>1</sub>-score values ranging from 0.81 to 0.96. The emotion detection analysis revealed varying success rates across different emotions, with some categories, such as anger and neutral, showing reasonable performance and others, such as trust, showing mixed performance.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This research contributes to understanding the emotional dynamics of persuasive health communication and highlights both the capabilities and limitations of current natural language processing tools in analyzing health-related persuasive messaging. This proof-of-concept study using synthetically generated data establishes a methodological framework for multimodal sentiment and emotion analysis. The findings require validation with real-world health coaching messages before clinical deployment.</p></sec></abstract><kwd-group><kwd>sentiment analysis</kwd><kwd>emotion detection</kwd><kwd>persuasive communication</kwd><kwd>health coaching</kwd><kwd>natural language processing</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Health coaching has been actively researched, and the results are expected to increase human life expectancy. However, with the advancement of technology and social media, the pattern of life is also changing. It appears that health care professionals are not directly involved, and the content does not always align with the user&#x2019;s behavior or situation [<xref ref-type="bibr" rid="ref1">1</xref>]. The necessity for advanced data preprocessing and analysis frameworks to improve system efficacy has been brought to light by the explosive growth of persuasive technology. The optimization of these components mostly depends on precise data processing and user behavior interpretation, even if contemporary persuasive systems use a variety of tactics (eg, reminders, rewards, suggestions, and praise). This study tackles the difficulties in converting raw data into useful insights by offering a thorough paradigm for preprocessing and customizing data in persuasive system design (PSD). Adherence to digital behavior change interventions is problematic, and some reasons for this include the absence of a social incentive from a health care practitioner and the presence of content that may not always be relevant to the user&#x2019;s circumstances [<xref ref-type="bibr" rid="ref2">2</xref>].</p><p>Advanced data preprocessing and analysis frameworks are vital for improving system effectiveness, as evidenced by the quick development of persuasive technology. As pointed out in a previous study [<xref ref-type="bibr" rid="ref1">1</xref>], the absence of social incentives and personalization makes it difficult to adhere to digital behavior modification treatments. Although computerized interventions offer practical ways to provide behavioral assistance at a reasonable cost [<xref ref-type="bibr" rid="ref3">3</xref>], their efficacy mostly depends on precise data processing and user behavior interpretation.</p><p>Changing habits and lifestyle choices can significantly improve health outcomes and help prevent early mortality; thus, it is important to design compelling messaging strategies for a digital health assistant that can help individuals stay committed to their prescribed health programs and behavioral changes [<xref ref-type="bibr" rid="ref4">4</xref>]. However, persuasive communication, which utilizes reinforcement learning (RL), has been implemented only in the reminder portion of PSD, and thus, it does not fully utilize the PSD capabilities proposed previously [<xref ref-type="bibr" rid="ref5">5</xref>]. Moreover, this could lead to the dataset being used only for the reminder portion. On the other hand, there is no segment for implementing praise, reward, etc. This limitation motivated us to conduct our experiments with data preprocessing using Text-to-Text Generative Adversarial Network (TT-GAN) [<xref ref-type="bibr" rid="ref6">6</xref>] and text generation with knowledge transfer from Generative Pretrained Transformer-2 (GPT-2) [<xref ref-type="bibr" rid="ref7">7</xref>]. Generative adversarial networks (GANs), which represent a class of artificial intelligence (AI) models capable of producing incredibly realistic synthetic data, have emerged as a significant innovation and demonstrated impressive progress in machine learning studies.</p><p>Our framework encompasses multiple data processing layers, beginning with fundamental preprocessing techniques for cleaning and standardizing raw user interaction data. This includes handling missing values, normalizing temporal data, and standardizing input formats across various data sources. The behavior analysis component uses pattern recognition algorithms to identify user engagement trends, activity cycles, and response patterns to different persuasive elements. These patterns are crucial for developing effective personalization strategies.</p><p>The classification system utilizes machine learning algorithms to categorize user behaviors and preferences, enabling dynamic adaptation of persuasive strategies. Natural language processing (NLP) techniques are integrated to analyze user feedback and responses, extracting sentiment and context to refine the system&#x2019;s understanding of user needs. This multifaceted approach ensures that the persuasive elements are personalized, contextually appropriate, and timely.</p><p>Our research combines these components into a cohesive framework to address the gap between raw data collection and effective persuasive strategy implementation. Compared with traditional static approaches, the proposed system significantly improves user engagement and behavior change outcomes while maintaining scalability and real-time processing capabilities.</p><p>To close the gap between health care professionals&#x2019; involvement and the GPT-2 architecture, we conducted an experiment combining the GPT-2 method and data preprocessing to obtain a better result for the PSD element. In this experiment, we advanced text generation using GPT-2. Then, we compared the result with the sentiment analysis finding and performed validation with health care practitioners to increase its relevance.</p><p>The contributions of this experiment are as follows:</p><list list-type="bullet"><list-item><p>Introduce the framework of RL and persuasive technology (PT) as the combination of human-computer interaction elements and machine learning</p></list-item><list-item><p>Demonstrate the effectiveness of using GPT-2 as a generative method to construct persuasive texts in datasets for health coaching messages</p></list-item><list-item><p>Compare the performance of multiple sentiment analysis tools (VADER [Valence Aware Dictionary and Sentiment Reasoner] and TextBlob) when applied to health coaching messages across different persuasive types (reminder, reward, suggestion, and praise)</p></list-item><list-item><p>Evaluate and compare the effectiveness of different emotion detection methods (Text2Emotion and NRCLex [National Research Council Lexicon]) for analyzing emotional content in health-related persuasive communication.</p></list-item></list><p>This study serves as a proof-of-concept for integrating generative AI with sentiment and emotion analysis in health-coaching contexts. We acknowledge that our use of GPT-2&#x2013;generated synthetic data provides controlled conditions for methodological development but has inherent limitations in ecological validity. The findings represent baseline performance under ideal conditions and should not be extrapolated directly to real-world deployment without further validation using authentic patient-provider communication. This study provides a foundational computational framework for analyzing the sentiment and emotional characteristics of persuasive health coaching messages. We focused on establishing the baseline performance of NLP tools and identifying emotional patterns across persuasive types. However, we did not evaluate whether these sentiment/emotion labels translate to improved message quality, safety, or user outcomes. Such validation requires clinical trials with patient participants, which represents future work beyond the scope of this computational methodology study. Our contribution is the establishment of the analytical infrastructure necessary for future outcome-based validation.</p><p>In the next section, we discuss related work. Later, we provide some background on the knowledge used in the experiment and analyze the results. Lastly, we summarize the findings, provide conclusions, and indicate future directions.</p></sec><sec id="s1-2"><title>Related Work</title><sec id="s1-2-1"><title>Overview</title><p>GANs are AI models in which 2 neural networks compete against each other&#x2014;one generates fake data while the other tries to detect the fake data. Over several years of study and experimentation, many variations of GANs have been improved in terms of flexibility (f-GAN [<xref ref-type="bibr" rid="ref8">8</xref>]), stable training behavior compared with a traditional GAN (w-GAN [<xref ref-type="bibr" rid="ref9">9</xref>]), and learning of disentangled and interpretable representations (Information-Theoretic Generative Adversarial Network [InfoGAN] [<xref ref-type="bibr" rid="ref10">10</xref>]). However, TT-GAN appears more appropriate than the stated GANs, as it is more reliable for language and transformer-based models when using teacher forcing. With regard to dialogue generation, document summarization, and machine translation, language models play crucial roles in optimizing linguistic structure. Based on a previous report [<xref ref-type="bibr" rid="ref7">7</xref>], existing neural networks can be broadly categorized into 2 types: recurrent models (eg, recurrent neural network [RNN] and long short-term memory [LSTM]) and self-attention models (eg, GPT-2 and Transformer-XL).</p></sec><sec id="s1-2-2"><title>Text Generation Using GANs</title><p>A previous study [<xref ref-type="bibr" rid="ref6">6</xref>] demonstrated that TT-GAN could successfully provide 2 types of text outputs: semantic summarization and paraphrasing of movie reviews in both English and Chinese. However, an examination of the experiments reveals a lack of a precise comparative analysis between the proposed approach and existing methods in terms of limitations.</p><p>A previous survey [<xref ref-type="bibr" rid="ref11">11</xref>] identified 3 main approaches to GAN-based text generation: Gumbel-Softmax differentiation, RL, and modified training objectives. The adaptation of GANs for text generation presents several challenges, including the initial design of GANs for continuous data (eg, images) and not discrete text data; the need to preserve grammar, syntax, and semantic properties; and the pretraining burden for many approaches. In terms of language structure requirements, it is important to preserve grammatical accuracy and maintain syntactic coherence. This can lead to the challenge of connecting sequences logically. Finally, technical limitations regarding memory consumption and context understanding remain persistent obstacles when adapting GANs for generating text.</p><p>Time and Frequency Domain-Based Generative Adversarial Network (TF-GAN), which has been proposed previously [<xref ref-type="bibr" rid="ref7">7</xref>], transforms the active learning process from operating on discrete text elements to working within a continuous space of text features by utilizing maximum likelihood estimation (MLE) to convert features back into tokens. Because the MLE model is not an issue, TF-GAN can produce diverse, high-quality text outputs while maintaining variety in its generations.</p><p>InfoGAN is considered superior to other GAN methods for several key reasons. First, it uniquely achieves high-quality disentangled representations in a completely unsupervised manner, while previous approaches require supervision. Second, it successfully disentangles meaningful features like writing styles, pose, lighting, and facial attributes across various datasets (MNIST, 3D faces, SVHN, and CelebA) with quality that matches or exceeds supervised methods. Third, its information-theoretic approach of maximizing mutual information between latent codes and observations naturally discovers important data variations without supervision.</p></sec><sec id="s1-2-3"><title>GPT-2 for Implementation</title><p>Using sophisticated computational methods, text generation systems can produce novel content in response to prompts, creating outputs that frequently appear similar to human writing. Language models that perform the best on linguistic tasks combine 2 key steps: initial pretraining and supervised fine-tuning [<xref ref-type="bibr" rid="ref12">12</xref>]. Recent research indicates that specialized architectural designs for individual tasks may be unnecessary, and the use of multiple self-attention layers and their transfer are adequate [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>].</p><p>In the context of health, when doctors are not available, health care workers can struggle to provide patients with complete information about their medications, including the benefits and side effects. The ability of GPT-2 to generate summaries could help address this information gap [<xref ref-type="bibr" rid="ref15">15</xref>]. The study examined whether GPT-2 could accurately understand and explain medication instructions when provided with specific medication names as input [<xref ref-type="bibr" rid="ref15">15</xref>]. A dataset from the PubMed National Library of Medicine was used for the assessment.</p><p>On comparing the proposed Generative Pretrained Transformer (GPT) model implementation for text generation in the health care domain with other methods (eg, InfoGAN and TT-GAN), the proposed approach appears to have some distinct advantages and tradeoffs. While InfoGAN focuses on learning disentangled representations in an unsupervised manner through mutual information maximization and TT-GAN is aimed at paraphrase generation using adversarial networks, the GPT-based method, specifically in [<xref ref-type="bibr" rid="ref15">15</xref>], targets health care text generation with a more straightforward architecture.</p><p>The evolution from simple GANs to more sophisticated models like GPT demonstrates significant progress but also highlights the need to evaluate the generated content carefully. There are various GAN architectures for text generation, but their comparative performance metrics must be critically examined. While TT-GAN&#x2019;s capabilities in semantic summarization and paraphrasing have been mentioned [<xref ref-type="bibr" rid="ref15">15</xref>], the challenges in text generation reveal significant gaps between theoretical models and practical implementation. The transition from traditional GANs to TF-GAN shows the evolution in handling discrete text data, yet questions remain about the tradeoffs between model complexity and output quality. While relevant, the inclusion of GPT-2 applications in health care appears disconnected from the main discussion of GAN-based text generation, suggesting a need for a more cohesive analysis of different generative approaches. Despite being outdated, GPT-2 still has value for certain applications, such as data augmentation and exploration of text generation concepts. GPT-2 is capable of producing excellent, grammatically sound, and semantically cohesive text. It can produce long-form content that is frequently indistinguishable from human-written text, such as essays, articles, and stories. GPT-2 has shown competence across diverse tasks like summarization, question answering, and text completion [<xref ref-type="bibr" rid="ref16">16</xref>]. In the experiments, we used GPT-2 as the model for working with persuasive elements, serving as a baseline for future improvements. This establishes a benchmark for comparison with newer models in terms of performance, constraints, and other aspects in health care settings.</p><p><xref ref-type="table" rid="table1">Table 1</xref> compares TF-GAN, InfoGAN, and GPT-2. Based on the findings, GPT-2 was chosen as the generating agent for the dataset. As noted by Karak et al [<xref ref-type="bibr" rid="ref15">15</xref>], GPT models demonstrate superior performance in generating health care&#x2013;appropriate content, particularly for patient communication tasks, which are similar to our health coaching scenario. While newer models exist, GPT-2 provides an established baseline for comparison and demonstrates the viability of transformer-based approaches over adversarial networks in this domain.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Comparative analysis of GPT-2<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> and GAN<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup>-based models for health text generation.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Criterion</td><td align="left" valign="top">TF-GAN<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="top">InfoGAN<sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="top">GPT-2</td></tr></thead><tbody><tr><td align="left" valign="top">Architecture</td><td align="left" valign="top">Adversarial (generator-discriminator)</td><td align="left" valign="top">Adversarial with mutual information maximization</td><td align="left" valign="top">Transformer with self-attention</td></tr><tr><td align="left" valign="top">Training stability</td><td align="left" valign="top">Prone to mode collapse and instability</td><td align="left" valign="top">Requires careful balancing of objectives</td><td align="left" valign="top">Stable autoregressive training</td></tr><tr><td align="left" valign="top">Text coherence</td><td align="left" valign="top">Struggles with long-form semantic coherence</td><td align="left" valign="top">Limited to feature disentanglement</td><td align="left" valign="top">Maintains grammatical and semantic consistency</td></tr><tr><td align="left" valign="top">Controllability</td><td align="left" valign="top">Requires complex conditional architecture</td><td align="left" valign="top">Unsupervised learning of latent codes</td><td align="left" valign="top">Direct prompt-based control for persuasive types</td></tr><tr><td align="left" valign="top">Domain adaptation</td><td align="left" valign="top">High pretraining burden for health care</td><td align="left" valign="top">Not designed for text generation tasks</td><td align="left" valign="top">Fine-tuning on health communication</td></tr><tr><td align="left" valign="top">Output diversity</td><td align="left" valign="top">Achieved through continuous space transformation</td><td align="left" valign="top">Good for representation learning</td><td align="left" valign="top">Natural diversity through temperature sampling</td></tr><tr><td align="left" valign="top">Health care suitability</td><td align="left" valign="top">Complex discrete-to-continuous conversion</td><td align="left" valign="top">Better for image/feature analysis</td><td align="left" valign="top">Proven effectiveness in medical text generation</td></tr><tr><td align="left" valign="top">Implementation complexity</td><td align="left" valign="top">High (adversarial training dynamics)</td><td align="left" valign="top">High (information-theoretic optimization)</td><td align="left" valign="top">Low (straightforward fine-tuning)</td></tr><tr><td align="left" valign="top">Baseline value</td><td align="left" valign="top">Limited health care text applications</td><td align="left" valign="top">Not applicable to text generation</td><td align="left" valign="top">Established baseline for transformer comparisons</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>GPT-2: Generative Pretrained Transformer-2.</p></fn><fn id="table1fn2"><p><sup>b</sup>GAN: generative adversarial network.</p></fn><fn id="table1fn3"><p><sup>c</sup>TF-GAN: Time and Frequency Domain-Based Generative Adversarial Network.</p></fn><fn id="table1fn4"><p><sup>d</sup>InfoGAN: Information-Theoretic Generative Adversarial Network.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s1-2-4"><title>Emotion Detection Analysis</title><p>In previous years, researchers explored 2 main approaches for detecting emotions in text: one approach used machine learning algorithms to train emotion detection models, and the other approach relied on lexicons (collections of words that typically express specific emotions) [<xref ref-type="bibr" rid="ref17">17</xref>]. The discrete emotion model organizes emotions by sorting them into separate and distinct categories or classes. Like the Ekman theory, the Plutchik model proposes that a limited number of basic emotions exist in pairs of opposites, and these primary emotions can combine to create more complex emotional states [<xref ref-type="bibr" rid="ref18">18</xref>].</p><p>The approach involves comprehensive emotion analysis of a given text message using 2 different emotion analysis libraries. The experiment implemented NRCLex emotion detection, which is the same as Plutchik emotion dyads (anticipation, joy, trust, fear, surprise, sadness, disgust, and anger; <xref ref-type="fig" rid="figure1">Figure 1</xref>) [<xref ref-type="bibr" rid="ref19">19</xref>], but with the addition of positive and negative.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Plutchik emotion wheel [<xref ref-type="bibr" rid="ref19">19</xref>].</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig01.png"/></fig></sec></sec><sec id="s1-3"><title>PSD Framework</title><p>PSD provides the theoretical foundation for developing systems that influence user attitudes and behaviors through persuasion rather than coercion. Oinas-Kukkonen and Harjumaa [<xref ref-type="bibr" rid="ref5">5</xref>] established the seminal PSD framework, identifying 28 design principles organized into 4 categories: primary task support, dialogue support, system credibility support, and social support. Among these, dialogue support principles are particularly relevant to health coaching applications, encompassing praise, reward, reminder, suggestion, similarity, liking, and social role elements.</p><p>The application of PSD to health behavior change has evolved significantly, yet critical gaps remain in existing implementations. Early health coaching systems focused primarily on single persuasive strategies. Beinema et al [<xref ref-type="bibr" rid="ref1">1</xref>] implemented embodied conversational agents with automatic topic selection but primarily utilized reminder-based approaches. op den Akker et al [<xref ref-type="bibr" rid="ref2">2</xref>] proposed the Council of Coaches framework for holistic behavior change, incorporating multiple PSD elements but facing implementation complexity challenges. Most significantly, Albers et al [<xref ref-type="bibr" rid="ref4">4</xref>] developed an RL approach for persuasive conversational agents but implemented only the reminder component of dialogue support, leaving other PSD elements (praise, reward, and suggestion) unexplored in their dataset and system.</p><p>This limited implementation represents a significant gap in PSD research. The dialogue support category encompasses multiple complementary strategies, with each serving distinct psychological functions: reminders prompt action, praise provides positive reinforcement, rewards acknowledge achievements, and suggestions offer guidance. By focusing exclusively on reminders, previous research has not fully utilized the capabilities proposed by the PSD framework or examined how different dialogue support elements may carry varying emotional and sentiment characteristics. This research contributes to PSD literature by providing empirical evidence for how different dialogue support elements function emotionally and sentimentally. The finding that praise naturally carries higher emotional intensity than reminders or suggestions validates intuitive assumptions in PSD theory while providing quantifiable metrics for these differences. Understanding these emotional signatures enables more sophisticated PSD, where different PSD elements can be strategically deployed based on desired emotional impact and user state.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design and Overview</title><p>This is a computational methodology development study (not a clinical trial) using synthetically generated health coaching messages (n=1300). No human participants or real patient data were involved. The primary objective was to validate sentiment and emotion analysis tools for health-specific persuasive communication.</p><p>We constructed the dataset using GPT-2, randomization, natural language variation, and template-based generation (<xref ref-type="fig" rid="figure2">Figure 2</xref>). This section will explain the step-by-step process of this experiment. In the first stage, the dataset was constructed using GPT-2 and Python. The second stage involved data preprocessing, including tokenization, stemming, lemmatization, etc. Subsequently, we performed sentiment analysis using VADER and TextBlob. When compared to complex machine learning methods, VADER&#x2019;s straightforward approach offers notable benefits. It provides fast and efficient processing while maintaining high accuracy levels. Additionally, unlike machine learning models, where the decision-making process is obscured in a black box, VADER&#x2019;s dictionary and governing rules are transparent and can be directly examined [<xref ref-type="bibr" rid="ref20">20</xref>]. As a result, VADER can be readily reviewed, comprehended, expanded, or customized according to specific needs. TextBlob, implemented as a Python library, provides an uncomplicated interface to execute fundamental NLP operations. One of its key advantages is that it handles text much like Python strings, making it particularly user-friendly and straightforward to implement [<xref ref-type="bibr" rid="ref21">21</xref>].</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Process of the experiment. GPT-2: Generative Pretrained Transformer-2; NRCLex: National Research Council Lexicon; VADER: Valence Aware Dictionary and Sentiment Reasoner.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig02.png"/></fig></sec><sec id="s2-2"><title>Data Generation</title><p>In the first phase of creating the dataset, we used GPT-2 for the pretrained dataset. We loaded the GPT-2 tokenizer and model using &#x201C;tokenizer=GPT2Tokenizer.from_pretrained(&#x2018;gpt2&#x2019;)&#x201D; and &#x201C;model=GPT2LMHeadModel.from_pretrained(&#x2018;gpt2&#x2019;),&#x201D; respectively. Activities for weight loss, such as water intake [<xref ref-type="bibr" rid="ref22">22</xref>], portion control, meal planning, and healthy eating [<xref ref-type="bibr" rid="ref23">23</xref>], were set to predefined activities. Then, we prompted text generation by categorizing it as reminder, reward, suggestion, and praise.</p><p>We used randomization, template-based generation, and natural language generation techniques to make it more interesting and natural. This dataset was artificially generated using GPT-2. However, the dataset is based on previous research [<xref ref-type="bibr" rid="ref4">4</xref>]. It has been indicated that the dataset only used reminder as its PT element. Thus, we created a new dataset containing more PT elements (reminder, reward, suggestion, and praise). Based on a previous report [<xref ref-type="bibr" rid="ref24">24</xref>], the use of at least 1000 samples in the dataset generally produces acceptable and reliable performance results when testing NLP models.</p><p><xref ref-type="table" rid="table2">Table 2</xref> provides examples demonstrating that GPT-2 successfully generated diverse messages across persuasive types while maintaining semantic coherence and health-appropriate content, validating our choice of generative approach.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Examples demonstrating GPT-2<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> diversity and generalization.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Persuasive type and activity</td><td align="left" valign="top">Example message</td><td align="left" valign="top">Diversity feature</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Reminder</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Water intake</td><td align="left" valign="top">Don&#x2019;t forget to drink your water today! Staying hydrated helps your body function at its best.</td><td align="left" valign="top">Standard instructional tone</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Meal planning</td><td align="left" valign="top">Time to plan your meals for the week. A little preparation goes a long way!</td><td align="left" valign="top">Task-focused approach</td></tr><tr><td align="left" valign="top" colspan="2">Reward</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Portion control</td><td align="left" valign="top">You&#x2019;ve been great with portion control this week! Treating yourself to something special is well-deserved.</td><td align="left" valign="top">Achievement recognition</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Healthy eating</td><td align="left" valign="top">By maintaining healthy eating, you&#x2019;ll feel better. Your dedication is paying off!</td><td align="left" valign="top">Health benefit emphasis</td></tr><tr><td align="left" valign="top" colspan="2">Suggestion</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Water intake</td><td align="left" valign="top">Try adding a slice of lemon to your water for extra flavor and vitamin C.</td><td align="left" valign="top">Practical tip with rationale</td></tr><tr><td align="left" valign="top" colspan="2">Praise</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Portion control</td><td align="left" valign="top">Excellent work on controlling your portions! You&#x2019;re making real progress.</td><td align="left" valign="top">Direct encouragement</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>GPT-2: Generative Pretrained Transformer-2.</p></fn></table-wrap-foot></table-wrap><p>Our synthetic dataset generation was grounded in the real-world health coaching dataset from previous research [<xref ref-type="bibr" rid="ref4">4</xref>]. This dataset contains authentic health coaching messages from a conversational agent study focused on physical activity and medication adherence.</p></sec><sec id="s2-3"><title>Data Preprocessing</title><p>The analysis of persuasive messages was conducted through a comprehensive computational approach incorporating multiple NLP techniques (<xref ref-type="other" rid="box1">Textbox 1</xref>). The methodology consisted of several sequential phases designed to process and analyze the textual data systematically. The methodology began with systematic data preprocessing, which included text normalization through lowercase conversion, contraction expansion, and special character removal. Linguistic processing was performed using NLTK&#x2019;s word_tokenize function, with selective stopword removal that preserved critical semantic markers (eg, &#x201C;not,&#x201D; &#x201C;no,&#x201D; and &#x201C;nor&#x201D;) and lemmatization via WordNetLemmatizer. Feature engineering encompassed both quantitative metrics (word frequency, character count, and sentence structure) and categorical data processing through label encoding for persuasive types and activities.</p><boxed-text id="box1"><title> Pseudocode of text preprocessing.</title><p>FUNCTION preprocess_text(text)</p><p><named-content content-type="indent">&#x2003;</named-content>INPUT: text - a string of text to be preprocessed</p><p><named-content content-type="indent">&#x2003;</named-content>OUTPUT: preprocessed text as a string</p><p><named-content content-type="indent">&#x2003;</named-content>// Convert text to lowercase</p><p><named-content content-type="indent">&#x2003;</named-content>texttext = CONVERT_TO_LOWERCASE(text)</p><p><named-content content-type="indent">&#x2003;</named-content>// Expand contractions (eg, &#x201C;don'&#x2019;t&#x201D; to &#x201C;do not&#x201D;)</p><p><named-content content-type="indent">&#x2003;</named-content>texttext = EXPAND_CONTRACTIONS(text)</p><p><named-content content-type="indent">&#x2003;</named-content>// Remove all characters except letters and spaces</p><p><named-content content-type="indent">&#x2003;</named-content>texttext = REMOVE_SPECIAL_CHARACTERS_AND_NUMBERS(text)</p><p><named-content content-type="indent">&#x2003;</named-content>// Normalize whitespace</p><p><named-content content-type="indent">&#x2003;</named-content>texttext = REMOVE_EXTRA_WHITESPACE(text)</p><p><named-content content-type="indent">&#x2003;</named-content>// Split text into individual words</p><p><named-content content-type="indent">&#x2003;</named-content>tokenstokens = TOKENIZE(text)</p><p><named-content content-type="indent">&#x2003;</named-content>// Initialize stopwords to remove</p><p><named-content content-type="indent">&#x2003;</named-content>stop_wordswords = GET_ENGLISH_STOPWORDS() important_words = {&#x2018;'not,&#x2019;&#x2018;not,&#x2019; 'no',&#x2018;no,&#x2019; &#x2018;nor,&#x2019;&#x2018;nor,&#x2019; 'but',&#x2018;but,&#x2019; 'and',&#x2018;and,&#x2019; 'or',&#x2018;or,&#x2019; &#x2019;should',&#x2018;should,&#x2019; &#x2018;must&#x2019;} stop_wordswords = stop_words - important_words</p><p><named-content content-type="indent">&#x2003;</named-content>// Remove stopwords</p><p><named-content content-type="indent">&#x2003;</named-content>filtered_tokenstokens = EMPTY_LIST</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH token IN tokens:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>IF token NOT IN stop_words:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>ADD token TO filtered_tokens</p><p><named-content content-type="indent">&#x2003;</named-content>// Lemmatize words to their base form</p><p><named-content content-type="indent">&#x2003;</named-content>lemmatized_tokenstokens = EMPTY_LIST</p><p><named-content content-type="indent">&#x2003;</named-content>lemmatizerlemmatizer = INITIALIZE_LEMMATIZER()</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH token IN filtered_tokens:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>lemmatized_tokentoken = LEMMATIZE(token)</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>ADD lemmatized_token TO lemmatized_tokens</p><p><named-content content-type="indent">&#x2003;</named-content>// Join tokens back into text</p><p><named-content content-type="indent">&#x2003;</named-content>RETURN JOIN_WITH_SPACES(lemmatized_tokens)</p><p>END FUNCTION</p></boxed-text><p>We used a methodical ground-truth labeling approach to create trustworthy standards for assessing sentiment and emotion analysis technologies. This procedure guaranteed that the emotional content and message sentiment of all persuasion types were consistently and reliably assessed. Three independent raters with backgrounds in psychology and linguistics annotated each message after the data were preprocessed.</p><p>A multistage protocol was used in the annotation process to reduce bias and increase dependability. Initially, a calibration set of 50 sample messages with preestablished labels was used to train raters. They then independently assigned labels to the entire dataset based on predetermined standards. Disputes over communications with different annotations were resolved through a consensus meeting. Using Fleiss &#x03BA; to calculate interannotator agreement, we identified a moderate degree of agreement (&#x03BA;=0.65) for emotion detection and a large degree of agreement (&#x03BA;=0.78) for sentiment classification.</p><p>The mood and emotion ground-truth classification criteria are described in <xref ref-type="table" rid="table3">Table 3</xref>. Based on preliminary validation tests, we set specific thresholds for sentiment classification as follows: polarity between &#x2212;0.05 and 0.05, neutral; polarity &#x2264;&#x2212;0.05, negative; and polarity &#x2265;0.05, positive. This approach takes into account the subtleties of persuasive health communication while adhering to accepted standards in sentiment analysis research.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Classification type for sentiment analysis.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Classification type and category</td><td align="left" valign="top">Assignment criteria</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="3">Sentiment</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Positive</td><td align="left" valign="top">Messages expressing encouragement, optimism, approval, or positive reinforcement with polarity &#x2265;0.05</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Negative</td><td align="left" valign="top">Messages expressing caution, warning, criticism, or concern with polarity &#x2264;&#x2212;0.05</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Neutral</td><td align="left" valign="top">Messages conveying information without strong emotional connotations with polarity between &#x2212;0.05 and 0.05</td></tr><tr><td align="left" valign="top" colspan="3">Emotion</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Joy</td><td align="left" valign="top">Expressions of happiness, pleasure, or satisfaction (eg, &#x201C;great job&#x201D; and &#x201C;excellent progress&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Trust</td><td align="left" valign="top">Expressions of confidence, reliability, or dependability (eg, &#x201C;you can count on&#x201D; and &#x201C;reliable method&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Fear</td><td align="left" valign="top">Expressions of concern, caution, or warning (eg, &#x201C;be careful&#x201D; and &#x201C;watch out for&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Surprise</td><td align="left" valign="top">Expressions of astonishment or unexpectedness (eg, &#x201C;amazing results&#x201D; and &#x201C;unexpected benefit&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sadness</td><td align="left" valign="top">Expressions of disappointment or regret (eg, &#x201C;unfortunately&#x201D; and &#x201C;disappointing outcome&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Disgust</td><td align="left" valign="top">Expressions of aversion or distaste (eg, &#x201C;avoid unhealthy options&#x201D; and &#x201C;eliminate junk food&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Anger</td><td align="left" valign="top">Expressions of frustration or irritation (eg, &#x201C;challenging obstacles&#x201D; and &#x201C;frustrating setbacks&#x201D;)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Anticipation</td><td align="left" valign="top">Expressions of expectation or looking forward (eg, &#x201C;prepare for,&#x201D; &#x201C;plan ahead,&#x201D; and &#x201C;look forward to&#x201D;)</td></tr></tbody></table></table-wrap><p>We used the Plutchik wheel of emotions as our theoretical framework for classifying emotions, and we found that each message had 8 main emotions. Raters used clear textual clues to assign binary labels (present/absent) to each emotion category. When appropriate, a message may be assigned more than one emotion label. Lexical features (certain phrases that convey a particular emotion) and contextual clues in the message were both used in the emotion recognition procedure.</p><p>The resulting ground-truth labels provided a robust foundation for evaluating the performance of sentiment analysis tools (VADER and TextBlob) and emotion detection methods (Text2Emotion and NRCLex). By establishing these reliable benchmarks, we could accurately assess the strengths and limitations of each approach in analyzing persuasive health communication. <xref ref-type="table" rid="table4">Table 4</xref> presents the composition and distribution of the dataset generated.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Dataset composition and distribution.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top" colspan="2">Dimension and category</td><td align="left" valign="top" colspan="2">Value (N=1300), n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="4">Persuasive_type</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reminder</td><td align="left" valign="top" colspan="2">343 (26.4)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Reward</td><td align="left" valign="top" colspan="2">331 (25.5)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Suggestion</td><td align="left" valign="top" colspan="2">316 (24.3)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Praise</td><td align="left" valign="top" colspan="2">310 (23.9)</td></tr><tr><td align="left" valign="top" colspan="2">Activity</td><td align="left" valign="top" colspan="2"/></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Water intake</td><td align="left" valign="top" colspan="2">357 (27.5)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Healthy eating</td><td align="left" valign="top" colspan="2">331 (25.5)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Portion control</td><td align="left" valign="top" colspan="2">313 (24.1)</td></tr><tr><td align="left" valign="top" colspan="2"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Meal planning</td><td align="left" valign="top" colspan="2">299 (23.0)</td></tr></tbody></table></table-wrap></sec><sec id="s2-4"><title>Sentiment Analysis</title><p>Sentiment analysis was conducted using a dual-method approach: VADER provided compound sentiment scores and classification with &#x00B1;0.05 thresholds, while TextBlob analysis supplied complementary polarity and subjectivity metrics. VADER and TextBlob are sentiment analysis tools that help categorize text reviews into 3 emotional categories: positive, neutral, and negative [<xref ref-type="bibr" rid="ref25">25</xref>]. Example pseudocodes for VADER and TextBlob are provided in <xref ref-type="other" rid="box2">Textboxes 2</xref> and <xref ref-type="other" rid="box3">3</xref>, respectively.</p><boxed-text id="box2"><title> Example pseudocode for VADER (Valence Aware Dictionary and Sentiment Reasoner).</title><p>FUNCTION apply_vader_sentiment(dataframe, vader_scores)</p><p><named-content content-type="indent">&#x2003;</named-content>INPUT:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe - table of text data</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>vader_scores - sentiment analysis scores from VADER</p><p><named-content content-type="indent">&#x2003;</named-content>OUTPUT:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe with added sentiment columns</p><p><named-content content-type="indent">&#x2003;</named-content>// Add negative sentiment scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN vader_scores:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['vader_negative'&#x2019;][row] = vader_scores[row]['neg'&#x2019;]</p><p><named-content content-type="indent">&#x2003;</named-content>// Add neutral sentiment scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN vader_scores:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['vader_neutral'&#x2019;][row] = vader_scores[row]['neu'&#x2019;]</p><p><named-content content-type="indent">&#x2003;</named-content>// Add positive sentiment scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN vader_scores:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['vader_positive'&#x2019;][row] = =vader_scores[row]['pos'&#x2019;]</p><p><named-content content-type="indent">&#x2003;</named-content>// Add compound sentiment scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN vader_scores:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['vader_compound'&#x2019;][row] = vader_scores[row]['compound'&#x2019;]</p><p><named-content content-type="indent">&#x2003;</named-content>RETURN dataframe</p><p>END FUNCTION</p></boxed-text><boxed-text id="box3"><title> Example pseudocode for TextBlob.</title><p>FUNCTION apply_textblob_sentiment(dataframe, message_column)</p><p><named-content content-type="indent">&#x2003;</named-content>INPUT:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe - table of text data message_column - column containing text messages</p><p><named-content content-type="indent">&#x2003;</named-content>OUTPUT:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe with added TextBlob sentiment columns</p><p><named-content content-type="indent">&#x2003;</named-content>// Add polarity scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN dataframe:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>texttext = CONVERT_TO_STRING(dataframe[message_column][row])</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>blobblob = CREATE_TEXTBLOB(text)</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['textblob_polarity'&#x2019;][row] = blob.sentiment.polarity</p><p><named-content content-type="indent">&#x2003;</named-content>// Add subjectivity scores</p><p><named-content content-type="indent">&#x2003;</named-content>FOR EACH row IN dataframe:</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>texttext = CONVERT_TO_STRING(dataframe[message_column][row])</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>blobblob = CREATE_TEXTBLOB(text)</p><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content>dataframe['textblob_subjectivity'&#x2019;][row] = =blob.sentiment.subjectivity</p><p><named-content content-type="indent">&#x2003;</named-content>RETURN dataframe</p><p>END FUNCTION</p></boxed-text><p>The statistical framework included a distribution analysis of sentiment across message categories and a correlation analysis between VADER and TextBlob scores. The implementation utilized Python 3.x with established NLP libraries (NLTK, VADER, and TextBlob) and used a modular design to ensure reproducibility and facilitate independent verification of each analysis component.</p><p>TextBlob is a Python library that helps developers work with text data in both Python 2 and 3 environments [<xref ref-type="bibr" rid="ref26">26</xref>]. It offers straightforward tools for performing essential NLP tasks. With TextBlob, text can be analyzed to identify parts of speech, extract noun phrases, determine sentiment, categorize content, and convert text between languages. The library is designed to make these complex NLP operations accessible through a simple interface. All processed features were integrated into a unified dataset that preserved original messages alongside their processed versions, maintaining data integrity throughout the analysis pipeline. TextBlob provided the following 2 primary sentiment metrics: polarity and subjectivity. While polarity measures the positive-negative orientation of text, subjectivity quantifies the degree to which the text expresses personal opinions, emotions, or judgments versus objective information.</p><p>VADER&#x2019;s compound score calculation involves several steps to evaluate the emotional content of text. First, it scans the text to identify words and patterns that have known emotional meanings. Then, it adjusts how strong or weak these emotions are based on specific rules (eg, intensifiers, negations, etc). Next, it combines all these individual emotional scores found in the text. Finally, it converts the total score into a standardized number between &#x2212;1 and 1, where &#x2212;1 represents extremely negative sentiment and 1 represents extremely positive sentiment. The function of VADER is as follows:</p><disp-formula id="E1"><label>(1)</label><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi><mml:mi>p</mml:mi><mml:mi>o</mml:mi><mml:mi>u</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>l</mml:mi><mml:mi>i</mml:mi><mml:mi>z</mml:mi><mml:mi>e</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>w</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mi>x</mml:mi><mml:mrow><mml:msqrt><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:msqrt><mml:mtext>+</mml:mtext><mml:mi>&#x03B1;</mml:mi></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>In VADER&#x2019;s sentiment analysis equation, the compound score formula normalizes raw sentiment scores (x) to a range between &#x2212;1 and 1. The &#x03B1; parameter, typically set to 15, acts as a stabilizing factor that prevents extreme scores when the raw sentiment value is small. The denominator&#x2019;s square root term helps create a smooth normalization curve while preserving the sentiment&#x2019;s direction.</p><p>The first function, get_vader_sentiment, takes a text input and calculates sentiment scores using VADER. It converts missing values into empty strings and returns a dictionary of scores, including negative, neutral, positive, and compound values. The second function, get_textblob_sentiment, performs a similar analysis using TextBlob, returning both polarity (how positive or negative) and subjectivity (how objective or subjective) scores.</p><p>The code then applies these sentiment analyses to a data frame&#x2019;s &#x201C;message&#x201D; column. It creates new columns by extracting individual components of the sentiment scores. For VADER, it separates the negative, neutral, positive, and compound scores into separate columns. Similarly, TextBlob extracts the polarity and subjectivity scores into separate columns. This separation makes it easier to analyze and compare different aspects of sentiment.</p><p>Finally, the code categorizes the overall sentiment of each message based on VADER&#x2019;s compound score using a standard threshold approach. Messages with compound scores greater than or equal to 0.05 are labeled &#x201C;positive,&#x201D; those with scores less than or equal to &#x2212;0.05 are labeled &#x201C;negative,&#x201D; and those with scores between these values are labeled &#x201C;neutral.&#x201D; This categorization provides a simple way to classify the emotional tone of each message into 3 distinct categories.</p><p>We used precision, recall, <italic>F</italic><sub>1</sub>-score, and accuracy in this experiment to assess performance. We determined the true positive (TP), true negative (TN), false positive (FP), and false negative (FN) values. Precision was calculated by dividing truly positive classifications by all positive classifications as follows: TP/(TP+FP). Recall was calculated by dividing truly positive classifications by all positive examples as follows: TP/(TP+FN). <italic>F</italic><sub>1</sub>-score was calculated by dividing (precision&#x00D7;recall) by (precision+recall) as follows: (2&#x00D7;TP)/([2&#x00D7;TP]+FP+FN). Accuracy was calculated by dividing true classifications by all classifications as follows: (TP+TN)/(TP+TN+FP+FN).</p></sec><sec id="s2-5"><title>Emotion Detection Analysis</title><p>Emotion detection is a branch of sentiment analysis that deals with the extraction and analysis of emotions. In this experiment, we conducted emotion detection analysis in the dataset. Text2Emotion and NRCLex were chosen as the techniques or methods in this experiment. NRCLex was selected as it does not require training data or model training and identifies multiple emotional categories beyond just positive/negative sentiment, including joy, anger, sadness, fear, trust, surprise, and others. NRCLex offers tools for plotting sentiment analysis outcomes [<xref ref-type="bibr" rid="ref27">27</xref>]. While TextBlob was used for sentiment analysis (positive/negative/neutral), Text2Emotion was used for more granular emotional analysis that is needed for a deeper understanding [<xref ref-type="bibr" rid="ref28">28</xref>].</p><p>In Text2Emotion, the following 5 emotions are involved: happy, angry, surprise, sad, and fear. On the other hand, NRCLex involves the following 10 emotions: fear, anger, trust, surprise, sadness, disgust, joy, anticipation, positive, and negative.</p><p>Text2Emotion follows a basic model for emotions. It starts by preprocessing the input text using a separate method. If the cleaning process results in empty text, it returns an empty analysis result. For the actual analysis, it processes the text through both the Text2Emotion and NRCLex libraries to get emotion scores. The method then calculates an overall emotion intensity based on these scores. To identify the dominant emotions, it converts the emotion scores from both libraries into lists of tuples and finds the emotion with the highest score for each library, defaulting to &#x201C;neutral&#x201D; if no emotions are detected. Additionally, it determines an overall sentiment score by subtracting the NRCLex negative score from the positive score. The final output is a dictionary containing the original message, emotion scores from both libraries, dominant emotions from each library, calculated sentiment score, and overall emotion intensity.</p></sec><sec id="s2-6"><title>Experimental Setup</title><p>The experiments were conducted using Python programming within the Anaconda 3.0 environment and Jupyter Notebook interface. This section outlines the experimental setup, including details about the system specifications and parameter configurations used. <xref ref-type="table" rid="table5">Table 5</xref> presents sentiment thresholds for the VADER and TextBlob analyzers, defining positive, negative, and neutral classifications and data preprocessing parameters for emotional analysis. The experiments were performed on a VICTUS gaming laptop (15-fa1231TX; HP Inc) with an Intel Core i5-12450H processor (2.00 GHz), 16 GB of RAM, and an NVIDIA GeForce RTX 4050 graphics card. The operating system was Windows 11 (Microsoft Corp).</p><p><xref ref-type="table" rid="table6">Table 6</xref> outlines emotion detection parameters using Text2Emotion and NRCLex libraries, with keyword categories and scoring methods for emotional analysis.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Hyperparameter settings for sentiment lexicons.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Parameter name</td><td align="left" valign="top">Parameter value</td></tr></thead><tbody><tr><td align="left" valign="top">VADER<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup> positive</td><td align="left" valign="top">Sentiment score &#x2265;0.05</td></tr><tr><td align="left" valign="top">VADER negative</td><td align="left" valign="top">Sentiment score &#x2264;&#x2212;0.05</td></tr><tr><td align="left" valign="top">VADER neutral</td><td align="left" valign="top">Sentiment score &#x003C;0.05 or &#x003E;&#x2212;0.05</td></tr><tr><td align="left" valign="top">TextBlob positive</td><td align="left" valign="top">Sentiment score &#x003E;0</td></tr><tr><td align="left" valign="top">TextBlob negative</td><td align="left" valign="top">Sentiment score &#x003C;0</td></tr><tr><td align="left" valign="top">TextBlob neutral</td><td align="left" valign="top">Sentiment score 0</td></tr><tr><td align="left" valign="top">Emotion detection</td><td align="left" valign="top">High intensity &#x003E; mean</td></tr><tr><td align="left" valign="top">Score normalization</td><td align="left" valign="top">Value/total when total &#x003E;0</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>VADER: Valence Aware Dictionary and Sentiment Reasoner.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Hyperparameter settings for emotion analysis.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Parameter name</td><td align="left" valign="top">Parameter value</td></tr></thead><tbody><tr><td align="left" valign="top">Text2Emotion</td><td align="left" valign="top">Happy, angry, surprise, sad, fear (default classes)</td></tr><tr><td align="left" valign="top">NRCLex<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup></td><td align="left" valign="top">Fear, anger, trust, surprise, sadness, disgust, joy, anticipation, positive, negative (default classes)</td></tr><tr><td align="left" valign="top">Emotion keywords (joy)</td><td align="left" valign="top">Happy, great, excellent, good, wonderful, fantastic, excited</td></tr><tr><td align="left" valign="top">Emotion keywords (encouragement)</td><td align="left" valign="top">Can, will, try, achieve, possible, potential, progress</td></tr><tr><td align="left" valign="top">Emotion keywords (concern)</td><td align="left" valign="top">Careful, warning, attention, caution, important</td></tr><tr><td align="left" valign="top">Emotion keywords (neutral)</td><td align="left" valign="top">Maintain, continue, regular, routine, standard</td></tr><tr><td align="left" valign="top">Emotion keywords (directive)</td><td align="left" valign="top">Must, should, need, remember, don&#x2019;t forget</td></tr><tr><td align="left" valign="top">Emotion intensity</td><td align="left" valign="top">Calculated as max (Text2Emotion score, NRCLex score)</td></tr><tr><td align="left" valign="top">Emotion classification</td><td align="left" valign="top">Dominant emotion is the highest scoring category</td></tr><tr><td align="left" valign="top">Score normalization</td><td align="left" valign="top">Score/total (when total &#x003E;0)</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>NRCLex: National Research Council Lexicon.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This research is exempt from institutional ethics review as it is a computational methodology development study exclusively using synthetically generated data. No human participants, human research participants, real patient data, or personally identifiable information were involved at any stage of this research. All health coaching data and messages (1300 messages) were generated using GPT-2 based on hypothetical and generic health coaching scenarios [<xref ref-type="bibr" rid="ref4">4</xref>]. These messages were never derived from real patient communications, real medical records, or prior research data involving human participants. Therefore, no data sharing restrictions apply, and no privacy protections specific to human participants are required. The code and synthetic dataset are publicly available on GitHub [<xref ref-type="bibr" rid="ref29">29</xref>]. This study represents a methodological proof-of-concept, and future clinical validation and deployment will require formal ethics review, informed consent, and adherence to applicable data protection regulations.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Sentiment Analysis</title><p>Sentiment analysis revealed a highly positively skewed distribution across all health coaching messages (<xref ref-type="table" rid="table7">Table 7</xref>). The vast majority of messages (1218/1300, 93.7%) were classified as positive, with negative messages representing only 5.4% (70/1300) and neutral messages representing less than 1% (12/1300, 0.9%) of the total dataset. This overwhelmingly positive distribution suggests a deliberate messaging strategy designed to maintain an uplifting and constructive tone, typical of persuasive communication aimed at encouraging and motivating users toward health behavior change. The minimal presence of negative and neutral content indicates a conscious design choice to avoid discouraging or ambivalent messaging in favor of positive reinforcement. This pronounced positive skew aligns with established principles in health coaching, where supportive, optimistic communication has been shown to enhance user engagement and adherence to behavior change interventions.</p><table-wrap id="t7" position="float"><label>Table 7.</label><caption><p>Overall sentiment distribution.</p></caption><table id="table7" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Sentiment category</td><td align="left" valign="top">Distribution value</td></tr></thead><tbody><tr><td align="left" valign="top">Positive</td><td align="left" valign="top">0.936923</td></tr><tr><td align="left" valign="top">Negative</td><td align="left" valign="top">0.053846</td></tr><tr><td align="left" valign="top">Neutral</td><td align="left" valign="top">0.009231</td></tr></tbody></table></table-wrap><p>The visualization in <xref ref-type="fig" rid="figure3">Figure 3</xref> reveals a remarkable variation in sentiment compound scores among the reminder, reward, suggestion, and praise categories, with the latter demonstrating the highest median sentiment score and the most compact IQR. Praise messages exhibited a notably high concentration of positive sentiments (approximately 0.75&#x2010;0.95), suggesting a consistent positive emotional valence in praise-based communication strategies. However, the presence of outliers in all categories, especially negative ones (ranging from &#x2212;0.75 to &#x2212;1.0), raises compelling questions about the contextual factors that generate these anomalous cases.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>VADER (Valence Aware Dictionary and Sentiment Reasoner) sentiment distribution by persuasive type.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig03.png"/></fig><p>The reminder category showed the widest IQR, indicating greater variability in emotional content, which could be attributed to the diverse nature of reminder messages, which ranged from urgent warnings to gentle nudges. This heterogeneity in sentiment distribution might reflect the complex linguistic strategies used in reminder-based persuasion. The reward and suggestion categories demonstrated intermediate patterns, with reward messages showing slightly higher median sentiment scores than suggestions. This nuanced difference might be explained by the inherently positive nature of reward-related communication versus the more neutral or instructional tone often present in suggestions.</p><p>Theoretically, these patterns align with existing literature on persuasive communication, but they also challenge some conventional assumptions about the emotional loading of different persuasive strategies. The consistently high positive sentiment in praise messages supports established theories about positive reinforcement. However, the substantial variance in reminder sentiments suggests a more complex relationship between message type and emotional content than previously theorized. This analysis opens several avenues for future research, particularly in understanding how sentiment variation within each persuasive type correlates with message effectiveness and recipient engagement.</p><p><xref ref-type="fig" rid="figure4">Figure 4</xref> shows how 2 different sentiment analysis tools (VADER and TextBlob) compare when analyzing the same messages across different persuasive types (reminder, reward, suggestion, and praise). The pattern reveals an interesting relationship as follows: as VADER scores increase (moving right), TextBlob scores tend to also increase (moving up), creating a diagonal trend from bottom-left to top-right. This means both tools generally agree on whether messages are positive or negative. Most points cluster in the positive range (right side of the graph), especially for praise messages (shown in red), which matches what we would expect since praise is usually positive. However, there is quite a bit of scatter in the data, indicating that these tools do not always agree perfectly. We can see points spread out vertically for any given VADER score, meaning that while VADER might give one score, TextBlob could give quite a different rating for the same text. Reminders, rewards, and suggestions had similar proportions of positive and neutral components, though reminders appeared to have a slightly smaller positive component than the others. It is important to distinguish between VADER&#x2019;s component scores (the proportion of positive/neutral/negative words) and the final sentiment classification (based on the compound score). <xref ref-type="fig" rid="figure5">Figure 5</xref> shows that neutral words dominated the composition of health coaching messages (70%&#x2010;80% of words). However, the strategic use of positive words, encouraging punctuation, and a supportive tone resulted in 93.7% (1218/1300) of messages being classified as overall positive. This pattern reflects effective health coaching communication design: using primarily informative (neutral) language enhanced with selective positive reinforcement.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Correlation between VADER (Valence Aware Dictionary and Sentiment Reasoner) and TextBlob sentiment scores.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig04.png"/></fig><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Sentiment metric heatmap by persuasive type. Darker blue indicates higher values, and lighter colors represent lower values.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig05.png"/></fig><p>The sentiment analysis heatmap in <xref ref-type="fig" rid="figure5">Figure 5</xref> provides a comprehensive visualization of how different persuasive message types perform across various sentiment metrics. The data reveal fascinating patterns in how praise, reminder, reward, and suggestion messages carry emotional content. In the heatmap, we can see that praise messages consistently demonstrate the strongest positive sentiment, with the highest VADER compound score (0.80) and TextBlob polarity (0.51). The uniformly low negative sentiment scores (0.02&#x2010;0.03) across all message types are particularly noteworthy, suggesting a deliberate approach to maintain constructive communication regardless of the message&#x2019;s purpose.</p><p>The neutral component (vader_neutral) consistently showed high values (0.73&#x2010;0.80) across all message types, indicating that the messages maintained a balanced tone even while conveying different persuasive intentions. Reminder messages generally exhibited lower intensity scores compared to other types, which might reflect their more practical, straightforward nature. Meanwhile, suggestions and rewards showed similar patterns, with moderately high compound scores (0.63 and 0.62, respectively), suggesting that they carry comparable emotional weight in their delivery. This similarity in sentiment patterns across different metrics, visible in the vertical striping of the heatmap (<xref ref-type="fig" rid="figure5">Figure 5</xref>), indicates a consistent approach to emotional content within each message type, while the variations between types reflect their distinct communicative purposes.</p><p>Reward and suggestion messages showed remarkably similar overall sentiment scores (vader_compound: 0.625 and 0.626, respectively) yet diverged in their stylistic characteristics. Reward messages maintained moderate polarity (0.350) and subjectivity (0.498), balancing positive reinforcement with personal engagement. In contrast, suggestion messages displayed lower polarity (0.209) and the lowest subjectivity (0.434), indicating a more objective, advisory delivery style that prioritizes practical guidance over emotional appeal. This pattern reveals that while all message types maintain positive orientation, they use different degrees of emotional directness and personal perspective tailored to their specific persuasive functions&#x2014;praise for celebration, reminders for neutral prompting, rewards for positive reinforcement, and suggestions for objective guidance.</p><p>The sentiment analysis across different persuasive types revealed distinct emotional profiles that aligned with their communicative functions (<xref ref-type="table" rid="table8">Tables 8</xref> and <xref ref-type="table" rid="table9">9</xref>). Praise messages demonstrated the highest sentiment intensity (vader_compound: 0.799; textblob_polarity: 0.513) and subjectivity (0.615), reflecting their emotionally expressive nature designed to acknowledge and celebrate user achievements. Reminder messages exhibited the lowest sentiment scores (vader_compound: 0.451; textblob_polarity: 0.257) with moderate subjectivity (0.489), indicating a more neutral, task-focused tone appropriate for prompting action without excessive emotional loading.</p><table-wrap id="t8" position="float"><label>Table 8.</label><caption><p>Detailed sentiment analysis by persuasive type.</p></caption><table id="table8" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Persuasive_type</td><td align="left" valign="top">Vader_compound</td><td align="left" valign="top">Textblob_polarity</td><td align="left" valign="top">Textblob_subjectivity</td></tr></thead><tbody><tr><td align="left" valign="top">Praise</td><td align="left" valign="top">0.799</td><td align="left" valign="top">0.513</td><td align="left" valign="top">0.615</td></tr><tr><td align="left" valign="top">Reminder</td><td align="left" valign="top">0.451</td><td align="left" valign="top">0.257</td><td align="left" valign="top">0.489</td></tr><tr><td align="left" valign="top">Reward</td><td align="left" valign="top">0.625</td><td align="left" valign="top">0.350</td><td align="left" valign="top">0.498</td></tr><tr><td align="left" valign="top">Suggestion</td><td align="left" valign="top">0.626</td><td align="left" valign="top">0.209</td><td align="left" valign="top">0.434</td></tr></tbody></table></table-wrap><table-wrap id="t9" position="float"><label>Table 9.</label><caption><p>Average sentiment by persuasive type.</p></caption><table id="table9" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Persuasive_type</td><td align="left" valign="top">Sentiment value</td></tr></thead><tbody><tr><td align="left" valign="top">Praise</td><td align="left" valign="top">0.798766</td></tr><tr><td align="left" valign="top">Reminder</td><td align="left" valign="top">0.451110</td></tr><tr><td align="left" valign="top">Reward</td><td align="left" valign="top">0.624842</td></tr><tr><td align="left" valign="top">Suggestion</td><td align="left" valign="top">0.626238</td></tr></tbody></table></table-wrap><p>The correlation matrix (<xref ref-type="table" rid="table10">Table 10</xref>) for sentiment analysis revealed distinct patterns in how different measures relate. VADER&#x2019;s compound score and TextBlob&#x2019;s polarity showed a moderate positive correlation (<italic>r</italic>=0.445), indicating that they generally agreed on sentiment direction, but each captured unique aspects of the text&#x2019;s emotional content. TextBlob&#x2019;s polarity demonstrated a stronger relationship with its subjectivity measure (<italic>r</italic>=0.508), suggesting that text with higher subjectivity tends to express more pronounced sentiments, according to TextBlob&#x2019;s analysis. In contrast, VADER&#x2019;s compound score had only a weak correlation with TextBlob&#x2019;s subjectivity (<italic>r</italic>=0.214), which indicates that VADER&#x2019;s sentiment detection operates more independently of how subjective the text is. The perfect correlations (<italic>r</italic>=1.000) along the diagonal represent each measure&#x2019;s correlation with itself, which is expected in a correlation matrix. The relationships suggest that while VADER and TextBlob share some common ground in sentiment detection, they each bring unique perspectives to the analysis. VADER is less influenced by text subjectivity, while TextBlob shows a stronger connection between subjective content and sentiment strength.</p><table-wrap id="t10" position="float"><label>Table 10.</label><caption><p>Correlation matrix for sentiment analysis.</p></caption><table id="table10" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Variable</td><td align="left" valign="top">Vader_compound</td><td align="left" valign="top">Textblob_polarity</td><td align="left" valign="top">Textblob_subjectivity</td></tr></thead><tbody><tr><td align="left" valign="top">Vader_compound</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>r</italic></td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.445</td><td align="left" valign="top">0.214</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table10fn1">a</xref></sup></td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Textblob_polarity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>r</italic></td><td align="left" valign="top">0.445</td><td align="left" valign="top">1.000</td><td align="left" valign="top">0.508</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x2014;</td><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top">Textblob_subjectivity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>r</italic></td><td align="left" valign="top">0.214</td><td align="left" valign="top">0.508</td><td align="left" valign="top">1.000</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>P</italic> value</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top">&#x2014;</td></tr></tbody></table><table-wrap-foot><fn id="table10fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap><p>The performance metric analysis (<xref ref-type="table" rid="table11">Table 11</xref>) revealed intriguing patterns when comparing the VADER and TextBlob sentiment analysis tools. VADER demonstrated moderate but realistic performance with an accuracy of 60%, indicating its ability to classify most messages correctly. Its high precision of 80% suggests strong reliability in its positive predictions, though its lower recall of 60% indicates that it might miss some positive cases. The <italic>F</italic><sub>1</sub>-score of 0.57, representing the harmonic mean of precision and recall, suggests a balanced but moderate overall performance. This pattern aligns with typical real-world sentiment analysis challenges where perfect classification is rare.</p><table-wrap id="t11" position="float"><label>Table 11.</label><caption><p>Performance metric results.</p></caption><table id="table11" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Metric</td><td align="left" valign="top">VADER<sup><xref ref-type="table-fn" rid="table11fn1">a</xref></sup></td><td align="left" valign="top">TextBlob</td></tr></thead><tbody><tr><td align="left" valign="top">Accuracy</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.57</td></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top">0.76</td><td align="left" valign="top">0.61</td></tr><tr><td align="left" valign="top">Recall</td><td align="left" valign="top">0.69</td><td align="left" valign="top">0.57</td></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">0.68</td><td align="left" valign="top">0.58</td></tr></tbody></table><table-wrap-foot><fn id="table11fn1"><p><sup>a</sup>VADER: Valence Aware Dictionary and Sentiment Reasoner.</p></fn></table-wrap-foot></table-wrap><p>The stark contrast between VADER&#x2019;s realistic performance metrics and TextBlob&#x2019;s perfect scores warrants further investigation, potentially through expanded testing with a larger, more diverse dataset and the implementation of cross-validation techniques to ensure a more representative performance assessment. This finding underscores the importance of rigorous evaluation methodologies in sentiment analysis and suggests the need for careful consideration of tool selection based on specific use cases and requirements. The dataset had 1300 rows, which is moderate but may not be sufficient to evaluate sentiment analysis performance fully. Considering the analysis of a dataset of 1300 rows, several potential red flags emerge that warrant careful consideration. The dataset size is moderate but may not be comprehensive enough to fully evaluate sentiment analysis performance, particularly when considering the complexity and nuance of sentiment expression. A key concern is the need to verify whether there is a balanced representation across different sentiment categories, as imbalanced data could skew the evaluation results. The data showed some positive characteristics. Each message was unique and covered various health and wellness topics, including meal planning, portion control, and water intake. The messages also demonstrated varying lengths and complexity levels, which is generally good for testing robustness. However, TextBlob&#x2019;s perfect scores (1.0) across all metrics (accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-score) raise significant concerns about the evaluation process. These perfect scores could indicate that the ground-truth labels are too closely aligned with TextBlob&#x2019;s built-in classification approach or that the dataset may lack sufficiently complex or ambiguous examples that would typically challenge a sentiment analysis model.</p></sec><sec id="s3-2"><title>Emotion Detection Analysis</title><p>Emotion models serve as the essential building blocks for emotion detection systems by establishing how different emotions are represented and classified. These models are built on the premise that emotions can exist in different states, making it necessary to identify and differentiate between these distinct emotional states.</p><p>The chart in <xref ref-type="fig" rid="figure6">Figure 6</xref> compares accuracy, precision, recall, and <italic>F</italic><sub>1</sub>-scores across different emotions (anger, anticipation, joy, neutral, sadness, and trust). The model used a hybrid or modified version of the Plutchik wheel of emotions with some adaptations. The data revealed varying levels of performance across different emotional categories. Anger demonstrated reasonable performance, with perfect accuracy (1.0) but moderate findings for other metrics (precision: 0.75, recall: 0.72, and <italic>F</italic><sub>1</sub>-score: 0.73). Joy showed consistent moderate performance, with all metrics, except accuracy, showing values ranging from 0.84 to 0.85. The neutral emotion category had an accuracy score of 1.0. Sadness exhibited a precision score of 0.86 and a recall score of 0.85. For entertainment, all metrics were above 0.80. Trust showed mixed results, with high accuracy, precision, and <italic>F</italic><sub>1</sub>-score (0.88-1.00) but lower recall (0.81), suggesting some reliability issues in its detection. These variations in performance across different emotions point to potential areas for model improvement, particularly in balancing detection capabilities across all emotion categories.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Performance metrics by emotion.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e79558_fig06.png"/></fig></sec><sec id="s3-3"><title>Comparative Evaluation With Existing Frameworks</title><p>Our comprehensive benchmarking analysis demonstrated significant performance variations across different sentiment analysis frameworks when applied to health coaching messages.</p><p>As shown in <xref ref-type="table" rid="table12">Table 12</xref>, RoBERTa (robustly optimized BERT approach)-sentiment achieved the highest overall performance, with 88% accuracy and an <italic>F</italic><sub>1</sub>-score of 0.8264, substantially outperforming other models. Interestingly, VADER delivered reasonable accuracy (69%) with a remarkably fast inference time (0.03 ms). It was over 1000 times faster than transformer-based approaches and maintained competitive precision (0.7476).</p><table-wrap id="t12" position="float"><label>Table 12.</label><caption><p>Model performance.</p></caption><table id="table12" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Model</td><td align="left" valign="top">Accuracy</td><td align="left" valign="top">Precision</td><td align="left" valign="top">Recall</td><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top">&#x03BA;</td><td align="left" valign="top">Avg_inference_time (ms)</td></tr></thead><tbody><tr><td align="left" valign="top">VADER<sup><xref ref-type="table-fn" rid="table12fn1">a</xref></sup></td><td align="left" valign="top">0.6900</td><td align="left" valign="top">0.7476</td><td align="left" valign="top">0.6900</td><td align="left" valign="top">0.6771</td><td align="left" valign="top">0.4582</td><td align="left" valign="top">0.03</td></tr><tr><td align="left" valign="top">TextBlob</td><td align="left" valign="top">0.5700</td><td align="left" valign="top">0.7133</td><td align="left" valign="top">0.5700</td><td align="left" valign="top">0.6167</td><td align="left" valign="top">0.3067</td><td align="left" valign="top">0.23</td></tr><tr><td align="left" valign="top">BERT<sup><xref ref-type="table-fn" rid="table12fn2">b</xref></sup>-base-sentiment</td><td align="left" valign="top">0.5500</td><td align="left" valign="top">0.5938</td><td align="left" valign="top">0.5500</td><td align="left" valign="top">0.5707</td><td align="left" valign="top">0.2645</td><td align="left" valign="top">39.87</td></tr><tr><td align="left" valign="top">RoBERTa<sup><xref ref-type="table-fn" rid="table12fn3">c</xref></sup>-sentiment</td><td align="left" valign="top">0.8800</td><td align="left" valign="top">0.7832</td><td align="left" valign="top">0.8800</td><td align="left" valign="top">0.8264</td><td align="left" valign="top">0.7801</td><td align="left" valign="top">29.93</td></tr></tbody></table><table-wrap-foot><fn id="table12fn1"><p><sup>a</sup>VADER: Valence Aware Dictionary and Sentiment Reasoner.</p></fn><fn id="table12fn2"><p><sup>b</sup>BERT: Bidirectional Encoder Representations from Transformers.</p></fn><fn id="table12fn3"><p><sup>c</sup>RoBERTa: robustly optimized BERT approach.</p></fn></table-wrap-foot></table-wrap><p>This performance-efficiency tradeoff reveals important insights for health communication systems. While transformer models like RoBERTa offer superior accuracy, traditional lexicon-based approaches like VADER provide an excellent balance between performance and computational efficiency for real-time applications. Notably, BERT-base-sentiment underperformed with only 55% accuracy despite its significant computational cost (39.87 ms), suggesting that general-purpose sentiment models may require domain adaptation for health coaching contexts. TextBlob showed moderate performance (57% accuracy) but fell considerably short of its previously reported perfect scores, highlighting the importance of rigorous benchmarking against multiple competing frameworks for realistic performance assessment.</p><p>These findings align with our overall methodology while addressing any concern about the lack of benchmarking. By providing empirical comparisons across diverse model architectures, we have established clear performance baselines and identified the strengths and limitations of different approaches for health coaching sentiment analysis.</p></sec><sec id="s3-4"><title>Feasibility and Validation of the Analytical Framework</title><p>Regarding tool feasibility, VADER (69% accuracy; &#x003C;1 ms/message; real-time capable, transparent, and domain-adaptable) is considered suitable for deployment, while RoBERTa (88% accuracy; approximately 10 ms/message; black-box) is considered suitable for high-accuracy applications. Regarding the ground truth, interrater agreement (&#x03BA;) was 0.78 for sentiment and 0.65 for emotion, with both exceeding the &#x2265;0.60 threshold. As synthetic data were used, future clinical validation is needed before real-world deployment.</p></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The sentiment analysis findings revealed several significant patterns and methodological considerations when examining persuasive messaging through computational linguistics. Text sentiment analysis evaluated and classified the emotional content of writing, assigning it as positive, negative, or neutral based on its overall tone [<xref ref-type="bibr" rid="ref30">30</xref>]. A notable strength lies in the dual-tool approach utilizing both VADER and GPT-2&#x2013;based TextBlob, which provided complementary perspectives on sentiment analysis. The combination of VADER and TextBlob, along with GPT-2&#x2013;generated content, provided rich insights into how different types of persuasive messages carry and convey emotional content, demonstrating both the capabilities and limitations of current NLP tools in understanding the nuances of communication strategies [<xref ref-type="bibr" rid="ref31">31</xref>]. The moderate correlation (0.445) between VADER and TextBlob suggests that while they generally agree on sentiment direction, they capture different aspects of emotional content, with TextBlob showing stronger ties to subjectivity (0.508) compared to VADER (0.214). However, this raises the following critical weakness in current sentiment analysis approaches: the potential limitations of pretrained language models like GPT-2 in accurately capturing nuanced emotional contexts, particularly given the dataset&#x2019;s overwhelming positive skew (93.7%).</p><p>The study identified significant gaps in semantic understanding, particularly in how different persuasive types (reminder, reward, suggestion, and praise) maintain varying levels of emotional intensity while sharing similar neutral components (0.73&#x2010;0.80). This paradox suggests potential limitations in the ability of current sentiment analysis tools to differentiate between genuine positive sentiment and formulaic positive language common in persuasive communication. Furthermore, outliers, especially in negative sentiments (&#x2212;0.75 to &#x2212;1.0), indicated potential edge cases where current sentiment analysis tools might fail to capture the full complexity of persuasive communication strategies. Sentiment analysis in health coaching faces challenges in matching text meaning with emotional labels, as tools typically analyze word patterns rather than grasping deeper semantic nuances.</p><p>While the findings align with established persuasive communication theories, they suggest limitations in NLP approaches. Future research could benefit from incorporating more advanced language models beyond GPT-2, such as transformer-based architectures that better capture the subtle interplay between sentiment and persuasive intent. Additionally, the study highlights the need for more sophisticated sentiment analysis tools that can better account for context-dependent sentiment variations and the relationship between linguistic features and persuasive effectiveness. By combining different types of data inputs, comprehensive sentiment analysis can provide deeper insights into how people think and feel about various topics [<xref ref-type="bibr" rid="ref32">32</xref>].</p><p>In the performance comparison, VADER (69% accuracy) outperformed TextBlob (57% accuracy) for health coaching message classification. This finding challenges the assumption that more sophisticated polarity calculation methods necessarily yield better results. VADER&#x2019;s rule-based approach, specifically designed for social media text, appears better suited to the conversational tone of health coaching messages than TextBlob&#x2019;s pattern-based sentiment extraction.</p><p>Our comparative benchmarking revealed significant performance differences across sentiment analysis frameworks, with RoBERTa-sentiment achieving the highest accuracy (88%) and <italic>F</italic><sub>1</sub>-score (0.8264), and VADER offering a compelling balance between reasonable accuracy (69%) and exceptional efficiency (0.03 ms inference time). This performance-efficiency tradeoff has important implications for health coaching applications, where real-time interaction may prioritize VADER&#x2019;s speed, while more resource-intensive offline analysis might benefit from RoBERTa&#x2019;s superior accuracy. Notably, BERT-base-sentiment&#x2019;s underperformance (55% accuracy), despite high computational cost, suggests that general-purpose sentiment models may require domain adaptation for health contexts. These findings challenge our initial TextBlob results (moderate 57% accuracy vs perfect scores reported in <xref ref-type="table" rid="table12">Table 12</xref>) and underscore the critical importance of comprehensive benchmarking against multiple frameworks to establish realistic performance expectations and guide model selection based on specific application requirements in health communication systems.</p><p>To address the concerns, several improvements could be implemented. The dataset should be expanded with more diverse examples, including edge cases and ambiguous sentiments that better reflect real-world complexity. Proper cross-validation techniques should be implemented to ensure robust evaluation. The labeling process should be reviewed for potential biases, and results should be compared with additional sentiment analysis tools beyond just VADER. Finally, all preprocessing steps should be carefully examined to prevent any data leakage that might artificially inflate performance metrics. These adjustments would help ensure a more reliable and realistic evaluation of sentiment analysis performance.</p></sec><sec id="s4-2"><title>Conclusion</title><p>This study demonstrated the effectiveness of combining multiple sentiment analysis approaches (VADER and TextBlob) with emotion detection tools (Text2Emotion and NRCLex) for analyzing persuasive health-related messages. The findings revealed a strong positive bias in the dataset, with 93.7% of messages classified as positive, suggesting a deliberate strategy in health coaching communication. The analysis showed distinct patterns across different persuasive types: praise messages consistently demonstrated the highest sentiment scores (vader_compound: 0.799), while reminder messages demonstrated the lowest scores (vader_compound: 0.451). Reward and suggestion messages maintained moderate positive sentiments (approximately 0.625). The moderate correlation (0.445) between VADER and TextBlob sentiment scores indicates that while these tools generally agree on sentiment direction, they capture different aspects of emotional content. Performance metrics revealed interesting contrasts between the tools, with TextBlob showing perfect scores across all metrics and VADER demonstrating more realistic performance (60% accuracy), suggesting potential evaluation biases that warrant further investigation. The emotion detection analysis showed varying performance across different emotions, with the anger and neutral categories achieving reasonable results and the trust category demonstrating mixed results.</p><p>The research highlights the potential and limitations of NLP tools in understanding persuasive health communication. Balancing detection capabilities across different emotional categories remains problematic, with performance metrics varying significantly between emotions. While the combined approach provides rich insights, the overwhelming positive skew in the dataset and the varying performance across different emotion categories suggest areas for improvement in future research, particularly in capturing nuanced emotional contexts and the relationship between linguistic features and persuasive effectiveness. Reconciliation of the significant differences in performance metrics between TextBlob (perfect scores) and VADER (more moderate scores) suggests potential evaluation methodology issues.</p><p>In terms of limitations, TextBlob&#x2019;s perfect performance metrics (1.0 across all metrics) suggest potential evaluation biases or limitations in the test dataset. The dataset size (1300 rows) may not have been comprehensive enough to evaluate sentiment analysis performance across all contexts fully, and future work should implement deeper interpretative analysis. The current approach may not effectively differentiate between genuine positive sentiment and formulaic positive language common in persuasive communication. In future research, we plan to apply this framework of RL and PT to real-world situations by implementing it into the development of cloud-based applications. We also suggest further exploration of the dataset in terms of comparing multiple sentiment and emotion analysis methods in this context. To address synthetic data limitations in future research, we plan to collaborate with health care institutions to access anonymized real messages and recruit 3&#x2010;5 health coaching professionals for validation studies. These experts would review the dataset and assess appropriateness for patient communication. This approach would provide crucial validation of synthetic datasets against real-world standards, ensuring that our findings can be applied to actual health care settings.</p><p>This proof-of-concept study demonstrates the feasibility of multimodal sentiment and emotion analysis for synthetically generated persuasive health coaching messages. The findings provide a methodological foundation and baseline performance metrics under controlled conditions. However, significant limitations in ecological validity must be addressed before clinical translation. The synthetic nature of our dataset likely inflated tool performance compared with noisy, real-world patient communication. Future research must validate the findings with authentic health coaching messages, assess robustness to informal language and diverse populations, and establish links between sentiment/emotion metrics and clinical outcomes. We recommend this framework as a foundation for research and development rather than a deployment-ready solution for clinical settings.</p></sec></sec></body><back><ack><p>Some parts of this work utilized artificial intelligence (AI) assistance, including AI-based grammar and style checking tools (Claude, Grammarly, and Paraphrasing) for editorial assistance. The authors reviewed and verified the text and take full responsibility for the content.</p></ack><notes><sec><title>Funding</title><p>This paper is part of the Persuasive Information System (PERSIS) project. The work was supported by a university research grant (code: GUP-2023-036).</p></sec><sec><title>Data Availability</title><p>The code used for data generation, preprocessing, sentiment analysis, emotion detection, and performance evaluation is publicly available on GitHub [<xref ref-type="bibr" rid="ref29">29</xref>]. All data in this study have been synthetically generated using GPT-2 and contain no human participant information, personal health data, or identifiable information. Therefore, there are no privacy concerns, ethical restrictions, or regulatory barriers preventing full public sharing.</p></sec></notes><fn-group><fn fn-type="con"><p>MAMZ conceptualized and designed the study, generated and curated the dataset using GPT-2, developed all software and code, conducted all formal analyses (including sentiment and emotion detection), created all visualizations, and wrote the original manuscript draft. NMA secured funding (grant GUP-2023-036), supervised the project, provided methodological guidance, administered the research project, and led the manuscript review and editing process. JKC contributed to methodology development, assisted with formal analysis and software optimization, and participated in manuscript review. All authors validated the research findings, contributed to manuscript revision, and approved the final version for publication.</p></fn><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">FN</term><def><p>false negative</p></def></def-item><def-item><term id="abb3">FP</term><def><p>false positive</p></def></def-item><def-item><term id="abb4">GAN</term><def><p>generative adversarial network</p></def></def-item><def-item><term id="abb5">GPT</term><def><p>Generative Pretrained Transformer</p></def></def-item><def-item><term id="abb6">GPT-2</term><def><p>Generative Pretrained Transformer-2</p></def></def-item><def-item><term id="abb7">InfoGAN</term><def><p>Information-Theoretic Generative Adversarial Network</p></def></def-item><def-item><term id="abb8">MLE</term><def><p>maximum likelihood estimation</p></def></def-item><def-item><term id="abb9">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb10">NRCLex</term><def><p>National Research Council Lexicon</p></def></def-item><def-item><term id="abb11">PSD</term><def><p>persuasive system design</p></def></def-item><def-item><term id="abb12">PT</term><def><p>persuasive technology</p></def></def-item><def-item><term id="abb13">RL</term><def><p>reinforcement learning</p></def></def-item><def-item><term id="abb14">RoBERTa</term><def><p>robustly optimized BERT approach</p></def></def-item><def-item><term id="abb15">TF-GAN</term><def><p>Time and Frequency Domain-Based Generative Adversarial Network</p></def></def-item><def-item><term id="abb16">TN</term><def><p>true negative</p></def></def-item><def-item><term id="abb17">TP</term><def><p>true positive</p></def></def-item><def-item><term id="abb18">TT-GAN</term><def><p>Text-to-Text Generative Adversarial Network</p></def></def-item><def-item><term id="abb19">VADER</term><def><p>Valence Aware Dictionary and Sentiment Reasoner</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beinema</surname><given-names>T</given-names> </name><name name-style="western"><surname>Op den Akker</surname><given-names>H</given-names> </name><name name-style="western"><surname>Hurmuz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Jansen-Kosterink</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hermens</surname><given-names>H</given-names> </name></person-group><article-title>Automatic topic selection for long-term interaction with embodied conversational agents in health coaching: a micro-randomized trial</article-title><source>Internet Interv</source><year>2022</year><month>03</month><volume>27</volume><fpage>100502</fpage><pub-id pub-id-type="doi">10.1016/j.invent.2022.100502</pub-id><pub-id pub-id-type="medline">35198412</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>op den Akker</surname><given-names>H</given-names> </name><name name-style="western"><surname>op den Akker</surname><given-names>R</given-names> </name><name name-style="western"><surname>Beinema</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Council of coaches - a novel holistic behavior change coaching approach</article-title><conf-name>4th International Conference on Information and Communication Technologies for Ageing Well and e-Health</conf-name><conf-date>Mar 22-23, 2018</conf-date><pub-id pub-id-type="doi">10.5220/0006787702190226</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abdullah</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Gaehde</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bickmore</surname><given-names>T</given-names> </name></person-group><article-title>A tablet based embodied conversational agent to promote smoking cessation among veterans: a feasibility study</article-title><source>J Epidemiol Glob Health</source><year>2018</year><month>12</month><volume>8</volume><issue>3-4</issue><fpage>225</fpage><lpage>230</lpage><pub-id pub-id-type="doi">10.2991/j.jegh.2018.08.104</pub-id><pub-id pub-id-type="medline">30864768</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Albers</surname><given-names>N</given-names> </name><name name-style="western"><surname>Neerincx</surname><given-names>M</given-names> </name><name name-style="western"><surname>Brinkman</surname><given-names>WP</given-names> </name></person-group><article-title>Reinforcement learning-based persuasion by a conversational agent for behavior change</article-title><access-date>2026-04-02</access-date><conf-name>33rd Benelux Conference on Artificial Intelligence and 30th Belgian-Dutch Conference on Machine Learning</conf-name><conf-date>Nov 10-12, 2021</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/publication/362233761_Reinforcement_Learning-Based_Persuasion_by_a_Conversational_Agent_for_Behavior_Change">https://www.researchgate.net/publication/362233761_Reinforcement_Learning-Based_Persuasion_by_a_Conversational_Agent_for_Behavior_Change</ext-link></comment></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oinas-Kukkonen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Harjumaa</surname><given-names>M</given-names> </name></person-group><article-title>Persuasive systems design: key issues, process model, and system features</article-title><source>Communications of the Association for Information Systems</source><year>2009</year><month>03</month><volume>24</volume><fpage>2428</fpage><pub-id pub-id-type="doi">10.17705/1CAIS.02428</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><name name-style="western"><surname>Su</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>WJ</given-names> </name></person-group><article-title>Text-to-text generative adversarial networks</article-title><conf-name>2018 International Joint Conference on Neural Networks (IJCNN)</conf-name><conf-date>Jul 8-13, 2018</conf-date><pub-id pub-id-type="doi">10.1109/IJCNN.2018.8489624</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><etal/></person-group><article-title>Text feature adversarial learning for text generation with knowledge transfer from GPT2</article-title><source>IEEE Trans Neural Netw Learning Syst</source><year>2024</year><volume>35</volume><issue>5</issue><fpage>6558</fpage><lpage>6569</lpage><pub-id pub-id-type="doi">10.1109/TNNLS.2022.3210975</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Nowozin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cseke</surname><given-names>B</given-names> </name><name name-style="western"><surname>Tomioka</surname><given-names>R</given-names> </name></person-group><article-title>f-GAN: training generative neural samplers using variational divergence minimization</article-title><conf-name>30th International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 5-10, 2016</conf-date><pub-id pub-id-type="doi">10.5555/3157096.3157127</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Arjovsky</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chintala</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bottou</surname><given-names>L</given-names> </name></person-group><article-title>Wasserstein GAN</article-title><source>arXiv</source><comment>Preprint posted online on  Jan 26, 2017</comment><pub-id pub-id-type="doi">10.48550/arXiv.1701.07875</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Duan</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Houthooft</surname><given-names>R</given-names> </name><name name-style="western"><surname>Schulman</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name><name name-style="western"><surname>Abbeel</surname><given-names>P</given-names> </name></person-group><article-title>InfoGAN: interpretable representation learning by information maximizing generative adversarial nets</article-title><conf-name>30th International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 5-10, 2016</conf-date><pub-id pub-id-type="doi">10.5555/3157096.3157340</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Rosa</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Papa</surname><given-names>JP</given-names> </name></person-group><article-title>A survey on text generation using generative adversarial networks</article-title><source>Pattern Recognit DAGM</source><year>2021</year><month>11</month><volume>119</volume><fpage>108098</fpage><pub-id pub-id-type="doi">10.1016/j.patcog.2021.108098</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Radford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Child</surname><given-names>R</given-names> </name><name name-style="western"><surname>Luan</surname><given-names>D</given-names> </name><name name-style="western"><surname>Amodei</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name></person-group><article-title>Language models are unsupervised multitask learners</article-title><source>OpenAI</source><access-date>2026-04-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf">https://cdn.openai.com/better-language-models/language_models_are_unsupervised_multitask_learners.pdf</ext-link></comment></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Radford</surname><given-names>A</given-names> </name><name name-style="western"><surname>Narasimhan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Salimans</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sutskever</surname><given-names>I</given-names> </name></person-group><article-title>Improving language understandingby generative pre-training</article-title><source>OpenAI</source><access-date>2026-04-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf">https://cdn.openai.com/research-covers/language-unsupervised/language_understanding_paper.pdf</ext-link></comment></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Devlin</surname><given-names>J</given-names> </name><name name-style="western"><surname>Chang</surname><given-names>MW</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>K</given-names> </name><name name-style="western"><surname>Toutanova</surname><given-names>K</given-names> </name></person-group><article-title>BERT: pre-training of deep bidirectional transformers for language understanding</article-title><conf-name>2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</conf-name><conf-date>Jun 2-7, 2019</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://aclanthology.org/N19-1423.pdf">https://aclanthology.org/N19-1423.pdf</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Karak</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kunal</surname><given-names>K</given-names> </name><name name-style="western"><surname>Darapaneni</surname><given-names>N</given-names> </name><name name-style="western"><surname>Paduri</surname><given-names>AR</given-names> </name></person-group><article-title>Implementation of GPT models for text generation in healthcare domain</article-title><source>EAI Endorsed Trans AI Robotics</source><year>2024</year><volume>3</volume><pub-id pub-id-type="doi">10.4108/airo.4082</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Al-Amin</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ali</surname><given-names>MS</given-names> </name><name name-style="western"><surname>Salam</surname><given-names>A</given-names> </name><etal/></person-group><article-title>History of generative artificial intelligence (AI) chatbots: past, present, and future development</article-title><source>arXiv</source><comment>Preprint posted online on  Feb 4, 2024</comment><pub-id pub-id-type="doi">10.48550/arXiv.2402.05122</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Machov&#x00E1;</surname><given-names>K</given-names> </name><name name-style="western"><surname>Szab&#x00F3;ova</surname><given-names>M</given-names> </name><name name-style="western"><surname>Parali&#x010D;</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mi&#x010D;ko</surname><given-names>J</given-names> </name></person-group><article-title>Detection of emotion by text analysis using machine learning</article-title><source>Front Psychol</source><year>2023</year><volume>14</volume><fpage>1190326</fpage><pub-id pub-id-type="doi">10.3389/fpsyg.2023.1190326</pub-id><pub-id pub-id-type="medline">37799520</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Acheampong</surname><given-names>FA</given-names> </name><name name-style="western"><surname>Wenyu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Nunoo&#x2010;Mensah</surname><given-names>H</given-names> </name></person-group><article-title>Text&#x2010;based emotion detection: advances, challenges, and opportunities</article-title><source>Engineering Reports</source><year>2020</year><month>07</month><volume>2</volume><issue>7</issue><fpage>e12189</fpage><pub-id pub-id-type="doi">10.1002/eng2.12189</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Whatley</surname><given-names>S</given-names> </name></person-group><article-title>Plutchik&#x2019;s wheel of emotion</article-title><source>Simon Whatley</source><access-date>2026-04-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.simonwhatley.co.uk/writing/plutchik-wheel-of-emotion/">https://www.simonwhatley.co.uk/writing/plutchik-wheel-of-emotion/</ext-link></comment></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Hutto</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gilbert</surname><given-names>E</given-names> </name></person-group><article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title><conf-name>Eighth International AAAI Conference on Weblogs and Social Media</conf-name><conf-date>Jun 1-4, 2014</conf-date><pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gujjar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>P</given-names> </name></person-group><article-title>Sentiment analysis: Textblob for decision making</article-title><source>International Journal of Scientific Research &#x0026; Engineering Trends</source><year>2021</year><access-date>2026-04-02</access-date><volume>7</volume><issue>2</issue><fpage>1097</fpage><lpage>1099</lpage><comment><ext-link ext-link-type="uri" xlink:href="https://ijsret.com/wp-content/uploads/2021/03/IJSRET_V7_issue2_289.pdf">https://ijsret.com/wp-content/uploads/2021/03/IJSRET_V7_issue2_289.pdf</ext-link></comment></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hakam</surname><given-names>N</given-names> </name><name name-style="western"><surname>Guzman Fuentes</surname><given-names>JL</given-names> </name><name name-style="western"><surname>Nabavizadeh</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Outcomes in randomized clinical trials testing changes in daily water intake: a systematic review</article-title><source>JAMA Netw Open</source><year>2024</year><month>11</month><day>4</day><volume>7</volume><issue>11</issue><fpage>e2447621</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2024.47621</pub-id><pub-id pub-id-type="medline">39585691</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hanson</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Kattelmann</surname><given-names>KK</given-names> </name><name name-style="western"><surname>McCormack</surname><given-names>LA</given-names> </name><etal/></person-group><article-title>Cooking and meal planning as predictors of fruit and vegetable intake and BMI in first-year college students</article-title><source>Int J Environ Res Public Health</source><year>2019</year><month>07</month><day>11</day><volume>16</volume><issue>14</issue><fpage>2462</fpage><pub-id pub-id-type="doi">10.3390/ijerph16142462</pub-id><pub-id pub-id-type="medline">31373293</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="other"><person-group person-group-type="author"><name name-style="western"><surname>Chaturvedi</surname><given-names>J</given-names> </name><name name-style="western"><surname>Shamsutdinova</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zimmer</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Sample size in natural language processing within healthcare research</article-title><source>SSRN</source><comment>Preprint posted online on  Sep 6, 2023</comment><pub-id pub-id-type="doi">10.2139/ssrn.4553964</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Isnan</surname><given-names>M</given-names> </name><name name-style="western"><surname>Elwirehardja</surname><given-names>GN</given-names> </name><name name-style="western"><surname>Pardamean</surname><given-names>B</given-names> </name></person-group><article-title>Sentiment analysis for TikTok review using VADER sentiment and SVM model</article-title><source>Procedia Comput Sci</source><year>2023</year><volume>227</volume><fpage>168</fpage><lpage>175</lpage><pub-id pub-id-type="doi">10.1016/j.procs.2023.10.514</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abiola</surname><given-names>O</given-names> </name><name name-style="western"><surname>Abayomi-Alli</surname><given-names>A</given-names> </name><name name-style="western"><surname>Tale</surname><given-names>OA</given-names> </name><name name-style="western"><surname>Misra</surname><given-names>S</given-names> </name><name name-style="western"><surname>Abayomi-Alli</surname><given-names>O</given-names> </name></person-group><article-title>Sentiment analysis of COVID-19 tweets from selected hashtags in Nigeria using VADER and Text Blob analyser</article-title><source>J Electr Syst Inf Technol</source><year>2023</year><month>01</month><volume>10</volume><issue>1</issue><fpage>1</fpage><pub-id pub-id-type="doi">10.1186/s43067-023-00070-9</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Awais</surname><given-names>M</given-names> </name><name name-style="western"><surname>Durrani</surname><given-names>S</given-names> </name></person-group><article-title>Sentiment and emotion analysis on consumer review using NRCLex</article-title><access-date>2026-04-02</access-date><conf-name>2nd International Conference on Engineering, Natural and Social Sciences</conf-name><conf-date>Apr 4-6, 2023</conf-date><comment><ext-link ext-link-type="uri" xlink:href="https://as-proceeding.com/index.php/icensos/article/view/514">https://as-proceeding.com/index.php/icensos/article/view/514</ext-link></comment></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aslam</surname><given-names>N</given-names> </name><name name-style="western"><surname>Rustam</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>E</given-names> </name><name name-style="western"><surname>Washington</surname><given-names>PB</given-names> </name><name name-style="western"><surname>Ashraf</surname><given-names>I</given-names> </name></person-group><article-title>Sentiment analysis and emotion detection on cryptocurrency related tweets using ensemble LSTM-GRU model</article-title><source>IEEE Access</source><year>2022</year><volume>10</volume><fpage>39313</fpage><lpage>39324</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2022.3165621</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="web"><article-title>Use GPT2</article-title><source>GitHub</source><access-date>2026-02-04</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://github.com/harryaiman21/jmirAiman/tree/a3416f2c9700f6f2e6c16d51fcf851007e500b61/USE%20GPT2">https://github.com/harryaiman21/jmirAiman/tree/a3416f2c9700f6f2e6c16d51fcf851007e500b61/USE%20GPT2</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jim</surname><given-names>JR</given-names> </name><name name-style="western"><surname>Talukder</surname><given-names>MAR</given-names> </name><name name-style="western"><surname>Malakar</surname><given-names>P</given-names> </name><name name-style="western"><surname>Kabir</surname><given-names>MM</given-names> </name><name name-style="western"><surname>Nur</surname><given-names>K</given-names> </name><name name-style="western"><surname>Mridha</surname><given-names>MF</given-names> </name></person-group><article-title>Recent advancements and challenges of NLP-based sentiment analysis: a state-of-the-art review</article-title><source>Nat Lang Process J</source><year>2024</year><month>03</month><volume>6</volume><fpage>100059</fpage><pub-id pub-id-type="doi">10.1016/j.nlp.2024.100059</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Singgalen</surname><given-names>YA</given-names> </name><name name-style="western"><surname>Semanggi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Setiabudi</surname><given-names>K</given-names> </name><name name-style="western"><surname>Selatan</surname><given-names>KJ</given-names> </name><name name-style="western"><surname>Khusus</surname><given-names>D</given-names> </name></person-group><article-title>Implementation of Perspective, Vader, and TextBlob in toxicity and sentiment analysis of food and tourism</article-title><source>Journal of Information System Research</source><year>2024</year><volume>5</volume><issue>4</issue><fpage>1292</fpage><lpage>1302</lpage><pub-id pub-id-type="doi">10.47065/josh.v5i4.5632</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Anderson</surname><given-names>T</given-names> </name><name name-style="western"><surname>Sarkar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kelley</surname><given-names>R</given-names> </name></person-group><article-title>Analyzing public sentiment on sustainability: a comprehensive review and application of sentiment analysis techniques</article-title><source>Nat Lang Process J</source><year>2024</year><month>09</month><volume>8</volume><fpage>100097</fpage><pub-id pub-id-type="doi">10.1016/j.nlp.2024.100097</pub-id></nlm-citation></ref></ref-list></back></article>