<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v10i1e78054</article-id>
      <article-id pub-id-type="pmid">41482273</article-id>
      <article-id pub-id-type="doi">10.2196/78054</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Letter</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Research Letter</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Identifying Patient Sentiment in Atopic Dermatitis Treatment: Large Language Model Approach</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Sarvestan</surname>
            <given-names>Javad</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>O'Connor</surname>
            <given-names>Karen</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sauvayre</surname>
            <given-names>Romy</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Li</surname>
            <given-names>Zhi</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Cummins</surname>
            <given-names>Jack Alexander</given-names>
          </name>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-0978-0421</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Yu</surname>
            <given-names>JiaDe</given-names>
          </name>
          <degrees>MS, MD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <address>
            <institution>Department of Dermatology</institution>
            <institution>Massachusetts General Hospital</institution>
            <addr-line>50 Staniford St</addr-line>
            <addr-line>Boston, MA, 02114</addr-line>
            <country>United States</country>
            <phone>1 617 726 2914</phone>
            <email>jdyu@mgb.org</email>
          </address>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0874-3170</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Princeton University</institution>
        <addr-line>Princeton, NJ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Dermatology</institution>
        <institution>Massachusetts General Hospital</institution>
        <addr-line>Boston, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: JiaDe Yu <email>jdyu@mgb.org</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2026</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>2</day>
        <month>1</month>
        <year>2026</year>
      </pub-date>
      <volume>10</volume>
      <elocation-id>e78054</elocation-id>
      <history>
        <date date-type="received">
          <day>26</day>
          <month>5</month>
          <year>2025</year>
        </date>
        <date date-type="rev-request">
          <day>20</day>
          <month>8</month>
          <year>2025</year>
        </date>
        <date date-type="rev-recd">
          <day>17</day>
          <month>11</month>
          <year>2025</year>
        </date>
        <date date-type="accepted">
          <day>5</day>
          <month>12</month>
          <year>2025</year>
        </date>
      </history>
      <copyright-statement>©Jack Alexander Cummins, JiaDe Yu. Originally published in JMIR Formative Research (https://formative.jmir.org), 02.01.2026.</copyright-statement>
      <copyright-year>2026</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2026/1/e78054" xlink:type="simple"/>
      <abstract>
        <p>This study demonstrates that GPT-4o outperforms traditional natural language processing methods in accurately analyzing patient sentiment toward atopic dermatitis treatments on Reddit, enabling more nuanced and reliable extraction of real-world patient perspectives from large-scale social media data.</p>
      </abstract>
      <kwd-group>
        <kwd>atopic dermatitis</kwd>
        <kwd>eczema</kwd>
        <kwd>large language model</kwd>
        <kwd>social media</kwd>
        <kwd>dupilumab</kwd>
        <kwd>upadacitinib</kwd>
        <kwd>abrocitinib</kwd>
        <kwd>tralokinumab</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <p>Atopic dermatitis (AD) treatment has broadened since 2017, with several new, targeted, highly efficacious systemic therapies. Patients’ personal experiences with these novel therapies are largely unknown and unreported. Reddit is a rich source of patient perspectives on dermatology [<xref ref-type="bibr" rid="ref1">1</xref>]. Previous studies have used traditional natural language processing (NLP) methods to extract meaningful information from unstructured social media data [<xref ref-type="bibr" rid="ref2">2</xref>], but more relevant findings could potentially be extracted by applying large language models (LLMs) to such language data, including large-scale Reddit datasets, because tailored prompts can extract more specific and nuanced insights.</p>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Overview</title>
        <p>We used the Pushshift Reddit dataset to access all Reddit comments (n=8,543,388) posted prior to January 1, 2024, on various dermatology-related subreddits (<xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, Table S1). We analyzed all comments containing the generic or brand name of four AD therapies as of January 1, 2024: dupilumab, upadacitinib, abrocitinib, and tralokinumab. This resulted in 27,272 comments. Our novel approach applied OpenAI’s developer application programming interface (API) to access GPT-4o [<xref ref-type="bibr" rid="ref3">3</xref>], OpenAI’s cutting-edge LLM, for sentiment analysis to determine whether comments indicated positive, neutral, or negative impressions of the medications. The GPT-4o API was configured with a temperature setting of 0.0 to ensure consistent and deterministic responses across all analyses, eliminating variability in sentiment classification. The complete prompt used for the GPT-4o sentiment analysis is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p>
      </sec>
      <sec>
        <title>Model Comparison</title>
        <p>We compared 3 automated tools against manual sentiment analysis to identify the most accurate tool. Two dermatologists (JY and Hadley Johnson, MD) independently reviewed 100 randomly selected comments (25 comments for each medication) and reached concordance for 84 of 100 sentiments (κ=0.75, concordance=0.84). The dermatologists classified the comments as positive, neutral, or negative based on their overall expressed sentiment toward the medication without predefined annotation guidelines. Disagreements typically occurred for comments containing ambiguous treatment responses, mixed sentiments about medication efficacy, or informational discussions rather than clear personal sentiment expressions. The 84 comments were used as our reference standard for testing GPT-4o and two traditional NLP sentiment analysis methods: Valence Aware Dictionary and Sentiment Reasoner (VADER) [<xref ref-type="bibr" rid="ref4">4</xref>], a lexicon-based tool specifically attuned to social media text, and distilbert-base-uncased-finetuned-sst-2-english (DistilBERT), a model pre-finetuned on the Stanford Sentiment Treebank for Sentiment Analysis [<xref ref-type="bibr" rid="ref5">5</xref>]. The DistilBERT model was accessed through the Hugging Face Transformers library using its high-level pipeline interface. VADER and DistilBERT were used with default parameters. These models were selected as commonly used general-purpose sentiment analysis tools.</p>
      </sec>
      <sec>
        <title>LLM Application</title>
        <p>We applied GPT-4o to evaluate the sentiment of the posts as positive, neutral, or negative. This resulted in 28,889 total analyses, as some comments listed more than one medication. CIs were calculated for the proportion of positive comments and the proportion of negative comments using a binomial proportion CI.</p>
        <p>Based on the high κ statistic (κ=0.73) of the LLM for predicting sentiment, we used GPT-4o to analyze sentiment in the full set of 27,272 comments mentioning the medications (<xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>, Figure S1).</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <p>This study analyzed publicly available, deidentified Reddit comments and does not constitute human subjects research as it involved secondary analysis of existing public data that cannot be linked to identifiable individuals.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <p>GPT-4o demonstrated superior performance across standard classification metrics (<xref ref-type="table" rid="table1">Table 1</xref>), with precision of 0.87, recall of 0.82, <italic>F</italic><sub>1</sub>-score of 0.82 (support-weighted across classes), and accuracy of 0.82, compared to VADER (precision: 0.42; recall: 0.38; <italic>F</italic><sub>1</sub>-score: 0.37; accuracy: 0.38) and DistilBERT (precision: 0.58; recall: 0.56; <italic>F</italic><sub>1</sub>-score: 0.56; accuracy: 0.56). The medications with the highest proportion of comments in the full dataset tagged as positive by GPT-4o (<xref rid="figure1" ref-type="fig">Figure 1</xref>) were upadacitinib (673/2107, 31.9%) and dupilumab (7724/25,926, 29.8%). Abrocitinib had a smaller percentage of negative sentiments than other medications (<xref rid="figure1" ref-type="fig">Figure 1</xref>). Dupilumab and upadacitinib had a high percentage of positive sentiment, which may suggest that many individuals are satisfied with the efficacy and safety of these medications.</p>
      <p>Examples from the dataset illustrate the range of patient experiences. Positive comments expressed enthusiasm about treatment options, such as one patient’s response to abrocitinib approval: “Just saw it was just approved! Calling my dermo on Monday!” Negative sentiment reflected disappointment with treatment outcomes, as seen in one dupilumab user’s comment: “I thought Dupixent would provide relief too, but I’m still itchy and inflamed.” Neutral comments predominantly involved informational exchanges, such as questions about accessing treatment and questions about clinical trial participation.</p>
      <table-wrap position="float" id="table1">
        <label>Table 1</label>
        <caption>
          <p>In the initial subset of 84 manually evaluated comments, the large language model–based approach showed a remarkably stronger correlation with human judgment (κ=0.73) than distilbert-base-uncased-finetuned-sst-2-english (DistilBERT; κ=0.33) or Valence Aware Dictionary and Sentiment Reasoner (VADER; κ=0.06).</p>
        </caption>
        <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
          <col width="470"/>
          <col width="190"/>
          <col width="170"/>
          <col width="170"/>
          <thead>
            <tr valign="top">
              <td>Model agreement with dermatologist judgment</td>
              <td>GPT-4o</td>
              <td>VADER</td>
              <td>DistilBERT</td>
            </tr>
          </thead>
          <tbody>
            <tr valign="top">
              <td>Positive comments (n=26), n (%)</td>
              <td>24 (92)</td>
              <td>16 (62)</td>
              <td>15 (58)</td>
            </tr>
            <tr valign="top">
              <td>Neutral comments (n=41), n (%)</td>
              <td>28 (68)</td>
              <td>13 (32)</td>
              <td>21 (51)</td>
            </tr>
            <tr valign="top">
              <td>Negative comments (n=17), n (%)</td>
              <td>17 (100)</td>
              <td>3 (18)</td>
              <td>11 (65)</td>
            </tr>
          </tbody>
        </table>
      </table-wrap>
      <fig id="figure1" position="float">
        <label>Figure 1</label>
        <caption>
          <p>Bar chart showing the proportion of positive, neutral, and negative comments for each therapy, as identified by GPT-4o. CIs were calculated for the positive and negative bars using binomial proportion CIs.</p>
        </caption>
        <graphic xlink:href="formative_v10i1e78054_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
      </fig>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <p>GPT-4o showed substantially better agreement with clinician judgment than traditional NLP methods when classifying patient sentiment in Reddit comments about AD treatments. The superior performance across all metrics establishes the feasibility of applying LLMs to extract patient perspectives from unstructured social media data. However, it is important to note that we excluded comments when the annotators disagreed to ensure a clear reference standard. This approach potentially biased the validation set toward straightforward sentiment examples and likely inflated the reported performance metrics, as the models were not tested on ambiguous, mixed-emotion, or context-dependent language, which is common in real patient discussions. Simple task-specific prompts applied to LLMs may yield more detailed insights than traditional NLP methods. LLMs offer advantages through basic zero-shot prompts that can be adapted for specific tasks like drug-specific sentiment analysis in medical discussions, whereas general-purpose sentiment analysis tools may struggle with medical terminology and context without additional training or customization. However, LLM-based approaches also have limitations, including computational costs and the need for careful prompt design to ensure reliable results. The predominance of neutral sentiment across all medications shows that many comments serve informational or question-asking purposes rather than expressing clear sentiment. Clinically, the observed higher positive sentiment for upadacitinib and dupilumab in our dataset may reflect patient satisfaction with these treatments, while the lower negative sentiment observed with abrocitinib could indicate fewer patient-reported concerns. Similar approaches could illuminate real-world perspectives on treatments for psoriasis and other chronic skin conditions where multiple therapeutic options exist. While our findings provide initial insights for AD treatment discussions, they also suggest potential broader applications for analyzing patient perspectives across medical fields. Future studies should expand beyond simple sentiment categorization to capture more nuanced patient experiences, including mixed sentiments, specific concerns about side effects, cost considerations, and conditional satisfaction with treatment. Additionally, future research should explore temporal trends in sentiment as medications gain market share and correlate social media sentiment with real-world evidence databases and patient registries.</p>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>List of dermatology subreddits used in the study and the total number of comments in each subreddit prior to January 1, 2024.</p>
        <media xlink:href="formative_v10i1e78054_app1.docx" xlink:title="DOCX File , 14 KB"/>
      </supplementary-material>
      <supplementary-material id="app2">
        <label>Multimedia Appendix 2</label>
        <p>Prompt used by GPT-4o to generate results.</p>
        <media xlink:href="formative_v10i1e78054_app2.docx" xlink:title="DOCX File , 13 KB"/>
      </supplementary-material>
      <supplementary-material id="app3">
        <label>Multimedia Appendix 3</label>
        <p>Flow chart displaying the system workflow for the study.</p>
        <media xlink:href="formative_v10i1e78054_app3.png" xlink:title="PNG File , 132 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">AD</term>
          <def>
            <p>atopic dermatitis</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">API</term>
          <def>
            <p>application programming interface</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">DistilBERT</term>
          <def>
            <p>distilbert-base-uncased-finetuned-sst-2-english</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">LLM</term>
          <def>
            <p>large language model</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">NLP</term>
          <def>
            <p>natural language processing</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">VADER</term>
          <def>
            <p>Valence Aware Dictionary and Sentiment Reasoner</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors thank Hadley Johnson, MD, for assistance with manual annotation of the validation dataset.</p>
    </ack>
    <notes>
      <sec>
        <title>Funding</title>
        <p>No external financial support or grants were received from any public, commercial, or not-for-profit entities for the research, authorship, or publication of this article.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>JY has served as a consultant, on the advisory board, or as an investigator, for the following: Abbvie, Arcutis, Astria, Dermavant, Hypothesis, Incyte, iRhythm, Johnson &#38; Johnson, Kiehl’s/L’Oreal, Leo, Lilly, National Eczema Association, O’Glacee, Pfizer, Sanofi, Smartpractice, Sol-Gel, and Soteri Skin. He also receives an honorarium from UptoDate.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buntinx-Krieg</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Caravaglio</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Domozych</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Dellavalle</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Dermatology on Reddit: elucidating trends in dermatologic communications on the world wide web</article-title>
          <source>Dermatol Online J</source>
          <year>2017</year>
          <month>07</month>
          <day>15</day>
          <volume>23</volume>
          <issue>7</issue>
          <fpage>13030 /qt9dr1f7x6</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://escholarship.org/uc/item/9dr1f7x6"/>
          </comment>
          <pub-id pub-id-type="doi">10.5070/D3237035730</pub-id>
          <pub-id pub-id-type="medline">29469693</pub-id>
          <pub-id pub-id-type="pii">13030/qt9dr1f7x6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cummins</surname>
              <given-names>JA</given-names>
            </name>
            <name name-style="western">
              <surname>Nambudiri</surname>
              <given-names>VE</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing: a window to understanding skincare trends</article-title>
          <source>Int J Med Inform</source>
          <year>2022</year>
          <month>04</month>
          <volume>160</volume>
          <fpage>104705</fpage>
          <pub-id pub-id-type="doi">10.1016/j.ijmedinf.2022.104705</pub-id>
          <pub-id pub-id-type="medline">35121355</pub-id>
          <pub-id pub-id-type="pii">S1386-5056(22)00019-3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <collab>OpenAI</collab>
            <name name-style="western">
              <surname>Achiam</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Adler</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>GPT-4 technical report</article-title>
          <source>arXiv</source>
          <comment>Preprint published online on March 15, 2023</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.2303.08774</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hutto</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Gilbert</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>VADER: a parsimonious rule-based model for sentiment analysis of social media text</article-title>
          <source>Proceedings of the International AAAI Conference on Web and Social Media</source>
          <year>2014</year>
          <volume>8</volume>
          <issue>1</issue>
          <fpage>216</fpage>
          <lpage>225</lpage>
          <pub-id pub-id-type="doi">10.1609/icwsm.v8i1.14550</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sanh</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Debut</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chaumond</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wolf</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>DistilBERT, a distilled version of BERT: smaller, faster, cheaper and lighter</article-title>
          <source>arXiv</source>
          <comment>Preprint posted online Oct 2, 2019</comment>
          <pub-id pub-id-type="doi">10.48550/arXiv.1910.01108</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
