<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e79997</article-id><article-id pub-id-type="doi">10.2196/79997</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Development of an Evaluation Index System for Health Recommender Systems Based on the Health Technology Assessment Framework: Cross-Sectional Delphi Study</article-title></title-group><contrib-group><contrib contrib-type="author"><name name-style="western"><surname>Sun</surname><given-names>Yue</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Hou</surname><given-names>Shijie</given-names></name><degrees>MBBS</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Chen</surname><given-names>Siye</given-names></name><degrees>MSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Leng</surname><given-names>Minmin</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Wang</surname><given-names>Zhiwen</given-names></name><degrees>Prof Dr</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>School of Nursing, Health Science Center, Xi'an Jiaotong University</institution><addr-line>Xi'an</addr-line><addr-line>Shaanxi Province</addr-line><country>China</country></aff><aff id="aff2"><institution>School of Nursing, Peking University</institution><addr-line>38 Xueyuan Road, Haidian District</addr-line><addr-line>Beijing</addr-line><country>China</country></aff><aff id="aff3"><institution>Department of Nursing, Shandong Provincial Hospital Affiliated to Shandong First Medical University</institution><addr-line>Jinan</addr-line><addr-line>Shandong Province</addr-line><country>China</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Brini</surname><given-names>Stefano</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Bhuiyan</surname><given-names>Iftekhar Uddin</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Liu</surname><given-names>Ming</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Zhiwen Wang, Prof Dr, School of Nursing, Peking University, No. 38 Xueyuan Road, Haidian District, Beijing, China, 86 15901566817, 86 010 82802248; <email>wzwjing@sina.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>22</day><month>12</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e79997</elocation-id><history><date date-type="received"><day>03</day><month>07</month><year>2025</year></date><date date-type="rev-recd"><day>17</day><month>11</month><year>2025</year></date><date date-type="accepted"><day>17</day><month>11</month><year>2025</year></date></history><copyright-statement>&#x00A9; Yue Sun, Shijie Hou, Siye Chen, Minmin Leng, Zhiwen Wang. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 22.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e79997"/><abstract><sec><title>Background</title><p>Health recommender systems (HRSs) are digital platforms designed to deliver personalized health information, resources, and interventions tailored to users&#x2019; specific needs. However, existing evaluations of HRSs largely focus on algorithmic performance, with limited scientific evidence supporting user-centered assessment approaches and insufficiently defined evaluation metrics. Moreover, no unified or scientifically validated framework currently exists for evaluating these systems, resulting in limited cross-study comparability and constraining regulatory and implementation decision-making.</p></sec><sec><title>Objective</title><p>This study aimed to develop a comprehensive, consensus-based evaluation index system for HRSs grounded in the health technology assessment (HTA) framework.</p></sec><sec sec-type="methods"><title>Methods</title><p>This cross-sectional study used a 2-round Delphi process conducted with 18 experts comprising clinicians, digital health researchers, and policymakers who possessed relevant professional experience and domain knowledge in HRSs. The age range of the experts was between 30 and 58 years, with 67% (n=12) of them possessing over 10 years of professional experience. On the basis of literature analysis and HTA principles, a preliminary indicator set comprising 5 primary and 16 secondary indicators was constructed. Experts rated the importance of each indicator using a 5-point Likert scale and provided qualitative suggestions for refinement. After the Delphi process, the analytic hierarchy process was applied to determine indicator weights and assess consistency.</p></sec><sec sec-type="results"><title>Results</title><p>The Delphi survey reached full participation in the first round (18/18, 100%) and maintained an 88.9% (16/18) response rate in the second round. The final evaluation index system of HRSs contained 5 first-level indicators (performance, effectiveness, safety, economy, and social appropriateness) and 18 second-level indicators. The mean importance scores of the second-level indicators ranged from 4.25 (SD 0.45) to 5.00 (SD 0.00), with coefficients of variation between 0.000 and 0.220. Among the first-level indicators, safety received the highest weight (0.289), followed by social appropriateness (0.251), effectiveness (0.193), performance (0.136), and economy (0.132).</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>This study presents an evaluation index system for HRSs grounded in the HTA framework and validated through expert consensus. The resulting framework not only provides actionable guidance for the design, optimization, and implementation of HRSs but also fills a methodological gap in the field by offering quantifiable, hierarchical evaluation indicators with validated weighting. Future research will involve iterative refinement and empirical validation of the system in real-world deployment settings, thereby enabling continuous improvement and facilitating the establishment of unified evaluation standards for HRS research and practice.</p></sec></abstract><kwd-group><kwd>health recommender systems</kwd><kwd>health technology assessment evaluation framework</kwd><kwd>HTA evaluation framework</kwd><kwd>evaluation index system</kwd><kwd>Delphi survey</kwd><kwd>artificial intelligence</kwd><kwd>AI</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><p>As technology evolves, new ways to implement tailored interventions are being adopted, and researchers and policymakers require access to appropriate tools to assess the design and suitability for use of these interventions [<xref ref-type="bibr" rid="ref1">1</xref>]. One such innovative approach to computer-based tailored health interventions is recommender systems (RSs) [<xref ref-type="bibr" rid="ref2">2</xref>]. Health RSs (HRSs), specialized digital platforms or software applications designed to recommend personalized information, resources, or interventions relevant to the user&#x2019;s specific health needs, are now available to address this need [<xref ref-type="bibr" rid="ref3">3</xref>]. A scoping review of the current literature identified 51 studies on HRSs covering a range of health domains, including general health promotion, lifestyle, generic health service, and others [<xref ref-type="bibr" rid="ref4">4</xref>]. These HRSs use sophisticated data analytic techniques, integrating machine learning algorithms and artificial intelligence frameworks to process and interpret diverse user-specific datasets, including medical histories, behavioral patterns, and individual preferences. By synthesizing these multifaceted data sources, HRSs generate personalized recommendations, targeted resources, and customized interventions tailored to the unique needs of each user [<xref ref-type="bibr" rid="ref5">5</xref>]. Given that the information provided by HRSs can have a significant impact on individuals&#x2019; health decisions, it is essential to conduct comprehensive evaluations before their widespread adoption. Such assessments not only enable developers to refine the design and functionality of these applications but also establish a scientific foundation for evaluating their effectiveness and ensuring their safe and reliable use [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>In 2018, Moshi et al [<xref ref-type="bibr" rid="ref8">8</xref>] highlighted that the health technology assessment (HTA) evaluation framework then in use was inadequate for delivering holistic assessments of mobile health solutions in a clinical environment. Since then, the field has advanced significantly, driven by the growing development and application of HTA evaluation frameworks for assessing digital health technologies (DHTs), especially mobile health [<xref ref-type="bibr" rid="ref9">9</xref>,<xref ref-type="bibr" rid="ref10">10</xref>]. As a health technology framework, HTA considerations encompass effectiveness, safety, and cost-effectiveness, as well as patient- and organizational-level factors, while also addressing ethical, social, and legal issues [<xref ref-type="bibr" rid="ref11">11</xref>]. Furthermore, they include the technical characteristics of DHTs [<xref ref-type="bibr" rid="ref12">12</xref>]. A scoping review of HRS research [<xref ref-type="bibr" rid="ref4">4</xref>] found that most studies focus on performance metrics, with only a limited number of HRS studies incorporating evaluations of user participation, health outcomes, user acceptance, or feasibility. Key dimensions such as safety, economic impact, and social appropriateness have been largely overlooked. For example, these studies are either narrowly focused on specific system dimensions (eg, recommendation techniques) or health-related outcome measures [<xref ref-type="bibr" rid="ref13">13</xref>-<xref ref-type="bibr" rid="ref18">18</xref>] or they provide only a general overview of HRSs without giving adequate emphasis to evaluation [<xref ref-type="bibr" rid="ref5">5</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. De Croon et al [<xref ref-type="bibr" rid="ref19">19</xref>] reviewed existing research related to HRSs and found that there is a lack of scientific evidence on user-centered evaluation approaches and that other metric parameters are ambiguous in terms of HRS evaluation and the feasibility of the organizational management of this technology. For example, the definition of performance varied across studies. Torrent-Fontbona and Lopez [<xref ref-type="bibr" rid="ref20">20</xref>] used the amount of time in the glycemic target range by reducing the time below the target as a performance metric, whereas Cho et al [<xref ref-type="bibr" rid="ref21">21</xref>] reported performance based on precision and recall. Given the heterogeneity in evaluation methods, it is necessary to develop a comprehensive and scientifically sound evaluation tool to provide reference and guidance for HRSs.</p><p>Accordingly, this study consisted of 2 main phases: first, summarizing and developing the evaluation indicators by reviewing the literature within the HTA framework and, second, identifying specific indicators and determining their weights through a Delphi expert consultation process and the analytic hierarchy process (AHP). Adopting an HTA-based evaluation indicator system could provide robust support for more systematic and in-depth assessment and offer guidance for the design, optimization, and implementation of HRSs in the health care domain [<xref ref-type="bibr" rid="ref11">11</xref>].</p></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p>This study used a Delphi method [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>], a widely recognized approach for selecting quality indicators in health care. The Delphi method does not prescribe a fixed number of rounds. Previous studies have consistently shown that 2 to 3 iterations are typically sufficient to reach consensus [<xref ref-type="bibr" rid="ref24">24</xref>]. The process concludes when consensus is achieved on the topics under discussion. This study was designed and reported following the Delphi Studies in Social and Health Sciences&#x2013;Recommendations for an Interdisciplinary Standardized Reporting framework [<xref ref-type="bibr" rid="ref25">25</xref>], which provides a consensus-based methodological and reporting standard for Delphi research.</p><p>This study comprised 2 rounds of questionnaires administered to an expert panel via email, conducted in accordance with established methodological guidelines for Delphi surveys. The process consisted of two key stages: (1) developing a preliminary set of potential evaluation indicators along with a conceptual framework and secondary-level metrics for HRSs through a systematic literature review and (2) conducting a Delphi survey to prioritize and reach consensus on the primary and secondary evaluation indicators for HRSs. The specific process is illustrated in <xref ref-type="other" rid="box1">Textbox 1</xref>.</p><boxed-text id="box1"><title> Summary of the 4-phase process used to develop the evaluation framework for the health recommender system, including expert panel selection, literature review, questionnaire preparation, and the Delphi&#x2013;analytic hierarchy process procedure.</title><p><bold>Phase 1: expert panel</bold></p><list list-type="bullet"><list-item><p>Members held a postgraduate degree or higher, possessed a midlevel professional title or above, or had a minimum of 10 years of relevant work experience.</p></list-item><list-item><p>Members were engaged in clinical medicine, clinical nursing, public health management, health-related government departments, or information technology related to recommender systems.</p></list-item></list><p><bold>Phase 2: literature review</bold></p><list list-type="bullet"><list-item><p>Databases included PubMed, Embase, Web of Science, ACM, IEEE Xplore, ScienceDirect, CNKI, and Wanfang.</p></list-item><list-item><p>Professional organization websites such as the World Health Organization and UNESCO were consulted.</p></list-item></list><p><bold>Phase 3: questionnaire preparation</bold></p><list list-type="bullet"><list-item><p>The focus was on 5 aspects: technical characteristics, effectiveness, safety, economic considerations, and social appropriateness.</p></list-item><list-item><p>The importance of each indicator was measured by its relevance to health recommender system performance, rated on a Likert scale from 1 to 5 (1=&#x201C;not important&#x201D;; 5=&#x201C;very important&#x201D;).</p></list-item></list><p><bold>Phase 4: Delphi method and the analytic hierarchy process</bold></p><list list-type="bullet"><list-item><p>The reliability of the expert consultation was assessed through the expert positive coefficient, the authority coefficient, and the degree of opinion concentration and coordination.</p></list-item><list-item><p>Using the Saaty scale, a judgment matrix was constructed, followed by hierarchical ranking and consistency testing.</p></list-item></list></boxed-text></sec><sec id="s2-2"><title>Participants</title><p>The selection criteria for experts were established as follows: (1) holding a postgraduate degree or higher, possessing a mid-level (or above) professional title, or having at least 10 years of relevant work experience; (2) being actively engaged in fields such as health IT, HTA, public health management, clinical medicine, nursing, or health-related government agencies, with substantial expertise and a thorough understanding of the research topic; and (3) voluntarily agreeing to participate in the study. Individuals lacking familiarity with HRSs were excluded from the selection process. A total of 20 eligible experts were invited via email, and 18 (90%) agreed to participate in the study.</p></sec><sec id="s2-3"><title>Questionnaire Preparation</title><p>This study, based on the HTA evaluation framework, aimed to develop a preliminary hierarchical framework for the evaluation indicator system of HRSs. Drawing on previous assessments of emerging health technologies, the evaluation focused on 5 key domains: technical characteristics, effectiveness, safety, economy, and social appropriateness, as shown in <xref ref-type="table" rid="table1">Table 1</xref>.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>The five-dimensional measurement framework for health recommender systems based on the health technology assessment model.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Aspect</td><td align="left" valign="bottom">Definition</td></tr></thead><tbody><tr><td align="left" valign="top">Technical characteristics</td><td align="left" valign="top">Refers to the system&#x2019;s effectiveness and reliability in delivering stable, high-quality recommendations.</td></tr><tr><td align="left" valign="top">Effectiveness</td><td align="left" valign="top">Refers to the ability of the health recommender system to achieve its intended objectives in real-world applications, such as promoting behavior change or improving health outcomes.</td></tr><tr><td align="left" valign="top">Safety</td><td align="left" valign="top">Refers to the system&#x2019;s capacity to ensure user privacy, data security, operational stability, and overall user safety throughout its use.</td></tr><tr><td align="left" valign="top">Economy</td><td align="left" valign="top">Refers to the economic feasibility and cost-effectiveness of the health recommender system while fulfilling its functional requirements.</td></tr><tr><td align="left" valign="top">Social appropriateness</td><td align="left" valign="top">Refers to the degree to which the health recommender system aligns with sociocultural norms, ethical principles, and legal regulations, and is acceptable to users and society.</td></tr></tbody></table></table-wrap><p>The following 8 electronic databases were searched on October 3, 2022: PubMed, Embase, Web of Science, ACM, IEEE Xplore, ScienceDirect, China National Knowledge Infrastructure, and Wanfang. Electronic searches were conducted using the following keywords: (&#x201C;recommender systems&#x201D;) OR (&#x201C;recommender system&#x201D;) OR (&#x201C;recommendation systems&#x201D;) OR (&#x201C;recommendation system&#x201D;) AND (health OR patient OR patients). To identify additional studies, we screened professional organization websites, including the World Health Organization, UNESCO, the Ministry of Health of the People&#x2019;s Republic of China, the National Health Commission of China, and other organizational or governmental websites.</p><p>Potential studies including competency indicators were extracted and screened by 2 reviewers according to the following criteria: (1) studies that described or implemented HRSs with a primary focus on improving health and included an evaluation of the HRSs and (2) studies published in English or Chinese. In cases of uncertainty regarding the inclusion of a study, the research team held discussions to reach a consensus. On the basis of the analysis of the literature, a preliminary evaluation index system for HRSs was developed comprising 5 primary indicators (dimensions) and 16 secondary indicators.</p><p>The preliminary evaluation index system for HRSs was operationalized into a Delphi-based survey instrument. Within this questionnaire, the importance of each indicator was defined as its relevance in assessing the overall performance of HRSs, quantified using a Likert scale from 1 to 5 (1=&#x201C;not important at all&#x201D;; 5=&#x201C;very important&#x201D;). Additionally, the survey included open-ended questions to allow domain experts to provide feedback on the existing indicators and suggest additional indicators they deemed essential for inclusion.</p></sec><sec id="s2-4"><title>Delphi Method and the AHP</title><p>In the initial phase of expert consultations, a 5-point Likert scale was used by the panel of experts to evaluate and score the significance of each indicator, with an additional open-ended section for suggesting modifications. Upon the conclusion of this phase, the criteria for item retention were delineated as follows: an item&#x2019;s mean importance score must surpass 4.0, the coefficient of variation (CV) must not exceed 0.25, and the selection rate for the highest score of 5 must be above 20% [<xref ref-type="bibr" rid="ref25">25</xref>]. During the subsequent round of consultations, the anonymized results from the first round were disseminated among the experts, and the consultation form was augmented with a section for the allocation of index weights. In the final round, a select group of experts assigned weights to the index system, and the AHP was used to ascertain the weight values for the finalized evaluation index system [<xref ref-type="bibr" rid="ref26">26</xref>]. This system is hierarchically organized into 3 strata: the goal layer, which encapsulates the overarching objective of the HRS evaluation; the criterion layer, encompassing the principal evaluation dimensions; and the indicator layer, comprising specific metrics. Experts conducted pairwise comparisons of indicators within the same stratum using a scale from 1 to 9 to quantify their relative importance (with 1 denoting equal importance and 9 signifying extreme importance), thereby constructing the judgment matrix [<xref ref-type="bibr" rid="ref27">27</xref>].</p></sec><sec id="s2-5"><title>Index Screening and Result Statistics</title><p>The collected data were processed using Microsoft Excel before being input into a database for further analysis. Data analysis was conducted using Microsoft Excel 2021 and SPSS (version 26.0; IBM Corp). Descriptive statistics were used to calculate the mean, SD, and CV of the importance and feasibility of each indicator.</p><p>The reliability of the expert consultation was assessed through the expert positive coefficient, the expert authority coefficient (Cr), the degree of expert opinion concentration, and the degree of expert opinion coordination [<xref ref-type="bibr" rid="ref28">28</xref>]. The expert positive coefficient was represented by the questionnaire response rate. The Cr was used to reflect the authority of expert opinions, calculated based on expert judgment (Ca) and their familiarity with the dimensions and items (Cs) using the formula Cr = (Ca + Cs)/2. The Kendall coefficient of concordance (<italic>W</italic>) and the CV were used to measure the degree of consensus among experts. A higher value of <italic>W</italic> indicates a stronger level of consensus among experts.</p><p>The weights of the indicators were determined using the AHP [<xref ref-type="bibr" rid="ref29">29</xref>]. Through assigning importance values to each indicator, we established the Saaty scale, constructed the judgment matrix, and conducted a hierarchical ranking and consistency test. The consistency ratio is calculated to assess the judgment matrix. When the consistency ratio value is less than 0.10, it indicates that the judgment matrix exhibits satisfactory consistency and does not require further adjustments.</p></sec><sec id="s2-6"><title>Ethical Considerations</title><p>Ethics approval was obtained from the Biomedical Ethics Committee of Peking University (IRB00001052-22042). All participants provided informed consent through an electronic registration form where they were clearly informed that their responses would remain confidential and be used solely for research purposes. Participants were notified of their right to withdraw from the study at any time without penalty.</p><p>To ensure confidentiality and adhere to ethical standards, the questionnaire was administered anonymously. Only participants&#x2019; email addresses were collected for follow-up purposes, and these were stored separately from all demographic and survey data. All responses were deidentified during data processing, sharing of interim results between Delphi rounds, and final reporting. No personally identifiable information is present in this manuscript.</p><p>Access to identifiable data was restricted exclusively to the research team, and all data were securely stored within institutional systems. No compensation was provided to participants for their involvement in the study.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Overview</title><p>The initial literature search yielded a total of 1538 articles. After removing duplicates, of the 1538 articles, 1239 (80.6%) were screened by title and abstract for eligibility. Of these 1239 articles, 77 (6.2%) studies related to the evaluation of HRSs were included in the analysis. <xref ref-type="fig" rid="figure1">Figure 1</xref> provides an overview of the identification and selection of studies at different stages of the screening process. The included studies were systematically organized based on the identified HTA evaluation framework, as detailed in Table S1 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The process of study identification and selection. Flow diagram illustrating the literature search, screening, eligibility assessment, and final inclusion of studies used to inform the development of the health recommender system evaluation index system.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e79997_fig01.png"/></fig></sec><sec id="s3-2"><title>Basic Information on the Experts</title><p>A total of 18 experts participated in the first round of the Delphi consultation. These experts were primarily engaged in fields such as clinical medicine, clinical nursing, public health management, health-related government departments, and IT related to RSs. The age range of the experts was between 30 and 58 years, with 67% (12/18) of them possessing over 10 years of professional experience. All experts (18/18, 100%) held a master&#x2019;s degree or higher, and 72% (13/18) held senior professional titles (<xref ref-type="table" rid="table2">Table 2</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Demographic characteristics of the 18 experts participating in the 2-round Delphi study (N=18).</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Variable</td><td align="left" valign="bottom">Participants, n (%)</td></tr></thead><tbody><tr><td align="left" valign="top" colspan="2">Age (y)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>30-39</td><td align="left" valign="top">10 (56)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>40-49</td><td align="left" valign="top">6 (33)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content> &#x2265;50</td><td align="left" valign="top">2 (11)</td></tr><tr><td align="left" valign="top" colspan="2">Work experience (y)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content> &#x003C;10</td><td align="left" valign="top">6 (33)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>10-19</td><td align="left" valign="top">8 (44)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>20-29</td><td align="left" valign="top">4 (22)</td></tr><tr><td align="left" valign="top" colspan="2">Highest degree</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Master&#x2019;s</td><td align="left" valign="top">9 (50)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>PhD</td><td align="left" valign="top">9 (50)</td></tr><tr><td align="left" valign="top" colspan="2">Professional title</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Intermediate grade title</td><td align="left" valign="top">1 (5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Deputy senior grade title</td><td align="left" valign="top">4 (22)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Senior grade title</td><td align="left" valign="top">13 (72)</td></tr><tr><td align="left" valign="top" colspan="2">Field of expertise</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Clinical medicine</td><td align="left" valign="top">5 (27)</td></tr><tr><td align="left" valign="top">&#x2003;Clinical nursing</td><td align="left" valign="top">4 (22)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Public health management</td><td align="left" valign="top">3 (16)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Health-related government departments</td><td align="left" valign="top">2 (11)</td></tr><tr><td align="left" valign="top">&#x2003;IT related to recommender systems</td><td align="left" valign="top">4 (22)</td></tr></tbody></table></table-wrap></sec><sec id="s3-3"><title>Analysis of Expert Participation</title><sec id="s3-3-1"><title>Expert Positive Coefficient</title><p>Expert participation was assessed using the response rate and the proportion of experts who provided written feedback. In the first Delphi round, all distributed questionnaires were returned (18/18, 100% response rate) and an expert enthusiasm coefficient of 1.00. In the second round, 89% (16/18) of the questionnaires were returned. The retention rate across rounds was 89% (16/18) and an expert enthusiasm coefficient of 0.89.</p></sec><sec id="s3-3-2"><title>The Cr Measure</title><p>Expert authority was assessed using the authority coefficient (Cr), defined as Cr = (Ca + Cs) / 2. In round 1, the mean Cr was 0.883, derived from a judgment coefficient (Ca) of 0.933 and a familiarity coefficient (Cs) of 0.833. In round 2, the mean Cr was 0.884, based on a Ca of 0.940 and a Cs of 0.827. All Cr values exceeded the commonly accepted threshold of 0.7, indicating a consistently high level of expert authority across both rounds.</p></sec><sec id="s3-3-3"><title>The Degree of Expert Opinion Concentration</title><p>The degree of expert opinion concentration was assessed using the Kendall coefficient of concordance (<italic>W</italic>). In round 1, <italic>W</italic> was 0.281 for dimensions and 0.282 for items (c&#x00B2;=25.254 and 121.806; <italic>P</italic>&#x003C;.001). In round 2, the values were 0.360 and 0.236 (c&#x00B2;=23.067 and 83.096; <italic>P</italic>&#x003C;.001).</p></sec></sec><sec id="s3-4"><title>Results of the Delphi Survey</title><sec id="s3-4-1"><title>First Round</title><p>The evaluation index system comprised 16 items across 5 dimensions. For the primary indicators, the mean importance scores ranged from 4.39 (SD 0.59) to 5.00 (SD 0.00), with CVs ranging from 0.000 to 0.172. All primary indicators met the inclusion criteria and were retained without modification. For the secondary indicators, the CVs ranged from 0.000 to 0.173, and the mean importance scores ranged from 4.17 (SD 0.60) to 5.00 (SD 0.00). Detailed results for dimensions and items are presented in Tables S2 and S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> and <xref ref-type="table" rid="table3">Table 3</xref>.</p><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Adjustments to competency indicators for health recommender systems after round 1 of the Delphi survey.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Primary indicators</td><td align="left" valign="top">Secondary indicators</td><td align="left" valign="top">Adjustment</td></tr></thead><tbody><tr><td align="left" valign="top">1. Performance</td><td align="left" valign="top">1.1 Accuracy (retained)<break/>1.2 Content coverage (retained)<break/>1.3 Result diversity (retained)<break/>1.4 User trust (retained)<break/>1.5 System robustness (retained)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Added &#x201C;1.6 System response efficiency&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">2. Effectiveness</td><td align="left" valign="top">2.1 Health behavior outcomes (modified)<break/>2.2 Quality of life impact (modified)<break/>2.3 Health service efficiency (modified)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Added &#x201C;2.1 Health behavior&#x201D;</p></list-item><list-item><p>Merged 2.1 and 2.2 as &#x201C;2.2 Health outcome&#x201D;</p></list-item><list-item><p>Modified 2.3 to &#x201C;2.3 Quality and efficiency of health services&#x201D;</p></list-item></list></td></tr><tr><td align="char" char="." valign="top">3. Safety</td><td align="left" valign="top">3.1 Clinical safety (retained)<break/>3.2 Technical application safety (retained)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>No adjustments</p></list-item></list></td></tr><tr><td align="char" char="." valign="top">4. Economy</td><td align="left" valign="top">4.1 Patient cost-effectiveness (modified)<break/>4.2 Institutional cost-effectiveness (modified)<break/>4.3 Social benefits (retained)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Modified 4.1 to &#x201C;4.1 Patient-level economy&#x201D;</p></list-item><list-item><p>Modified 4.2 to &#x201C;4.2 Institutional-level economy&#x201D;</p></list-item></list></td></tr><tr><td align="left" valign="top">5. Social appropriateness</td><td align="left" valign="top">5.1 Data privacy protection (deleted)<break/>5.2 Legal and policy compliance (modified)<break/>5.3 Acceptability (retained)</td><td align="left" valign="top"><list list-type="bullet"><list-item><p>Replaced 5.1 with &#x201C;5.1 Ethicality&#x201D;</p></list-item><list-item><p>Modified 5.2 to &#x201C;5.2 Policy suitability&#x201D;</p></list-item><list-item><p>Added &#x201C;5.4 Feasibility&#x201D;</p></list-item></list></td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-4-2"><title>Second Round</title><p>For the primary indicators, the mean importance scores ranged from 4.31 (SD 0.59) to 4.94 (SD 0.37), with CVs ranging from 0.00 to 0.141. All primary indicators met the inclusion criteria and were retained without modification. The mean importance scores of the second-level indicators ranged from 4.25 (SD 0.45) to 5.00 (SD 0.00), with CVs ranging between 0.000 and 0.220. Detailed results for all indicators are provided in Tables S4 and S5 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. No additional feedback or suggestions were provided by the experts. After 2 rounds of the Delphi survey, the evaluation index system for HRSs was established, comprising 5 first-level indicators and 18 second-level indicators (<xref ref-type="table" rid="table4">Table 4</xref>). Explanations of the indicators are provided in <xref ref-type="table" rid="table1">Table 1</xref> and Table S6 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Final evaluation index system for health recommender systems developed through a 2-round Delphi consensus study.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Primary indicator</td><td align="left" valign="bottom">Secondary indicators</td></tr></thead><tbody><tr><td align="left" valign="top">Performance</td><td align="left" valign="top"><list list-type="simple"><list-item><p>1.1 Accuracy</p></list-item><list-item><p>1.2 Coverage</p></list-item><list-item><p>1.3 Result diversity</p></list-item><list-item><p>1.4 User trust</p></list-item><list-item><p>1.5 Robustness</p></list-item><list-item><p>1.6 Response efficiency</p></list-item></list></td></tr><tr><td align="left" valign="top">Effectiveness</td><td align="left" valign="top"><list list-type="simple"><list-item><p>2.1 Health behavior</p></list-item><list-item><p>2.2 Health outcome</p></list-item><list-item><p>2.3 Quality and efficiency of health services</p></list-item></list></td></tr><tr><td align="left" valign="top">Safety</td><td align="left" valign="top"><list list-type="simple"><list-item><p>3.1 Clinical safety</p></list-item><list-item><p>3.2 Technical safety</p></list-item></list></td></tr><tr><td align="left" valign="top">Economy</td><td align="left" valign="top"><list list-type="simple"><list-item><p>4.1 Patient-level economy</p></list-item><list-item><p>4.2 Institutional-level economy</p></list-item><list-item><p>4.3 Social benefit</p></list-item></list></td></tr><tr><td align="left" valign="top">Social appropriateness</td><td align="left" valign="top"><list list-type="simple"><list-item><p>5.1 Ethicality</p></list-item><list-item><p>5.2 Policy appropriateness</p></list-item><list-item><p>5.3 Acceptability</p></list-item><list-item><p>5.4 Feasibility</p></list-item></list></td></tr></tbody></table></table-wrap></sec></sec><sec id="s3-5"><title>Results of the AHP</title><p>The weights assigned to the primary indicators were as follows: 0.136 for performance, 0.193 for effectiveness, 0.289 for safety, 0.132 for economy, and 0.251 for social suitability. The consistency value of 0.032 demonstrates the reliability of the results. Among the secondary indicators, the top 4 weights were attributed to technical safety (0.154), clinical safety (0.135), acceptability (0.065), and policy appropriateness (0.059). All secondary indicators exhibited consistency values below 0.1, further validating the rationality of the weight assignments. The combined weights of the evaluation indicators for HRSs are shown in <xref ref-type="table" rid="table5">Table 5</xref>.</p><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Combined weights of primary and secondary evaluation indicators for health recommender systems obtained through the analytic hierarchy process.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Primary indicator (weight) and secondary indicator</td><td align="left" valign="bottom">Weight</td><td align="left" valign="bottom">Combined weight</td></tr></thead><tbody><tr><td align="left" valign="top">1. Performance</td><td align="left" valign="top">0.136</td><td align="left" valign="top">&#x2014;<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.1 Accuracy</td><td align="char" char="." valign="top">0.144</td><td align="char" char="." valign="top">0.020</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.2 Coverage</td><td align="char" char="." valign="top">0.151</td><td align="char" char="." valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.3 Result diversity</td><td align="char" char="." valign="top">0.157</td><td align="char" char="." valign="top">0.021</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.4 User trust</td><td align="char" char="." valign="top">0.173</td><td align="char" char="." valign="top">0.024</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.5 Robustness</td><td align="char" char="." valign="top">0.183</td><td align="char" char="." valign="top">0.025</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>1.6 Response efficiency</td><td align="char" char="." valign="top">0.192</td><td align="char" char="." valign="top">0.026</td></tr><tr><td align="left" valign="top">2. Effectiveness</td><td align="left" valign="top">0.193</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2.1 Health behavior</td><td align="char" char="." valign="top">0.296</td><td align="char" char="." valign="top">0.057</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2.2 Health outcome</td><td align="char" char="." valign="top">0.336</td><td align="char" char="." valign="top">0.065</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>2.3 Quality and efficiency of health services</td><td align="char" char="." valign="top">0.368</td><td align="char" char="." valign="top">0.071</td></tr><tr><td align="left" valign="top">3. Safety</td><td align="left" valign="top">0.289</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3.1 Clinical safety</td><td align="char" char="." valign="top">0.466</td><td align="char" char="." valign="top">0.135</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>3.2 Technical safety</td><td align="char" char="." valign="top">0.534</td><td align="char" char="." valign="top">0.154</td></tr><tr><td align="left" valign="top">4. Economy</td><td align="left" valign="top">0.132</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4.1 Patient-level economy</td><td align="char" char="." valign="top">0.300</td><td align="char" char="." valign="top">0.040</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4.2 Institutional-level economy</td><td align="char" char="." valign="top">0.328</td><td align="char" char="." valign="top">0.043</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>4.3 Social benefit</td><td align="char" char="." valign="top">0.372</td><td align="char" char="." valign="top">0.049</td></tr><tr><td align="left" valign="top">5. Social appropriateness</td><td align="left" valign="top">0.251</td><td align="left" valign="top">&#x2014;</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5.1 Ethicality</td><td align="char" char="." valign="top">0.217</td><td align="char" char="." valign="top">0.054</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5.2 Policy appropriateness</td><td align="char" char="." valign="top">0.236</td><td align="char" char="." valign="top">0.059</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5.3 Acceptability</td><td align="char" char="." valign="top">0.260</td><td align="char" char="." valign="top">0.065</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>5.4 Feasibility</td><td align="char" char="." valign="top">0.287</td><td align="char" char="." valign="top">0.072</td></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>Not applicable.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><p>In this study, by integrating literature and policy research with the Delphi expert consultation and AHP methods, an HRS evaluation index system was constructed based on the HTA framework. This system encompasses 5 dimensions&#x2014;performance, effectiveness, safety, economy, and social appropriateness&#x2014;comprising a total of 18 secondary assessment indicators.</p><sec id="s4-1"><title>Interpretation of Results and Comparison With Previous Research</title><p>Existing evaluations of HRSs in most studies predominantly focus on accuracy metrics within the performance dimension, with some studies exhibiting nonstandardized indicator selection and a general lack of user participation in the evaluation process [<xref ref-type="bibr" rid="ref19">19</xref>]. Even in the few HRS studies that do incorporate user involvement, such participation is typically restricted to assessments of clinical effectiveness, feasibility, or user experience [<xref ref-type="bibr" rid="ref4">4</xref>]. While these dimensions are important, they alone do not capture the broader clinical, social, and economic implications of deploying RSs in health care settings. General frameworks exist for health IT; however, current evaluation tools tend to be narrowly focused on specific dimensions, such as the user-centric framework proposed by Knijnenburg et al [<xref ref-type="bibr" rid="ref30">30</xref>]. Some evaluation tools have been developed for DHTs [<xref ref-type="bibr" rid="ref31">31</xref>-<xref ref-type="bibr" rid="ref33">33</xref>], such as the Digital Health Technology Assessment developed through the cooperation between the Finnish Coordinating Center for Health Technology Assessment and the University of Oulu&#x2019;s Faculty of Medicine [<xref ref-type="bibr" rid="ref33">33</xref>]. This framework was designed to conduct evidence-based reviews of DHTs focusing on dimensions such as product information, technical stability, cost, effectiveness, clinical safety, data security, usability, accessibility, interoperability, and patient and organizational considerations. However, it does not fully address legal, social, and ethical aspects.</p><p>Compared with previous research, this work expands the evaluative perspective of HRSs from performance-focused assessment to a holistic, multicriteria framework informed by health technology policy and real-world implementation factors. In terms of core dimensions, our framework aligns with most of the aforementioned evaluation frameworks by addressing safety; effectiveness; economic aspects; organizational implications; and sociocultural, ethical, and legal considerations [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref33">33</xref>]. To address the lack of technical granularity and insufficient practical guidance in existing evaluations of HRSs, we propose an evaluation index system with 2 levels (comprising primary and secondary indicators), providing more specialized and refined assessment support. For example, at the performance level, it specifically incorporates key technical indicators such as recommendation accuracy, content coverage, result diversity, user trust, system robustness, and system response efficiency, tailored to meet the specialized assessment needs of HRSs with regard to algorithmic quality and service efficiency [<xref ref-type="bibr" rid="ref34">34</xref>]. More importantly, this secondary evaluation index system serves not only as a tool for periodic assessment but also as a driver for the continuous iteration and optimization of HRSs. By integrating quantitative performance data, user feedback, and multidimensional evidence from real-world applications, the evaluation process establishes a closed-loop feedback mechanism.</p><p>Quantitative assessment of system performance can identify shortcomings in recommendation accuracy, response speed, coverage, and diversity, thereby providing clear directions and evidence for subsequent technological iterations and optimizations [<xref ref-type="bibr" rid="ref35">35</xref>]. HRSs are capable of conducting precise analyses based on users&#x2019; physiological, psychological, and behavioral data, offering tailored health advice and action plans to facilitate refined health management [<xref ref-type="bibr" rid="ref30">30</xref>]. However, erroneous recommendations may lead to adverse outcomes, posing risks to patient safety. Therefore, ensuring the effectiveness and safety of recommendation results is crucial for broader promotion and application. Specifically, it is recommended to assess health behaviors and behavioral outcomes through user deployment in real-world application. For example, Bidargaddi et al [<xref ref-type="bibr" rid="ref17">17</xref>] reported findings from a randomized controlled trial evaluating the efficacy of a guided recommendation service for readily available mobile mental health apps targeting young people. Furthermore, the economic evaluation of HRSs is of paramount importance [<xref ref-type="bibr" rid="ref36">36</xref>]. Given that the core aim of this technology is to enhance health care efficiency, users can participate in real-world deployment, enabling assessment of whether the system reduces unnecessary medical expenditures, improves diagnostic and treatment quality, and promotes preventive care [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. Such assessments can quantify the cost-effectiveness ratio of the system, providing policymakers with decision-making evidence to determine the feasibility of large-scale promotion and investment. In terms of social appropriateness, HRSs must exhibit good user acceptance, feasibility, and ease of dissemination. Therefore, it is recommended to incorporate user-centered design methods early in the development process. For example, Leng et al [<xref ref-type="bibr" rid="ref39">39</xref>] used a convergent mixed methods approach to evaluate usability, combining quantitative and qualitative data from diverse users to identify barriers and collaboratively refine system design. Given the unique nature of HRSs as an artificial intelligence technology, the evaluation process must also address ethical issues and regulatory compliance to ensure effective promotion and widespread application [<xref ref-type="bibr" rid="ref40">40</xref>].</p><p>The evaluation index system for HRSs holds significant practical implications for research and development institutions, individual and institutional users, and policymakers in health care management [<xref ref-type="bibr" rid="ref6">6</xref>]. For organizations involved in the development of HRSs, the system helps development teams in prioritizing core metrics, including accuracy, diversity, real-time performance, and user satisfaction, throughout the technological iteration process [<xref ref-type="bibr" rid="ref15">15</xref>]. By leveraging the evaluation framework to quantify product performance, the evaluation index system enables the swift identification and resolution of existing issues, thereby significantly enhancing the overall efficacy of HRSs. This provides practical and effective guidance for the design, optimization, and development of HRSs. For health care institutions, HRSs can be leveraged to better serve patients and improve service quality and patient satisfaction while simultaneously reducing medical costs and promoting the efficient allocation of health care resources [<xref ref-type="bibr" rid="ref41">41</xref>]. This evaluation index system offers clear direction for how institutions can rationally apply and manage such systems in the future. For government agencies, authoritative evaluation results can serve as critical decision-making references when promoting HRSs [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. This aids in formulating policies, regulations, and technical standards that are better aligned with practical needs and specifically targeted to real-world challenges, ensuring the healthy and orderly development of this innovative technology.</p></sec><sec id="s4-2"><title>Implications for Clinical Practice and Future Research</title><p>First, given the dynamic and evolving trends in user needs and health issues, the evaluation metrics for HRSs will need to be periodically updated to accurately reflect these changes and address diverse health requirements. Second, it is essential to design differentiated evaluation content based on secondary indicators according to the characteristics of different disease profiles or user groups (eg, individuals with chronic conditions) to ensure that the system can deliver personalized recommendations that support individuals in achieving their specific health goals [<xref ref-type="bibr" rid="ref44">44</xref>]. Finally, further exploration of the application of emerging technologies such as artificial intelligence and blockchain in the evaluation framework is necessary to investigate how to enhance assessment efficiency and accuracy, improve data transparency and security, and provide robust technical support for the continuous optimization of HRSs [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>].</p></sec><sec id="s4-3"><title>Strengths and Limitations</title><p>The high expert consensus achieved across Delphi rounds highlights the relevance and acceptability of the proposed indicators. The use of the AHP made the index system clearer and more structured, facilitating its application by decision-makers. As HRSs continue to be integrated into clinical decision support, patient engagement platforms, and digital therapeutics, the evaluation system developed in this study provides a structured foundation for guiding system design, regulatory decision-making, and cross-study comparability.</p><p>As a relatively new research field, empirical evidence on the practical implementation of HRSs, particularly systematic deployment, remains limited [<xref ref-type="bibr" rid="ref47">47</xref>]. In the future, based on updates from empirical research, existing evidence will be integrated to develop specific quantitative methods and evaluation criteria for the HRS evaluation index system. Additionally, the indicators will be further optimized during practical application, continuously improving the evaluation index system for HRSs. It should be noted that the current framework relies heavily on expert input from a specific regional context, primarily China, which may affect its generalizability. Differences in health care systems, cultural norms, and organizational practices across regions may influence the applicability of certain indicators in other settings.</p></sec><sec id="s4-4"><title>Conclusions</title><p>This study addresses the lack of clear evaluation standards for HRSs by using rigorous scientific methodologies and a systematic development process to construct a comprehensive and structured evaluation index system. In contrast to existing studies that have largely focused on algorithmic performance, this work explicitly incorporates critical but often overlooked dimensions such as safety, economy, and social suitability. By establishing a structured and consensus-based set of criteria, this study addresses a key gap in current HRS evaluation practices and offers a practical foundation for developers, researchers, and policymakers. Future research should apply this framework when designing or deploying HRSs while continuously refining its components to meet emerging technological and ethical challenges in digital health.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>This research was supported by the National Natural Science Foundation of China (72274007) and the China Postdoctoral Science Foundation (2025M772234).</p></sec><sec><title>Data Availability</title><p>The data supporting the findings of this study are available within the article and its supplementary materials.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AHP</term><def><p>analytic hierarchy process</p></def></def-item><def-item><term id="abb2">Ca</term><def><p>expert judgment coefficient</p></def></def-item><def-item><term id="abb3">Cr</term><def><p>expert authority coefficient</p></def></def-item><def-item><term id="abb4">Cs</term><def><p>expert familiarity coefficient</p></def></def-item><def-item><term id="abb5">CV</term><def><p>coefficient of variation</p></def></def-item><def-item><term id="abb6">DHT</term><def><p>digital health technology</p></def></def-item><def-item><term id="abb7">HRS</term><def><p>health recommender system</p></def></def-item><def-item><term id="abb8">HTA</term><def><p>health technology assessment</p></def></def-item><def-item><term id="abb9">RS</term><def><p>recommender system</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Noar</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Benac</surname><given-names>CN</given-names> </name><name name-style="western"><surname>Harris</surname><given-names>MS</given-names> </name></person-group><article-title>Does tailoring matter? Meta-analytic review of tailored print health behavior change interventions</article-title><source>Psychol Bull</source><year>2007</year><month>07</month><volume>133</volume><issue>4</issue><fpage>673</fpage><lpage>693</lpage><pub-id pub-id-type="doi">10.1037/0033-2909.133.4.673</pub-id><pub-id pub-id-type="medline">17592961</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Park</surname><given-names>DH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>HK</given-names> </name><name name-style="western"><surname>Choi</surname><given-names>IY</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>JK</given-names> </name></person-group><article-title>A literature review and classification of recommender systems research</article-title><source>Expert Syst Appl</source><year>2012</year><month>09</month><volume>39</volume><issue>11</issue><fpage>10059</fpage><lpage>10072</lpage><pub-id pub-id-type="doi">10.1016/j.eswa.2012.02.038</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cheung</surname><given-names>KL</given-names> </name><name name-style="western"><surname>Durusu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Sui</surname><given-names>X</given-names> </name><name name-style="western"><surname>de Vries</surname><given-names>H</given-names> </name></person-group><article-title>How recommender systems could support and enhance computer-tailored digital health programs: a scoping review</article-title><source>Digit Health</source><year>2019</year><volume>5</volume><fpage>2055207618824727</fpage><pub-id pub-id-type="doi">10.1177/2055207618824727</pub-id><pub-id pub-id-type="medline">30800414</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ji</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pei</surname><given-names>L</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name></person-group><article-title>Development and evaluation of health recommender systems: systematic scoping review and evidence mapping</article-title><source>J Med Internet Res</source><year>2023</year><month>01</month><day>19</day><volume>25</volume><fpage>e38184</fpage><pub-id pub-id-type="doi">10.2196/38184</pub-id><pub-id pub-id-type="medline">36656630</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cai</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Yu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Kumar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Gladney</surname><given-names>R</given-names> </name><name name-style="western"><surname>Mostafa</surname><given-names>J</given-names> </name></person-group><article-title>Health recommender systems development, usage, and evaluation from 2010 to 2022: a scoping review</article-title><source>Int J Environ Res Public Health</source><year>2022</year><month>11</month><day>16</day><volume>19</volume><issue>22</issue><fpage>15115</fpage><pub-id pub-id-type="doi">10.3390/ijerph192215115</pub-id><pub-id pub-id-type="medline">36429832</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ananthakrishnan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Milne-Ives</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cong</surname><given-names>C</given-names> </name><etal/></person-group><article-title>The evaluation of health recommender systems: a scoping review</article-title><source>Int J Med Inform</source><year>2025</year><month>03</month><volume>195</volume><fpage>105697</fpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2024.105697</pub-id><pub-id pub-id-type="medline">39608231</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>He</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>H</given-names> </name><name name-style="western"><surname>Xiang</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Establishing an evaluation indicator system for user satisfaction with hypertension management apps: combining user-generated content and analytic hierarchy process</article-title><source>J Med Internet Res</source><year>2024</year><month>09</month><day>3</day><volume>26</volume><fpage>e60773</fpage><pub-id pub-id-type="doi">10.2196/60773</pub-id><pub-id pub-id-type="medline">39226103</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Moshi</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Tooher</surname><given-names>R</given-names> </name><name name-style="western"><surname>Merlin</surname><given-names>T</given-names> </name></person-group><article-title>Suitability of current evaluation frameworks for use in the health technology assessment of mobile medical applications: a systematic review</article-title><source>Int J Technol Assess Health Care</source><year>2018</year><month>01</month><volume>34</volume><issue>5</issue><fpage>464</fpage><lpage>475</lpage><pub-id pub-id-type="doi">10.1017/S026646231800051X</pub-id><pub-id pub-id-type="medline">30201060</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Unsworth</surname><given-names>H</given-names> </name><name name-style="western"><surname>Dillon</surname><given-names>B</given-names> </name><name name-style="western"><surname>Collinson</surname><given-names>L</given-names> </name><etal/></person-group><article-title>The NICE evidence standards framework for digital health and care technologies - developing and maintaining an innovative evidence framework with global impact</article-title><source>Digit Health</source><year>2021</year><volume>7</volume><fpage>20552076211018617</fpage><pub-id pub-id-type="doi">10.1177/20552076211018617</pub-id><pub-id pub-id-type="medline">34249371</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="web"><article-title>Digital technology assessment criteria (DTAC)</article-title><source>National Health Service England</source><access-date>2025-11-28</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.nhsx.nhs.uk/key-tools-and-info/digital-technology-assessment-criteria-dtac">https://www.nhsx.nhs.uk/key-tools-and-info/digital-technology-assessment-criteria-dtac</ext-link></comment></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vis</surname><given-names>C</given-names> </name><name name-style="western"><surname>B&#x00FC;hrmann</surname><given-names>L</given-names> </name><name name-style="western"><surname>Riper</surname><given-names>H</given-names> </name><name name-style="western"><surname>Ossebaard</surname><given-names>HC</given-names> </name></person-group><article-title>Health technology assessment frameworks for eHealth: a systematic review</article-title><source>Int J Technol Assess Health Care</source><year>2020</year><month>06</month><volume>36</volume><issue>3</issue><fpage>204</fpage><lpage>216</lpage><pub-id pub-id-type="doi">10.1017/S026646232000015X</pub-id><pub-id pub-id-type="medline">32297588</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haverinen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ker&#x00E4;nen</surname><given-names>N</given-names> </name><name name-style="western"><surname>Falkenbach</surname><given-names>P</given-names> </name><name name-style="western"><surname>Maijala</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kolehmainen</surname><given-names>T</given-names> </name><name name-style="western"><surname>Reponen</surname><given-names>J</given-names> </name></person-group><article-title>Digi-HTA: health technology assessment framework for digital healthcare services</article-title><source>Finn J eHealth eWelfare</source><year>2019</year><volume>11</volume><issue>4</issue><pub-id pub-id-type="doi">10.23996/fjhw.82538</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Etemadi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Bazzaz Abkenar</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ahmadzadeh</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A systematic review of healthcare recommender systems: open issues, challenges, and techniques</article-title><source>Expert Syst Appl</source><year>2023</year><month>03</month><volume>213</volume><fpage>118823</fpage><pub-id pub-id-type="doi">10.1016/j.eswa.2022.118823</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Abhari</surname><given-names>S</given-names> </name><name name-style="western"><surname>Safdari</surname><given-names>R</given-names> </name><name name-style="western"><surname>Azadbakht</surname><given-names>L</given-names> </name><etal/></person-group><article-title>A systematic review of nutrition recommendation systems: with focus on technical aspects</article-title><source>J Biomed Phys Eng</source><year>2019</year><month>12</month><volume>9</volume><issue>6</issue><fpage>591</fpage><lpage>602</lpage><pub-id pub-id-type="doi">10.31661/jbpe.v0i0.1248</pub-id><pub-id pub-id-type="medline">32039089</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Isinkaye</surname><given-names>FO</given-names> </name><name name-style="western"><surname>Folajimi</surname><given-names>YO</given-names> </name><name name-style="western"><surname>Ojokoh</surname><given-names>BA</given-names> </name></person-group><article-title>Recommendation systems: principles, methods and evaluation</article-title><source>Egypt Inform J</source><year>2015</year><month>11</month><volume>16</volume><issue>3</issue><fpage>261</fpage><lpage>273</lpage><pub-id pub-id-type="doi">10.1016/j.eij.2015.06.005</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mustaqeem</surname><given-names>A</given-names> </name><name name-style="western"><surname>Anwar</surname><given-names>SM</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Majid</surname><given-names>M</given-names> </name></person-group><article-title>A statistical analysis based recommender model for heart disease patients</article-title><source>Int J Med Inform</source><year>2017</year><month>12</month><volume>108</volume><fpage>134</fpage><lpage>145</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2017.10.008</pub-id><pub-id pub-id-type="medline">29132619</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bidargaddi</surname><given-names>N</given-names> </name><name name-style="western"><surname>Musiat</surname><given-names>P</given-names> </name><name name-style="western"><surname>Winsall</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Efficacy of a web-based guided recommendation service for a curated list of readily available mental health and well-being mobile apps for young people: randomized controlled trial</article-title><source>J Med Internet Res</source><year>2017</year><month>05</month><day>12</day><volume>19</volume><issue>5</issue><fpage>e141</fpage><pub-id pub-id-type="doi">10.2196/jmir.6775</pub-id><pub-id pub-id-type="medline">28500020</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>H</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Using natural language processing techniques to provide personalized educational materials for chronic disease patients in China: development and assessment of a knowledge-based health recommender system</article-title><source>JMIR Med Inform</source><year>2020</year><month>04</month><day>23</day><volume>8</volume><issue>4</issue><fpage>e17642</fpage><pub-id pub-id-type="doi">10.2196/17642</pub-id><pub-id pub-id-type="medline">32324148</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>De Croon</surname><given-names>R</given-names> </name><name name-style="western"><surname>Van Houdt</surname><given-names>L</given-names> </name><name name-style="western"><surname>Htun</surname><given-names>NN</given-names> </name><name name-style="western"><surname>&#x0160;tiglic</surname><given-names>G</given-names> </name><name name-style="western"><surname>Vanden Abeele</surname><given-names>V</given-names> </name><name name-style="western"><surname>Verbert</surname><given-names>K</given-names> </name></person-group><article-title>Health recommender systems: systematic review</article-title><source>J Med Internet Res</source><year>2021</year><month>06</month><day>29</day><volume>23</volume><issue>6</issue><fpage>e18035</fpage><pub-id pub-id-type="doi">10.2196/18035</pub-id><pub-id pub-id-type="medline">34185014</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Torrent-Fontbona</surname><given-names>F</given-names> </name><name name-style="western"><surname>Lopez</surname><given-names>B</given-names> </name></person-group><article-title>Personalized adaptive CBR bolus recommender system for type 1 diabetes</article-title><source>IEEE J Biomed Health Inform</source><year>2019</year><month>01</month><volume>23</volume><issue>1</issue><fpage>387</fpage><lpage>394</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2018.2813424</pub-id><pub-id pub-id-type="medline">29994082</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Cho</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Sondhi</surname><given-names>P</given-names> </name><name name-style="western"><surname>Zhai</surname><given-names>C</given-names> </name><name name-style="western"><surname>Schatz</surname><given-names>BR</given-names> </name></person-group><article-title>Resolving healthcare forum posts via similar thread retrieval</article-title><conf-name>Proceedings of the 5th ACM Conference on Bioinformatics, Computational Biology, and Health Informatics</conf-name><conf-date>Sep 20-23, 2014</conf-date><pub-id pub-id-type="doi">10.1145/2649387.2649399</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sumsion</surname><given-names>T</given-names> </name></person-group><article-title>The Delphi technique: an adaptive research tool</article-title><source>Br J Occup Therapy</source><year>1998</year><month>04</month><volume>61</volume><issue>4</issue><fpage>153</fpage><lpage>156</lpage><pub-id pub-id-type="doi">10.1177/030802269806100403</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brady</surname><given-names>SR</given-names> </name></person-group><article-title>Utilizing and adapting the Delphi method for use in qualitative research</article-title><source>Int J Qual Methods</source><year>2015</year><month>12</month><day>9</day><volume>14</volume><issue>5</issue><pub-id pub-id-type="doi">10.1177/1609406915621381</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wei</surname><given-names>Y</given-names> </name><name name-style="western"><surname>An</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Cao</surname><given-names>Q</given-names> </name><name name-style="western"><surname>Feng</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Zhu</surname><given-names>D</given-names> </name></person-group><article-title>Developing a professional competency framework for general practitioners in tertiary hospitals in China: a modified Delphi study</article-title><source>BMJ Open</source><year>2025</year><month>03</month><day>4</day><volume>15</volume><issue>3</issue><fpage>e082736</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2023-082736</pub-id><pub-id pub-id-type="medline">40037672</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>J&#x00FC;nger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Payne</surname><given-names>SA</given-names> </name><name name-style="western"><surname>Brine</surname><given-names>J</given-names> </name><name name-style="western"><surname>Radbruch</surname><given-names>L</given-names> </name><name name-style="western"><surname>Brearley</surname><given-names>SG</given-names> </name></person-group><article-title>Guidance on Conducting and REporting DElphi Studies (CREDES) in palliative care: recommendations based on a methodological systematic review</article-title><source>Palliat Med</source><year>2017</year><month>09</month><volume>31</volume><issue>8</issue><fpage>684</fpage><lpage>706</lpage><pub-id pub-id-type="doi">10.1177/0269216317690685</pub-id><pub-id pub-id-type="medline">28190381</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saaty</surname><given-names>TL</given-names> </name></person-group><article-title>A scaling method for priorities in hierarchical structures</article-title><source>J Math Psychol</source><year>1977</year><month>06</month><volume>15</volume><issue>3</issue><fpage>234</fpage><lpage>281</lpage><pub-id pub-id-type="doi">10.1016/0022-2496(77)90033-5</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Diamond</surname><given-names>IR</given-names> </name><name name-style="western"><surname>Grant</surname><given-names>RC</given-names> </name><name name-style="western"><surname>Feldman</surname><given-names>BM</given-names> </name><etal/></person-group><article-title>Defining consensus: a systematic review recommends methodologic criteria for reporting of Delphi studies</article-title><source>J Clin Epidemiol</source><year>2014</year><month>04</month><volume>67</volume><issue>4</issue><fpage>401</fpage><lpage>409</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2013.12.002</pub-id><pub-id pub-id-type="medline">24581294</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Keeney</surname><given-names>S</given-names> </name><name name-style="western"><surname>Hasson</surname><given-names>F</given-names> </name><name name-style="western"><surname>McKenna</surname><given-names>HP</given-names> </name></person-group><article-title>A critical review of the Delphi technique as a research methodology for nursing</article-title><source>Int J Nurs Stud</source><year>2001</year><month>04</month><volume>38</volume><issue>2</issue><fpage>195</fpage><lpage>200</lpage><pub-id pub-id-type="doi">10.1016/s0020-7489(00)00044-4</pub-id><pub-id pub-id-type="medline">11223060</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Saaty</surname><given-names>TL</given-names> </name></person-group><article-title>Decision making with the analytic hierarchy process</article-title><source>Int J Serv Sci</source><year>2008</year><volume>1</volume><issue>1</issue><fpage>83</fpage><pub-id pub-id-type="doi">10.1504/IJSSCI.2008.017590</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Knijnenburg</surname><given-names>BP</given-names> </name><name name-style="western"><surname>Willemsen</surname><given-names>MC</given-names> </name><name name-style="western"><surname>Gantner</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Soncu</surname><given-names>H</given-names> </name><name name-style="western"><surname>Newell</surname><given-names>C</given-names> </name></person-group><article-title>Explaining the user experience of recommender systems</article-title><source>User Model User Adap Inter</source><year>2012</year><month>10</month><volume>22</volume><issue>4-5</issue><fpage>441</fpage><lpage>504</lpage><pub-id pub-id-type="doi">10.1007/s11257-011-9118-4</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kidholm</surname><given-names>K</given-names> </name><name name-style="western"><surname>Ekeland</surname><given-names>AG</given-names> </name><name name-style="western"><surname>Jensen</surname><given-names>LK</given-names> </name><etal/></person-group><article-title>A model for assessment of telemedicine applications: mast</article-title><source>Int J Technol Assess Health Care</source><year>2012</year><month>01</month><volume>28</volume><issue>1</issue><fpage>44</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1017/S0266462311000638</pub-id><pub-id pub-id-type="medline">22617736</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kristensen</surname><given-names>FB</given-names> </name><name name-style="western"><surname>Lampe</surname><given-names>K</given-names> </name><name name-style="western"><surname>Wild</surname><given-names>C</given-names> </name><name name-style="western"><surname>Cerbo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Goettsch</surname><given-names>W</given-names> </name><name name-style="western"><surname>Becla</surname><given-names>L</given-names> </name></person-group><article-title>The HTA Core Model&#x00AE;-10 years of developing an international framework to share multidimensional value assessment</article-title><source>Value Health</source><year>2017</year><month>02</month><volume>20</volume><issue>2</issue><fpage>244</fpage><lpage>250</lpage><pub-id pub-id-type="doi">10.1016/j.jval.2016.12.010</pub-id><pub-id pub-id-type="medline">28237203</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haverinen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Turpeinen</surname><given-names>M</given-names> </name><name name-style="western"><surname>Falkenbach</surname><given-names>P</given-names> </name><name name-style="western"><surname>Reponen</surname><given-names>J</given-names> </name></person-group><article-title>Implementation of a new Digi-HTA process for digital health technologies in Finland</article-title><source>Int J Technol Assess Health Care</source><year>2022</year><month>08</month><day>19</day><volume>38</volume><issue>1</issue><fpage>e68</fpage><pub-id pub-id-type="doi">10.1017/S0266462322000502</pub-id><pub-id pub-id-type="medline">35983625</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhu</surname><given-names>YX</given-names> </name><name name-style="western"><surname>L&#x00FC;</surname><given-names>LY</given-names> </name></person-group><article-title>Evaluation metrics for recommender systems</article-title><source>J Univ Electron Sci Technol China</source><year>2012</year><volume>41</volume><issue>2</issue><fpage>163</fpage><lpage>175</lpage><pub-id pub-id-type="doi">10.3969/j.issn.1001-0548.2012.02.001</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wiesner</surname><given-names>M</given-names> </name><name name-style="western"><surname>Pfeifer</surname><given-names>D</given-names> </name></person-group><article-title>Health recommender systems: concepts, requirements, technical basics and challenges</article-title><source>Int J Environ Res Public Health</source><year>2014</year><month>03</month><day>3</day><volume>11</volume><issue>3</issue><fpage>2580</fpage><lpage>2607</lpage><pub-id pub-id-type="doi">10.3390/ijerph110302580</pub-id><pub-id pub-id-type="medline">24595212</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gomes</surname><given-names>M</given-names> </name><name name-style="western"><surname>Murray</surname><given-names>E</given-names> </name><name name-style="western"><surname>Raftery</surname><given-names>J</given-names> </name></person-group><article-title>Economic evaluation of digital health interventions: methodological issues and recommendations for practice</article-title><source>Pharmacoeconomics</source><year>2022</year><month>04</month><volume>40</volume><issue>4</issue><fpage>367</fpage><lpage>378</lpage><pub-id pub-id-type="doi">10.1007/s40273-022-01130-0</pub-id><pub-id pub-id-type="medline">35132606</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Halvorsen</surname><given-names>N</given-names> </name><name name-style="western"><surname>Mori</surname><given-names>Y</given-names> </name></person-group><article-title>Cost-effectiveness for artificial intelligence in colonoscopy</article-title><source>Gastrointest Endosc Clin N Am</source><year>2025</year><month>04</month><volume>35</volume><issue>2</issue><fpage>401</fpage><lpage>405</lpage><pub-id pub-id-type="doi">10.1016/j.giec.2024.10.008</pub-id><pub-id pub-id-type="medline">40021236</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morrell</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Sutcliffe</surname><given-names>P</given-names> </name><name name-style="western"><surname>Booth</surname><given-names>A</given-names> </name><etal/></person-group><article-title>A systematic review, evidence synthesis and meta-analysis of quantitative and qualitative studies evaluating the clinical effectiveness, the cost-effectiveness, safety and acceptability of interventions to prevent postnatal depression</article-title><source>Health Technol Assess</source><year>2016</year><month>05</month><volume>20</volume><issue>37</issue><fpage>1</fpage><lpage>414</lpage><pub-id pub-id-type="doi">10.3310/hta20370</pub-id><pub-id pub-id-type="medline">27184772</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Leng</surname><given-names>M</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Li</surname><given-names>C</given-names> </name><name name-style="western"><surname>Han</surname><given-names>S</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Z</given-names> </name></person-group><article-title>Usability evaluation of a knowledge graph-based dementia care intelligent recommender system: mixed methods study</article-title><source>J Med Internet Res</source><year>2023</year><month>09</month><day>26</day><volume>25</volume><fpage>e45788</fpage><pub-id pub-id-type="doi">10.2196/45788</pub-id><pub-id pub-id-type="medline">37751241</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Vervoort</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tam</surname><given-names>DY</given-names> </name><name name-style="western"><surname>Wijeysundera</surname><given-names>HC</given-names> </name></person-group><article-title>Health technology assessment for cardiovascular digital health technologies and artificial intelligence: why is it different?</article-title><source>Can J Cardiol</source><year>2022</year><month>02</month><volume>38</volume><issue>2</issue><fpage>259</fpage><lpage>266</lpage><pub-id pub-id-type="doi">10.1016/j.cjca.2021.08.015</pub-id><pub-id pub-id-type="medline">34461229</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hors-Fraile</surname><given-names>S</given-names> </name><name name-style="western"><surname>Rivera-Romero</surname><given-names>O</given-names> </name><name name-style="western"><surname>Schneider</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Analyzing recommender systems for health promotion using a multidisciplinary taxonomy: a scoping review</article-title><source>Int J Med Inform</source><year>2018</year><month>06</month><volume>114</volume><fpage>143</fpage><lpage>155</lpage><pub-id pub-id-type="doi">10.1016/j.ijmedinf.2017.12.018</pub-id><pub-id pub-id-type="medline">29331276</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Alotaibi</surname><given-names>YK</given-names> </name><name name-style="western"><surname>Federico</surname><given-names>F</given-names> </name></person-group><article-title>The impact of health information technology on patient safety</article-title><source>Saudi Med J</source><year>2017</year><month>12</month><volume>38</volume><issue>12</issue><fpage>1173</fpage><lpage>1180</lpage><pub-id pub-id-type="doi">10.15537/smj.2017.12.20631</pub-id><pub-id pub-id-type="medline">29209664</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>K&#x00FC;hne</surname><given-names>F</given-names> </name><name name-style="western"><surname>Schomaker</surname><given-names>M</given-names> </name><name name-style="western"><surname>Stojkov</surname><given-names>I</given-names> </name><etal/></person-group><article-title>Causal evidence in health decision making: methodological approaches of causal inference and health decision science</article-title><source>Ger Med Sci</source><year>2022</year><volume>20</volume><fpage>Doc12</fpage><pub-id pub-id-type="doi">10.3205/000314</pub-id><pub-id pub-id-type="medline">36742460</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Felfernig</surname><given-names>A</given-names> </name><name name-style="western"><surname>Wundara</surname><given-names>M</given-names> </name><name name-style="western"><surname>Tran</surname><given-names>TNT</given-names> </name><etal/></person-group><article-title>Recommender systems for sustainability: overview and research issues</article-title><source>Front Big Data</source><year>2023</year><volume>6</volume><fpage>1284511</fpage><pub-id pub-id-type="doi">10.3389/fdata.2023.1284511</pub-id><pub-id pub-id-type="medline">37965497</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Agbo</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Mahmoud</surname><given-names>QH</given-names> </name><name name-style="western"><surname>Eklund</surname><given-names>JM</given-names> </name></person-group><article-title>Blockchain technology in healthcare: a systematic review</article-title><source>Healthcare (Basel)</source><year>2019</year><month>04</month><day>4</day><volume>7</volume><issue>2</issue><fpage>56</fpage><pub-id pub-id-type="doi">10.3390/healthcare7020056</pub-id><pub-id pub-id-type="medline">30987333</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Dubovitskaya</surname><given-names>A</given-names> </name><name name-style="western"><surname>Novotny</surname><given-names>P</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>F</given-names> </name></person-group><article-title>Applications of blockchain technology for data-sharing in oncology: results from a systematic literature review</article-title><source>Oncology (Williston Park, NY)</source><year>2020</year><volume>98</volume><issue>6</issue><fpage>403</fpage><lpage>411</lpage><pub-id pub-id-type="doi">10.1159/000504325</pub-id><pub-id pub-id-type="medline">31794967</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ngiam</surname><given-names>KY</given-names> </name><name name-style="western"><surname>Khor</surname><given-names>IW</given-names> </name></person-group><article-title>Big data and machine learning algorithms for health-care delivery</article-title><source>Lancet Oncol</source><year>2019</year><month>05</month><volume>20</volume><issue>5</issue><fpage>e262</fpage><lpage>e273</lpage><pub-id pub-id-type="doi">10.1016/S1470-2045(19)30149-4</pub-id><pub-id pub-id-type="medline">31044724</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Summary of the studies included in the Delphi process and the importance scores with coefficients of variation for primary indicators across both Delphi rounds.</p><media xlink:href="formative_v9i1e79997_app1.docx" xlink:title="DOCX File, 60 KB"/></supplementary-material></app-group></back></article>