<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e75215</article-id><article-id pub-id-type="doi">10.2196/75215</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Validation of The Umbrella Collaboration for Tertiary Evidence Synthesis in Geriatrics: Mixed Methods Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes" equal-contrib="yes"><name name-style="western"><surname>Carrillo</surname><given-names>Beltran</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name name-style="western"><surname>Rubinos-Cuadrado</surname><given-names>Marta</given-names></name><degrees>RNC</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="fn" rid="equal-contrib1">*</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Parellada</surname><given-names>Jazmin</given-names></name><degrees>BBI</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Palacios</surname><given-names>Alejandra</given-names></name><degrees>BME</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Carrillo-Rubinos</surname><given-names>Beltran</given-names></name><degrees>BME</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Canillas</surname><given-names>Fernando</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Bazt&#x00E1;n Cort&#x00E9;s</surname><given-names>Juan Jos&#x00E9;</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>G&#x00F3;mez-Pav&#x00F3;n</surname><given-names>Javier</given-names></name><degrees>MD, PhD</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib></contrib-group><aff id="aff1"><institution>The Umbrella Collaboration</institution><addr-line>C/ Ferraz, 49</addr-line><addr-line>Madrid</addr-line><country>Spain</country></aff><aff id="aff2"><institution>Department of Traumatology and Orthopedic Surgery, Hospital Central de la Cruz Roja San Jos&#x00E9; y Santa Adela</institution><addr-line>Madrid</addr-line><country>Spain</country></aff><aff id="aff3"><institution>Department of Geriatrics, Hospital Central de la Cruz Roja San Jos&#x00E9; y Santa Adela</institution><addr-line>Madrid</addr-line><country>Spain</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Mavragani</surname><given-names>Amaryllis</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Onah</surname><given-names>Chibuzo</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Beltran Carrillo, MD, PhD, The Umbrella Collaboration, C/ Ferraz, 49, Madrid, 28008, Spain, 34 637016776; <email>bcm@theumbrellacollaboration.org</email></corresp><fn fn-type="equal" id="equal-contrib1"><label>*</label><p>these authors contributed equally</p></fn></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>8</day><month>7</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e75215</elocation-id><history><date date-type="received"><day>30</day><month>03</month><year>2025</year></date><date date-type="rev-recd"><day>15</day><month>05</month><year>2025</year></date><date date-type="accepted"><day>21</day><month>05</month><year>2025</year></date></history><copyright-statement>&#x00A9; Beltran Carrillo, Marta Rubinos-Cuadrado, Jazmin Parellada, Alejandra Palacios, Beltran Carrillo-Rubinos, Fernando Canillas, Juan Jos&#x00E9; Bazt&#x00E1;n Cort&#x00E9;s, Javier G&#x00F3;mez-Pav&#x00F3;n. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 8.7.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e75215"/><abstract><sec><title>Background</title><p>The synthesis of evidence in health care is essential for informed decision-making and policy development. This study aims to validate The Umbrella Collaboration (TU), an innovative, semiautomated tertiary evidence synthesis methodology, by comparing it with traditional umbrella reviews (TURs), which are currently the gold standard.</p></sec><sec><title>Objective</title><p>The primary objective of this study is to evaluate whether TU, an artificial intelligence&#x2013;assisted, software-driven system for tertiary evidence synthesis, can achieve effectiveness comparable to that of TURs, while offering a more timely, efficient, and comprehensive approach.</p></sec><sec sec-type="methods"><title>Methods</title><p>This comparative study evaluated TU against TURs across 8 matched projects in geriatrics. For each selected TUR, a parallel TU project was conducted using the same research question. Outcomes of interest (OoIs), effect sizes, certainty ratings, and execution times were systematically compared. Effect sizes were assessed both quantitatively, by transforming TUR metrics to Cohen <italic>d</italic> and correlating them with TU&#x2019;s R<sub>TU</sub> metric, and qualitatively, through categorical classifications (trivial, small, moderate, and large). Certainty levels were compared by mapping Grading of Recommendations Assessment, Development, and Evaluation (GRADE) ratings and TU&#x2019;s sentiment analysis scores onto a common 0&#x2010;1 scale. Execution time was measured precisely in TU, while TUR durations were estimated from literature benchmarks. Statistical analyses included chi-square tests and Spearman correlations.</p></sec><sec sec-type="results"><title>Results</title><p>Eight TURs in geriatrics were matched with parallel projects using TU. TU replicated 73 of the 86 (85%) OoIs identified by TURs and reported an additional 337 OoIs, representing a 4.77-fold increase in outcome identification. In the comparison of effect size classifications, full concordance was observed in 24 of the 48 (50%) cases, and consistent concordance (full plus 1-level deviation) in 45 of the 48 (94%) cases, with a moderate strength of association (Cram&#x00E9;r <italic>V</italic>=0.339). The correlation of transformed certainty values between TU and GRADE yielded a statistically significant Spearman coefficient (&#x03C1;=0.446; <italic>P</italic>=.02). The average execution time per TU project was 4 hours and 46 minutes, compared with estimated durations of 6&#x2010;12 months for TURs.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The TU demonstrated high concordance with TURs, replicating 73 of the 86 (85%) outcomes identified by TURs and identifying nearly 5 times as many additional outcomes. The experimental effect size metric (R<sub>TU</sub>) showed moderate agreement with conventional measures, and the certainty ratings derived from sentiment analysis correlated acceptably with GRADE-based assessments. While further validation is needed, TU appears to be a valid and efficient approach for tertiary evidence synthesis, offering a scalable and time-efficient alternative when rapid results are required.</p></sec><sec sec-type="registered-report"><title>International Registered Report Identifier (IRRID)</title><p>RR2-10.2196/67248</p></sec></abstract><kwd-group><kwd>tertiary evidence synthesis</kwd><kwd>The Umbrella Collaboration</kwd><kwd>umbrella reviews</kwd><kwd>health research methodology</kwd><kwd>AI-assisted synthesis</kwd><kwd>evidence-based decision-making</kwd><kwd>algorithms</kwd><kwd>analytics</kwd><kwd>artificial intelligence</kwd><kwd>digital health</kwd><kwd>digital interventions</kwd><kwd>digital technology</kwd><kwd>machine learning</kwd><kwd>models</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Synthesizing evidence in health care transforms large volumes of data into actionable knowledge, enabling decisions based on the best available information. This process integrates findings from multiple sources to produce clear, accurate, and accessible summaries for clinicians, policy makers, and patients alike [<xref ref-type="bibr" rid="ref1">1</xref>]. As a core component of knowledge translation, evidence synthesis bridges research and practice, and is essential for developing effective health policies [<xref ref-type="bibr" rid="ref2">2</xref>,<xref ref-type="bibr" rid="ref3">3</xref>].</p><p>Yet limited statistical and health literacy among professionals and the public often hampers the effective use of synthesized evidence [<xref ref-type="bibr" rid="ref4">4</xref>-<xref ref-type="bibr" rid="ref6">6</xref>]. This highlights the need for tools that democratize access to high-level information and support meaningful stakeholder participation in health care decisions [<xref ref-type="bibr" rid="ref7">7</xref>].</p><p>In recent years, tertiary evidence synthesis, commonly known as umbrella reviews, has emerged as a third tier in the evidence hierarchy, building on primary studies and systematic reviews with or without meta-analyses (SRs/MAs). Also referred to as overviews, meta-reviews, or meta-epidemiological studies [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref9">9</xref>], umbrella reviews (hereafter referred to as traditional umbrella reviews [TURs]) are particularly valuable for addressing broad questions, generating rapid insights, or navigating resource constraints. Despite structured guidance from organizations such as Cochrane and the Joanna Briggs Institute [<xref ref-type="bibr" rid="ref10">10</xref>-<xref ref-type="bibr" rid="ref14">14</xref>], methodological inconsistency across TURs persists due to variations in implementation.</p><p>The COVID-19 pandemic underscored the need for faster synthesis methods, even at the expense of some precision [<xref ref-type="bibr" rid="ref15">15</xref>]. In this context, The Umbrella Collaboration (TU) introduces a novel artificial intelligence (AI)&#x2013;assisted approach to tertiary synthesis, combining automation and human oversight. Although tools such as Covidence, Rayyan, and DistillerSR have improved aspects of secondary synthesis [<xref ref-type="bibr" rid="ref16">16</xref>], the application of AI to tertiary synthesis remains largely unexplored. Large language models (LLMs) such as ChatGPT show potential for automating SRs [<xref ref-type="bibr" rid="ref17">17</xref>], but dedicated software for tertiary synthesis has yet to be established.</p><p>TU addresses this gap by offering a structured, reproducible, and fully digital alternative to TURs. It is a patent-pending software system that automates tertiary evidence synthesis through a combination of algorithmic processes, natural language processing (NLP), and selective use of LLMs. TU retrieves abstracts of SRs/MAs from MEDLINE via PubMed, using LLMs (eg, ChatGPT-4 [<xref ref-type="bibr" rid="ref18">18</xref>]) to suggest related terms, validated by a human reviewer, to enhance search sensitivity without sacrificing specificity. Crucially, AI is limited to this initial phase; subsequent steps are fully managed by rule-based software to ensure transparency and auditability.</p><p>TU extracts and synthesizes key information, including outcomes of interest (OoIs), effect sizes (ESs), direction, statistical significance, and a certainty estimate derived from sentiment analysis, and presents results in plain language through an interactive web platform. The system updates daily, supporting the principles of living SRs [<xref ref-type="bibr" rid="ref19">19</xref>]. Unlike tools that assist in isolated stages of secondary synthesis, TU delivers an end-to-end solution tailored specifically for tertiary synthesis. A schematic diagram (<xref ref-type="fig" rid="figure1">Figure 1</xref>) illustrates the overall architecture, from automated literature retrieval to final result visualization. A more detailed technical description of the algorithms and processes is available in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>As AI evolves, fully automated synthesis workflows may become feasible, but rigorous validation is essential to ensure trust, transparency, and scientific integrity.</p><p>The implementation of new methodologies in the scientific field requires a comparative validation process with established methods to confirm their reliability and effectiveness. TU, being an innovative methodology still in its theoretical-conceptual stage, must be evaluated against established methodologies. Therefore, this study aims to validate TU by comparing its performance and outcomes with the gold standard, TURs, to establish its credibility and potential superiority.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>The Umbrella Collaboration workflow. LLM: large language model; MA: meta-analysis; NLP: natural language processing; SA: sentiment analysis; SR: systematic review.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e75215_fig01.png"/></fig></sec><sec id="s1-2"><title>Objectives</title><p>This study aims to evaluate whether TU, a software-driven and AI-assisted system, can produce results comparable to TURs, while offering a faster, more efficient, and potentially more comprehensive approach.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Study Design</title><p><xref ref-type="fig" rid="figure2">Figure 2</xref> outlines the study design. Using a structured comparative approach, 8 TURs in geriatrics were selected as reference models. Each was replicated in TU using the same research questions. Key variables, including OoI, ES, certainty, and execution time, were systematically collected and compared across both methodologies.</p><p>To compare both methodologies, we conducted a targeted PubMed search to identify representative TURs in geriatrics. Rather than aiming for an exhaustive review, this focused approach was designed to select suitable benchmarks for parallel evaluation. Given the study&#x2019;s aim, methodological comparison rather than comprehensive coverage, this simplified strategy was both appropriate and intentional.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Study workflow. TU: Umbrella Collaboration; TUR: traditional umbrella review.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e75215_fig02.png"/></fig></sec><sec id="s2-2"><title>Study Variables</title><sec id="s2-2-1"><title>Outcomes of Interest</title><p>A key variable was the identification of OoIs, defined as specific effects evaluated by SRs/MAs addressing the same research question. We assessed concordance between TU and TURs using a concordance matrix and compared the total number of OoIs identified by each methodology through descriptive and statistical analyses.</p></sec><sec id="s2-2-2"><title>Effect Size of Outcome of Interest</title><p>ES for each OoI was assessed using 2 complementary strategies: a quantitative comparison of numerical values and a qualitative classification into standard categories (trivial, small, moderate, and large). TU applies automated standardization using a proprietary metric (R<sub>TU</sub>), which converts various ES formats (eg, standardized mean difference [SMD], mean difference, risk ratio, odds ratio, hazard ratio) into a unified, weighted score for consistent synthesis. TUR-derived ES values were transformed into Cohen <italic>d</italic> for comparison. Spearman correlation was used to assess quantitative concordance, while a contingency matrix evaluated categorical agreement, distinguishing full, partial, and major discordance (<xref ref-type="table" rid="table1">Table 1</xref>). Based on the degree of agreement, each matched OoI was assigned to one of the following concordance levels.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Concordance matrix for ES<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup>.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">TUR<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup> ES</td><td align="left" valign="bottom" colspan="4">TU<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup> ES</td></tr><tr><td align="left" valign="top"/><td align="left" valign="top">Trivial</td><td align="left" valign="top">Small</td><td align="left" valign="top">Moderate</td><td align="left" valign="top">Large</td></tr></thead><tbody><tr><td align="left" valign="top">Trivial</td><td align="char" char="." valign="top">Full concordance</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td></tr><tr><td align="left" valign="top">Small</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td><td align="char" char="." valign="top">Full concordance</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td></tr><tr><td align="left" valign="top">Moderate</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td><td align="char" char="." valign="top">Full concordance</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td></tr><tr><td align="left" valign="top">Large</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td><td align="char" char="." valign="top">Discordance (discrepancy&#x003E;1 category)</td><td align="char" char="." valign="top">Partial concordance (discrepancy of 1 category)</td><td align="char" char="." valign="top">Full concordance</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>ES: effect size.</p></fn><fn id="table1fn2"><p><sup>b</sup>TUR: traditional umbrella review.</p></fn><fn id="table1fn3"><p><sup>c</sup>TU: Umbrella Collaboration.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s2-2-3"><title>Full Concordance</title><p>Both TU and TUR classified the outcome in the same category (eg, trivial-trivial).</p></sec><sec id="s2-2-4"><title>Partial Concordance</title><p>The classifications differed by only 1 level (eg, trivial-small or moderate-large). These differences were considered acceptable, as they are unlikely to lead to substantial changes in interpretation or clinical decision-making.</p></sec><sec id="s2-2-5"><title>Discordance</title><p>The classifications differed by 2 or more levels (eg, trivial-moderate or small-large), representing a more substantial methodological discrepancy.</p></sec><sec id="s2-2-6"><title>Consistent Concordance</title><p>A combined category including both full and partial concordance, reflecting a general alignment between methodologies even when minor categorical differences were present.</p></sec></sec><sec id="s2-3"><title>Certainty of Evidence</title><p>TU estimates the certainty of evidence for each OoI using automated sentiment analysis (SA) applied to SR/MA abstracts. While this approach, based on NLP and a model trained on Twitter/X data, does not replicate the multidimensional Grading of Recommendations Assessment, Development, and Evaluation (GRADE) framework [<xref ref-type="bibr" rid="ref20">20</xref>], it offers a rapid and scalable approximation. GRADE ratings from TURs were categorized as very low to high, while TU&#x2019;s SA scores (ranging from &#x2212;1 to +1) were normalized to a 0&#x2010;1 scale using the following formula: (SA score + 1)/2. GRADE levels were similarly mapped to enable direct comparison and statistical concordance analysis.</p></sec><sec id="s2-4"><title>Execution Time</title><p>The execution time of each methodology was assessed, with TU providing exact time measurements and TURs relying on an estimated time frame of 6-12 months based on existing literature.</p></sec><sec id="s2-5"><title>Data Collection and Research Question Replication</title><p>A targeted PubMed search using the terms &#x201C;umbrella&#x201D; AND &#x201C;geriatric&#x201D; was conducted to identify suitable TURs, which served as benchmarks. Their research questions were replicated in TU without modification. TU then applied automated searches and synthesis using NLP, web scraping, sentiment analysis, and machine learning, with human oversight for verification and extraction. Data from both methodologies were systematically collected to compare outcomes, ESs, certainty, and execution time.</p><p>TU relied solely on abstracts due to practical constraints, such as limited access to full texts and the aim to minimize language bias, as all MEDLINE abstracts are in English. Although abstracts may omit methodological details, TU uses structured extraction criteria to capture key information. This study explicitly evaluates whether abstract-based synthesis in TU can yield conclusions comparable to those from full-text TURs.</p></sec><sec id="s2-6"><title>Statistical Analysis: Data Analysis and Statistical Methods</title><p>Results from TU and TURs were compared using contingency tables for outcomes and ESs. Certainty scores from both methods were transformed to a 0&#x2010;1 scale for direct comparison. Chi-square tests assessed differences between methodologies, and Spearman correlations evaluated the association between TU certainty estimates and GRADE ratings.</p></sec><sec id="s2-7"><title>Ethical Considerations</title><p>This study did not involve human participants, personal data, or animals. All data were obtained from previously published studies and analyzed in accordance with established ethical standards for secondary data analysis. Therefore, ethical review or approval was not required. This is consistent with the institutional policy of Universidad Alfonso X El Sabio (Villanueva de la Ca&#x00F1;ada, Madrid, Spain), which exempts research based solely on publicly available, nonidentifiable data from review by the institutional review board.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Identification of Traditional Umbrella Reviews in Geriatrics as Reference Models</title><p>To establish reference models, we conducted a PubMed search on March 5, 2023, using the terms &#x201C;umbrella&#x201D; AND &#x201C;geriatric,&#x201D; which yielded 111 results. After excluding 75 records for irrelevance or ineligibility, 36 TURs remained. From these, 8 were randomly selected, with a preference for recent publications, to serve as comparators for TU [<xref ref-type="bibr" rid="ref21">21</xref>-<xref ref-type="bibr" rid="ref28">28</xref>]. The selection process is summarized in <xref ref-type="fig" rid="figure3">Figure 3</xref> and <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Flow diagram of the search and selection process for traditional umbrella reviews (TURs) in geriatrics.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e75215_fig03.png"/></fig></sec><sec id="s3-2"><title>Identification of Outcomes of Interest</title><p>Across the 8 comparative projects, TURs identified 86 OoIs, while TU identified 410 for the same research questions. TU replicated 73 of the 86 (85%) TUR OoIs and missed 13 (15%). Conversely, only 73 out of 410 (17.8%) TU&#x2019;s OoIs were reported by TURs, leaving 337 (82.2%) additional OoIs uniquely identified by TU (<xref ref-type="table" rid="table2">Table 2</xref>).</p><p>The identification gain factor, calculated as the ratio of OoIs identified by TU to those identified by TURs, quantifies TU&#x2019;s broader retrieval capacity. TU consistently identified more OoIs across all projects, with an overall gain factor of 4.77 (ie, nearly 5 times more outcomes). Project-specific gain factors ranged from 1.87 to 18.63 (<xref ref-type="table" rid="table3">Table 3</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Global summary table of OoIs<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup> identified by TURs<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup> and TU<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup>: totals, matches, and discrepancies.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom"/><td align="left" valign="bottom" colspan="8">Project</td><td align="left" valign="bottom">Total (TUR: 86 OoIs; TU: 140 OoIs)</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">1 [<xref ref-type="bibr" rid="ref22">22</xref>] (TUR: 5 OoIs; TU: 12 OoIs)</td><td align="left" valign="bottom">2 [<xref ref-type="bibr" rid="ref27">27</xref>] (TUR: 38 OoIs; TU: 71 OoIs)</td><td align="left" valign="bottom">3 [<xref ref-type="bibr" rid="ref23">23</xref>] (TUR: 7 OoIs; TU: 34 OoIs)</td><td align="left" valign="bottom">4 [<xref ref-type="bibr" rid="ref21">21</xref>] (TUR: 10 OoIs; TU: 22 OoIs)</td><td align="left" valign="bottom">5 [<xref ref-type="bibr" rid="ref28">28</xref>] (TUR: 4 OoIs; TU: 25 OoIs)</td><td align="left" valign="bottom">6 [<xref ref-type="bibr" rid="ref24">24</xref>] (TUR: 8 OoIs; TU: 149 OoIs)</td><td align="left" valign="bottom">7 [<xref ref-type="bibr" rid="ref25">25</xref>] (TUR: 6 OoIs; TU: 49 OoIs)</td><td align="left" valign="bottom">8 [<xref ref-type="bibr" rid="ref26">26</xref>] (TUR: 8 OoIs; TU: 48 OoIs)</td><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top">TUR OoI identified by TU, n (% TUR)</td><td align="left" valign="top">5 (100.0)</td><td align="left" valign="top">30 (78.9)</td><td align="left" valign="top">5 (71.4)</td><td align="left" valign="top">7 (70.0)</td><td align="left" valign="top">4 (100.0)</td><td align="left" valign="top">8 (100.0)</td><td align="left" valign="top">6 (100.0)</td><td align="left" valign="top">8 (100.0)</td><td align="left" valign="top">73 (84.9)</td></tr><tr><td align="left" valign="top">TUR OoI not identified by TU, n (% TUR)</td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup></td><td align="left" valign="top">8 (21.1)</td><td align="left" valign="top">2 (28.6)</td><td align="left" valign="top">3 (30.00)</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">13 (15.1)</td></tr><tr><td align="left" valign="top">TU OoI identified by TUR, n (% TU)</td><td align="left" valign="top">5 (41.7)</td><td align="left" valign="top">30 (42.3)</td><td align="left" valign="top">5 (14.7)</td><td align="left" valign="top">7 (31.8)</td><td align="left" valign="top">4 (16.0)</td><td align="left" valign="top">8 (5.4)</td><td align="left" valign="top">6 (12.2)</td><td align="left" valign="top">8 (16.7)</td><td align="left" valign="top">73 (17.8)</td></tr><tr><td align="left" valign="top">TU OoI not identified by TUR, n (% TU)</td><td align="left" valign="top">7 (58.3)</td><td align="left" valign="top">41 (57.7)</td><td align="left" valign="top">29 (85.3)</td><td align="left" valign="top">15 (68.2)</td><td align="left" valign="top">21 (84.0)</td><td align="left" valign="top">141 (94.6)</td><td align="left" valign="top">43 (87.8)</td><td align="left" valign="top">40 (83.3)</td><td align="left" valign="top">337 (82.2)</td></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>OoI: outcome of interest.</p></fn><fn id="table2fn2"><p><sup>b</sup>TUR: traditional umbrella review.</p></fn><fn id="table2fn3"><p><sup>c</sup>TU: Umbrella Collaboration.</p></fn><fn id="table2fn4"><p><sup>d</sup>N/A: not applicable.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>OoIs<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup> identified by TU<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup> and TURs<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup>, additional OoIs identified by TU, and identification gain factor.<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Project</td><td align="left" valign="bottom">Number of OoIs identified by TU, n (n=410)</td><td align="left" valign="bottom">Number of OoIs identified by TUR, n (n=86)</td><td align="left" valign="bottom">Additional OoIs identified by TU, n (n=324)</td><td align="left" valign="bottom">Identification gain factor of OoIs by TU compared with TUR</td></tr></thead><tbody><tr><td align="char" char="." valign="top">Project 1: Veronese et al (2023) [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">12</td><td align="left" valign="top">5</td><td align="left" valign="top">7</td><td align="left" valign="top">2.40</td></tr><tr><td align="char" char="." valign="top">Project 2: Marx et al (2021) [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">71</td><td align="left" valign="top">38</td><td align="left" valign="top">33</td><td align="left" valign="top">1.87</td></tr><tr><td align="char" char="." valign="top">Project 3: Shen et al (2022) [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">34</td><td align="left" valign="top">7</td><td align="left" valign="top">27</td><td align="left" valign="top">4.86</td></tr><tr><td align="char" char="." valign="top">Project 4: Conneely et al (2022) [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">22</td><td align="left" valign="top">10</td><td align="left" valign="top">12</td><td align="left" valign="top">2.20</td></tr><tr><td align="char" char="." valign="top">Project 5: Gazzaniga et al (2023) [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">25</td><td align="left" valign="top">4</td><td align="left" valign="top">21</td><td align="left" valign="top">6.25</td></tr><tr><td align="char" char="." valign="top">Project 6: Musazadeh et al (2023) [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">149</td><td align="left" valign="top">8</td><td align="left" valign="top">141</td><td align="left" valign="top">18.63</td></tr><tr><td align="char" char="." valign="top">Project 7: Veronese et al (2021) (1)</td><td align="left" valign="top">49</td><td align="left" valign="top">6</td><td align="left" valign="top">43</td><td align="left" valign="top">8.17</td></tr><tr><td align="char" char="." valign="top">Project 8: Veronese et al (2021) (2)</td><td align="left" valign="top">48</td><td align="left" valign="top">8</td><td align="left" valign="top">40</td><td align="left" valign="top">6.00</td></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>OoI: outcome of interest.</p></fn><fn id="table3fn2"><p><sup>b</sup>TU: Umbrella Collaboration.</p></fn><fn id="table3fn3"><p><sup>c</sup>TUR: traditional umbrella review.</p></fn><fn id="table3fn4"><p><sup>d</sup>The total identification gain factor of OoIs by TU compared with TUR is 4.77.</p></fn></table-wrap-foot></table-wrap></sec><sec id="s3-3"><title>Concordance in Effect Size Classification of Outcomes of Interest</title><p>To assess agreement, ESs for 48 matched OoIs were compared between TURs and TU. TUR-reported ESs were standardized (Cohen <italic>d</italic>) for comparison with TU&#x2019;s R<sub>TU</sub> metric. TURs classified most OoIs as small (n=23, 48%) or trivial (n=16, 33%). TU showed a similar pattern, with 26 (54%) classified as small and 17 (35%) classified as trivial. Moderate effects were less frequent&#x2014;TUR: n=7 (15%); TU: n=5 (10%)&#x2014;and large effects were rare, with TUR reporting 2 (4%) and TU reporting none.</p><p><xref ref-type="table" rid="table4">Table 4</xref> summarizes the categorical agreement in ES classification. TU labeled 17 out of 48 (35%) OoIs as trivial; of these, 10 out of 17 (59%) matched TUR classifications, while 7 out of 17 (41%) were classified as small. Among OoIs, TU classified as small (26/48, 54%), half-matched TURs, with the rest spread across trivial (5/26, 19%), moderate (6/23, 23%), and large (2/26, 8%) classifications. For the 5 of 48 (10%) OoIs labeled as moderate by TU, TURs agreed in only 1 case, classifying most as small or trivial.</p><p>Of the 48 OoIs analyzed, full concordance in ES classification was found in 24 (50%) cases and partial concordance (1-level difference) in 21 (44%) cases. Only 3 (6%) cases showed major discordance. Overall, consistent classification (full + partial) was observed in 45 out of 48 (94%) samples (<xref ref-type="table" rid="table5">Table 5</xref>).</p><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Contingency table of effect size classification: TUR<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup> versus TU<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup> (qualitative categories).<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup><sup>,</sup><sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">TUR effect size</td><td align="left" valign="bottom" colspan="4">TU effect size, n (%)</td><td align="left" valign="bottom">Total, n (%)</td></tr><tr><td align="left" valign="bottom"/><td align="left" valign="bottom">Trivial</td><td align="left" valign="bottom">Small</td><td align="left" valign="bottom">Moderate</td><td align="left" valign="bottom">Large</td><td align="left" valign="bottom"/></tr></thead><tbody><tr><td align="left" valign="top">Trivial</td><td align="left" valign="top">10 (59)<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">5 (19)<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">1 (20)<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">N/A<sup><xref ref-type="table-fn" rid="table4fn8">h</xref></sup></td><td align="left" valign="top">16 (33)</td></tr><tr><td align="left" valign="top">Small</td><td align="left" valign="top">7 (41)<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">13 (50)<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">3 (60)<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">N/A</td><td align="left" valign="top">23 (48)</td></tr><tr><td align="left" valign="top">Moderate</td><td align="left" valign="top">N/A</td><td align="left" valign="top">6 (23)<sup><xref ref-type="table-fn" rid="table4fn6">f</xref></sup></td><td align="left" valign="top">1 (20)<sup><xref ref-type="table-fn" rid="table4fn5">e</xref></sup></td><td align="left" valign="top">N/A</td><td align="left" valign="top">7 (15)</td></tr><tr><td align="left" valign="top">Large</td><td align="left" valign="top">N/A</td><td align="left" valign="top">2 (8)<sup><xref ref-type="table-fn" rid="table4fn7">g</xref></sup></td><td align="left" valign="top">N/A</td><td align="left" valign="top">N/A</td><td align="left" valign="top">2 (4)</td></tr><tr><td align="left" valign="top">Total</td><td align="left" valign="top">17 (35)</td><td align="left" valign="top">26 (54)</td><td align="left" valign="top">5 (10)</td><td align="left" valign="top">N/A</td><td align="left" valign="top">48 (100)</td></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>TUR: traditional umbrella review.</p></fn><fn id="table4fn2"><p><sup>b</sup>TU: Umbrella Collaboration.</p></fn><fn id="table4fn3"><p><sup>c</sup>Cram&#x00E9;r <italic>V</italic> was the effect size measure.</p></fn><fn id="table4fn4"><p><sup>d</sup>Pearson <italic>&#x03C7;</italic><sup>2<sub>6</sub></sup>=11.03<sup><xref ref-type="table-fn" rid="table4fn9">i</xref></sup> (<italic>P</italic>=.09), <italic>V</italic>=0.339 (moderate).</p></fn><fn id="table4fn5"><p><sup>e</sup>Full concordance.</p></fn><fn id="table4fn6"><p><sup>f</sup>Partial concordance (discrepancy of 1 category).</p></fn><fn id="table4fn7"><p><sup>g</sup>Discordance (discrepancy of more than 1 category).</p></fn><fn id="table4fn8"><p><sup>h</sup>N/A: not applicable.</p></fn><fn id="table4fn9"><p><sup>i</sup>Marginally significant (<italic>P</italic>&#x003C;.10).</p></fn></table-wrap-foot></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Concordance levels in effect size classification: frequency and cumulative distribution.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Classification</td><td align="left" valign="bottom">Absolute frequency, n</td><td align="left" valign="bottom">Cumulative absolute frequency, n</td><td align="left" valign="bottom">Relative frequency, n/N (%)</td><td align="left" valign="bottom">Cumulative relative frequency, n/N (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Different effect size</td><td align="left" valign="top">24</td><td align="left" valign="top">24</td><td align="left" valign="top">24/48 (50)</td><td align="left" valign="top">24/48 (50)</td></tr><tr><td align="left" valign="top">Same effect size</td><td align="left" valign="top">24</td><td align="left" valign="top">48</td><td align="left" valign="top">24/48 (50)</td><td align="left" valign="top">48/48 (100)</td></tr><tr><td align="left" valign="top">Same effect size</td><td align="left" valign="top">24</td><td align="left" valign="top">24</td><td align="left" valign="top">24/48 (50)</td><td align="left" valign="top">24/48 (50)</td></tr><tr><td align="left" valign="top">One-level discrepancy</td><td align="left" valign="top">21</td><td align="left" valign="top">45</td><td align="left" valign="top">21/48 (44)</td><td align="left" valign="top">44/48 (94)</td></tr><tr><td align="left" valign="top">Discrepancy&#x003E;1 level</td><td align="left" valign="top">3</td><td align="left" valign="top">48</td><td align="left" valign="top">3/48 (6)</td><td align="left" valign="top">48/48 (100)</td></tr></tbody></table></table-wrap></sec><sec id="s3-4"><title>Correlation of Quantitative Effect Sizes Between TU and TURs</title><p>A Spearman correlation assessed the relationship between TU and TUR ESs (R<sub>TU</sub> vs Cohen <italic>d</italic>), yielding &#x03C1;=0.399 (<italic>P</italic>=.005), indicating a statistically significant, low-to-moderate positive correlation. The nonparametric approach was chosen to ensure robustness against outliers and deviations from normality, as both the TU and TUR ES distributions significantly deviated from normality according to the Shapiro-Wilk test (<italic>W</italic>=0.874 and 0.847, respectively; <italic>P</italic>&#x003C;.001 for both cases). The nonparametric approach ensures robustness against outliers and nonnormality. A scatter plot with a regression line (<xref ref-type="fig" rid="figure4">Figure 4</xref>) confirmed this trend, although the explained variance was modest (<italic>R</italic><sup>&#x00B2;</sup>=0.111).</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Correlation of quantitative effect sizes between Umbrella Collaboration and traditional umbrella reviews.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e75215_fig04.png"/></fig></sec><sec id="s3-5"><title>Concordance in Transformed Certainty Scores Between TU and TURs</title><p>We compared transformed certainty scores between TU and TURs using GRADE in 5 of the 8 included reviews, yielding 25 matched OoIs. <xref ref-type="fig" rid="figure5">Figure 5</xref> illustrates the relationship between GRADE-based scores (x-axis) and TU&#x2019;s sentiment-based estimates (y-axis), both normalized to a 0&#x2010;1 scale for direct comparison.</p><p>A Spearman correlation assessed the relationship between transformed certainty scores from TURs (GRADE) and TU (sentiment analysis). Due to nonnormal distributions of the transformed certainty scores, Shapiro-Wilk <italic>W</italic>=0.874 (<italic>P</italic>=.005) for TUR and <italic>W</italic>=0.928 (<italic>P</italic>=.049) for TU, a nonparametric test was used. A Spearman correlation between the transformed certainty scores assigned by TURs and TU yielded &#x03C1;=0.446 (<italic>P</italic>=.02), indicating a statistically significant, moderate positive association that supports concordance between both approaches.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Scatter plot of transformed certainty scores for outcomes of interest (OoIs; n=25) assessed by traditional umbrella reviews (TURs; Grading of Recommendations Assessment, Development, and Evaluation [GRADE]) and Umbrella Collaboration (TU; sentiment analysis [SA]).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e75215_fig05.png"/></fig></sec><sec id="s3-6"><title>Execution Time of TU Projects</title><p>Execution time was assessed only for TU, as none of the selected TURs reported this metric. While TURs typically take 6&#x2010;12 months, TU projects were completed in a mean time of 4 hours and 46 minutes (SD 2 hours and 30 minutes), with a median of 4 hours and 36 minutes. Completion times ranged from 1 hour and 34 minutes [<xref ref-type="bibr" rid="ref28">28</xref>] to 10 hours and 8 minutes [<xref ref-type="bibr" rid="ref24">24</xref>] (<xref ref-type="table" rid="table6">Table 6</xref>).</p><table-wrap id="t6" position="float"><label>Table 6.</label><caption><p>Project execution time, search parameters, and reference overlap between TU<sup><xref ref-type="table-fn" rid="table6fn1">a</xref></sup> and TUR<sup><xref ref-type="table-fn" rid="table6fn2">b</xref></sup> methodologies.</p></caption><table id="table6" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Project</td><td align="left" valign="bottom">Search date</td><td align="left" valign="bottom">TU<break/>execution time<break/>(h:min:s)</td><td align="left" valign="bottom">Number of TU search terms, n</td><td align="left" valign="bottom">Number of references in TUR, n</td><td align="left" valign="bottom">Number of references in TU, n</td><td align="left" valign="bottom">Reference overlap, n/N (%)</td></tr></thead><tbody><tr><td align="left" valign="top">Project 1: Veronese et al [<xref ref-type="bibr" rid="ref22">22</xref>]</td><td align="left" valign="top">August 1, 2023</td><td align="left" valign="top">2:37:27</td><td align="left" valign="top">16</td><td align="left" valign="top">5</td><td align="left" valign="top">8</td><td align="left" valign="top">5/5 (100)</td></tr><tr><td align="left" valign="top">Project 2: Marx et al [<xref ref-type="bibr" rid="ref27">27</xref>]</td><td align="left" valign="top">May 1, 2023</td><td align="left" valign="top">4:54:57</td><td align="left" valign="top">60</td><td align="left" valign="top">15</td><td align="left" valign="top">54</td><td align="left" valign="top">15/15 (100)</td></tr><tr><td align="left" valign="top">Project 3: Shen et al [<xref ref-type="bibr" rid="ref23">23</xref>]</td><td align="left" valign="top">May 1, 2023</td><td align="left" valign="top">5:29:22</td><td align="left" valign="top">19</td><td align="left" valign="top">6</td><td align="left" valign="top">18</td><td align="left" valign="top">6/6 (100)</td></tr><tr><td align="left" valign="top">Project 4: Conneely et al [<xref ref-type="bibr" rid="ref21">21</xref>]</td><td align="left" valign="top">May 1, 2023</td><td align="left" valign="top">5:37:14</td><td align="left" valign="top">6</td><td align="left" valign="top">16</td><td align="left" valign="top">23</td><td align="left" valign="top">16/16 (100)</td></tr><tr><td align="left" valign="top">Project 5: Gazzaniga et al [<xref ref-type="bibr" rid="ref28">28</xref>]</td><td align="left" valign="top">May 1, 2023</td><td align="left" valign="top">1:34:37</td><td align="left" valign="top">15</td><td align="left" valign="top">11</td><td align="left" valign="top">19</td><td align="left" valign="top">10/11 (91)</td></tr><tr><td align="left" valign="top">Project 6: Musazadeh et al [<xref ref-type="bibr" rid="ref24">24</xref>]</td><td align="left" valign="top">November 12, 2023</td><td align="left" valign="top">10:08:24</td><td align="left" valign="top">41</td><td align="left" valign="top">37</td><td align="left" valign="top">90</td><td align="left" valign="top">36/37(97)</td></tr><tr><td align="left" valign="top">Project 7: Veronese et al [<xref ref-type="bibr" rid="ref25">25</xref>]</td><td align="left" valign="top">December 12, 2023</td><td align="left" valign="top">4:17:43</td><td align="left" valign="top">53</td><td align="left" valign="top">7</td><td align="left" valign="top">35</td><td align="left" valign="top">7/7 (100)</td></tr><tr><td align="left" valign="top">Project 8: Veronese et al [<xref ref-type="bibr" rid="ref26">26</xref>]</td><td align="left" valign="top">December 12, 2023</td><td align="left" valign="top">3:33:54</td><td align="left" valign="top">16</td><td align="left" valign="top">8</td><td align="left" valign="top">22</td><td align="left" valign="top">8/9 (89)</td></tr></tbody></table><table-wrap-foot><fn id="table6fn1"><p><sup>a</sup>TU: Umbrella Collaboration.</p></fn><fn id="table6fn2"><p><sup>b</sup>TUR: traditional umbrella review.</p></fn></table-wrap-foot></table-wrap></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>The findings of this study support the validity of the TU as a robust and efficient methodology for tertiary evidence synthesis. TU demonstrated a high level of concordance with TURs in identifying OoIs, successfully replicating the majority of those reported in TURs while also identifying a substantial number of additional outcomes. The experimental ES metric employed by TU (R<sub>TU</sub>) showed a statistically significant correlation with standardized measures such as Cohen <italic>d</italic> and SMD, reinforcing its methodological soundness for estimating the magnitude of interventions or exposures. In addition, TU&#x2019;s automated, sentiment-based certainty assessment exhibited an acceptable level of agreement with GRADE, the most widely adopted system for evaluating certainty in evidence synthesis. One of the most notable advantages of TU lies in its execution time: while TURs typically require several months to complete, TU was able to generate comprehensive results within hours. This efficiency, combined with TU&#x2019;s demonstrated consistency and methodological coherence, highlights its potential value in settings where rapid evidence synthesis is essential or where resources are limited.</p><p>The development of TU aligns with the growing interest in AI-assisted tools for evidence synthesis, an emerging field likely to shape the future of scientific research. Rather than replacing TURs, TU is intended to complement them by offering a methodologically sound alternative in contexts where conventional approaches may be impractical or untimely. As a semiautomated platform, TU leverages NLP, sentiment analysis, and machine learning to streamline tertiary synthesis while maintaining methodological rigor. Despite its potential, TU represents a paradigm shift that may require time and educational efforts for widespread understanding and adoption. During the initial dissemination of this project, a general unfamiliarity with automated tertiary synthesis was observed, underscoring the need for clearer conceptual frameworks and greater engagement with the research community.</p><p>The identification of OoIs represents a central axis in the comparison between TU and TURs. In the context of tertiary evidence synthesis, an OoI reflects a specific finding previously evaluated in 1 or more SRs/MAs and integrated to answer a defined research question. The validity of a synthesis model depends not only on aggregating relevant data but also on faithfully capturing the main conclusions emerging from the primary literature.</p><p>Given the limited literature on tertiary synthesis, this analysis draws from well-established principles in secondary synthesis, where many of the same strengths and limitations apply. These include challenges in outcome selection, variability in inclusion criteria, and the balance between breadth and precision.</p><p>The comparative analysis revealed substantial differences in the number and breadth of OoIs identified. Across the 8 matched projects, TURs identified a total of 86 OoIs, whereas TU detected 410, yielding an identification gain factor of 4.77. TU replicated 73 out of 86 (85%) OoIs reported by TURs and contributed 337 additional OoIs. This difference does not appear to stem solely from broader search strategies or inclusion criteria. According to the Cochrane Handbook [<xref ref-type="bibr" rid="ref29">29</xref>], umbrella reviews can pursue either broad or narrow questions [<xref ref-type="bibr" rid="ref30">30</xref>]. While both methodologies support both approaches, several TURs in this study applied restrictive filters. For example, project 6 [<xref ref-type="bibr" rid="ref24">24</xref>] limited both its search strategy and inclusion criteria, while projects 3 [<xref ref-type="bibr" rid="ref23">23</xref>] and 4 [<xref ref-type="bibr" rid="ref21">21</xref>] also applied inclusion restrictions. Nevertheless, even when only unrestricted TURs are considered, TU maintained a gain factor of 4.93, suggesting an intrinsic property of TU in capturing a greater volume of outcomes.</p><p>One explanation lies in TU&#x2019;s automated and systematic extraction process, which minimizes human bias and ensures that any OoI mentioned in an abstract is captured, regardless of its perceived importance. By contrast, traditional reviews often apply subjective judgment in prioritizing outcomes, potentially omitting secondary findings. Moreover, TU is not constrained by human workload, allowing it to process larger volumes of data and extract multiple OoIs per review without sacrificing comprehensiveness.</p><p>A second consideration is the differing approach to quality assessment. TURs often prioritize high-quality SRs/MAs, limiting scope but potentially increasing rigor. TU, by relying on abstracts, does not perform formal quality appraisals. While this design choice increases the number of included studies, it should not be interpreted as a methodological flaw. Notably, up to 30% of SRs/MAs fail to report study quality assessments, and only 12.9% set quality thresholds for meta-analysis inclusion [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref32">32</xref>]. Furthermore, tools such as PRISMA (Preferred Reporting Items for Systematic Reviews and Meta-Analyses)-Abstract could be integrated into future versions of TU to address this limitation, though prior research has shown that abstracts often score low on PRISMA quality indicators [<xref ref-type="bibr" rid="ref33">33</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. Nonetheless, TU consistently identifies more OoIs based solely on abstracts than TURs do using full-text reviews.</p><p>Another factor is the role of cognitive bias in outcome selection. Human reviewers may unconsciously favor familiar or hypothesis-confirming outcomes, limiting variability. While TU still involves human oversight in the final output selection, its core processes are software-driven, applying uniform parameters across all included studies and thereby reducing cognitive distortions. For instance, whereas TURs tended to report outcomes only in 1 direction, TU captured both positive and negative effects, aligning with best-practice guidelines from the Joanna Briggs Institute and the Cochrane Collaboration, which emphasize the importance of presenting balanced results for informed decision-making [<xref ref-type="bibr" rid="ref29">29</xref>,<xref ref-type="bibr" rid="ref37">37</xref>].</p><p>Finally, while it is possible that TU identifies outcomes of limited clinical relevance, this risk appears minimal. All OoIs extracted by TU are derived from published SRs/MAs indexed in MEDLINE, which are assumed to meet minimum standards of relevance and methodological quality. According to the Cochrane Handbook [<xref ref-type="bibr" rid="ref30">30</xref>], reviews should focus on critical and important outcomes, excluding trivial ones.</p><p>While a higher number of outcomes may raise concerns about information overload, it is important to consider the primary purpose of tertiary synthesis: to provide a comprehensive and structured overview of the available evidence landscape. TU does not aim to guide individual clinical decisions but rather to inform stakeholders of the full scope of existing research. In this context, the inclusion of a larger set of outcomes enhances transparency and facilitates a more complete understanding of the literature. A formal qualitative appraisal of these additional outcomes, although beyond the scope of this study, represents a valuable direction for future research.</p><p>The study examined the concordance and correlation of ES metrics for OoIs between TU and TURs, using a unified set of outcomes assessed by both methodologies. TU incorporates a novel metric, R<sub>TU</sub>, specifically developed to standardize and synthesize ESs from SRs/MAs within a tertiary synthesis framework. Unlike conventional metrics such as Cohen <italic>d</italic> or the SMD, R<sub>TU</sub> was designed to allow direct aggregation of heterogeneous ES measures and requires empirical validation. This analysis represents the first formal evaluation of R<sub>TU</sub> against traditional metrics.</p><p>In the qualitative comparison of ES categories (trivial, small, moderate, and large), TU demonstrated a high level of agreement with TURs. Full concordance, defined as an exact categorical match between the 2 methods, was observed in 24 out of 48 (50%) cases, while consistent concordance (full plus 1-level deviation) was reached in 45 out of 48 (94%) cases. Only 3 out of 48 (6%) cases showed discordance, meaning the categorical classifications differed by more than 1 level. These findings suggest that TU approximates the classification logic of TURs with a high degree of reliability, reinforcing the potential of R<sub>TU</sub> to align with established interpretive frameworks.</p><p>The strength of the association between ES categorizations assigned by each methodology was evaluated using Cram&#x00E9;r <italic>V</italic>, which yielded a value of 0.339. This indicates a moderate association and supports the notion that TU and TURs tend to classify the strength of associations in a comparable manner. Although the chi-square test did not reach statistical significance, likely due to the limited sample size and sparse contingency cells, Cram&#x00E9;r <italic>V</italic> remains a valid ES measure and is less sensitive to these limitations [<xref ref-type="bibr" rid="ref38">38</xref>]. In applied research, the strength of association can often be more informative than statistical significance, particularly when evaluating the practical utility of an experimental method [<xref ref-type="bibr" rid="ref39">39</xref>]. It is worth noting that the statistical power of the chi-square test may have been reduced by the small number of observations and the presence of empty cells in the contingency table, both of which are known to limit test sensitivity. In line with recent critiques of overreliance on <italic>P</italic> values in biomedical research [<xref ref-type="bibr" rid="ref40">40</xref>-<xref ref-type="bibr" rid="ref43">43</xref>], the observed concordance rates and ES association offer more meaningful insights into the comparability of methods than statistical significance alone.</p><p>The quantitative comparison of ES values further supports the validity of TU. The Spearman correlation coefficient between TU and TUR values was positive and statistically significant, indicating a consistent directional relationship. Additionally, the slope of the regression line approached 1.0 (<italic>&#x03B2;</italic>=.8), suggesting that the R<sub>TU</sub> metric yields ES of similar magnitude to those reported in TURs using standard metrics. This represents a marked improvement in proportionality compared with earlier regression slopes reported in other validation contexts.</p><p>However, the coefficient of determination (<italic>R</italic><sup>2</sup>) remained modest (<italic>R</italic><sup>2</sup>=0.11), indicating that a substantial portion of the variance in TUR ES is not explained by TU estimates. This implies that, while TU captures general trends in effect magnitude, additional factors, such as differences in underlying metric types, weighting procedures, or reviewer judgments, may influence the exact values. It also highlights the limitations of linear models in fully explaining the relationship between these 2 methods.</p><p>These findings provide preliminary but encouraging evidence for the validity of R<sub>TU</sub> as a useful metric for tertiary synthesis. Despite its experimental nature, R<sub>TU</sub> appears to closely approximate established metrics such as Cohen <italic>d</italic> and the SMD in both categorical classification and magnitude. Future studies will be needed to further evaluate the sensitivity, specificity, and contextual performance of R<sub>TU</sub> across diverse synthesis domains. Nonetheless, this initial validation suggests that TU can offer a consistent and efficient alternative for evaluating and categorizing ES in large-scale evidence synthesis.</p><p>While the observed correlation between TU and TUR ESs was moderate (&#x03C1;=0.399), this result is consistent with the structural differences between the R<sub>TU</sub> metric and conventional measures such as Cohen <italic>d</italic> and SMD. R<sub>TU</sub> is a proprietary, automated metric specifically developed for abstract-level synthesis, prioritizing scalability and consistency over statistical precision at the study level. It does not incorporate weighting by sample size or variance and relies on normalized effect descriptors extracted from secondary sources. These foundational differences partly account for the limited shared variance (<italic>R</italic><sup>2</sup>=0.11). However, in the context of tertiary synthesis, where the goal is to identify trends and prioritize outcomes across a broad evidence base, R<sub>TU</sub> provides an acceptable and interpretable proxy. Moreover, this divergence has limited implications for clinical decision-making, as TU is designed to support synthesis and orientation at the field level, rather than to inform individual patient decisions. Future work may explore hybrid models that integrate additional contextual parameters to enhance concordance while preserving automation.</p><p>In this study, certainty assessments were compared exclusively among those TURs that applied the GRADE framework, a well-established and widely endorsed method for evaluating the certainty of evidence [<xref ref-type="bibr" rid="ref44">44</xref>]. Although other appraisal tools exist, such as the statistical grading criteria proposed by Papatheodorou [<xref ref-type="bibr" rid="ref45">45</xref>] or the methodological assessment framework AMSTAR 2 [<xref ref-type="bibr" rid="ref46">46</xref>], these differ conceptually from the certainty model implemented in TU. Papatheodorou&#x2019;s system, despite its structure, has been criticized for relying on arbitrary statistical thresholds and for lacking sensitivity to clinical relevance and risk of bias [<xref ref-type="bibr" rid="ref47">47</xref>]. AMSTAR 2, in turn, focuses on the methodological quality of the SRs/MAs themselves, rather than the certainty of individual outcomes. By contrast, TU&#x2019;s approach aligns more closely with GRADE principles by estimating certainty at the outcome level, albeit through an innovative method based on an automated sentiment analysis of SR/MA abstracts. Originally, the GRADE system referred to &#x201C;quality of evidence,&#x201D; which denoted the degree of confidence in the validity of study findings to inform clinical decisions. Over time, this evolved to &#x201C;confidence in estimates,&#x201D; and more recently, to &#x201C;certainty of evidence,&#x201D; a term now widely accepted as it better reflects both the trust in effect estimates and their applicability in practice [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref48">48</xref>].</p><p>The comparison between transformed certainty scores, standardized to a 0&#x2010;1 scale, for outcomes evaluated by TU and those rated using the GRADE framework provides important insights into the potential validity of TU&#x2019;s automated approach. The observed positive and statistically significant correlation suggests that TU is capable of approximating GRADE-based judgments, thereby reinforcing the conceptual alignment between the 2 methodologies. However, this result must be interpreted with caution. The number of matched outcomes was limited, which restricts generalizability and underscores the need for further validation in broader domains and larger datasets. Crucially, we recognize that TU&#x2019;s sentiment-based certainty estimation cannot replicate the multidimensional rigor of GRADE, which accounts for factors such as study limitations, inconsistency, indirectness, imprecision, and publication bias. The use of a general sentiment analysis model, originally trained on nonmedical texts, represents a methodological limitation. In this context, TU&#x2019;s certainty scoring should be viewed as a preliminary proxy, not a substitute for comprehensive manual assessments. Nevertheless, the approach offers a scalable and fully automated alternative capable of supporting rapid synthesis at low resource costs. Despite its current limitations, TU provides a complementary framework that aligns with the growing need for efficient, transparent, and reproducible tools in evidence synthesis. The correlation observed with GRADE invites further research into the integration of automated text-based analyses with established appraisal frameworks. As these technologies evolve, ongoing refinement and external validation will be critical to define their appropriate role in supporting scientific practice.</p><p>Execution time is a critical but often overlooked dimension in the evaluation of evidence synthesis methodologies. TURs rarely document their execution timelines, a pattern that was consistent among the TURs included in this study. By contrast, the TU methodology records execution time precisely, from the initiation of the first search term to the completion of the first version of the synthesis project, excluding any subsequent updates in this study. This level of detail in time tracking is unprecedented in both secondary and tertiary syntheses and represents a methodological innovation that may become increasingly relevant with the integration of automation tools. As a result, traditional timelines measured in months or years may soon be replaced by more accurate, time-stamped process metrics.</p><p>Given the lack of concrete time data from TURs, direct comparisons were not feasible. However, the execution times recorded for TU are sufficiently notable to highlight its efficiency. TU projects were completed within a range of 1.5-10 hours, substantially shorter than the typical duration for TURs, which is estimated to range between 6 and 12 months. This is also well below the average time frames reported for SRs/MAs, which commonly take 1-2 years [<xref ref-type="bibr" rid="ref49">49</xref>], or for accelerated alternatives such as rapid reviews, which range from 5 to 12 weeks [<xref ref-type="bibr" rid="ref50">50</xref>], and even the Two-Week SR approach, which targets completion in 11 working days [<xref ref-type="bibr" rid="ref51">51</xref>]. The remarkable speed of TU is a key strength, particularly in light of the growing international demand for agile evidence synthesis methods. Organizations such as the WHO [<xref ref-type="bibr" rid="ref52">52</xref>], Cochrane Collaboration [<xref ref-type="bibr" rid="ref53">53</xref>], and Joanna Briggs Institute [<xref ref-type="bibr" rid="ref15">15</xref>] have emphasized the need for rapid yet valid synthesis approaches, an objective that TU appears to fulfill effectively in the context of tertiary reviews.</p><p>It is important to note that the time measured in this study refers solely to the initial execution phase of TU projects and does not account for later updates. However, TU includes a novel updating system that keeps projects active beyond their initial completion. The platform automatically checks daily for new SRs/MAs that match the predefined search criteria. When new studies are identified, a human reviewer is notified and can assess their eligibility. If approved, the system integrates the new results into the existing synthesis following revision and confirmation. While no equivalent system currently exists in traditional tertiary synthesis, the need for regular updates is well documented in evidence-based practice. In SRs/MAs, updates are typically recommended every 2 years or in response to emerging evidence, yet compliance with this standard is often poor [<xref ref-type="bibr" rid="ref54">54</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]. TU&#x2019;s automated updating mechanism represents a major advantage in maintaining the relevance and accuracy of results, particularly in fast-evolving fields.</p><p>Notably, projects with a higher number of included references required longer execution times, which is expected given the additional workload involved in processing more SRs/MAs. Although TU significantly streamlines the synthesis process, the complexity and volume of information remain factors that influence total execution time, particularly when human oversight is still involved. Looking ahead, TU is being developed to support full automation of SR/MA data analysis, eliminating the need for human intervention. This transition will depend on training algorithms through semiautomated projects, progressively improving accuracy via machine learning.</p><p>Interestingly, the number of search terms used did not appear to correlate with execution time (unpublished data), suggesting that TU&#x2019;s algorithms can manage variable search complexity efficiently. In traditional methodologies, the search phase is often complex and conducted by specialized information professionals using advanced Boolean strategies across multiple databases [<xref ref-type="bibr" rid="ref58">58</xref>-<xref ref-type="bibr" rid="ref60">60</xref>]. By contrast, TU simplifies this process by using basic search terms exclusively in PubMed, automatically generated and refined through generative AI. This shift toward simplified, automated search strategies opens new avenues for exploring the adequacy and efficiency of noncomplex methods in evidence synthesis. It also raises important questions about the continued necessity of highly complex search protocols, suggesting that AI-assisted models may offer comparable effectiveness in certain contexts [<xref ref-type="bibr" rid="ref61">61</xref>-<xref ref-type="bibr" rid="ref63">63</xref>].</p><p>Ultimately, the short execution times observed with TU not only improve the efficiency of tertiary synthesis but also enhance the feasibility of rapidly delivering high-level evidence. In settings where timeliness is essential, this temporal advantage may outweigh minor trade-offs in other methodological dimensions. This capability holds particular promise for public health and health care decision-making, where fast access to up-to-date, high-quality evidence is crucial. TU, therefore, emerges as a promising tool in scenarios where speed is a priority, offering a pragmatic balance between rapid synthesis and analytical depth, while still requiring further validation against established gold standards.</p></sec><sec id="s4-2"><title>Limitations</title><p>One key limitation is the small number of TURs included&#x2014;only 8, randomly selected from 36 eligible reviews. Although intended to be representative, the lack of formal randomization introduces potential selection bias. The limited sample may not reflect the full methodological or topical diversity of umbrella reviews in geriatrics. However, all selected TURs were recent (2021&#x2010;2023), which enhances comparability by aligning with current evidence synthesis standards. Another limitation is the lack of blinding: the lead investigator selected and analyzed the TURs, potentially introducing bias. Prior knowledge of TUR findings could have influenced TU outputs. However, the heterogeneity of outcomes across TURs reduces this risk, and TU&#x2019;s algorithm-driven, semiautomated workflow limits subjective influence. Results were largely determined by predefined rules, adding neutrality to the process.</p><p>The decision to utilize only 1 database, MEDLINE via PubMed, in TU is both a recognized limitation and a deliberate choice shaped by resource constraints and technical considerations. Although SRs typically require searching multiple databases to capture all relevant literature [<xref ref-type="bibr" rid="ref64">64</xref>], our approach focuses on testing whether TU can achieve outcomes comparable to TURs despite this limitation. A pilot study provided preliminary evidence of PubMed&#x2019;s strong coverage, but further validation through TU is necessary to confirm its applicability across different domains. Although searching multiple databases is often recommended to reduce language and indexing biases, particularly regarding non-English literature [<xref ref-type="bibr" rid="ref65">65</xref>,<xref ref-type="bibr" rid="ref66">66</xref>], TU mitigates some of these biases by focusing on abstracts in English, as all PubMed abstracts are provided in English regardless of the original publication&#x2019;s language. Nonetheless, the absence of Chinese databases in our approach remains a notable limitation, given that only a small proportion of Chinese journals are indexed in MEDLINE.</p><p>Relying solely on abstracts in TU was a deliberate choice to test whether tertiary synthesis can be performed efficiently, with minimal resources, and without requiring advanced methodological expertise. While abstracts may omit key details (eg, certainty ratings, ES calculations, risk of bias), TU&#x2019;s structured, automated approach aims to generate clinically useful insights under standardized conditions. This approach offers 2 main advantages: (1) reduced language bias because all MEDLINE abstracts are in English; and (2) greater feasibility, as full-text access often requires costly subscriptions.</p><p>This pilot study focused on geriatrics to ensure methodological clarity and feasibility during initial validation. Limiting the scope allowed for controlled comparisons between TU and TURs. Future research should expand TU&#x2019;s application to other medical fields (eg, cardiology, psychiatry) and to nonmedical domains such as education or sociology, enabling stepwise validation before broader implementation.</p></sec><sec id="s4-3"><title>Conclusions</title><p>This study provides preliminary evidence supporting the validity of the TU as a complementary, semiautomated tool for tertiary evidence synthesis. In a set of comparative projects, TU demonstrated a high level of concordance with TURs in identifying OoIs, estimating ESs, and assessing certainty, while offering substantially shorter execution times.</p><p>Although the methodology remains experimental and further validation is required across broader contexts, the results suggest that semiautomated approaches such as TU may represent a promising step toward more efficient, scalable, and continuously updatable models of evidence synthesis. TU does not aim to replace traditional methods but to provide a practical alternative in settings where time, resources, or responsiveness are critical.</p><p>Future studies with larger datasets, enhanced blinding procedures, and expanded topic areas will be essential to confirm these findings and further explore the potential role of TU in the evolving landscape of evidence-based research.</p></sec></sec></body><back><ack><p>This project has received support from the Fundaci&#x00F3;n Alfonso X el Sabio (grant 1.014.030).</p></ack><fn-group><fn fn-type="conflict"><p>BC, MRC, JP, AP, and BCR were involved in the development of The Umbrella Collaboration (TU) software. Additionally, BC and MRC are owners of the software and may have financial or intellectual property interests in its future applications. While this study was conducted with the aim of validating TU as an evidence synthesis tool, the authors have taken all necessary precautions to ensure that the study design, data analysis, and interpretation of results remain as objective and methodologically rigorous as possible. To mitigate potential biases, comparisons were conducted against gold-standard traditional umbrella reviews, and statistical methodologies were used to ensure transparency and replicability of findings. Furthermore, the affiliations of the authors did not influence the study design, execution, data analysis, or interpretation of results. All research activities were conducted following rigorous methodological standards to ensure impartiality and scientific integrity. The authors used generative artificial intelligence (ChatGPT; OpenAI Inc) as a language support tool during manuscript preparation. Specifically, it was used to assist in translating portions of the original manuscript, written by the authors in Spanish, into academic English, and to improve the clarity and flow of the text. No content, interpretations, or scientific claims were generated by artificial intelligence. All outputs were thoroughly fact-checked, revised, and approved by the authors. No references were generated by ChatGPT, and all cited sources were independently selected, verified, and reviewed by the authors.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">AI</term><def><p>artificial intelligence</p></def></def-item><def-item><term id="abb2">ES</term><def><p>effect size</p></def></def-item><def-item><term id="abb3">GRADE</term><def><p>Grading of Recommendations Assessment, Development, and Evaluation</p></def></def-item><def-item><term id="abb4">LLM</term><def><p>large language model</p></def></def-item><def-item><term id="abb5">MA</term><def><p>meta-analysis</p></def></def-item><def-item><term id="abb6">NLP</term><def><p>natural language processing</p></def></def-item><def-item><term id="abb7">OoI</term><def><p>outcome of interest</p></def></def-item><def-item><term id="abb8">PRISMA</term><def><p>Preferred Reporting Items for Systematic Reviews and Meta-Analyses</p></def></def-item><def-item><term id="abb9">SMD</term><def><p>standardized mean difference</p></def></def-item><def-item><term id="abb10">SR</term><def><p>systematic review</p></def></def-item><def-item><term id="abb11">TU</term><def><p>Umbrella Collaboration</p></def></def-item><def-item><term id="abb12">TUR</term><def><p>traditional umbrella review</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="web"><article-title>Evidence synthesis for policy: a statement of principles - The Royal Society 2018</article-title><source>International Network for Governmental Science Advice</source><year>2018</year><access-date>2025-06-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://tinyurl.com/3347c8k8">https://tinyurl.com/3347c8k8</ext-link></comment></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="web"><person-group person-group-type="author"><collab>World Health Organization (WHO)</collab></person-group><article-title>World health organization knowledge management strategy</article-title><source>WHO</source><year>2005</year><access-date>2025-06-30</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://iris.who.int/bitstream/handle/10665/69119/WHO_EIP_KMS_2005.1.pdf">https://iris.who.int/bitstream/handle/10665/69119/WHO_EIP_KMS_2005.1.pdf</ext-link></comment></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Cottrell</surname><given-names>E</given-names> </name><name name-style="western"><surname>Whitlock</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kato</surname><given-names>E</given-names> </name><etal/></person-group><source>Defining the Benefits of Stakeholder Engagement in Systematic Reviews</source><year>2014</year><publisher-name>Agency for Healthcare Research and Quality (US)</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/books/NBK196180/">https://www.ncbi.nlm.nih.gov/books/NBK196180/</ext-link></comment></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>S&#x00F8;rensen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Van den Broucke</surname><given-names>S</given-names> </name><name name-style="western"><surname>Fullam</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Health literacy and public health: a systematic review and integration of definitions and models</article-title><source>BMC Public Health</source><year>2012</year><month>01</month><day>25</day><volume>12</volume><fpage>80</fpage><pub-id pub-id-type="doi">10.1186/1471-2458-12-80</pub-id><pub-id pub-id-type="medline">22276600</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mart&#x00ED;nez-Garc&#x00ED;a</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Canizalez-Rom&#x00E1;n</surname><given-names>A</given-names> </name><name name-style="western"><surname>Vel&#x00E1;zquez-Rom&#x00E1;n</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Flores-Villase&#x00F1;or</surname><given-names>HM</given-names> </name><name name-style="western"><surname>Le&#x00F3;n-Sicairos</surname><given-names>NM</given-names> </name></person-group><article-title>Evaluaci&#x00F3;n del conocimiento de m&#x00E9;todos b&#x00E1;sicos de epidemiolog&#x00ED;a e investigaci&#x00F3;n en m&#x00E9;dicos residentes</article-title><source>Rev M&#x00E9;dica Univ Aut&#x00F3;noma Sinaloa REVMEDUAS</source><year>2021</year><volume>11</volume><fpage>115</fpage><lpage>123</lpage></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baccolini</surname><given-names>V</given-names> </name><name name-style="western"><surname>Rosso</surname><given-names>A</given-names> </name><name name-style="western"><surname>Di Paolo</surname><given-names>C</given-names> </name><etal/></person-group><article-title>What is the prevalence of low health literacy in European Union member states? A systematic review and meta-analysis</article-title><source>J Gen Intern Med</source><year>2021</year><month>03</month><volume>36</volume><issue>3</issue><fpage>753</fpage><lpage>761</lpage><pub-id pub-id-type="doi">10.1007/s11606-020-06407-8</pub-id><pub-id pub-id-type="medline">33403622</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Baumann</surname><given-names>LA</given-names> </name><name name-style="western"><surname>Reinhold</surname><given-names>AK</given-names> </name><name name-style="western"><surname>Br&#x00FC;tt</surname><given-names>AL</given-names> </name></person-group><article-title>Public and patient involvement in health policy decision-making on the health system level - a scoping review</article-title><source>Health Policy</source><year>2022</year><month>10</month><volume>126</volume><issue>10</issue><fpage>1023</fpage><lpage>1038</lpage><pub-id pub-id-type="doi">10.1016/j.healthpol.2022.07.007</pub-id><pub-id pub-id-type="medline">35918211</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="book"><person-group person-group-type="editor"><name name-style="western"><surname>Biondi-Zoccai</surname><given-names>G</given-names> </name></person-group><source>Umbrella Reviews: Evidence Synthesis with Overviews of Reviews and Meta-Epidemiologic Studies</source><year>2016</year><publisher-name>Springer International Publishing</publisher-name><pub-id pub-id-type="doi">10.1007/978-3-319-25655-9</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Choi</surname><given-names>GJ</given-names> </name><name name-style="western"><surname>Kang</surname><given-names>H</given-names> </name></person-group><article-title>Introduction to umbrella reviews as a useful evidence-based practice</article-title><source>J Lipid Atheroscler</source><year>2023</year><month>01</month><volume>12</volume><issue>1</issue><fpage>3</fpage><lpage>11</lpage><pub-id pub-id-type="doi">10.12997/jla.2023.12.1.3</pub-id><pub-id pub-id-type="medline">36761061</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fusar-Poli</surname><given-names>P</given-names> </name><name name-style="western"><surname>Radua</surname><given-names>J</given-names> </name></person-group><article-title>Ten simple rules for conducting umbrella reviews</article-title><source>Evid Based Mental Health</source><year>2018</year><month>08</month><volume>21</volume><issue>3</issue><fpage>95</fpage><lpage>100</lpage><pub-id pub-id-type="doi">10.1136/ebmental-2018-300014</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name><name name-style="western"><surname>Fernandez</surname><given-names>R</given-names> </name><name name-style="western"><surname>Godfrey</surname><given-names>CM</given-names> </name><name name-style="western"><surname>Holly</surname><given-names>C</given-names> </name><name name-style="western"><surname>Khalil</surname><given-names>H</given-names> </name><name name-style="western"><surname>Tungpunkom</surname><given-names>P</given-names> </name></person-group><article-title>Summarizing systematic reviews: methodological development, conduct and reporting of an umbrella review approach</article-title><source>JBI Evid Implement</source><year>2015</year><volume>13</volume><fpage>132</fpage><pub-id pub-id-type="doi">10.1097/XEB.0000000000000055</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cant</surname><given-names>R</given-names> </name><name name-style="western"><surname>Ryan</surname><given-names>C</given-names> </name><name name-style="western"><surname>Kelly</surname><given-names>MA</given-names> </name></person-group><article-title>A nine&#x2010;step pathway to conduct an umbrella review of literature</article-title><source>Nurse Author Ed</source><year>2022</year><month>06</month><volume>32</volume><issue>2</issue><fpage>31</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1111/nae2.12039</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Belbasis</surname><given-names>L</given-names> </name><name name-style="western"><surname>Brooker</surname><given-names>RD</given-names> </name><name name-style="western"><surname>Zavalis</surname><given-names>E</given-names> </name><name name-style="western"><surname>Pezzullo</surname><given-names>AM</given-names> </name><name name-style="western"><surname>Axfors</surname><given-names>C</given-names> </name><name name-style="western"><surname>Ioannidis</surname><given-names>JP</given-names> </name></person-group><article-title>Mapping and systematic appraisal of umbrella reviews in epidemiological research: a protocol for a meta-epidemiological study</article-title><source>Syst Rev</source><year>2023</year><month>07</month><day>14</day><volume>12</volume><issue>1</issue><fpage>123</fpage><pub-id pub-id-type="doi">10.1186/s13643-023-02265-7</pub-id><pub-id pub-id-type="medline">37452309</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Pollock</surname><given-names>M</given-names> </name></person-group><article-title>Chapter V: overviews of reviews</article-title><source>Cochrane Handbook for Systematic Reviews of Interventions</source><year>2023</year><access-date>2025-07-03</access-date><publisher-name>The Cochrane Collaboration</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://training.cochrane.org/handbook/current/chapter-v">https://training.cochrane.org/handbook/current/chapter-v</ext-link></comment></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><name name-style="western"><surname>Straus</surname><given-names>SE</given-names> </name><name name-style="western"><surname>Ghaffar</surname><given-names>A</given-names> </name><name name-style="western"><surname>Langlois</surname><given-names>EV</given-names> </name></person-group><article-title>Rapid reviews for health policy and systems decision-making: more important than ever before</article-title><source>Syst Rev</source><year>2022</year><month>07</month><day>30</day><volume>11</volume><issue>1</issue><fpage>153</fpage><pub-id pub-id-type="doi">10.1186/s13643-022-01887-7</pub-id><pub-id pub-id-type="medline">35906637</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Qureshi</surname><given-names>R</given-names> </name><name name-style="western"><surname>Shaughnessy</surname><given-names>D</given-names> </name><name name-style="western"><surname>Gill</surname><given-names>KAR</given-names> </name><name name-style="western"><surname>Robinson</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Li</surname><given-names>T</given-names> </name><name name-style="western"><surname>Agai</surname><given-names>E</given-names> </name></person-group><article-title>Are ChatGPT and large language models &#x201C;the answer&#x201D; to bringing us closer to systematic review automation?</article-title><source>Syst Rev</source><year>2023</year><month>04</month><day>29</day><volume>12</volume><issue>1</issue><fpage>72</fpage><pub-id pub-id-type="doi">10.1186/s13643-023-02243-z</pub-id><pub-id pub-id-type="medline">37120563</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Teperikidis</surname><given-names>L</given-names> </name><name name-style="western"><surname>Boulmpou</surname><given-names>A</given-names> </name><name name-style="western"><surname>Papadopoulos</surname><given-names>C</given-names> </name><name name-style="western"><surname>Biondi-Zoccai</surname><given-names>G</given-names> </name></person-group><article-title>Using ChatGPT to perform a systematic review: a tutorial</article-title><source>Minerva Cardiol Angiol</source><year>2024</year><month>12</month><volume>72</volume><issue>6</issue><fpage>547</fpage><lpage>567</lpage><pub-id pub-id-type="doi">10.23736/S2724-5683.24.06568-2</pub-id><pub-id pub-id-type="medline">39056432</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="web"><source>OpenAI</source><year>2025</year><access-date>2025-04-02</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://openai.com/">https://openai.com/</ext-link></comment></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Elliott</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Synnot</surname><given-names>A</given-names> </name><name name-style="western"><surname>Turner</surname><given-names>T</given-names> </name><etal/></person-group><article-title>Living systematic review: 1. Introduction-the why, what, when, and how</article-title><source>J Clin Epidemiol</source><year>2017</year><month>11</month><volume>91</volume><fpage>23</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2017.08.010</pub-id><pub-id pub-id-type="medline">28912002</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guyatt</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Oxman</surname><given-names>AD</given-names> </name><name name-style="western"><surname>Vist</surname><given-names>GE</given-names> </name><etal/></person-group><article-title>GRADE: an emerging consensus on rating quality of evidence and strength of recommendations</article-title><source>BMJ</source><year>2008</year><month>04</month><day>26</day><volume>336</volume><issue>7650</issue><fpage>924</fpage><lpage>926</lpage><pub-id pub-id-type="doi">10.1136/bmj.39489.470347.AD</pub-id><pub-id pub-id-type="medline">18436948</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Conneely</surname><given-names>M</given-names> </name><name name-style="western"><surname>Leahy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dore</surname><given-names>L</given-names> </name><etal/></person-group><article-title>The effectiveness of interventions to reduce adverse outcomes among older adults following emergency department discharge: umbrella review</article-title><source>BMC Geriatr</source><year>2022</year><month>05</month><day>28</day><volume>22</volume><issue>1</issue><fpage>462</fpage><pub-id pub-id-type="doi">10.1186/s12877-022-03007-5</pub-id><pub-id pub-id-type="medline">35643453</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Honvo</surname><given-names>G</given-names> </name><name name-style="western"><surname>Bruy&#x00E8;re</surname><given-names>O</given-names> </name><etal/></person-group><article-title>Knee osteoarthritis and adverse health outcomes: an umbrella review of meta-analyses of observational studies</article-title><source>Aging Clin Exp Res</source><year>2023</year><month>02</month><volume>35</volume><issue>2</issue><fpage>245</fpage><lpage>252</lpage><pub-id pub-id-type="doi">10.1007/s40520-022-02289-4</pub-id><pub-id pub-id-type="medline">36331799</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shen</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>D</given-names> </name><name name-style="western"><surname>Li</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Effects of exercise on patients important outcomes in older people with sarcopenia: an umbrella review of meta-analyses of randomized controlled trials</article-title><source>Front Med (Lausanne)</source><year>2022</year><volume>9</volume><fpage>811746</fpage><pub-id pub-id-type="doi">10.3389/fmed.2022.811746</pub-id><pub-id pub-id-type="medline">35186999</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Musazadeh</surname><given-names>V</given-names> </name><name name-style="western"><surname>Kavyani</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Mirhosseini</surname><given-names>N</given-names> </name><name name-style="western"><surname>Dehghan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Vajdi</surname><given-names>M</given-names> </name></person-group><article-title>Effect of vitamin D supplementation on type 2 diabetes biomarkers: an umbrella of interventional meta-analyses</article-title><source>Diabetol Metab Syndr</source><year>2023</year><month>04</month><day>19</day><volume>15</volume><issue>1</issue><fpage>76</fpage><pub-id pub-id-type="doi">10.1186/s13098-023-01010-3</pub-id><pub-id pub-id-type="medline">37072813</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Galvano</surname><given-names>D</given-names> </name><name name-style="western"><surname>D&#x2019;Antiga</surname><given-names>F</given-names> </name><etal/></person-group><article-title>Interventions for reducing loneliness: an umbrella review of intervention studies</article-title><source>Health Soc Care Community</source><year>2021</year><month>09</month><volume>29</volume><issue>5</issue><fpage>e89</fpage><lpage>e96</lpage><pub-id pub-id-type="doi">10.1111/hsc.13248</pub-id><pub-id pub-id-type="medline">33278311</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>L</given-names> </name><name name-style="western"><surname>Bolzetta</surname><given-names>F</given-names> </name><name name-style="western"><surname>Cester</surname><given-names>A</given-names> </name><name name-style="western"><surname>Demurtas</surname><given-names>J</given-names> </name><name name-style="western"><surname>Punzi</surname><given-names>L</given-names> </name></person-group><article-title>Efficacy of conservative treatments for hand osteoarthritis</article-title><source>Wien Klin Wochenschr</source><year>2021</year><month>03</month><volume>133</volume><issue>5-6</issue><fpage>234</fpage><lpage>240</lpage><pub-id pub-id-type="doi">10.1007/s00508-020-01702-0</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marx</surname><given-names>W</given-names> </name><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Kelly</surname><given-names>JT</given-names> </name><etal/></person-group><article-title>The dietary inflammatory index and human health: an umbrella review of meta-analyses of observational studies</article-title><source>Adv Nutr</source><year>2021</year><month>10</month><day>1</day><volume>12</volume><issue>5</issue><fpage>1681</fpage><lpage>1690</lpage><pub-id pub-id-type="doi">10.1093/advances/nmab037</pub-id><pub-id pub-id-type="medline">33873204</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gazzaniga</surname><given-names>G</given-names> </name><name name-style="western"><surname>Menichelli</surname><given-names>D</given-names> </name><name name-style="western"><surname>Scaglione</surname><given-names>F</given-names> </name><name name-style="western"><surname>Farcomeni</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pani</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pastori</surname><given-names>D</given-names> </name></person-group><article-title>Effect of digoxin on all-cause and cardiovascular mortality in patients with atrial fibrillation with and without heart failure: an umbrella review of systematic reviews and 12 meta-analyses</article-title><source>Eur J Clin Pharmacol</source><year>2023</year><month>04</month><volume>79</volume><issue>4</issue><fpage>473</fpage><lpage>483</lpage><pub-id pub-id-type="doi">10.1007/s00228-023-03470-y</pub-id><pub-id pub-id-type="medline">36872367</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Higgins</surname><given-names>J</given-names> </name></person-group><source>Cochrane Handbook for Systematic Reviews of Interventions 2023</source><year>2023</year><access-date>2025-07-03</access-date><publisher-name>The Cochrane Collaboration</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://training.cochrane.org/handbook">https://training.cochrane.org/handbook</ext-link></comment></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McKenzie</surname><given-names>JE</given-names> </name><name name-style="western"><surname>Brennan</surname><given-names>SE</given-names> </name></person-group><article-title>Overviews of systematic reviews: great promise, greater challenge</article-title><source>Syst Rev</source><year>2017</year><month>09</month><day>8</day><volume>6</volume><issue>1</issue><fpage>185</fpage><pub-id pub-id-type="doi">10.1186/s13643-017-0582-8</pub-id><pub-id pub-id-type="medline">28886726</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Luchini</surname><given-names>C</given-names> </name><name name-style="western"><surname>Veronese</surname><given-names>N</given-names> </name><name name-style="western"><surname>Nottegar</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Assessing the quality of studies in meta-research: review/guidelines on the most important quality assessment tools</article-title><source>Pharm Stat</source><year>2021</year><month>01</month><volume>20</volume><issue>1</issue><fpage>185</fpage><lpage>195</lpage><pub-id pub-id-type="doi">10.1002/pst.2068</pub-id><pub-id pub-id-type="medline">32935459</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Seehra</surname><given-names>J</given-names> </name><name name-style="western"><surname>Pandis</surname><given-names>N</given-names> </name><name name-style="western"><surname>Koletsi</surname><given-names>D</given-names> </name><name name-style="western"><surname>Fleming</surname><given-names>PS</given-names> </name></person-group><article-title>Use of quality assessment tools in systematic reviews was varied and inconsistent</article-title><source>J Clin Epidemiol</source><year>2016</year><month>01</month><volume>69</volume><fpage>179</fpage><lpage>184</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2015.06.023</pub-id><pub-id pub-id-type="medline">26151664</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bigna</surname><given-names>JJR</given-names> </name><name name-style="western"><surname>Um</surname><given-names>LN</given-names> </name><name name-style="western"><surname>Nansseu</surname><given-names>JRN</given-names> </name></person-group><article-title>A comparison of quality of abstracts of systematic reviews including meta-analysis of randomized controlled trials in high-impact general medicine journals before and after the publication of PRISMA extension for abstracts: a systematic review and meta-analysis</article-title><source>Syst Rev</source><year>2016</year><month>10</month><day>13</day><volume>5</volume><issue>1</issue><fpage>174</fpage><pub-id pub-id-type="doi">10.1186/s13643-016-0356-8</pub-id><pub-id pub-id-type="medline">27737710</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>T</given-names> </name><name name-style="western"><surname>Hua</surname><given-names>F</given-names> </name><name name-style="western"><surname>Dan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Zhong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Levey</surname><given-names>C</given-names> </name><name name-style="western"><surname>Song</surname><given-names>Y</given-names> </name></person-group><article-title>Reporting quality of systematic review abstracts in operative dentistry: an assessment using the PRISMA for Abstracts guidelines</article-title><source>J Dent</source><year>2020</year><month>11</month><volume>102</volume><fpage>103471</fpage><pub-id pub-id-type="doi">10.1016/j.jdent.2020.103471</pub-id><pub-id pub-id-type="medline">32931892</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>El Ansari</surname><given-names>W</given-names> </name><name name-style="western"><surname>AlRumaihi</surname><given-names>K</given-names> </name><name name-style="western"><surname>El-Ansari</surname><given-names>K</given-names> </name><etal/></person-group><article-title>Reporting quality of abstracts of systematic reviews/meta-analyses: an appraisal of Arab Journal of Urology across 12 years: the PRISMA-Abstracts checklist</article-title><source>Arab J Urol</source><year>2022</year><month>08</month><day>22</day><volume>21</volume><issue>1</issue><fpage>52</fpage><lpage>65</lpage><pub-id pub-id-type="doi">10.1080/2090598X.2022.2113127</pub-id><pub-id pub-id-type="medline">36818377</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhong</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Dan</surname><given-names>S</given-names> </name><etal/></person-group><article-title>The reporting quality of systematic review abstracts in leading general dental journals: a methodological study</article-title><source>J Evid Based Dent Pract</source><year>2023</year><month>03</month><volume>23</volume><issue>1</issue><fpage>101831</fpage><pub-id pub-id-type="doi">10.1016/j.jebdp.2022.101831</pub-id><pub-id pub-id-type="medline">36914298</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="web"><person-group person-group-type="author"><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name><name name-style="western"><surname>Lockwood</surname><given-names>C</given-names> </name><name name-style="western"><surname>Porritt</surname><given-names>K</given-names> </name><name name-style="western"><surname>Pilla</surname><given-names>B</given-names> </name></person-group><article-title>JBI Manual for Evidence Synthesis</article-title><source>JBI Global Wiki</source><access-date>2024-06-05</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://jbi-global-wiki.refined.site/space/MANUAL">https://jbi-global-wiki.refined.site/space/MANUAL</ext-link></comment></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brzezi&#x0144;ska</surname><given-names>J</given-names> </name></person-group><article-title>The problem of zero cells in the analysis of contingency tables</article-title><source>KREM</source><year>2015</year><volume>5</volume><issue>941</issue><fpage>49</fpage><lpage>61</lpage><pub-id pub-id-type="doi">10.15678/ZNUEK.2015.0941.0504</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sapra</surname><given-names>RL</given-names> </name><name name-style="western"><surname>Saluja</surname><given-names>S</given-names> </name></person-group><article-title>Understanding statistical association and correlation</article-title><source>Current Medicine Research and Practice</source><year>2021</year><volume>11</volume><issue>1</issue><fpage>31</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.4103/cmrp.cmrp_62_20</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sullivan</surname><given-names>GM</given-names> </name><name name-style="western"><surname>Feinn</surname><given-names>R</given-names> </name></person-group><article-title>Using effect size-or why the P value is not enough</article-title><source>J Grad Med Educ</source><year>2012</year><month>09</month><volume>4</volume><issue>3</issue><fpage>279</fpage><lpage>282</lpage><pub-id pub-id-type="doi">10.4300/JGME-D-12-00156.1</pub-id><pub-id pub-id-type="medline">23997866</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Demidenko</surname><given-names>E</given-names> </name></person-group><article-title>The P-value you can&#x2019;t buy</article-title><source>Am Stat</source><year>2016</year><volume>70</volume><fpage>33</fpage><lpage>38</lpage><pub-id pub-id-type="doi">10.1080/00031305.2015.1069760</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Greenland</surname><given-names>S</given-names> </name></person-group><article-title>Valid P-values behave exactly as they should: some misleading criticisms of P-values and their resolution with S-values</article-title><source>Am Stat</source><year>2019</year><volume>73</volume><fpage>106</fpage><lpage>114</lpage><pub-id pub-id-type="doi">10.1080/00031305.2018.1529625</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Lu</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Belitskaya-Levy</surname><given-names>I</given-names> </name></person-group><article-title>The debate about P-values</article-title><source>Shanghai Arch Psychiatry</source><year>2015</year><month>12</month><day>25</day><volume>27</volume><issue>6</issue><fpage>381</fpage><lpage>385</lpage><pub-id pub-id-type="doi">10.11919/j.issn.1002-0829.216027</pub-id><pub-id pub-id-type="medline">27199532</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Franco</surname><given-names>JVA</given-names> </name><name name-style="western"><surname>Arancibia</surname><given-names>M</given-names> </name><name name-style="western"><surname>Meza</surname><given-names>N</given-names> </name><name name-style="western"><surname>Madrid</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kopitowski</surname><given-names>K</given-names> </name></person-group><article-title>Clinical practice guidelines: concepts, limitations and challenges</article-title><source>Medwave</source><year>2020</year><month>04</month><day>30</day><volume>20</volume><issue>3</issue><fpage>e7887</fpage><pub-id pub-id-type="doi">10.5867/medwave.2020.03.7887</pub-id><pub-id pub-id-type="medline">32428925</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Papatheodorou</surname><given-names>S</given-names> </name></person-group><article-title>Umbrella reviews: what they are and why we need them</article-title><source>Eur J Epidemiol</source><year>2019</year><month>06</month><volume>34</volume><issue>6</issue><fpage>543</fpage><lpage>546</lpage><pub-id pub-id-type="doi">10.1007/s10654-019-00505-6</pub-id><pub-id pub-id-type="medline">30852716</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shea</surname><given-names>BJ</given-names> </name><name name-style="western"><surname>Reeves</surname><given-names>BC</given-names> </name><name name-style="western"><surname>Wells</surname><given-names>G</given-names> </name><etal/></person-group><article-title>AMSTAR 2: a critical appraisal tool for systematic reviews that include randomised or non-randomised studies of healthcare interventions, or both</article-title><source>BMJ</source><year>2017</year><month>09</month><day>21</day><volume>358</volume><fpage>j4008</fpage><pub-id pub-id-type="doi">10.1136/bmj.j4008</pub-id><pub-id pub-id-type="medline">28935701</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Schlesinger</surname><given-names>S</given-names> </name><name name-style="western"><surname>Schwingshackl</surname><given-names>L</given-names> </name><name name-style="western"><surname>Neuenschwander</surname><given-names>M</given-names> </name><name name-style="western"><surname>Barbaresko</surname><given-names>J</given-names> </name></person-group><article-title>A critical reflection on the grading of the certainty of evidence in umbrella reviews</article-title><source>Eur J Epidemiol</source><year>2019</year><month>09</month><volume>34</volume><issue>9</issue><fpage>889</fpage><lpage>890</lpage><pub-id pub-id-type="doi">10.1007/s10654-019-00531-4</pub-id><pub-id pub-id-type="medline">31222608</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hultcrantz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Rind</surname><given-names>D</given-names> </name><name name-style="western"><surname>Akl</surname><given-names>EA</given-names> </name><etal/></person-group><article-title>The GRADE Working Group clarifies the construct of certainty of evidence</article-title><source>J Clin Epidemiol</source><year>2017</year><month>07</month><volume>87</volume><fpage>4</fpage><lpage>13</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2017.05.006</pub-id><pub-id pub-id-type="medline">28529184</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borah</surname><given-names>R</given-names> </name><name name-style="western"><surname>Brown</surname><given-names>AW</given-names> </name><name name-style="western"><surname>Capers</surname><given-names>PL</given-names> </name><name name-style="western"><surname>Kaiser</surname><given-names>KA</given-names> </name></person-group><article-title>Analysis of the time and workers needed to conduct systematic reviews of medical interventions using data from the PROSPERO registry</article-title><source>BMJ Open</source><year>2017</year><month>02</month><day>27</day><volume>7</volume><issue>2</issue><fpage>e012545</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2016-012545</pub-id><pub-id pub-id-type="medline">28242767</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hamel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Michaud</surname><given-names>A</given-names> </name><name name-style="western"><surname>Thuku</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Defining rapid reviews: a systematic scoping review and thematic analysis of definitions and defining characteristics of rapid reviews</article-title><source>J Clin Epidemiol</source><year>2021</year><month>01</month><volume>129</volume><fpage>74</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.09.041</pub-id><pub-id pub-id-type="medline">33038541</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Clark</surname><given-names>J</given-names> </name><name name-style="western"><surname>Glasziou</surname><given-names>P</given-names> </name><name name-style="western"><surname>Del Mar</surname><given-names>C</given-names> </name><name name-style="western"><surname>Bannach-Brown</surname><given-names>A</given-names> </name><name name-style="western"><surname>Stehlik</surname><given-names>P</given-names> </name><name name-style="western"><surname>Scott</surname><given-names>AM</given-names> </name></person-group><article-title>A full systematic review was completed in 2 weeks using automation tools: a case study</article-title><source>J Clin Epidemiol</source><year>2020</year><month>05</month><volume>121</volume><fpage>81</fpage><lpage>90</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.01.008</pub-id><pub-id pub-id-type="medline">32004673</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Tricco</surname><given-names>AC</given-names> </name><name name-style="western"><surname>EtienneV</surname><given-names>L</given-names> </name><name name-style="western"><surname>Straus</surname><given-names>SE</given-names> </name></person-group><source>Rapid Reviews to Strengthen Health Policy and Systems: A Practical Guide</source><year>2017</year><publisher-name>World Health Organization</publisher-name><pub-id pub-id-type="other">9789241512763</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garritty</surname><given-names>C</given-names> </name><name name-style="western"><surname>Gartlehner</surname><given-names>G</given-names> </name><name name-style="western"><surname>Nussbaumer-Streit</surname><given-names>B</given-names> </name><etal/></person-group><article-title>Cochrane Rapid Reviews Methods Group offers evidence-informed guidance to conduct rapid reviews</article-title><source>J Clin Epidemiol</source><year>2021</year><month>02</month><volume>130</volume><fpage>13</fpage><lpage>22</lpage><pub-id pub-id-type="doi">10.1016/j.jclinepi.2020.10.007</pub-id><pub-id pub-id-type="medline">33068715</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Garner</surname><given-names>P</given-names> </name><name name-style="western"><surname>Hopewell</surname><given-names>S</given-names> </name><name name-style="western"><surname>Chandler</surname><given-names>J</given-names> </name><etal/></person-group><article-title>When and how to update systematic reviews: consensus and checklist</article-title><source>BMJ</source><year>2016</year><month>07</month><day>20</day><volume>354</volume><fpage>i3507</fpage><pub-id pub-id-type="doi">10.1136/bmj.i3507</pub-id><pub-id pub-id-type="medline">27443385</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hoffmeyer</surname><given-names>B</given-names> </name><name name-style="western"><surname>Fonnes</surname><given-names>S</given-names> </name><name name-style="western"><surname>Andresen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Rosenberg</surname><given-names>J</given-names> </name></person-group><article-title>Use of inactive Cochrane reviews in academia: a citation analysis</article-title><source>Scientometrics</source><year>2023</year><month>05</month><volume>128</volume><issue>5</issue><fpage>2923</fpage><lpage>2934</lpage><pub-id pub-id-type="doi">10.1007/s11192-023-04691-9</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Beller</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>JH</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>UH</given-names> </name><name name-style="western"><surname>Glasziou</surname><given-names>PP</given-names> </name></person-group><article-title>Are systematic reviews up-to-date at the time of publication?</article-title><source>Syst Rev</source><year>2013</year><month>05</month><day>28</day><volume>2</volume><fpage>36</fpage><pub-id pub-id-type="doi">10.1186/2046-4053-2-36</pub-id><pub-id pub-id-type="medline">23714302</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bashir</surname><given-names>R</given-names> </name><name name-style="western"><surname>Surian</surname><given-names>D</given-names> </name><name name-style="western"><surname>Dunn</surname><given-names>AG</given-names> </name></person-group><article-title>Time-to-update of systematic reviews relative to the availability of new evidence</article-title><source>Syst Rev</source><year>2018</year><month>11</month><day>17</day><volume>7</volume><issue>1</issue><fpage>195</fpage><pub-id pub-id-type="doi">10.1186/s13643-018-0856-9</pub-id><pub-id pub-id-type="medline">30447694</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Heinen</surname><given-names>L</given-names> </name><name name-style="western"><surname>Goossen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Lunny</surname><given-names>C</given-names> </name><name name-style="western"><surname>Hirt</surname><given-names>J</given-names> </name><name name-style="western"><surname>Puljak</surname><given-names>L</given-names> </name><name name-style="western"><surname>Pieper</surname><given-names>D</given-names> </name></person-group><article-title>The optimal approach for retrieving systematic reviews was achieved when searching MEDLINE and Epistemonikos in addition to reference checking: a methodological validation study</article-title><source>BMC Med Res Methodol</source><year>2024</year><month>11</month><day>9</day><volume>24</volume><issue>1</issue><fpage>271</fpage><pub-id pub-id-type="doi">10.1186/s12874-024-02384-2</pub-id><pub-id pub-id-type="medline">39522026</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bramer</surname><given-names>WM</given-names> </name><name name-style="western"><surname>Rethlefsen</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Kleijnen</surname><given-names>J</given-names> </name><name name-style="western"><surname>Franco</surname><given-names>OH</given-names> </name></person-group><article-title>Optimal database combinations for literature searches in systematic reviews: a prospective exploratory study</article-title><source>Syst Rev</source><year>2017</year><month>12</month><day>6</day><volume>6</volume><issue>1</issue><fpage>245</fpage><pub-id pub-id-type="doi">10.1186/s13643-017-0644-y</pub-id><pub-id pub-id-type="medline">29208034</pub-id></nlm-citation></ref><ref id="ref60"><label>60</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aromataris</surname><given-names>E</given-names> </name><name name-style="western"><surname>Riitano</surname><given-names>D</given-names> </name></person-group><article-title>Systematic reviews: constructing a search strategy and searching for evidence</article-title><source>Am J Nurs</source><year>2014</year><volume>114</volume><fpage>49</fpage><pub-id pub-id-type="doi">10.1097/01.NAJ.0000446779.99522.f6</pub-id><pub-id pub-id-type="medline">24759479</pub-id></nlm-citation></ref><ref id="ref61"><label>61</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>S</given-names> </name><name name-style="western"><surname>Sun</surname><given-names>Y</given-names> </name><etal/></person-group><article-title>Exploring the diverse definitions of &#x201C;evidence&#x201D;: a scoping review</article-title><source>BMJ Evid Based Med</source><year>2024</year><month>01</month><day>19</day><volume>29</volume><issue>1</issue><fpage>37</fpage><lpage>43</lpage><pub-id pub-id-type="doi">10.1136/bmjebm-2023-112355</pub-id><pub-id pub-id-type="medline">37940419</pub-id></nlm-citation></ref><ref id="ref62"><label>62</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Guimar&#x00E3;es</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Joviano-Santos</surname><given-names>JV</given-names> </name><name name-style="western"><surname>Reis</surname><given-names>MG</given-names> </name><name name-style="western"><surname>Chaves</surname><given-names>RRM</given-names> </name><collab>Observatory of Epidemiology, Nutrition, Health Research (OPENS)</collab></person-group><article-title>Development of search strategies for systematic reviews in health using ChatGPT: a critical analysis</article-title><source>J Transl Med</source><year>2024</year><month>01</month><day>2</day><volume>22</volume><issue>1</issue><fpage>1</fpage><pub-id pub-id-type="doi">10.1186/s12967-023-04371-5</pub-id><pub-id pub-id-type="medline">38167166</pub-id></nlm-citation></ref><ref id="ref63"><label>63</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ge</surname><given-names>L</given-names> </name><name name-style="western"><surname>Agrawal</surname><given-names>R</given-names> </name><name name-style="western"><surname>Singer</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Leveraging artificial intelligence to enhance systematic reviews in health research: advanced tools and challenges</article-title><source>Syst Rev</source><year>2024</year><month>10</month><day>25</day><volume>13</volume><issue>1</issue><fpage>269</fpage><pub-id pub-id-type="doi">10.1186/s13643-024-02682-2</pub-id><pub-id pub-id-type="medline">39456077</pub-id></nlm-citation></ref><ref id="ref64"><label>64</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gusenbauer</surname><given-names>M</given-names> </name><name name-style="western"><surname>Haddaway</surname><given-names>NR</given-names> </name></person-group><article-title>Which academic search systems are suitable for systematic reviews or meta-analyses? Evaluating retrieval qualities of Google Scholar, PubMed, and 26 other resources</article-title><source>Res Synth Methods</source><year>2020</year><month>03</month><volume>11</volume><issue>2</issue><fpage>181</fpage><lpage>217</lpage><pub-id pub-id-type="doi">10.1002/jrsm.1378</pub-id><pub-id pub-id-type="medline">31614060</pub-id></nlm-citation></ref><ref id="ref65"><label>65</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jia</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Huang</surname><given-names>D</given-names> </name><name name-style="western"><surname>Wen</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Assessment of language and indexing biases among Chinese-sponsored randomized clinical trials</article-title><source>JAMA Netw Open</source><year>2020</year><month>05</month><day>1</day><volume>3</volume><issue>5</issue><fpage>e205894</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.5894</pub-id><pub-id pub-id-type="medline">32463469</pub-id></nlm-citation></ref><ref id="ref66"><label>66</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Mao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Li</surname><given-names>M</given-names> </name></person-group><article-title>Language bias among Chinese-sponsored randomized clinical trials in systematic reviews and meta-analyses-can anything be done?</article-title><source>JAMA Netw Open</source><year>2020</year><month>05</month><day>1</day><volume>3</volume><issue>5</issue><fpage>e206370</fpage><pub-id pub-id-type="doi">10.1001/jamanetworkopen.2020.6370</pub-id><pub-id pub-id-type="medline">32463464</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Practical overview of The Umbrella Collaboration platform.</p><media xlink:href="formative_v9i1e75215_app1.docx" xlink:title="DOCX File, 1426 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Traditional umbrella reviews (TURs) identified in the initial search and TURs selected for comparative analysis.</p><media xlink:href="formative_v9i1e75215_app2.docx" xlink:title="DOCX File, 53 KB"/></supplementary-material></app-group></back></article>