@Article{info:doi/10.2196/38932,
author="Bjertn{\ae}s, {\O}yvind
and Iversen, Hilde Hestad
and Norman, Rebecka
and Valderas, Jose M",
title="Web-Based Public Ratings of General Practitioners in Norway: Validation Study",
journal="JMIR Form Res",
year="2023",
month="Mar",
day="17",
volume="7",
pages="e38932",
keywords="web-based rating; questionnaire; psychometric; patient-reported experiences and satisfaction; survey; health care; practitioner; doctor rating; physician rating; patient provider; patient experience; patient satisfaction",
abstract="Background: Understanding the complex relationships among multiple strategies for gathering users' perspectives in the evaluation of the performance of services is crucial for the interpretation of user-reported measures. Objective: The main objectives were to (1) evaluate the psychometric performance of an 11-item web-based questionnaire of ratings of general practitioners (GPs) currently used in Norway (Legelisten.no) and (2) assess the association between web-based and survey-based patient experience indicators. Methods: We included all published ratings on GPs and practices on Legelisten.no in the period of May 5, 2012, to December 15, 2021 (N=76,521). The questionnaire consists of 1 mandatory item and 10 voluntary items with 5 response categories (1 to 5 stars), alongside an open-ended review question and background variables. Questionnaire dimensionality and internal consistency were assessed with Cronbach $\alpha$, exploratory factor, and item response theory analyses, and a priori hypotheses were developed for assessing construct validity (chi-square analysis). We calculated Spearman correlations between web-based ratings and reference patient experience indicators based on survey data using the patient experiences with the GP questionnaire (n=5623 respondents for a random sample of 50 GPs). Results: Web-based raters were predominantly women (n=32,074, 64.0{\%}), in the age range of 20-50 years (n=35,113, 74.6{\%}), and reporting 5 or fewer consultations with the GP each year (n=28,798, 64.5{\%}). Ratings were missing for 18.9{\%} (n=14,500) to 27.4{\%} (n=20,960) of nonmandatory items. A total of 4 of 11 rating items showed a U-shaped distribution, with >60{\%} reporting 5 stars. Factor analysis and internal consistency testing identified 2 rating scales: ``GP'' (5 items; $\alpha$=.98) and ``practice'' (6 items; $\alpha$=.85). Some associations were not consistent with a priori hypotheses and allowed only partial confirmation of the construct validity of ratings. Item response theory analysis results were adequate for the ``practice'' scale but not for the ``GP'' scale, with items with inflated discrimination (>5) distributed over a narrow interval of the scale. The correlations between the web-based ratings GP scale and GP reference indicators ranged from 0.34 (P=.021) to 0.44 (P=.002), while the correlation between the web-based ratings practice scale and reference indicators ranged from 0.17 (not significant) to 0.49 (P<.001). The strongest correlations between web-based and survey scores were found for items measuring practice-related experiences: phone availability ($\rho$=0.51), waiting time in the office ($\rho$=0.62), other staff ($\rho$=0.54-0.58; P<.001). Conclusions: The practice scale of the web-based ratings has adequate psychometric performance, while the GP suffers from important limitations. The associations with survey-based patient experience indicators were accordingly mostly weak to modest. Our study underlines the importance of interpreting web-based ratings with caution and the need to further develop rating sites. ",
issn="2561-326X",
doi="10.2196/38932",
url="https://formative.jmir.org/2023/1/e38932",
url="https://doi.org/10.2196/38932",
url="http://www.ncbi.nlm.nih.gov/pubmed/36930207"
}