<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v9i1e70778</article-id><article-id pub-id-type="doi">10.2196/70778</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Comparison and Validation of Actigraphy Algorithms Using a Large Community Dataset: Algorithm Validation Study</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Panesar</surname><given-names>Darshan</given-names></name><degrees>HBSc, MEd</degrees><xref ref-type="aff" rid="aff1">1</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Vichare</surname><given-names>Aashish</given-names></name><degrees>BSc</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Goncalves</surname><given-names>Jason</given-names></name><degrees>BSc, MBA</degrees><xref ref-type="aff" rid="aff3">3</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Stremler</surname><given-names>Robyn</given-names></name><degrees>PhD, RN</degrees><xref ref-type="aff" rid="aff4">4</xref></contrib></contrib-group><aff id="aff1"><institution>Applied Psychology and Human Development, Ontario Institute for Studies in Education, University of Toronto</institution><addr-line>252 Bloor Street West</addr-line><addr-line>Toronto</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><aff id="aff2"><institution>Independent Researcher</institution><addr-line>Vancouver</addr-line><addr-line>BC</addr-line><country>Canada</country></aff><aff id="aff3"><institution>Independent Researcher</institution><addr-line>Toronto</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><aff id="aff4"><institution>Lawrence Bloomberg Faculty of Nursing, University of Toronto</institution><addr-line>Toronto</addr-line><addr-line>ON</addr-line><country>Canada</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Berahmand</surname><given-names>Kamal</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Zheng</surname><given-names>Xi</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Darshan Panesar, HBSc, MEd, Applied Psychology and Human Development, Ontario Institute for Studies in Education, University of Toronto, 252 Bloor Street West, Toronto, ON, M5S 1V6, Canada, 1 416 934 4503; <email>darshan.panesar@mail.utoronto.ca</email></corresp></author-notes><pub-date pub-type="collection"><year>2025</year></pub-date><pub-date pub-type="epub"><day>11</day><month>12</month><year>2025</year></pub-date><volume>9</volume><elocation-id>e70778</elocation-id><history><date date-type="received"><day>02</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>29</day><month>10</month><year>2025</year></date><date date-type="accepted"><day>30</day><month>10</month><year>2025</year></date></history><copyright-statement>&#x00A9; Darshan Panesar, Aashish Vichare, Jason Goncalves, Robyn Stremler. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 11.12.2025. </copyright-statement><copyright-year>2025</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2025/1/e70778"/><abstract><sec><title>Background</title><p>For decades, the measurement of sleep and wake has relied upon watch-based actigraphy as an alternative to expensive, obtrusive clinical monitoring. At the time of this publication, we have relied upon a handful of algorithms to score actigraphy data as sleep or wake. However, these algorithms have largely been tested and validated with only small samples of young, healthy individuals.</p></sec><sec><title>Objective</title><p>This study aimed to establish the accuracy and agreement of conventional and traditional actigraphy algorithms against polysomnography, the clinical standard, using the diverse Multi-Ethnic Study of Atherosclerosis (MESA) sleep dataset. As a secondary objective, we examined algorithm and polysomnography agreement for key sleep metrics including total sleep time (TST), sleep efficiency (SE), and wake after sleep onset (WASO).</p></sec><sec sec-type="methods"><title>Methods</title><p>We assessed 5 well-established algorithms, including Cole-Kripke, University of California San Diego (UCSD) scoring, Kripke 2010, Philips-Respironics, and Sadeh, with and without rescoring across 1440 individuals (M<sub>age</sub>=mean 69.36, SD 8.97) from the MESA sleep dataset. We conducted epoch-by-epoch comparisons assessing accuracy, confusion matrix analyses, receiver operator characteristic curves (ROC), area under the curve (AUC), and Bland-Altman analyses for agreement.</p></sec><sec sec-type="results"><title>Results</title><p>Primary results indicated all algorithms demonstrated accuracy between 78%&#x2010;80% with the highest accuracy by the Kripke 2010 (80%) algorithm followed closely by the Cole-Kripke (80%) and Philips-Respironics (80%&#x2010;79%) algorithms. In addition, moderate Cohen &#x03BA; agreement and moderate positive Matthews correlations were demonstrated by all algorithms. Further, all algorithms demonstrated significant mean difference across sleep metrics.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The findings of this study establish that these traditional actigraphy algorithms can, with high accuracy, detect sleep and wake in large, diverse population samples, including older adults or populations at risk of health conditions. However, these algorithms may carry difficulty for precise assessment of sleep metrics, especially in cases of sleep disorders or irregular sleep.</p></sec></abstract><kwd-group><kwd>accelerometer</kwd><kwd>actigraphy</kwd><kwd>algorithm</kwd><kwd>Multi-Ethnic Study of Atherosclerosis</kwd><kwd>M.E.S.A.</kwd><kwd>polysomnography</kwd><kwd>sleep</kwd><kwd>sleep monitoring</kwd><kwd>sleep disorder</kwd><kwd>wake</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>Over several decades, actigraphy has been used to objectively examine rest and wake periods in a variety of participants. Actigraphs are accelerometers that measure activity (acceleration of motion) levels of the person wearing the device [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Given its small footprint and lower cost, actigraphy is often used as a simple alternative to traditional, more invasive sleep monitoring methods. Actigraphy is used to measure basic movement activity patterns of individuals noninvasively to assess when they are asleep (rest) or awake (wake) [<xref ref-type="bibr" rid="ref1">1</xref>-<xref ref-type="bibr" rid="ref3">3</xref>]. Movement data collected by actigraphs are analyzed using validated algorithms to determine, for each minute of recording, whether the wearer was asleep or awake. At the time of this publication, the vast majority of sleep-wake analysis of movement data collected by actigraphy continues to be done primarily using one of a handful of popular linear regression models. These include Cole-Kripke, University of California San Diego (UCSD), Sadeh, and Philips-Respironics. Though these algorithms are extensively used, they have largely been tested with only small samples of young and healthy individuals [<xref ref-type="bibr" rid="ref3">3</xref>-<xref ref-type="bibr" rid="ref11">11</xref>]. There is little research examining the comparison between and validity of these algorithms with a large, demographically diverse sample or with a sample at risk of health conditions. Given the increasing use of actigraphy for a variety of populations, it is critical to evaluate these algorithms across a large, diverse population to give a better representation of their accuracy. Further, it is important to evaluate the accuracy of these algorithms across older adults, as these populations have higher susceptibility to, and high prevalence of health disorders. Given the wide range of studies using actigraphy, it is important to understand actigraphy&#x2019;s performance when used with these higher-risk populations for sleep-wake assessment [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. In addition, given the comorbidity of sleep issues with other health disorders, the use of actigraphy in sleep studies has become more commonplace [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Therefore, it is highly important to assess the accuracy and reliability of traditional actigraphy algorithms with these populations.</p></sec><sec id="s1-2"><title>Multi-Ethnic Study of Atherosclerosis Dataset</title><p>There are several significant challenges in collecting a large, diverse dataset. This, in part, is due to the ideal method of validation (for actigraphy), which is polysomnography, the gold standard for sleep assessment. Polysomnography and its analysis are expensive, clinic-based procedures that require extensive equipment and expertise, are highly time-consuming to conduct, and are therefore impractical for measuring sleep across several nights with large samples [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. However, an ongoing initiative called the Multi-Ethnic Study of Atherosclerosis (MESA) has conducted a large sleep study (over 2000 participants), including time-synchronized actigraphy and polysomnography data as part of their larger initiative [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. This sleep sample represents a portion of over 6800 participants between the ages of 45 and 84 years, free of cardiovascular disease at baseline, who were monitored longitudinally for subclinical cardiovascular disease [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. This dataset presents a novel opportunity to establish a concrete comparison between conventional algorithms and establish their accuracy across a large population of older adults who are also at risk of health issues. Thus, this paper evaluates traditional actigraphy algorithms using the MESA dataset.</p></sec><sec id="s1-3"><title>Current Study</title><p>The primary objective of this study was to establish the accuracy of traditional actigraphy algorithms against the gold standard polysomnography using a large established dataset. In doing so, we provide comprehensive foundational benchmarks of the most commonly used traditional actigraphy algorithms. Further, we aimed to also establish performance benchmarks of commonly used actigraphy algorithms across a diverse population with older adults and sleep pathologies. These benchmarks greatly inform research and clinical use of these algorithms. Further, this study provides detailed standards to developers of novel actigraphy technologies and actigraphy analysis methodologies. Based on previous, albeit smaller, sample validation studies, we hypothesized that traditional actigraphy algorithms would detect sleep and wake with high accuracy. In addition, we predicted that additional rescoring of actigraphy would help improve their accuracy over the algorithms alone.</p><p>In addition to sleep-wake, researchers and clinicians often examine sleep metrics to evaluate patterns or disruption of sleep patterns. As our secondary goal, we examined each algorithm&#x2019;s agreement and bias with polysomnography on several commonly studied sleep metrics, including total sleep time (TST), sleep efficiency (SE), and wake after sleep onset (WASO). TST is the total number of minutes an individual sleeps during the night from the first onset of sleep to sleep offset. On the other hand, WASO is the total duration of wake between the first onset of sleep and the sleep offset. Finally, SE is the percentage of time asleep (TST) during the total time spent in bed. As these are key sleep metrics, this study&#x2019;s results provide detailed information for current research and clinical practice as well as benchmarks for future improvements to current and novel actigraphy methods. We hypothesized that overall, sleep metrics derived from actigraphy algorithm-processed activity would have high agreement with polysomnography [<xref ref-type="bibr" rid="ref22">22</xref>].</p><p>As an exploratory analysis, we looked at both accuracy and sleep metrics of each algorithm and evaluated possible points of failure and variables that impact the performance of actigraphy analyses. Specifically, we examined if accuracy results and sleep metrics would be poorer for subgroups of participants who had sleep disorders. To our knowledge, this is the first study to provide detailed actigraphy performance metrics across large key samples of individuals with sleep problems. These metrics provide nuanced information that will critically facilitate current and future practice. We discuss the implications of these variables, making recommendations for future directions of research.</p></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Sample</title><p>The dataset sample for this study was derived from the MESA and acquired through the National Sleep Research Resource (NSRR) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. MESA is a large prospective, community-based study designed to examine the risk factors, prevalence, and progression of cardiovascular disease [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. The MESA study was conducted from 2000 to 2002, with 6814 ethnically diverse men and women aged 45&#x2010;84 years who were free of overt cardiovascular disease and recruited from 6 US sites [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. MESA participants underwent periodic core physical assessments at enrollment and 10 years following initial recruitment (2010&#x2010;2012) [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. A subset of participants was invited to participate in the MESA ancillary sleep assessment study. This study entailed a home visit (1 night) in which home polysomnography was conducted along with concurrent actigraphy. Polysomnography data were analyzed by one of 3 polysomnologists using standard guidelines, while 2 scorers completed actigraphy analysis by first marking the sleep period and then using automatic scoring software. Scorers completed a MESA-rule&#x2013;based training and certification before scoring, and postscoring both interscorer reliability was assessed at 2 timepoints for sleep stages (which pertained to the sleep data used for the study). At time point 1 for sleep stage scoring, the interscorer intraclass correlation coefficients (ICC) across n=27 from 9 participants were as follows: Stage 1 was 0.86, Stage 2 was 0.63, Stage 3&#x2010;4 was 0.81, and REM was 0.96 [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. At time point 2, interscorer ICC across n=38 from 19 participants were as follows: Stage 1 was 0.74, Stage 2 was 0.81, Stage 3&#x2010;4 was 0.79, and REM was 0.93 [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. For this study, we used raw actigraphy activity counts for analysis. For ground truth, we used the matching polysomnography sleep-wake data that was collapsed to wake (coded wake stage) versus all sleep stages collapsed to one &#x201C;sleep&#x201D; variable. Only one night of polysomnography data were collected by the original study which was matched and synchronized with the respective actigraphy data (cropped to match).</p><p>The data were filtered based on several criteria. The initial dataset contained 2159 samples for actigraphy and 2056 samples for polysomnography. We first excluded samples that did not have concurrent polysomnography data (103 removed). The actigraphy and polysomnography samples were then matched based on a synchronization document provided by NSRR and MESA. Those samples that did not have a corresponding synchronization or were reported as having data issues by NSRR (2 individuals) were excluded, resulting in a sample of 1831 individuals (225 removed). Next, we filtered the dataset based on polysomnography data quality as indicated by the dataset description. We included individuals for whom the quality of polysomnography data was rated as very good or better, as this rating indicated an appropriate number of channels present for accurate sleep-wake evaluation. An additional 14 participants with complete, matched data of fair and good quality were also included. This resulted in a total of 1484 samples (347 removed).</p><p>Outlier filtering, or filtering based on actigraphy, was not conducted, as we wished to retain all actigraphy data for a true representation of how actigraphy data are traditionally processed using these algorithms. Given the absence of outlier filtering, we conducted sensitivity analysis to evaluate the effects of these outliers on our overall sample results. We identified outliers in the cropped 30-second epoch synchronized (1-night) actigraphy raw activity data using a 2-step process. The raw 30-second data were used as this was the data used for each algorithm (the binary algorithm results could not be used to identify outliers). Each participant&#x2019;s individual iIQR, mean, and SD of activity were calculated. For the first level, participants&#x2019; IQR was compared to the full sample IQR. Any participants whose range fell outside of 1.5 times below the first quartile and above the first quartile were identified as outliers. For the second step, the <italic>z</italic> score for each participant was calculated relative to the sample mean and SD. Any participants with a <italic>z</italic> score of 2 or greater were identified as outliers. A total of 56 participants were identified as outliers using this 2-tier approach. Once these outlier individuals were identified, they were omitted, and Cohen &#x03BA;, Matthews correlation coefficient (MCC), and confusion matrix metrics were recalculated. The relative change between the full dataset and outlier-omitted results was calculated to be less than 5% across all metrics and algorithms except for the Philips 40 rescored algorithm sensitivity (5.55%) and specificity (7%). However, the relative change was for only the rescored algorithm, close to the 5% threshold, and all other metrics were within the 5% range; therefore, we determined that the outliers did not pose a significant issue. For full comparison metrics, see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>Further, polysomnography data were in binary format, and the actigraphy data, once processed through an algorithm, would also be in binary data point format. This would limit the breadth of fluctuation in the main analyses and would allow for direct 1:1 comparison of epochs. In addition to the main analyses, rescoring was also applied, which is the standard method of correcting issues with actigraphy data. In addition, we retained all participants for subsequent analysis of sleep metrics, as we aimed to evaluate the entire spread of sleep metric results for normal sleep, sleep problems, and extreme cases of sleep. As a secondary measure, we added analyses for participants with sleep problems to evaluate these extreme cases separately.</p><p>Finally, during data processing, an additional 44 individuals were excluded due to issues synchronizing actigraphy and polysomnography time points, missing data, and issues with processing. The final sample size analyzed was N=1440 (M<sub>age</sub>=69.3, SD 9.0 years; n<sub>male</sub>=663) individuals (<xref ref-type="fig" rid="figure1">Figure 1</xref>). The distribution of included participants is denoted in <xref ref-type="table" rid="table1">Table 1</xref>.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Data filtering and selection flowchart reflecting the data sample selection and filtering from the source National Sleep Research Resource&#x2013;Multi-Ethnic Study of Atherosclerosis dataset. NSRR-MESA: National Sleep Research Resource&#x2013;Multi-Ethnic Study of Atherosclerosis;</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig01.png"/></fig><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>Baseline characteristics of the selected sample subset from National Sleep Research Resource&#x2013;Multi-Ethnic Study of Atherosclerosis sleep dataset.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Characteristic</td><td align="left" valign="top">Values</td></tr></thead><tbody><tr><td align="left" valign="top">Age at study (years), mean (SD)</td><td align="char" char="." valign="top">69.3 (9)</td></tr><tr><td align="left" valign="top">Age (years), range</td><td align="char" char="." valign="top">54&#x2010;94</td></tr><tr><td align="left" valign="top">Sex, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Female</td><td align="char" char="." valign="top">817 (55.7)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Male</td><td align="char" char="." valign="top">663 (46)</td></tr><tr><td align="left" valign="top">Race, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>White</td><td align="char" char="." valign="top">565 (39.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Black/African American</td><td align="char" char="." valign="top">396 (27.5)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Hispanic</td><td align="char" char="." valign="top">351 (24.4)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Asian</td><td align="char" char="." valign="top">168 (11.7)</td></tr><tr><td align="left" valign="top">Sleep problems, n (%)</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sleep apnea</td><td align="char" char="." valign="top">104 (7.2)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Insomnia</td><td align="char" char="." valign="top">84 (5.8)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Restless legs syndrome</td><td align="char" char="." valign="top">66 (4.6)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Use of CPAP<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup> machine for sleep</td><td align="char" char="." valign="top">62 (4.3)</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Snoring &#x2265;3&#x2010;5 times per week</td><td align="char" char="." valign="top">584 (40.6)</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>CPAP: continuous positive airway pressure.</p></fn></table-wrap-foot></table-wrap><p>In addition to the final sample, for exploratory analyses, subsamples for the populations with sleep problems were also selected. Four sleep problem groups, including apnea, individuals who used continuous positive airway pressure (CPAP), insomnia, and restless leg syndrome (RLS), were examined using the same detailed analyses and methods as the full sample (<xref ref-type="table" rid="table1">Table 1</xref>).</p></sec><sec id="s2-2"><title>Data Processing</title><p>Data processing was conducted using Python 3 with a wide range of libraries and platforms. Before the final analyses, both actigraphy data and polysomnography data were transformed from 30-second epochs to 1 minute to standardize, since many algorithms and previous studies typically present minute-by-minute epochs for actigraphy for easier data analysis and interpretation [<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref23">23</xref>]. For the actigraphy data, transformation was done after algorithm results were obtained. For the actigraphy results, once each respective algorithm had processed the data, the actigraphy values from the algorithms were summed to collapse the data to 1-minute epochs. The binarized polysomnography data were also transformed to 1-minute epochs (from 30-second epochs) using the following set of rules. First, if 2 adjacent 30-second epochs were both coded as sleep, the resultant minute would be sleep. Second, if 2 adjacent 30-second epochs were both coded as wake, the resultant minute would be wake. Third, if one of 2 adjacent 30-second epochs were wake, the resultant minute would be wake. Polysomnography transformation to collapse epochs to 1-minute intervals is a common practice for actigraphy comparison [<xref ref-type="bibr" rid="ref24">24</xref>-<xref ref-type="bibr" rid="ref27">27</xref>]. With respect to the wake-wins, evaluation of the distribution of pairs was conducted for the polysomnography epochs across all participants. A total of 55.13% of pairs were both asleep, 38.33% were both awake, and only 6.54% were mixed (wake-sleep or sleep-wake). Given the low percentage of mixed pairs, we expected the effect of data transformation to be limited with respect to the wake-wins condition.</p><p>All data transformations were again done through Python 3, using a wide range of libraries with coding assistance provided by ChatGPT (OpenAI) to generate Python code for existing Python libraries [<xref ref-type="bibr" rid="ref28">28</xref>]. ChatGPT was used to generate Python code for file data file preparation and processing (eg, data transformation). Manual data checks in addition to code-based error reporting were conducted to verify the code was working as desired and data were correctly processed. In addition, ChatGPT was used to generate Python code for running statistical analyses. All statistical formulae and analyses were verified within the code. Once code was generated, it was checked by at least one or more of the study authors before use. No machine learning code was generated via ChatGPT, and any actigraphy algorithm code was checked and specified in the Python code exactly as defined by the original algorithm sources. Data were synchronized based on the NSRR and the MESA-provided synchronization guide, which highlighted polysomnography start times relative to actigraphy.</p><sec id="s2-2-1"><title>Actigraphy Algorithms</title><p>For the application of actigraphy algorithms on the actigraphy data, we used Python (Python 3)-based platform pipelines. For this study, we uploaded raw actigraphy data. This platform allows data processing by simply selecting some basic parameters, that is, the specific algorithm for processing and the files. All files were processed through each algorithm, respectively. Once the algorithm value was calculated, raw results were collapsed to 1-minute epochs by summing the raw result values. We then applied each algorithm&#x2019;s respective classification threshold to determine whether each epoch was sleep or wake. These thresholds were prespecified by the respective algorithm authors in the original articles.</p><p>Algorithms were selected on the basis that they were well established and commonly used throughout the literature for actigraphy analyses across all populations, including with older adults and in populations with sleep disorders [<xref ref-type="bibr" rid="ref29">29</xref>-<xref ref-type="bibr" rid="ref36">36</xref>]. However, concrete performance benchmarks have yet to be established. The most used algorithms at the time of this publication are Cole-Kripke (cited 2413 times in Google Scholar) and Sadeh (cited 1795 times in Google Scholar) algorithms. The Cole-Kripke has been used across older adult and sleep disorder populations in several studies [<xref ref-type="bibr" rid="ref31">31</xref>,<xref ref-type="bibr" rid="ref34">34</xref>]. The Philips algorithm was selected as the actigraphs used for the MESA actigraphy data collection were Philips Actiwatch, which are also among a commonly used clinical actigraphs [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. Additional algorithms were selected based on the notation that they were adapted from the Cole-Kripke algorithm to be used in various sleep populations and cases with varying parameters. Further, all algorithms had variants that could handle 30-second epoch data or 1-minute epoch data, allowing the use of our dataset and for comparison between algorithms. Generally, the selected algorithms all follow a similar regression-based approach to determine sleep and wake, allowing for comparability and comprising a comprehensive spread of traditionally used actigraphy algorithms. The selected algorithms noted below were evaluated in this study [<xref ref-type="bibr" rid="ref22">22</xref>].</p></sec><sec id="s2-2-2"><title>Cole-Kripke (CK)</title><p>Cole et al [<xref ref-type="bibr" rid="ref6">6</xref>] developed an actigraphy analysis, and to date, this algorithm has been heavily used throughout actigraphy research analysis. The CK algorithm was developed for a variety of epochs, including 30-second and 1-minute epochs [<xref ref-type="bibr" rid="ref6">6</xref>]. The algorithm is regression based, using a 7-epoch window to compute whether a participant is awake or asleep. The original Cole et al [<xref ref-type="bibr" rid="ref6">6</xref>], 30-second version of the algorithm used for this study, uses the optimal parameters for a maximum 30-second nonoverlapping epoch of activity per minute.</p><p>D=0.0001(50A<sub>-4</sub>+30A<sub>-3</sub>+14A<sub>-2</sub>+28A<sub>-1</sub>+12lA<sub>0</sub>+8A<sub>+1</sub>+50A<sub>+2</sub>)</p><p>Here, if D&#x003C;1, the epoch is scored as sleep, while if D&#x003E;=1, the epoch is scored as wake. The 0.001 represents the scale factor for the entire equation, while the numerical values assigned to each epoch represent the weighting factors for the present, previous, and following epochs. A represents an epoch, where A<sub>-4</sub> to A<sub>-1</sub> represent the four preceding epochs to the current epoch (A0) and A<sub>+1</sub> to A<sub>+2</sub> represent the following 2 epochs.</p></sec><sec id="s2-2-3"><title>University of California, San Diego Scoring Algorithm (UCSD)</title><p>Developed by Jean-Louis et al [<xref ref-type="bibr" rid="ref9">9</xref>], this actigraphy algorithm is similar to the Cole-Kripke algorithm, with the only difference being the weights on the epoch. The UCSD algorithm was also designed for minute-by-minute epochs. However, the authors noted that two 30-second epochs were counted as 1 minute to match their polysomnography data recording rates. In our case, we gave this algorithm the original 30-second epoch data, treating each epoch in the formula as is. The UCSD algorithm used in this study was as follows:</p><p>D=0.05(0.010A<sub>-4</sub>+0.015A<sub>-3</sub>+0.028A<sub>-2</sub>+0.031A<sub>-1</sub>+0.085A<sub>0</sub>+0.015A<sub>+1</sub>+.010A<sub>+2</sub>)</p><p>where, if D&#x003C;1, the epoch is scored as sleep, while if D&#x2265;1, the epoch is scored as wake. According to previous analyses of Actillume data for healthy young adults, the optimal scaling factor, <italic>P,</italic> was .05 [<xref ref-type="bibr" rid="ref9">9</xref>]. The numerical values assigned to each epoch represent the weighting factors for the present, previous, and following epochs. A represents an epoch, where A<sub>-4</sub> to A<sub>-1</sub> represent the four preceding epochs to the current epoch (A<sub>0</sub>) and A<sub>+1</sub> to A<sub>+2</sub> represent the following two epochs.</p></sec><sec id="s2-2-4"><title>Kripke 2010 (K2010)</title><p>Developed by Kripke et al and applied through Microsoft Excel Visual Basic macro, this algorithm aimed to set optimal parameters for a sleep, wake scoring algorithm to score each epoch. This algorithm is similar to the aforementioned algorithms [<xref ref-type="bibr" rid="ref10">10</xref>]. However, the optimal algorithm accounts for the activity counts of 13 30-second epochs; the 10 epochs preceding and 2 epochs proceeding the epoch being scored (X0) and differs in the weights assigned to each epoch. The optimal algorithm by Kripke et al [<xref ref-type="bibr" rid="ref10">10</xref>] used in this study was as follows:</p><p>D=0.30(0.0064X<sub>-10</sub>+0.0074X<sub>-9</sub>+0.0112X<sub>-8</sub>+0.0112X<sub>-7</sub>+0.0118X<sub>-6</sub>+0.0118X<sub>-5</sub>+0.0128X<sub>-4</sub>+0.0188X<sub>-3</sub>+0.0280X<sub>-2</sub>+0.0664X<sub>-1</sub>+0.0300X<sub>0</sub>+0.0112X<sub>+1</sub>+.100X<sub>+2</sub>)</p><p>where D was the scaled polynomial sum of activity scores for 13 30-second epochs. The 0.30 represents the optimal overall scaling parameter. The numerical value attached to each epoch (X) represents a scaling parameter for each respective corresponding epoch. When D&#x003C;1, the epoch being scored (X<sub>0</sub>) is scored as sleep, while if D&#x2265;1 according to this algorithm.</p></sec><sec id="s2-2-5"><title>Philips-Respironics (Philips)</title><p>This algorithm is designed for Philips-Respironics and previously Mini-Mitter Co. Inc. devices known as Actiwatch [<xref ref-type="bibr" rid="ref37">37</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]. This algorithm is designed to handle data from the Actiwatch monitors, which measure activity levels in several epoch lengths of 15 seconds, 30 seconds, 1 minute, or 2 minutes. In our study, we used the 30-second version of this algorithm. Similar to the aforementioned algorithms, this also applies weights to 5 epochs, the 2 preceding and 2 proceeding epochs, and the epoch being scored. The algorithm used in this study is as follows:</p><p>A=0.04E<sub>-4</sub>+0.04E<sub>-3</sub>+0.2E<sub>-2</sub>+0.2E<sub>-1</sub>+2E<sub>0</sub>+0.2E<sub>+1</sub>+.2E<sub>+2</sub>+.04E<sub>+3</sub>+.04E<sub>+2</sub></p><p>Based on activity counts measured through the device, a total activity value (A) is generated for each epoch. The total activity value is then evaluated against the wake threshold value (20 [low], 40 [medium], 80 [high]), which, in the Actiwatch software, is automatically generated based on the activity data of individual cases or a custom value selected by the user [<xref ref-type="bibr" rid="ref39">39</xref>]. If the total activity value is less than or equal to the wake threshold value, the epoch is scored as sleep. That is, if A&#x003E;T, the epoch is scored as wake; otherwise, if A &#x2264; T, the epoch is scored as sleep. The En represented the activity counts of the previous, successive, or scored epoch. For our purposes, we evaluated each threshold after processing.</p></sec><sec id="s2-2-6"><title>Sadeh</title><p>Sadeh et al [<xref ref-type="bibr" rid="ref11">11</xref>] developed an algorithm based on concurrent polysomnography and a wrist actigraph (Ambulatory Monitoring, Ardsley). The algorithm features a discriminant function using 5 calculated variables on an 11-minute window (the 5 preceding, 5 proceeding, and the scored epochs), centered on the epoch being scored [<xref ref-type="bibr" rid="ref11">11</xref>]. Any missing epochs are considered 0 to avoid infinity problems. This happens if the current epoch is at the beginning or end of a dataset. The Sadeh algorithm uses the y-axis epoch data. If any of the epoch counts are over 300, it reduces them to 300. The original formula for this algorithm uses a 1-minute epoch window; in this study, we gave this algorithm the 30-second epoch raw actigraphy data. Each 30-second epoch was evaluated as a 1-minute epoch. This would preserve the comparison between algorithms, as all of them processed the 30-second raw actigraphy data. The original Sadeh algorithm used in this study is as follows:</p><p>PS = (7.601 - [0.065 * AVG] - [1.08 * NATS] - [0.056 * SD] - [0.703 * LG])</p><p>where AVG is the arithmetic mean (average) of the activity counts for the window, NATS is the number of epochs that have counts &#x2265;50 and &#x003C;100, SD is the standard deviation for the first 6 epochs of the window, and LG is the natural (base e) logarithm of the current epoch. Post scoring of the epoch, if the resultant value, referred to as probability of sleep (PS), is &#x2265; 0, the epoch is scored as sleep. Based on the original paper, typically, if the result is &#x003E; &#x2212;4, then the current epoch is considered asleep.</p></sec><sec id="s2-2-7"><title>Webster&#x2019;s Rescoring Rules (RS)</title><p>Notably, traditional actigraphy algorithms often incorrectly score periods of wake as sleep or rest. To counter this issue, Webster et al [<xref ref-type="bibr" rid="ref40">40</xref>] developed a set of rescoring rules to apply after initial classification using a scoring algorithm [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref22">22</xref>,<xref ref-type="bibr" rid="ref40">40</xref>]. These results have been well cited in the literature and given their relevance in increasing algorithm accuracy, they were an important consideration to evaluate within this study [<xref ref-type="bibr" rid="ref22">22</xref>]. These were applied in unison with the algorithms and compared with nonrescored results. Once the data were processed with each respective algorithm, the raw activity scores were binarized, reflecting sleep (1) or wake (0) using optimal thresholds (see data analysis), and rescoring was sequentially applied to the binarized data. Nonrescored results were denoted as NRS, and rescored results were as RS. The rescoring rules were as follows:</p><list list-type="order"><list-item><p>After at least 4 minutes scored as wake, the next 1 minute scored as sleep is rescored as wake</p></list-item><list-item><p>After at least 10 minutes scored as wake, the next 3 minutes scored as sleep are rescored as wake</p></list-item><list-item><p>After at least 15 minutes scored as wake, the next 4 minutes scored as sleep are rescored as wake</p></list-item><list-item><p>If 6 minutes or less are scored as sleep surrounded by at least 10 minutes (before and after) scored as wake are rescored as wake</p></list-item><list-item><p>If 10 minutes or less are scored as sleep surrounded by at least 20 minutes (before and after) scored as wake, they are rescored as wake</p></list-item></list></sec></sec><sec id="s2-3"><title>Statistical Analysis</title><sec id="s2-3-1"><title>Epoch-by-Epoch Comparison</title><p>Once the data were processed with each respective algorithm, the raw activity scores were examined. We conducted an epoch-by-epoch comparison of sleep-wake for each algorithm against the corresponding ground truth polysomnography measurements. In our analysis, the polysomnography classification was considered the actual class, while the actigraphy classification was considered the predicted class (with wake=0 and sleep=1). We used confusion matrix analyses to assess key metrics accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-score (a measure of an algorithm&#x2019;s predictive performance of sleep and wake). A complete definition of metrics is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p><p>Epoch-by-epoch comparison was conducted for each algorithm based on the original sleep-wake thresholds defined in the respective papers (see Actigraphy Algorithm Section). The Philips-Respironics algorithm has 3 suggested threshold values (20 [low], 40 [medium], 80 [high]), all of which were examined [<xref ref-type="bibr" rid="ref39">39</xref>]. In addition, weighted mean and SD results were generated for each algorithm both with and without rescoring.</p></sec><sec id="s2-3-2"><title>Cohen &#x03BA; and Matthews Correlation Coefficient</title><p>As measures of comparison between sleep algorithms and polysomnography, Cohen &#x03BA; and MCC were calculated for each algorithm. As with the confusion matrix statistics, the sleep algorithm results (predicted) were evaluated in comparison to the ground truth polysomnography (actual). Cohen &#x03BA; is a measure of agreement between raters for nominal or categorical data with adjustments for chance agreement [<xref ref-type="bibr" rid="ref41">41</xref>,<xref ref-type="bibr" rid="ref42">42</xref>]. For this study, the raters were individual sleep algorithms and polysomnography. MCC was also used to evaluate the correlation (performance) between binary classification, such as in our case between sleep algorithms and polysomnography [<xref ref-type="bibr" rid="ref43">43</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. A complete definition of metrics is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-3-3"><title>Repeated Measures ANOVA With Post Hoc Examination</title><p>As the same sample of participants was assessed by each algorithm, repeated measures ANOVA was conducted to evaluate whether there were respective differences between sleep algorithm results for Cohen &#x03BA;, MCC, and confusion matrix. The assumption of sphericity was violated across all results based on significant Mauchly tests and low epsilon values. Therefore, Greenhouse-Geisser corrections were applied, and adjusted degrees of freedom and <italic>P</italic> values were evaluated. For significant ANOVA results, subsequent post hoc analyses were conducted with Bonferroni correction (controlling for type 1 error) to evaluate which algorithm pairs demonstrated significant differences and their effect sizes (Hedges <italic>g</italic>) [<xref ref-type="bibr" rid="ref45">45</xref>,<xref ref-type="bibr" rid="ref46">46</xref>]. For Hedges <italic>g</italic> interpretation, the range of 0.2, 0.5, 0.8 for small, medium, and large effect sizes was considered.</p></sec><sec id="s2-3-4"><title>Receiver Operating Characteristic Curve and Area Under the Curve (ROC; AUC)</title><p>To comparably evaluate the ability of each algorithm to classify sleep-wake, we evaluated the area under the receiver operating characteristic curves (AUC and ROC), respectively. AUC provides information about algorithm performance (ie, how much is each algorithm capable of distinguishing sleep vs wake) as a measure of global accuracy and robustness. An ROC curve is generated by plotting the sensitivity (also known as the true positive rate; TPR) against 1 - specificity (also known as the false positive rate; FPR) at various cut-off thresholds. We used raw values for actigraphy and binary ground truth (polysomnography) to generate the ROC curve. Using the s<italic>cikit-learn</italic> Python library, an automatic extensive range of thresholds was covered for the ROC curve. Both ROC curves and AUC were generated for each algorithm. Given that the original algorithms reported a fixed, predetermined threshold for sleep-wake, the optimal cut-off point was not examined in the ROC curves. However, the sensitivity and specificity for each algorithm relative to their sleep-wake threshold were reported in the epoch-by-epoch results. A complete definition of metrics is provided in <xref ref-type="supplementary-material" rid="app2">Multimedia Appendix 2</xref>.</p></sec><sec id="s2-3-5"><title>Rescored Algorithm Analysis</title><p>To evaluate whether applying Webster et al [<xref ref-type="bibr" rid="ref40">40</xref>] rescoring rules improved algorithm performance in contrast to nonrescored actigraphy algorithms, we applied the rescoring rules to each algorithm [<xref ref-type="bibr" rid="ref40">40</xref>]. To do so, we first binarized (sleep [1], wake [0]) the raw activity values generated by each algorithm using the optimal thresholds determined in the ROC analyses. We then applied the rescoring rules to the binary data. Finally, we calculated the Cohen &#x03BA;, MCC, and all confusion matrix results for the rescored algorithms, respectively. These results were compared to the nonrescored results.</p></sec><sec id="s2-3-6"><title>Sleep Metrics Comparison</title><p>We examined each algorithm&#x2019;s agreement with polysomnography on several commonly studied sleep metrics, including TST, SE, and WASO. For the polysomnography, we evaluated the MESA and NSRR-provided data for each sleep metric, while for the actigraphy data, once each dataset was processed by each algorithm and binarized, we calculated each sleep metric for both the nonrescored and rescored data. We used the MESA and NSRR definitions to calculate the respective sleep metrics. To do so, we applied the following calculations:</p><list list-type="order"><list-item><p>TST=the total number of minutes of sleep from the first sleep onset time to sleep offset time.</p></list-item><list-item><p>SE=TST/in-bed (results presented as %).</p><list list-type="alpha-upper"><list-item><p>In-bed=synchronized to polysomnography in-bed start time, the interval between lights out and in-bed time versus lights off, out-bed, or wake time.</p></list-item></list></list-item><list-item><p>WASO=the total duration of wake between first sleep onset time and sleep offset time.</p></list-item></list><p>These sleep metrics were calculated for each participant and each algorithm, respectively. For comparison of agreement, we used Bland-Altman distributions [<xref ref-type="bibr" rid="ref47">47</xref>]. Bland-Altman distributions are used to visualize the difference between actigraphy and polysomnography. We evaluated the average difference between corresponding measurements (actigraphy and polysomnography) for each respective sleep metric. Through these plots, we examined the distribution of differences between actigraphy and polysomnography, that is, the consistency between the 2. In addition, we examined the mean difference (actigraphy-polysomnography), bias, limits of agreement (LoA), and proportional bias (across values; regression) between actigraphy and polysomnography. The Bland-Altman distribution analysis was also conducted in Python using the pyCompare library with code assistance provided by ChatGPT [<xref ref-type="bibr" rid="ref28">28</xref>].</p></sec></sec><sec id="s2-4"><title>Ethical Considerations</title><p>The current study was conducted on a pre-established MESA dataset on a secondary use basis [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. The original MESA study obtained institutional review board ethics approval for each study site of data collection and obtained written consent from all participants [<xref ref-type="bibr" rid="ref20">20</xref>]. The NSRR resource maintains a detailed approval procedure for dataset acquisition and use [<xref ref-type="bibr" rid="ref21">21</xref>]. The study dataset was acquired through a data access and use agreement with the NSRR [<xref ref-type="bibr" rid="ref21">21</xref>]. All data were predeidentified for privacy by NSRR. Ethics approval was acquired from the University of Toronto (REB protocol # 35344) for this study.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><p>The sample demographics results are presented in <xref ref-type="table" rid="table1">Table 1</xref>. Notably, 12.3% (n=177, includes Apnea, Insomnia RLS, and CPAP subgroup) of sample participants reported some form of sleep problem. Additionally, measures of hypertension, diabetes, and other disorders were not reported in the NSRR version of the MESA dataset. However, previous studies note the prevalence of health issues or health risks within this study sample [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>].</p><p>The epoch-by-epoch comparison for nonrescored algorithms demonstrated that the Kripke 2010 algorithm (mean 0.80, SD 0.09) had the highest accuracy, followed by Cole-Kripke (mean 0.80, SD 0.09), Philips-Respironics (mean 0.80, SD 0.08; mean 0.80, SD 0.09; and mean 0.79, SD 0.09; for 20, 40, and 80 thresholds), UCSD (mean 0.78, SD 0.10), and Sadeh (mean 0.78, SD 0.10) respectively (<xref ref-type="fig" rid="figure2">Figure 2</xref>). An important note is that there were not any large differences between traditional algorithms for accuracy, ranging only 0.01&#x2010;0.02. <italic>F</italic><sub>1</sub>-scores for the nonrescored algorithm results were all &#x003E;0.80. Results for Cohen &#x03BA; demonstrated moderate agreement (0.4&#x2010;0.6) between actigraphy algorithms and polysomnography. Similarly, MCC results showed moderate positive correlations (range 0.49&#x2010;0.58) between actigraphy algorithms and polysomnography for all algorithms. Complete results for Cohen &#x03BA;, MCC, accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-scores are presented in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Cohen &#x03BA;, Matthews correlation coefficient, and confusion matrix heatmap results for nonrescored algorithms. Accuracy, sensitivity, specificity, precision, <italic>F</italic><sub>1</sub>-score: mean (SD). Color tone represents index value. MCC: Matthews correlation coefficient; UCSD: University of California San Diego.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig02.png"/></fig><p>Examination of rescored algorithm results revealed marginal changes observed in accuracy. Rescored results demonstrated the Cole-Kripke (mean 0.81, SD 0.09) had the highest accuracy followed by Philips-Respironics (mean 0.80, SD 0.09; mean 0.81, SD 0.09; and mean 0.81 SD 0.09; for 20, 40, and 80 thresholds), Sadeh (mean 0.80, SD 0.10), UCSD (mean 0.80, SD 0.09); and Kripke 2010 (mean 0.79 SD 0.09) respectively (<xref ref-type="fig" rid="figure3">Figure 3</xref>). Again, there were no large differences between traditional algorithms for ACC, ranging only 0.01&#x2010;0.03. Rescored results for <italic>F</italic><sub>1</sub>-scores either demonstrated no change or marginally changed by 0.01&#x2010;0.02 for some algorithms, with all still &#x003E;0.80. Results for Cohen &#x03BA; again demonstrated minor changes remaining within the moderate agreement range between actigraphy algorithms and polysomnography. Similarly, MCC results only showed minor changes again, showcasing moderate positive correlations (range 0.54&#x2010;0.60) between actigraphy algorithms and polysomnography for all algorithms. Complete rescored results for Cohen &#x03BA;, MCC, accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-scores are presented in <xref ref-type="fig" rid="figure3">Figure 3</xref>. Complete confusion matrix statistical results for both nonrescored and rescored algorithms are provided in <xref ref-type="supplementary-material" rid="app3">Multimedia Appendix 3</xref>.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Cohen &#x03BA;, Matthews correlation coefficient, and confusion matrix heatmap results for rescored algorithms. Accuracy, sensitivity, specificity, precision, <italic>F</italic><sub>1</sub>-score: mean (SD). Color tone represents index value. MCC: Matthews correlation coefficient; UCSD: University of California San Diego.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig03.png"/></fig><p>Repeated measures ANOVA testing revealed significant differences between algorithms across all metrics (&#x03BA;, MCC, accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-score). Complete ANOVA results are presented in <xref ref-type="table" rid="table2">Tables 2</xref> and <xref ref-type="table" rid="table3">3</xref> for nonrescored and rescored algorithms, respectively. Subsequent post hoc analyses also revealed significant differences between algorithm pairs across all metrics (<xref ref-type="table" rid="table4">Tables 4</xref> and <xref ref-type="table" rid="table5">5</xref>).</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Repeated measures ANOVA for confusion matrix across nonrescored algorithms.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="top">Metric</td><td align="left" valign="top">SS<sup><xref ref-type="table-fn" rid="table2fn1">a</xref></sup></td><td align="left" valign="top">MS<sup><xref ref-type="table-fn" rid="table2fn2">b</xref></sup></td><td align="left" valign="top"><italic>F</italic> test</td><td align="left" valign="top"><italic>df</italic></td><td align="left" valign="top">&#x03B7;&#x00B2;g<sup><xref ref-type="table-fn" rid="table2fn3">c</xref></sup></td><td align="left" valign="top">Eps (&#x03B5;)</td><td align="left" valign="top"><italic>P</italic> value (uncorr.)</td><td align="left" valign="top">df<sup><xref ref-type="table-fn" rid="table2fn4">d</xref></sup> (adjusted)</td><td align="left" valign="top"><italic>P</italic> value (adjusted)</td></tr></thead><tbody><tr><td align="left" valign="top">Accuracy</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">78.14</td><td align="left" valign="top"/><td align="left" valign="top">0.01</td><td align="left" valign="top">0.19</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy<sub>algorithm</sub></td><td align="left" valign="top">0.50</td><td align="left" valign="top">0.08</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.15</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy<sub>error</sub></td><td align="left" valign="top">9.22</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1656.37</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Sensitivity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">3754.96</td><td align="left" valign="top"/><td align="left" valign="top">0.39</td><td align="left" valign="top">0.20</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sensitivity<sub>algorithm</sub></td><td align="left" valign="top">27.60</td><td align="left" valign="top">4.60</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.19</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sensitivity<sub>error</sub></td><td align="left" valign="top">10.55</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8616</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1711.96</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Specificity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">8112.40</td><td align="left" valign="top"/><td align="left" valign="top">0.23</td><td align="left" valign="top">0.22</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Specificity<sub>algorithm</sub></td><td align="left" valign="top">99.06</td><td align="left" valign="top">16.51</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.34</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Specificity<sub>error</sub></td><td align="left" valign="top">17.56</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1920.73</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">3435.68</td><td align="left" valign="top"/><td align="left" valign="top">0.07</td><td align="left" valign="top">0.20</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision<sub>algorithm</sub></td><td align="left" valign="top">10.55</td><td align="left" valign="top">1.76</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.22</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision<sub>error</sub></td><td align="left" valign="top">4.42</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1751.47</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">93.31</td><td align="left" valign="top"/><td align="left" valign="top">0.00</td><td align="left" valign="top">0.20</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score<sub>algorithm</sub></td><td align="left" valign="top">0.36</td><td align="left" valign="top">0.06</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.17</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content><italic>F</italic><sub>1</sub>-score<sub>error</sub></td><td align="left" valign="top">5.58</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1688.29</td><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table2fn1"><p><sup>a</sup>SS: sum of squares.</p></fn><fn id="table2fn2"><p><sup>b</sup>MS: mean squares.</p></fn><fn id="table2fn3"><p><sup>c</sup>&#x03B7;&#x00B2;g: general eta squared</p></fn><fn id="table2fn4"><p><sup>d</sup>Adjusted values for <italic>df</italic> and <italic>P</italic> values refer to respective Greenhouse-Geisser corrections.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t3" position="float"><label>Table 3.</label><caption><p>Repeated measures ANOVA for confusion matrix across nonrescored algorithms.</p></caption><table id="table3" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Metric</td><td align="left" valign="bottom">SS<sup><xref ref-type="table-fn" rid="table3fn1">a</xref></sup></td><td align="left" valign="bottom">MS<sup><xref ref-type="table-fn" rid="table3fn2">b</xref></sup></td><td align="left" valign="bottom"><italic>F</italic> test</td><td align="left" valign="bottom"><italic>df</italic></td><td align="left" valign="bottom">&#x03B7;&#x00B2;g<sup><xref ref-type="table-fn" rid="table3fn3">c</xref></sup></td><td align="left" valign="bottom">Eps (&#x03B5;)</td><td align="left" valign="bottom"><italic>P</italic><sub>uncorr.</sub> value</td><td align="left" valign="bottom">df<sub>adjusted<sup><xref ref-type="table-fn" rid="table3fn4">d</xref></sup></sub></td><td align="left" valign="bottom"><italic>P</italic><sub>adjusted</sub> value</td></tr></thead><tbody><tr><td align="left" valign="top">Accuracy</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">64.62</td><td align="left" valign="top"/><td align="left" valign="top">0.01</td><td align="left" valign="top">0.20</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy<sub>algorithm</sub></td><td align="left" valign="top">0.56</td><td align="left" valign="top">0.09</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.22</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Accuracy<sub>error</sub></td><td align="left" valign="top">12.56</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1754</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Sensitivity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">3906.50</td><td align="left" valign="top"/><td align="left" valign="top">0.40</td><td align="left" valign="top">0.21</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sensitivity<sub>algorithm</sub></td><td align="left" valign="top">44.24</td><td align="left" valign="top">7.37</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.24</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Sensitivity<sub>error</sub></td><td align="left" valign="top">16.26</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8616</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1782.96</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Specificity</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">5961.01</td><td align="left" valign="top"/><td align="left" valign="top">0.22</td><td align="left" valign="top">0.26</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Specificity<sub>algorithm</sub></td><td align="left" valign="top">97.56</td><td align="left" valign="top">16.26</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.55</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Specificity<sub>error</sub></td><td align="left" valign="top">23.54</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">2227.53</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">Precision</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">2706.59</td><td align="left" valign="top"/><td align="left" valign="top">0.08</td><td align="left" valign="top">0.23</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision<sub>algorithm</sub></td><td align="left" valign="top">11.65</td><td align="left" valign="top">1.94</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.37</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><named-content content-type="indent">&#x00A0;&#x00A0;&#x00A0;&#x00A0;</named-content>Precision<sub>error</sub></td><td align="left" valign="top">6.19</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1974.34</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">341.85</td><td align="left" valign="top"/><td align="left" valign="top">0.03</td><td align="left" valign="top">0.20</td><td align="left" valign="top">&#x003C;.001</td><td align="left" valign="top"/><td align="left" valign="top">&#x003C;.001</td></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score<sub>algorithm</sub></td><td align="left" valign="top">2.20</td><td align="left" valign="top">0.37</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.22</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><italic>F</italic><sub>1</sub>-score<sub>error</sub></td><td align="left" valign="top">9.26</td><td align="left" valign="top">0.00</td><td align="left" valign="top"/><td align="left" valign="top">8628</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1759.31</td><td align="left" valign="top"/></tr></tbody></table><table-wrap-foot><fn id="table3fn1"><p><sup>a</sup>SS: sum of squares.</p></fn><fn id="table3fn2"><p><sup>b</sup>MS: mean squares.</p></fn><fn id="table3fn3"><p><sup>c</sup>&#x03B7;&#x00B2;g: general eta squared.</p></fn><fn id="table3fn4"><p><sup>d</sup>Adjusted values for df and <italic>P</italic> values refer to respective Greenhouse-Geisser corrections.</p></fn></table-wrap-foot></table-wrap><table-wrap id="t4" position="float"><label>Table 4.</label><caption><p>Repeated measures ANOVA for Matthews correlation coefficient and Cohen &#x03BA; across nonrescored algorithms.</p></caption><table id="table4" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Metric</td><td align="left" valign="bottom">SS<sup><xref ref-type="table-fn" rid="table4fn1">a</xref></sup></td><td align="left" valign="bottom">MS<sup><xref ref-type="table-fn" rid="table4fn2">b</xref></sup></td><td align="left" valign="bottom"><italic>F</italic> test</td><td align="left" valign="bottom"><italic>df</italic></td><td align="left" valign="bottom">&#x03B7;&#x00B2;g<sup><xref ref-type="table-fn" rid="table4fn3">c</xref></sup></td><td align="left" valign="bottom">Eps (&#x03B5;)</td><td align="left" valign="bottom"><italic>P</italic><sub>uncorr.</sub> value</td><td align="left" valign="bottom"><italic>df</italic><sub>adjusted<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></sub></td><td align="left" valign="bottom"><italic>P</italic><sub>adjusted<sup><xref ref-type="table-fn" rid="table4fn4">d</xref></sup></sub> value</td></tr></thead><tbody><tr><td align="left" valign="top">MCC</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">138.47</td><td align="left" valign="bottom"/><td align="left" valign="bottom">0.01</td><td align="left" valign="bottom">0.23</td><td align="left" valign="bottom">&#x003C;.001</td><td align="left" valign="bottom"/><td align="left" valign="bottom">&#x003C;.001</td></tr><tr><td align="left" valign="top">MCC<sub>algorithm</sub></td><td align="left" valign="top">2.01</td><td align="left" valign="top">0.34</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.38</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">MCC<sub>error</sub></td><td align="left" valign="bottom">20.91</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom"/><td align="left" valign="bottom">8628</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">1988.02</td><td align="left" valign="bottom"/></tr><tr><td align="left" valign="top">K</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">450.81</td><td align="left" valign="bottom"/><td align="left" valign="bottom">0.02</td><td align="left" valign="bottom">0.21</td><td align="left" valign="bottom">&#x003C;.001</td><td align="left" valign="bottom"/><td align="left" valign="bottom">&#x003C;.001</td></tr><tr><td align="left" valign="top">K<sub>algorithm</sub></td><td align="left" valign="top">9.26</td><td align="left" valign="top">1.54</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.27</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">K<sub>error</sub></td><td align="left" valign="bottom">29.53</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom"/><td align="left" valign="bottom">8628</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">1828.56</td><td align="left" valign="bottom"/></tr></tbody></table><table-wrap-foot><fn id="table4fn1"><p><sup>a</sup>SS : sum of squares.</p></fn><fn id="table4fn2"><p><sup>b</sup>MS: mean squares.</p></fn><fn id="table4fn3"><p><sup>c</sup>&#x03B7;&#x00B2;g: general eta squared.</p></fn><fn id="table4fn4"><p><sup>d</sup>Adjusted values for df and <italic>P</italic> values refer to respective Greenhouse-Geisser corrections. </p></fn></table-wrap-foot></table-wrap><table-wrap id="t5" position="float"><label>Table 5.</label><caption><p>Repeated measures ANOVA for Matthews correlation coefficient and Cohen &#x03BA; across nonrescored algorithms.</p></caption><table id="table5" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Metric</td><td align="left" valign="bottom">SS<sup><xref ref-type="table-fn" rid="table5fn1">a</xref></sup></td><td align="left" valign="bottom">MS<sup><xref ref-type="table-fn" rid="table5fn2">b</xref></sup></td><td align="left" valign="bottom"><italic>F</italic> test</td><td align="left" valign="bottom"><italic>df</italic></td><td align="left" valign="bottom">&#x03B7;&#x00B2;g<sup><xref ref-type="table-fn" rid="table5fn3">c</xref></sup></td><td align="left" valign="bottom">Eps (&#x03B5;)</td><td align="left" valign="bottom"><italic>P</italic><sub>uncorr.</sub> value</td><td align="left" valign="bottom"><italic>df</italic><sub>adjusted<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></sub></td><td align="left" valign="bottom"><italic>P</italic><sub>adjusted<sup><xref ref-type="table-fn" rid="table5fn4">d</xref></sup></sub> value</td></tr></thead><tbody><tr><td align="left" valign="top">MCC</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">37.17</td><td align="left" valign="bottom"/><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom">0.25</td><td align="left" valign="bottom">&#x003C;.001</td><td align="left" valign="bottom"/><td align="left" valign="bottom">&#x003C;.001</td></tr><tr><td align="left" valign="top">MCC<sub>algorithm</sub></td><td align="left" valign="top">0.72</td><td align="left" valign="top">0.12</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.53</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">MCC<sub>error</sub></td><td align="left" valign="bottom">27.85</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom"/><td align="left" valign="bottom">8628</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">2196.75</td><td align="left" valign="bottom"/></tr><tr><td align="left" valign="top">K</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">112.84</td><td align="left" valign="bottom"/><td align="left" valign="bottom">0.01</td><td align="left" valign="bottom">0.23</td><td align="left" valign="bottom"><italic>P</italic>&#x003C;.001</td><td align="left" valign="bottom"/><td align="left" valign="bottom">&#x003C;.001</td></tr><tr><td align="left" valign="top">K<sub>algorithm</sub></td><td align="left" valign="top">3.21</td><td align="left" valign="top">0.53</td><td align="left" valign="top"/><td align="left" valign="top">6</td><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top">1.38</td><td align="left" valign="top"/></tr><tr><td align="left" valign="top">K<sub>error</sub></td><td align="left" valign="bottom">40.86</td><td align="left" valign="bottom">0.00</td><td align="left" valign="bottom"/><td align="left" valign="bottom">8628</td><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom"/><td align="left" valign="bottom">1977.95</td><td align="left" valign="bottom"/></tr></tbody></table><table-wrap-foot><fn id="table5fn1"><p><sup>a</sup>SS: sum of squares.</p></fn><fn id="table5fn2"><p><sup>b</sup>MS: mean squares.</p></fn><fn id="table5fn3"><p><sup>c</sup>&#x03B7;&#x00B2;g: general eta squared.</p></fn><fn id="table5fn4"><p><sup>d</sup>Adjusted values for df and <italic>P</italic> values refer to respective Greenhouse-Geisser corrections.</p></fn></table-wrap-foot></table-wrap><p>Post hoc analyses for MCC were significant; however, all effect sizes were very small to small (all Hedges <italic>g</italic>&#x003C;0.3). Post hoc results for Cohen &#x03BA; demonstrated small to medium effects with effect sizes ranging from 0 to &#x003C;0.5. Complete post hoc analysis results are presented in Tables 4 and 5. Notably, modest differences were observed between K2010 and UCSD (Hedges <italic>g</italic>=0.42) and K2010 and Sadeh (Hedges <italic>g</italic>=0.44), indicating better predictive performance of the K2010 algorithm, respectively. Similarly, significant post hoc results for rescored algorithms for both MCC and Cohen &#x03BA; showed very small to small effect sizes across all algorithm contrasts (all Hedges <italic>g</italic>&#x003C;0.25). Detailed results are provided in <xref ref-type="supplementary-material" rid="app4">Multimedia Appendix 4</xref>.</p><p>Post hoc analyses for nonrescored algorithms' confusion matrix metrics, particularly accuracy and <italic>F</italic><sub>1</sub>-scores, demonstrated both nonsignificant and many significant contrasts (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). Significant pairs, however, had very small to small effect sizes (Hedges <italic>g</italic>) ranging between 0&#x2010;0.3 for both accuracy and <italic>F</italic><sub>1</sub>-scores (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). Post hoc results for sensitivity, specificity, and precision varied greatly, ranging from very small to large effect between contrasts, revealing nuanced performance differences (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). For detailed post hoc results, see <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p><p>After rescoring, post hoc results for confusion matrix metrics demonstrated similar very small to small effect sizes for accuracy. However, there were some medium effect sizes for <italic>F</italic><sub>1</sub>-scores. Notably, modest differences were observed across K2010 predictive performance, which was worse than the CK (Hedges <italic>g</italic>=&#x2212;0.46), Sadeh (Hedges <italic>g</italic>=&#x2212;0.42), UCSD (Hedges <italic>g</italic>=&#x2212;0.40), and Philips algorithm with a threshold of 80 (Hedges <italic>g</italic>=&#x2212;0.42). Similarly, post hoc results for sensitivity, specificity, and precision varied greatly, ranging from very small to large effects between contrasts, revealing nuanced performance differences (<xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>). For full post hoc results, see <xref ref-type="supplementary-material" rid="app5">Multimedia Appendix 5</xref>.</p><p>The ROC analysis revealed that the Kripke 2010 had the highest AUC (0.86) followed by Sadeh (0.85), Philips-Respironics (0.84), Cole-Kripke (0.84), and UCSD (0.84). Across the board, all algorithms demonstrated excellent ability to discriminate sleep-wake (excellent performance). The ROC curves are presented with their respective AUC in <xref ref-type="fig" rid="figure4">Figure 4</xref>.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Algorithm receiver operating characteristic curves and area under the curve. Receiver operating characteristics curves and respective area under the curve listed for each algorithm. AUC: area under the curve; ROC: receiver operating characteristics curve;</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig04.png"/></fig><p>Examination of agreement for TST through Bland-Altman distributions demonstrated significant levels of mean difference and systematic bias across all algorithms (<xref ref-type="fig" rid="figure5">Figure 5</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table1">Table 1</xref>). This may be due to the sample or due to clear outliers. These outliers may also have caused discrepancies within the distribution and mean difference. In addition, supplementary regression analyses for proportional bias were significant across nonrescored Cole-Kripke, UCSD, Sadeh, and Philips threshold 80 algorithms. Similarly, rescored Kripke 2010, UCSD, Sadeh, Philips threshold 20, and Philips threshold 80 algorithms also demonstrated significant proportional bias. Although regression tests were significant, both the regression slopes and <italic>R</italic><sup>2</sup> values were extremely small, ranging from &#x2212;0.06 to 0.10 for slope and <italic>R</italic><sup>2</sup> values all &#x2264;0.01. Although proportional bias regression analyses were significant for a number of algorithms for TST, both slopes and <italic>R</italic><sup>2</sup> were extremely small, and plot distributions were relatively evenly spread. Therefore, effects of proportional bias may be minimal or significant due to sample size. Detailed proportional bias regression statistics are reported in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>, <xref ref-type="table" rid="table1">Table 1</xref>.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Bland-Altman distributions total sleep time for nonrescored algorithms. Plots represent the Bland-Altman distributions for nonrescored algorithms of actigraphy in comparison to polysomnography (ground truth) for total sleep time. TST: total sleep time.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig05.png"/></fig><p>The best results for TST agreement with polysomnography across all algorithms were obtained with the Philips algorithm with the threshold 40 (nonrescored: mean difference [MD] 1.28, SD &#x2212;46.00; 95% LoA &#x2212;88.88 to 91.44; <xref ref-type="fig" rid="figure5">Figure 5</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table1">Table 1</xref>). These results were considerably decreased with rescoring; however, the Philips algorithm with the threshold 40 retained the best agreement (rescored: MD &#x2212;9.67, SD &#x2013;47.64; 95% LoA &#x2212;103.05 to 83.71; <xref ref-type="fig" rid="figure5">Figure 5</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table1">Table 1</xref>). For TST, the Cole-Kripke, UCSD, and Sadeh algorithms displayed higher mean differences in TST measurements by actigraphy (<xref ref-type="fig" rid="figure5">Figure 5</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table1">Table 1</xref>). This overestimation of TST was reduced with rescoring (<xref ref-type="fig" rid="figure6">Figure 6</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table1">Table 1</xref>). Conversely, the Kripke 2010 algorithm underestimated the TST, and the magnitude of underestimation increased with rescoring. For the Philips algorithm, the results varied based on which threshold was used. The Philips algorithm with an 80 threshold overestimated TST when compared to polysomnography; however, this improved with rescoring. Finally, the Philips algorithm with 20 thresholds demonstrated considerable underestimation for TST, which further worsened with rescoring.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Bland-Altman distributions total sleep time for rescored algorithms. Plots represent the Bland-Altman distributions for rescored algorithms of actigraphy in comparison to polysomnography (ground truth) for total sleep time. TST: total sleep time.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig06.png"/></fig><p>With respect to SE, all algorithms demonstrated similar distributions where estimates of SE were worse at lower levels of SE (<xref ref-type="fig" rid="figure7">Figure 7</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table2">Table 2</xref>). Considerable improvements in agreement were observed as SE levels increased. Based on the Bland-Altman plots, the distributions demonstrate some systematic bias and heteroscedasticity (<xref ref-type="fig" rid="figure7">Figure 7</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table2">Table 2</xref>). Supplementary regression analyses for proportional bias were significant across all nonrescored and rescored algorithms with the exception of nonrescored Kripke 2010 (<italic>P</italic>=.86). Regression slopes covered a wide range and were all negative with the exception of rescored Kripke 2010 (slope=0.15). Therefore, indicating a directional bias as SE levels change (in particular, increased). However, <italic>R</italic><sup>2</sup> values were relatively small, with all values &#x2264;0.21. Detailed proportional bias regression statistics are reported in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table2">Table 2</xref>.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Bland-Altman distributions sleep efficiency for nonrescored algorithms. Plots represent the Bland-Altman distributions for nonrescored algorithms of actigraphy in comparison to polysomnography (ground truth) for sleep efficiency. SE: sleep efficiency.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig07.png"/></fig><p>The Philips algorithm with a threshold at 40 demonstrated the best agreement with polysomnography (nonrescored: MD 0.03, SD &#x2212;9.20; 95% LoA &#x2212;18 to 18.07). Rescoring of this resulted in underestimation of SE (rescored: MD &#x2212;2.20, SD &#x2212;9.61; 95% LoA &#x2212;21.03 to 16.63). The Cole-Kripke, UCSD, Sadeh, and Philips threshold 80 algorithms all demonstrated overestimation of SE, with some improvement with rescoring (<xref ref-type="fig" rid="figure8">Figure 8</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table2">Table 2</xref>). While the Kripke 2010 and Philips threshold 20 algorithms both demonstrated underestimation of SE, with rescoring further increasing the MD, that is, the magnitude of underestimation (<xref ref-type="fig" rid="figure8">Figure 8</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table2">Table 2</xref>).</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Bland-Altman distributions sleep for rescored algorithms. Plots represent the Bland-Altman distributions for rescored algorithms of actigraphy in comparison to polysomnography (ground truth) for sleep efficiency. SE: sleep efficiency.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig08.png"/></fig><p>Finally, the Bland-Altman distributions for WASO demonstrated considerable underestimation. The distributions demonstrate considerable bias and heteroscedasticity, where WASO estimates are better at lower WASO averages and become considerably worse as WASO increases (<xref ref-type="fig" rid="figure9">Figure 9</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table3">Table 3</xref>). Again, supplementary regression analyses for proportional bias were significant across all nonrescored and rescored algorithms. Regression slopes covered a wide range and were all negative with the exception of rescored Kripke 2010 (slope=0.08). Therefore, indicating a directional bias as WASO levels change (in particular, increased). <italic>R</italic><sup>2</sup> values covered a wide range. Detailed proportional bias regression statistics are reported in <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>, <xref ref-type="table" rid="table3">Table 3</xref>.</p><fig position="float" id="figure9"><label>Figure 9.</label><caption><p>Bland-Altman distributions wake after sleep onset for nonrescored algorithms. Plots represent the Bland-Altman distributions for nonrescored algorithms of actigraphy in comparison to polysomnography (ground truth) for wake after sleep onset. WASO: wake after sleep onset.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig09.png"/></fig><p>Among all the algorithms, the Philips threshold 20 algorithm had the best agreement with polysomnography for WASO, only underestimating by a small magnitude (nonrescored: MD=&#x2212;3.98, SD &#x2212;52.94; 95% LoA &#x2212;107.74 to 99.77). Rescoring of this algorithm diminished the agreement of this algorithm to overestimate WASO (rescored: MD 12.10, SD &#x2212;56.07; 95% LoA &#x2212;97.80 to 122.01). With respect to the Philips algorithms at thresholds 40 and 80, both underestimated WASO. Rescoring lowered the magnitude of underestimations, improving agreement. Similarly, Coke-Kripke, UCSD, and Sadeh had large underestimations of WASO with some improvement after rescoring (<xref ref-type="fig" rid="figure10">Figure 10</xref>; <xref ref-type="supplementary-material" rid="app6">Multimedia Appendix 6</xref>; <xref ref-type="table" rid="table3">Table 3</xref>) Kripke 2010, however, only slightly overestimated WASO, and rescoring considerably increased the magnitude of overestimation.</p><fig position="float" id="figure10"><label>Figure 10.</label><caption><p>Bland-Altman distributions wake after sleep onset for rescored algorithms. Plots represent the Bland-Altman distributions for rescored algorithms of actigraphy in comparison to polysomnography (ground truth) for wake after sleep onset. RS: rescored algorithms; WASO: wake after sleep onset.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v9i1e70778_fig10.png"/></fig><p>Exploratory subgroup analysis was conducted for 4 sleep problem groups, including apnea, individuals who used CPAP, insomnia, and RLS. For the subgroup analyses, we again examined AUC, accuracy metrics, and Bland-Altman distributions for agreement. The AUC was above 0.80 across all subgroups and algorithms, similar to the results across the entire population (<xref ref-type="supplementary-material" rid="app7">Multimedia Appendix 7</xref>). This indicated all algorithms demonstrated excellent ability to discriminate sleep-wake regardless of the presence of individuals&#x2019; sleep problems.</p><p>Accuracy ranged between 76% and 81% across all algorithms and subgroups, with only minor differences of 1%&#x2010;4% between algorithms. Rescoring only resulted in minor improvements in algorithm accuracy of 1%&#x2010;3%. <italic>F</italic><sub>1</sub>-scores demonstrated good balance, ranging from 0.80&#x2010;0.84. After rescoring, <italic>F</italic><sub>1</sub>-scores remained relatively the same with marginal changes (0.01&#x2010;0.03) again remaining within the ok-to-good balance range of 0.79 to 0.85. Overall, accuracy and <italic>F</italic><sub>1</sub>-score results were again highly similar to the results observed across the entire population sample. Complete subgroup results for accuracy, sensitivity, specificity, precision, and <italic>F</italic><sub>1</sub>-score are presented in <xref ref-type="supplementary-material" rid="app8">Multimedia Appendix 8</xref>.</p><p>Along these lines, MCC and Cohen &#x03BA; results were extremely similar to the overall results, ranging between 0.40&#x2010;0.60 for both metrics and for both rescored and nonrescored algorithms. Therefore, all results demonstrated moderate positive correlations for MCC and moderate Cohen &#x03BA; agreement across algorithms.</p><p>Repeated measures ANOVA analyses for accuracy were only significant for the apnea rescored algorithms (<italic>F</italic><sub>1.20,124.08</sub>=5.27; &#x03B5;=0.20; <italic>P</italic>=.02), RLS rescored algorithms (<italic>F</italic><sub>1.24,80.71</sub>=5.70; &#x03B5;=0.20; <italic>P</italic>=.01), and insomnia nonrescored algorithms (<italic>F</italic><sub>1.17,96.97</sub>=3.85; &#x03B5;=0.19; <italic>P</italic>=.05). All other nonaccuracy metrics demonstrated significant ANOVA for both nonrescored and rescored algorithms (<xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>).</p><p>Similar to the full sample results, post hoc contrasts for accuracy across subgroups were very small to small (all Hedges <italic>g</italic> results &#x003C;0.3). Post hoc contrasts for <italic>F</italic><sub>1</sub>-score were very small to small, approaching medium effect sizes for nonrescored algorithms (all Hedges <italic>g</italic> results &#x003C;0.35). <italic>F</italic><sub>1</sub>-score contrast effect sizes for the rescored algorithm demonstrated diminished performance with medium effect sizes for the K2010 algorithm across all subgroups. Similarly, the Philips threshold 20 algorithm showed diminished performance with medium effect sizes within the CPAP and RLS groups and to a lesser extent (Hedges <italic>g</italic>&#x003C;0.40) for the apnea and insomnia groups. Complete subgroup post hoc results are presented in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>.</p><p>The apnea subgroup of the K2010 rescored algorithm demonstrated diminished performance compared to the CK (Hedges <italic>g</italic>=&#x2212;0.52), UCSD (Hedges <italic>g</italic>=&#x2212;0.49), Sadeh (Hedges <italic>g</italic>=&#x2212;0.50), Philips threshold 80 (Hedges <italic>g</italic>=&#x2212;0.47), and Philips threshold 40 (Hedges <italic>g</italic>=&#x2212;0.37). The Philips threshold 20 also demonstrated worse performance with small, approaching medium effect sizes compared to the CK, UCSD, Sadeh, and Philips threshold 80 algorithms (ranging from (Hedges <italic>g</italic>=&#x2212;0.33 to &#x2212;0.37). Within the CPAP subgroup, rescored K2010 showed worse performance than CK (Hedges <italic>g</italic>=&#x2212;0.53), UCSD (Hedges <italic>g</italic>=&#x2212;0.51), Sadeh (Hedges <italic>g</italic>=&#x2212;0.53), Philips threshold 80 (Hedges <italic>g</italic>=&#x2212;0.47), and Philips threshold 40 (Hedges <italic>g</italic>=&#x2212;0.35). The Philips threshold 20 algorithm again demonstrated worse performance with small, approaching medium effect sizes compared to the CK, UCSD, Sadeh, and Philips threshold 80 algorithms (ranging from (Hedges <italic>g</italic>=&#x2212;0.37 to &#x2212;0.43). To a lesser extent, within the insomnia subgroup, comparisons of algorithms were similar to the apnea group. In addition, both K2010 and Philips threshold demonstrated small, approaching medium effect sizes for Philips threshold 20 algorithms. Similarly, within the RLS subgroup, the K2010 rescored algorithm demonstrated diminished performance compared to the CK (Hedges <italic>g</italic>=&#x2212;0.48), UCSD (Hedges <italic>g</italic>=&#x2212;0.49), Sadeh (Hedges <italic>g</italic>=&#x2212;0.53), Philips threshold 80 (Hedges <italic>g</italic>=&#x2212;0.46), and Philips threshold 40 (Hedges <italic>g</italic>=&#x2212;0.35). The Philips threshold 20 also demonstrated worse performance with small, approaching medium effect sizes compared to the CK, UCSD, Sadeh, and Philips threshold 80 algorithms (ranging from Hedges <italic>g</italic>=&#x2212;0.33 to &#x2212;0.40). Overall, the post hoc test revealed a wide range of effect sizes ranging from very small to large for sensitivity, specificity, and precision across all subgroups and algorithms. Complete subgroup post hoc results presented in <xref ref-type="supplementary-material" rid="app9">Multimedia Appendix 9</xref>.</p><p>Repeated measures ANOVA analyses for MCC and Cohen &#x03BA; also demonstrated a significant difference between algorithms for a variety of sleep problem subgroups. Both the apnea and insomnia subgroups demonstrated significant differences between algorithms for Cohen &#x03BA; in both nonrescored (Apnea: <italic>F</italic><sub>1.24,127.66</sub>=17.28; &#x03B5;=0.21; <italic>P</italic>&#x003C;.001; Insomnia: <italic>F</italic><sub>1.32,109.90</sub>=28.14; &#x03B5;=0.22; <italic>P</italic>&#x003C;.001) and rescored algorithms (Apnea: <italic>F</italic><sub>1.31,134.72</sub>=4.63; &#x03B5;=0.22; <italic>P</italic>=.02; Insomnia: <italic>F</italic><sub>1.42,117.76</sub>=5.94; &#x03B5;=0.24; <italic>P</italic>=.008) and for MCC between nonrescored algorithms (Apnea: <italic>F</italic><sub>1.31,135.07</sub>=4.51; &#x03B5;=0.22; <italic>P</italic>=.03; Insomnia: <italic>F</italic><sub>1.50,124.90</sub>=9.19; &#x03B5;=0.25; <italic>P</italic>&#x003C;.001) . Both CPAP and RLS subgroups showed significant differences between algorithms for only Cohen &#x03BA; and only for nonrescored algorithms (CPAP: <italic>F</italic><sub>1.23,74.80</sub>=8.31; &#x03B5;=0.20; <italic>P</italic>=.003; RLS: <italic>F</italic><sub>1.24,80.75</sub>=12.28; &#x03B5;=0.21; <italic>P</italic>&#x003C;.001). Complete subgroup ANOVA results within <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>.</p><p>A subsequent post hoc test revealed small effect sizes for MCC (Hedges <italic>g</italic>&#x003C;0.25) for both the apnea and insomnia subgroups across nonrescored algorithm contrasts. With respect to Cohen &#x03BA;, effect sizes ranged from very small to small for apnea in the subgroup across most nonrescored and algorithm contrasts (<xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>). The exception was the nonrescored K2010 algorithm which demonstrated small-approaching-medium effects, having better Cohen &#x03BA; agreement when compared with nonrescored UCSD and Sadeh algorithms (<xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>). Similarly, to a lesser extent, within both CPAP and RLS subgroups, nonrescored K2010 had better Cohen &#x03BA; agreement when compared with nonrescored UCSD and Sadeh algorithms (Hedges <italic>g</italic> range 0.27&#x2010;0.31). Complete subgroup post hoc results are presented in <xref ref-type="supplementary-material" rid="app10">Multimedia Appendix 10</xref>.</p><p>Finally, the Bland-Altman distributions were examined for each algorithm, without and with rescoring applied, for 3 sleep metrics (TST, SE, and WASO) within each of the 4 subgroups (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). Overall, the subgroup results were again highly similar to those observed across the entire population sample. All algorithms demonstrated considerable mean difference and systematic bias for each subgroup analysis (<xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>). Overall, rescoring did not improve agreement between the algorithm and ground truth, with some cases demonstrating poorer agreement after rescoring. Similar trends of bias were observed as the results from the entire dataset sample. For SE, all algorithms demonstrated heteroscedasticity for each subgroup (ie, estimates of SE were worse from polysomnography as SE decreased). The exception was the RLS group, which demonstrated random distributions for all algorithms. For WASO, all algorithms showed heteroscedasticity across all subgroups, where estimates of WASO were worse compared to polysomnography as WASO increased. For TST, there was no heteroscedasticity; that is, the distributions were random and evenly spread for all algorithms across all subgroups. For detailed Bland-Altman distribution statistics, see <xref ref-type="supplementary-material" rid="app11">Multimedia Appendix 11</xref>.</p></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>Using the large MESA dataset, we established that traditional actigraphy algorithms can, with considerable accuracy, classify sleep and wake. As a first, this study comprehensively provides detailed benchmarks of traditional actigraphy algorithms. This study also provides the first rescoring performance and comparison across a wide range of actigraphy algorithms. This study is among the first to evaluate traditional actigraphy algorithms across a large population of older adults who are also at risk for, or have, health and sleep issues. To our knowledge, this is the first study to provide comprehensive benchmarks and statistical comparisons across an extensive list of the commonly used actigraphy algorithms with the diverse MESA population, while previous studies have focused on novel algorithm development and comparison. In doing so, we provide comprehensive metrics for not only users of these algorithms but also inform future research and practice.</p><p>Overall, the ROC-AUC and accuracy results demonstrate that all of the algorithms evaluated in this study are good options for evaluating sleep-wake activity. However, contrary to accuracy, our results for both MCC and Cohen &#x03BA; demonstrate that traditional algorithms only provide moderate agreement and moderate positive correlations with ground truth polysomnography. These results suggest that though these actigraphy algorithms are a valuable analytic tool, there is a significant area of improvement. With novel technologies as well as improved analytic methods, we can improve our approach to sleep-wake assessment.</p><p>Along these lines, our results demonstrate that rescoring of actigraphy algorithms does not substantially improve sleep-wake classification accuracy. Rather, rescoring may significantly diminish the performance (eg, <italic>F</italic><sub>1</sub>-score) of certain algorithms such as Kripke 2010 and Philips-Respironics. Further, our analyses of sleep problem participants revealed similar findings that rescoring may diminish sleep-wake classification performance. This suggests that researchers and clinicians may safely opt to not rescore and still retain high accuracy.</p><p>In addition to the primary results, in-depth ANOVA and post hoc analyses showed significant differences between algorithms despite small differences in accuracy and other metrics. However, examination of effect sizes for MCC, accuracy, and <italic>F</italic><sub>1</sub>-score demonstrated very small to small effect sizes. Therefore, the significant differences between algorithms could be due to the large sample size of the dataset. The exception was Cohen &#x03BA;, which demonstrated marginally higher performance for the Kripke 2010 algorithm (small to medium effect sizes). This, along with the Kripke 2010 algorithm demonstrating the highest accuracy, may suggest that this algorithm could be optimal in specific analysis scenarios. However, after rescoring, these effects were reduced with only very small to small effect sizes overall.</p><p>Our evaluation of the agreement between actigraphy and polysomnography for sleep metrics TST, SE, and WASO was conducted through Bland-Altman distribution analysis. This analysis demonstrated both over and underestimation of all 3 sleep metrics, underscored by large mean differences between actigraphy and polysomnography. The Bland-Altman distributions also demonstrated considerable systematic bias for SE and WASO. In addition, there were clear patterns that demonstrated agreement decreased as SE decreased or when WASO increased. This was supported by the regression analyses for proportional bias. This suggests that actigraphy may be less precise in accurately measuring sleep metrics in individuals who have irregular sleep or sleep problems. From the traditional algorithms, the Philips algorithm with a threshold of 40 demonstrated the highest agreement with polysomnography for both TST and SE. This may be in part due to the supplied cutoffs for sleep-wake detection, which may allow for the user to adjust the algorithm based on their target population and other factors to improve both sleep-wake scoring and sleep metric estimates [<xref ref-type="bibr" rid="ref37">37</xref>]. However, an important note is that the MESA data were collected using Philips Actiwatch devices; therefore, it is expected that the Philips algorithms may perform better in these cases than the other algorithms [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. For WASO, the Kripke 2010 algorithm had the highest agreement with polysomnography. This may be due to the larger number of epochs (30) that are assessed by the algorithm to compute a sleep-wake decision. In evaluating more epochs, this algorithm may be more sensitive to shorter waking periods [<xref ref-type="bibr" rid="ref19">19</xref>]. This may also explain the lower accuracy of this algorithm, as it may be scoring more epochs as wake even when there is true sleep. This suggests a key point for current algorithm use and for future algorithm development, that some variability or ranges of thresholds and longer windows of epoch evaluation may provide better accuracy and improve estimates of sleep metrics.</p><p>Another explanation for the bias and heteroscedasticity observed is that the sample evaluated included many individuals who were either at risk of or had health conditions such as the sleep problems highlighted in the sample distributions (<xref ref-type="table" rid="table1">Table 1</xref>). Though not reported in the NSRR version of the MESA dataset, MESA and publications on this dataset have underscored that this sample includes individuals with hypertension, diabetes, etc [<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref48">48</xref>,<xref ref-type="bibr" rid="ref49">49</xref>]. Therefore, it is expected that these conditions may lead to fragmented and irregular sleep, which may have influenced the actigraph or the algorithm&#x2019;s ability to accurately measure. Previous studies have noted that individuals with sleep problems or health conditions that may affect sleep demonstrate more variability in actigraphy results of both sleep-wake and sleep metrics [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref12">12</xref>-<xref ref-type="bibr" rid="ref17">17</xref>]. Therefore, one important consideration was further tests of these algorithms with data from populations with health and sleep issues. Notably, these are the key populations who are recommended or given actigraphs for sleep assessment by researchers and clinicians. Therefore, the precision of their sleep or wake classification and sleep metrics by these algorithms is of great importance.</p><p>Along these lines, we first conducted an exploratory analysis of algorithm performance within sleep problem subgroups, specifically examining individuals with insomnia, RLS, apnea, and those who used a CPAP device. We found that traditional actigraphy algorithms do not perform particularly worse; rather, they are just as robust in determining sleep and wake or sleep metrics in individuals with sleep problems compared with the entire sample. These algorithms demonstrated very similar results and patterns to the entire sample with respect to AUC, accuracy, confusion matrix results, MCC, Cohen &#x03BA;, and agreement (Bland-Altman distributions). However, generally, these algorithms perform well with respect to sleep-wake scoring but not as well for the measurement of specific sleep metrics in contrast to the overall population. Our results show that sleep disorders may skew Bland-Altman distributions as sleep patterns within these disorders deviate from norms. We theorize based on our results that this may result in larger mean differences when compared to ground truth. Based on our results, we theorize that this may create skewness in accuracy in particularly extreme cases. We suggest that clinicians pay attention to severe cases of sleep disorders and take into account our results and the decreased reliability of actigraphy-based sleep metrics such as TST due to outlier sleep patterns.</p><p>Overall, these results provide clinicians with nuanced benchmarks for individual algorithm performance on specific sleep disorders. This will allow clinicians to make an informed choice not only based on accuracy, but on sensitivity and specificity metrics to precisely analyze patient sleep data. With mean difference and Bland-Altman plots, clinicians now have benchmarks to provide them with information on how extreme cases may deviate from general trends. Further, both clinicians and manufacturers now have mean difference benchmarks, which reflect the difference between ground truth and actigraphy, allowing for corrections in patient data analyses if needed.</p><p>In the future, it would be interesting to compare the precision of traditional actigraphy algorithms across a variety of actigraph and accelerometer devices. Overall, both the main and subgroup results suggest that generally traditional actigraphy algorithms may have poorer agreement with polysomnography in fringe cases. Specifically, cases in which the individual&#x2019;s sleep may be significantly different from the vast majority of the population. One hypothesis here is that the heteroscedasticity may be correlated to the severity of a sleep problem, which explains the poorer agreement as SE or WASO changes. Further analysis would allow us to gain a better understanding of this phenomenon.</p><p>With regard to rescoring and the sleep problem sub-samples, there were mixed results. Traditional use of the Cole-Kripke (1992) [<xref ref-type="bibr" rid="ref6">6</xref>] algorithm proposed the use of rescoring; first, we investigated rescoring across all included algorithms [<xref ref-type="bibr" rid="ref40">40</xref>]. Contrary to the belief that rescoring would significantly improve accuracy metrics, rescoring only slightly improved agreement for sleep metrics and may diminish specific performance variables. This could be due to the rescoring of smaller periods of sleep and wake, especially in the aforementioned population with sleep problems. Rescoring may be better suited across multiple nights, which may present more within-participant variability, whereas our sample was only over one night. Rescoring may also smooth out or erroneously change edge cases or cases in which individuals have sleep problems. This would, in part, explain the diminished performance of results. One future direction could be further optimization of rescoring criteria for improvements to sleep-wake detection across algorithms or to better suit individual algorithms specifically. This may also remove the diminished performance observed after rescoring with sleep problem populations. As a current recommendation, we suggest that rescoring be used only when absolutely necessary and that when rescoring, researchers compare results that are both rescored and nonrescored. Our findings suggest that rescoring does not provide any substantial benefits to nonrescored algorithm results. Further, we do not recommend rescoring for sleep disorder population data to preserve accurate sleep-wake scoring.</p><p>Overall, these results echo the findings of previous studies which have examined these algorithms. Haghayegh et al [<xref ref-type="bibr" rid="ref50">50</xref>] examined the performance of the Coke-Kripke, Sadeh, and UCSD algorithms on a sample of 40 healthy adults. Similar to our results, they found accuracies of 85%&#x2010;86% across algorithms. Additionally, they also found varying levels of overall bias across algorithms for the algorithms&#x2019; estimation of total sleep time, sleep onset latency, and wake after sleep onset. Along these lines, Palotti et al [<xref ref-type="bibr" rid="ref51">51</xref>] and Jokar et al [<xref ref-type="bibr" rid="ref52">52</xref>] used a smaller subset of the MESA dataset to compare performance to the Cole-Kripke and Sadeh algorithms to their novel method both for night only and night-day. Cole-Kripke and Sadeh demonstrated mean accuracies ranging from 70%to 85% [<xref ref-type="bibr" rid="ref53">53</xref>,<xref ref-type="bibr" rid="ref54">54</xref>].</p><p>In addition, this study presented some limitations. Foremost, there has been recent work and novel algorithms for sleep and wake classification. Some notable examples include novel machine learning and other methods to evaluate activity data. These algorithms may be more precise and robust as sleep and wake classification. Though our current study did not directly evaluate these algorithms, previous research has provided some benchmarks for comparison. Palotti et al [<xref ref-type="bibr" rid="ref51">51</xref>] developed novel Convolutional Neural Network and Long Short-Term Memory machine learning approaches using the MESA dataset for training and testing. Their novel approaches demonstrated better performance (88.2% and 87.7% respectively) than traditional algorithms (Cole-Kripke and Sadeh). In comparison to this study, their novel methods also demonstrate higher performance than traditional algorithms, albeit on a smaller subsample of the MESA dataset. Similarly, Nunes et al [<xref ref-type="bibr" rid="ref55">55</xref>] developed a domain adversarial convolutional neural network method, trained on the MESA dataset. Their optimal method demonstrated the highest accuracy (80.1%) when compared to other models and traditional algorithms such as Cole-Kripke and Sadeh (71.9% and 69.6%). In contrast to our findings, their novel method could provide stable, more generalizable sleep-wake classification. Again, these results suggest that novel machine learning methods are the future for actigraphy analyses. Given the rapid development of novel actigraphy analytic methods, our future study aim is to examine the performance of newer algorithms and strategies with large testing samples in contrast to our current study findings.</p><p>Another limitation of this study is that we only examined sleep problem subgroups as an exploratory aim. Given the wide demographic, there were several other populations within the dataset, such as individuals with diabetes, cardiovascular illness, etc that could explain the lack of agreement or the heteroscedasticity observed. In addition, we did not conduct an analysis examining demographic subgroups such as sex, ethnicity, and age. These groups may have also contributed to the Bland-Altman distributions observed.</p><p>Given the similarities of the subgroup results to those of the entire dataset, there may be additional correlation-based analysis, which could help unpack why traditional actigraphy algorithms perform poorly as sleep metrics reach extreme values. As aforementioned, previous research denotes individuals with sleep problems may have lower SE, higher WASO, and a variety of other extreme sleep activity metrics. Therefore, this may be one important limitation of not only the actigraphy algorithms but the activity measurement devices themselves. To address this issue, future research should delve deeper and examine extreme cases and traditional algorithms&#x2019; ability to classify sleep and wake and accurately measure sleep metrics. This, in turn, may also allow us to gain a better understanding of specific patterns of sleep and wake in these populations, as well as examine whether specific populations alter the distribution variability observed. In addition, future research should comprehensively assess the performance of traditional and novel algorithms on subsets of the MESA dataset. In doing so, we could identify key markers that may be used to improve analysis performance.</p><p>Finally, our study also filtered out fragmented or poor-quality data from the dataset. Even though these data may not be suitable for the current accuracy analysis and comparison, they may highlight important limitations of actigraphy algorithms with respect to the estimation of sleep metrics and the classification of sleep and wake in extreme cases. Therefore, future research should assess a spectrum of poor- and good-quality data to gain a complete understanding of the performance of traditional actigraphy algorithms. In addition, future research should also compare the results of different actigraphy devices to determine, in unison with analytics, the most accurate actigraphy and accelerometer devices.</p><p>Overall, the results of this study provide significant support that traditional actigraphy algorithms can, with acceptable accuracy, detect sleep and wake in large, diverse population samples, including older adults or populations at risk of health conditions. This study provides researchers and clinicians with evidence that traditional algorithms can continue to be used to assess sleep-wake activity. However, traditional algorithms may present significant limitations in measurement precision of extreme sleep cases. Further, rescoring may not be a necessary step in the analysis of actigraphy data. The implications of this finding are highly important as researchers develop new algorithms and methods of actigraphy data analysis. Specifically, new algorithms should consider the variability of sleep and wake data in sleep problems or extreme cases to precisely measure sleep and wake activity. Further, the results denoted by the current study serve as an important reference point for new, developing strategies for actigraphy analyses.</p></sec><sec id="s4-2"><title>Conclusion</title><p>In conclusion, we emphasize the future direction for generalizable, accurate, and comprehensive actigraphy analyses through the application of machine learning and artificial intelligence models. As aforementioned, several researchers have now adapted artificial intelligence&#x2013;based models and automated toolboxes to analyze actigraphy and reportedly extract more accurate and more detailed sleep-wake results [<xref ref-type="bibr" rid="ref53">53</xref>-<xref ref-type="bibr" rid="ref57">57</xref>]. Given the promise of these methods, we theorize that their assessment and usage to analyze actigraphy data in novel ways poses a promising future direction for more accurate and diverse actigraphy analyses for sleep assessment. Further, in unison with demographic analyses, first of their performance, then the usage of demographic and sleep problem data, we propose this would be a novel opportunity to develop more robust and accurate methods of sleep-wake prediction from activity and actigraphy data. In addition, novel methods using unsupervised machine learning may also provide new ways in which sleep-wake actigraphy data could be parsed to reveal underlying patterns of sleep and sleep problems [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref58">58</xref>]. As these methods are fitted to specific datasets, they could provide better generalizability for sleep-wake analyses [<xref ref-type="bibr" rid="ref56">56</xref>,<xref ref-type="bibr" rid="ref58">58</xref>].</p></sec></sec></body><back><ack><p>We would like to acknowledge the contributions of Mit Patel, who helped with this project.</p><p>The Multi-Ethnic Study of Atherosclerosis (MESA) Sleep Ancillary study was funded by NIH-NHLBI Association of Sleep Disorders with Cardiovascular Health Across Ethnic Groups (RO1 HL098433). MESA is supported by NHLBI-funded contracts HHSN268201500003I, N01-HC-95159, N01-HC-95160, N01-HC-95161, N01-HC-95162, N01-HC-95163, N01-HC-95164, N01-HC-95165, N01-HC-95166, N01-HC-95167, N01-HC-95168, and N01-HC-95169 from the National Heart, Lung, and Blood Institute, and by cooperative agreements UL1-TR-000040, UL1-TR-001079, and UL1-TR-001420 funded by NCATS. The National Sleep Research Resource was supported by the National Heart, Lung, and Blood Institute (R24 HL114473, 75N92019R002) [<xref ref-type="bibr" rid="ref59">59</xref>].</p><p>The author(s) attest there was no use of generative artificial intelligence (AI) in the generation of text, figures, or other informational content of this manuscript. The authors note and attest within the methods section of all uses of generative artificial intelligence (AI). Specifically, generative AI was exclusively used to draft Python code for predetermined purposes. We provide detailed context for its use within our methods section and below. &#x201C;We used ChatGPT for coding assistance, to generate Python code for existing Python libraries [<xref ref-type="bibr" rid="ref28">28</xref>]. ChatGPT was used to generate Python code for file data file preparation and processing (eg, data transformation). Manual data checks in addition to code-based error reporting were conducted to verify the code was working as desired and data was correctly processed. In addition, ChatGPT was used to generate Python code for running statistical analyses. All statistical formulae and analyses were verified within the code. Once code was generated, it was checked by at least one or more of study authors before use. No machine learning code was generated via ChatGPT, and any actigraphy algorithm code was checked and specified in the python code exactly as defined by original algorithm sources.&#x201D;</p></ack><notes><sec><title>Funding</title><p>Robyn Stremler received funding for this project from the Canadian Institutes of Health Research; Healthy Data Behavior Challenge operating grants HBC 15488 and HBC 15694. Robyn Stremler holds the Lawrence S. Bloomberg Limited-Term Professorship in Child and Family Health.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations:</title><def-list><def-item><term id="abb1">AUC</term><def><p>area under the curve</p></def></def-item><def-item><term id="abb2">CK</term><def><p>Cole-Kripke</p></def></def-item><def-item><term id="abb3">CPAP</term><def><p>continuous positive airway pressure</p></def></def-item><def-item><term id="abb4">ICC</term><def><p>intraclass correlation coefficient</p></def></def-item><def-item><term id="abb5">K2010</term><def><p>Kripke 2010</p></def></def-item><def-item><term id="abb6">LoA</term><def><p>limits of agreement</p></def></def-item><def-item><term id="abb7">MCC</term><def><p>Matthews correlation coefficient</p></def></def-item><def-item><term id="abb8">MESA</term><def><p>Multi-Ethnic Study of Atherosclerosis</p></def></def-item><def-item><term id="abb9">NRS</term><def><p>nonrescored result</p></def></def-item><def-item><term id="abb10">NSRR</term><def><p>National Sleep Research Resource</p></def></def-item><def-item><term id="abb11">Philips</term><def><p>Philips-Respironics</p></def></def-item><def-item><term id="abb12">RLS</term><def><p>restless leg syndrome</p></def></def-item><def-item><term id="abb13">ROC</term><def><p>receiver operator characteristic curve</p></def></def-item><def-item><term id="abb14">RR</term><def><p>rescored result</p></def></def-item><def-item><term id="abb15">RS</term><def><p>Webster&#x2019;s rescoring rules</p></def></def-item><def-item><term id="abb16">SE</term><def><p>sleep efficiency</p></def></def-item><def-item><term id="abb17">TST</term><def><p>total sleep time</p></def></def-item><def-item><term id="abb18">UCSD</term><def><p>University of California San Diego</p></def></def-item><def-item><term id="abb19">WASO</term><def><p>wake after sleep onset</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Acebo</surname><given-names>C</given-names> </name><name name-style="western"><surname>LeBourgeois</surname><given-names>MK</given-names> </name></person-group><article-title>Actigraphy</article-title><source>Respir Care Clin N Am</source><year>2006</year><month>03</month><volume>12</volume><issue>1</issue><fpage>23</fpage><lpage>30</lpage><pub-id pub-id-type="doi">10.1016/j.rcc.2005.11.010</pub-id><pub-id pub-id-type="medline">16530645</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name></person-group><article-title>Actigraphy</article-title><source>Handbook of Clinical Neurophysiology</source><year>2005</year><volume>6</volume><publisher-name>Elsevier</publisher-name><fpage>67</fpage><lpage>72</lpage><pub-id pub-id-type="doi">10.1016/S1567-4231(09)70031-0</pub-id><pub-id pub-id-type="other">9780444515179</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name></person-group><article-title>The role and validity of actigraphy in sleep medicine: an update</article-title><source>Sleep Med Rev</source><year>2011</year><month>08</month><volume>15</volume><issue>4</issue><fpage>259</fpage><lpage>267</lpage><pub-id pub-id-type="doi">10.1016/j.smrv.2010.10.001</pub-id><pub-id pub-id-type="medline">21237680</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ancoli-Israel</surname><given-names>S</given-names> </name><name name-style="western"><surname>Cole</surname><given-names>R</given-names> </name><name name-style="western"><surname>Alessi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Chambers</surname><given-names>M</given-names> </name><name name-style="western"><surname>Moorcroft</surname><given-names>W</given-names> </name><name name-style="western"><surname>Pollak</surname><given-names>CP</given-names> </name></person-group><article-title>The role of actigraphy in the study of sleep and circadian rhythms</article-title><source>Sleep</source><year>2003</year><month>05</month><day>1</day><volume>26</volume><issue>3</issue><fpage>342</fpage><lpage>392</lpage><pub-id pub-id-type="doi">10.1093/sleep/26.3.342</pub-id><pub-id pub-id-type="medline">12749557</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Banfi</surname><given-names>T</given-names> </name><name name-style="western"><surname>Valigi</surname><given-names>N</given-names> </name><name name-style="western"><surname>di Galante</surname><given-names>M</given-names> </name><name name-style="western"><surname>d&#x2019;Ascanio</surname><given-names>P</given-names> </name><name name-style="western"><surname>Ciuti</surname><given-names>G</given-names> </name><name name-style="western"><surname>Faraguna</surname><given-names>U</given-names> </name></person-group><article-title>Efficient embedded sleep wake classification for open-source actigraphy</article-title><source>Sci Rep</source><year>2021</year><month>01</month><day>11</day><volume>11</volume><issue>1</issue><fpage>345</fpage><pub-id pub-id-type="doi">10.1038/s41598-020-79294-y</pub-id><pub-id pub-id-type="medline">33431918</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cole</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Gruen</surname><given-names>W</given-names> </name><name name-style="western"><surname>Mullaney</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Gillin</surname><given-names>JC</given-names> </name></person-group><article-title>Automatic sleep/wake identification from wrist activity</article-title><source>Sleep</source><year>1992</year><month>10</month><volume>15</volume><issue>5</issue><fpage>461</fpage><lpage>469</lpage><pub-id pub-id-type="doi">10.1093/sleep/15.5.461</pub-id><pub-id pub-id-type="medline">1455130</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haghayegh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Khoshnevis</surname><given-names>S</given-names> </name><name name-style="western"><surname>Smolensky</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Diller</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Castriotta</surname><given-names>RJ</given-names> </name></person-group><article-title>Performance comparison of different interpretative algorithms utilized to derive sleep parameters from wrist actigraphy data</article-title><source>Chronobiol Int</source><year>2019</year><month>12</month><day>2</day><volume>36</volume><issue>12</issue><fpage>1752</fpage><lpage>1760</lpage><pub-id pub-id-type="doi">10.1080/07420528.2019.1679826</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jean-Louis</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Cole</surname><given-names>RJ</given-names> </name><name name-style="western"><surname>Assmus</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Langer</surname><given-names>RD</given-names> </name></person-group><article-title>Sleep detection with an accelerometer actigraph: comparisons with polysomnography</article-title><source>Physiol Behav</source><year>2001</year><month>01</month><volume>72</volume><issue>1-2</issue><fpage>21</fpage><lpage>28</lpage><pub-id pub-id-type="doi">10.1016/s0031-9384(00)00355-3</pub-id><pub-id pub-id-type="medline">11239977</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Jean-Louis</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Mason</surname><given-names>WJ</given-names> </name><name name-style="western"><surname>Elliott</surname><given-names>JA</given-names> </name><name name-style="western"><surname>Youngstedt</surname><given-names>SD</given-names> </name></person-group><article-title>Sleep estimation from wrist movement quantified by different actigraphic modalities</article-title><source>J Neurosci Methods</source><year>2001</year><month>02</month><day>15</day><volume>105</volume><issue>2</issue><fpage>185</fpage><lpage>191</lpage><pub-id pub-id-type="doi">10.1016/s0165-0270(00)00364-2</pub-id><pub-id pub-id-type="medline">11275275</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Hahn</surname><given-names>EK</given-names> </name><name name-style="western"><surname>Grizas</surname><given-names>AP</given-names> </name><etal/></person-group><article-title>Wrist actigraphic scoring for sleep laboratory patients: algorithm development</article-title><source>J Sleep Res</source><year>2010</year><month>12</month><volume>19</volume><issue>4</issue><fpage>612</fpage><lpage>619</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2869.2010.00835.x</pub-id><pub-id pub-id-type="medline">20408923</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Sharkey</surname><given-names>KM</given-names> </name><name name-style="western"><surname>Carskadon</surname><given-names>MA</given-names> </name></person-group><article-title>Activity-based sleep-wake identification: an empirical test of methodological issues</article-title><source>Sleep</source><year>1994</year><month>04</month><volume>17</volume><issue>3</issue><fpage>201</fpage><lpage>207</lpage><pub-id pub-id-type="doi">10.1093/sleep/17.3.201</pub-id><pub-id pub-id-type="medline">7939118</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Blackwell</surname><given-names>T</given-names> </name><name name-style="western"><surname>Redline</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ancoli-Israel</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Comparison of sleep parameters from actigraphy and polysomnography in older women: the SOF study</article-title><source>Sleep</source><year>2008</year><month>02</month><volume>31</volume><issue>2</issue><fpage>283</fpage><lpage>291</lpage><pub-id pub-id-type="doi">10.1093/sleep/31.2.283</pub-id><pub-id pub-id-type="medline">18274276</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>de Souza</surname><given-names>L</given-names> </name><name name-style="western"><surname>Benedito-Silva</surname><given-names>AA</given-names> </name><name name-style="western"><surname>Pires</surname><given-names>MLN</given-names> </name><name name-style="western"><surname>Poyares</surname><given-names>D</given-names> </name><name name-style="western"><surname>Tufik</surname><given-names>S</given-names> </name><name name-style="western"><surname>Calil</surname><given-names>HM</given-names> </name></person-group><article-title>Further validation of actigraphy for sleep studies</article-title><source>Sleep</source><year>2003</year><month>02</month><day>1</day><volume>26</volume><issue>1</issue><fpage>81</fpage><lpage>85</lpage><pub-id pub-id-type="doi">10.1093/sleep/26.1.81</pub-id><pub-id pub-id-type="medline">12627737</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gooneratne</surname><given-names>NS</given-names> </name><name name-style="western"><surname>Vitiello</surname><given-names>MV</given-names> </name></person-group><article-title>Sleep in older adults: normative changes, sleep disorders, and treatment options</article-title><source>Clin Geriatr Med</source><year>2014</year><month>08</month><volume>30</volume><issue>3</issue><fpage>591</fpage><lpage>627</lpage><pub-id pub-id-type="doi">10.1016/j.cger.2014.04.007</pub-id><pub-id pub-id-type="medline">25037297</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Morgenthaler</surname><given-names>T</given-names> </name><name name-style="western"><surname>Alessi</surname><given-names>C</given-names> </name><name name-style="western"><surname>Friedman</surname><given-names>L</given-names> </name><etal/></person-group><article-title>Practice parameters for the use of actigraphy in the assessment of sleep and sleep disorders: an update for 2007</article-title><source>Sleep</source><year>2007</year><month>04</month><volume>30</volume><issue>4</issue><fpage>519</fpage><lpage>529</lpage><pub-id pub-id-type="doi">10.1093/sleep/30.4.519</pub-id><pub-id pub-id-type="medline">17520797</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Desforges</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Prinz</surname><given-names>PN</given-names> </name><name name-style="western"><surname>Vitiello</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Raskind</surname><given-names>MA</given-names> </name><name name-style="western"><surname>Thorpy</surname><given-names>MJ</given-names> </name></person-group><article-title>Sleep disorders and aging</article-title><source>N Engl J Med</source><year>1990</year><month>08</month><day>23</day><volume>323</volume><issue>8</issue><fpage>520</fpage><lpage>526</lpage><pub-id pub-id-type="doi">10.1056/NEJM199008233230805</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sivertsen</surname><given-names>B</given-names> </name><name name-style="western"><surname>Omvik</surname><given-names>S</given-names> </name><name name-style="western"><surname>Havik</surname><given-names>OE</given-names> </name><etal/></person-group><article-title>A comparison of actigraphy and polysomnography in older adults treated for chronic primary insomnia</article-title><source>Sleep</source><year>2006</year><month>10</month><volume>29</volume><issue>10</issue><fpage>1353</fpage><lpage>1358</lpage><pub-id pub-id-type="doi">10.1093/sleep/29.10.1353</pub-id><pub-id pub-id-type="medline">17068990</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Marino</surname><given-names>M</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Rueschman</surname><given-names>MN</given-names> </name><etal/></person-group><article-title>Measuring sleep: accuracy, sensitivity, and specificity of wrist actigraphy compared to polysomnography</article-title><source>Sleep</source><year>2013</year><month>11</month><day>1</day><volume>36</volume><issue>11</issue><fpage>1747</fpage><lpage>1755</lpage><pub-id pub-id-type="doi">10.5665/sleep.3142</pub-id><pub-id pub-id-type="medline">24179309</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Acebo</surname><given-names>C</given-names> </name></person-group><article-title>The role of actigraphy in sleep medicine</article-title><source>Sleep Med Rev</source><year>2002</year><month>04</month><volume>6</volume><issue>2</issue><fpage>113</fpage><lpage>124</lpage><pub-id pub-id-type="doi">10.1053/smrv.2001.0182</pub-id><pub-id pub-id-type="medline">12531147</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>R</given-names> </name><name name-style="western"><surname>Zee</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Racial/ethnic differences in sleep disturbances: the multi-ethnic study of atherosclerosis (MESA)</article-title><source>Sleep</source><year>2015</year><month>06</month><day>1</day><volume>38</volume><issue>6</issue><fpage>877</fpage><lpage>888</lpage><pub-id pub-id-type="doi">10.5665/sleep.4732</pub-id><pub-id pub-id-type="medline">25409106</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>GQ</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>L</given-names> </name><name name-style="western"><surname>Mueller</surname><given-names>R</given-names> </name><etal/></person-group><article-title>The national sleep research resource: towards a sleep data commons</article-title><source>J Am Med Inform Assoc</source><year>2018</year><month>10</month><day>1</day><volume>25</volume><issue>10</issue><fpage>1351</fpage><lpage>1358</lpage><pub-id pub-id-type="doi">10.1093/jamia/ocy064</pub-id><pub-id pub-id-type="medline">29860441</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fekedulegn</surname><given-names>D</given-names> </name><name name-style="western"><surname>Andrew</surname><given-names>ME</given-names> </name><name name-style="western"><surname>Shi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Violanti</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Knox</surname><given-names>S</given-names> </name><name name-style="western"><surname>Innes</surname><given-names>KE</given-names> </name></person-group><article-title>Actigraphy-based assessment of sleep parameters</article-title><source>Ann Work Expo Health</source><year>2020</year><month>04</month><day>30</day><volume>64</volume><issue>4</issue><fpage>350</fpage><lpage>367</lpage><pub-id pub-id-type="doi">10.1093/annweh/wxaa007</pub-id><pub-id pub-id-type="medline">32053169</pub-id></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Soldan</surname><given-names>A</given-names> </name><name name-style="western"><surname>Alfini</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pettigrew</surname><given-names>C</given-names> </name><etal/></person-group><article-title>Actigraphy-estimated physical activity is associated with functional and structural brain connectivity among older adults</article-title><source>Neurobiol Aging</source><year>2022</year><month>08</month><volume>116</volume><fpage>32</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1016/j.neurobiolaging.2022.04.006</pub-id><pub-id pub-id-type="medline">35551019</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meredith-Jones</surname><given-names>KA</given-names> </name><name name-style="western"><surname>Haszard</surname><given-names>JJ</given-names> </name><name name-style="western"><surname>Graham-DeMello</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Validation of actigraphy sleep metrics in children aged 8 to 16 years: considerations for device type, placement and algorithms</article-title><source>Int J Behav Nutr Phys Act</source><year>2024</year><month>Winter</month><volume>21</volume><issue>1</issue><fpage>40</fpage><pub-id pub-id-type="doi">10.1186/s12966-024-01590-x</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Razjouyan</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>H</given-names> </name><name name-style="western"><surname>Parthasarathy</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mohler</surname><given-names>J</given-names> </name><name name-style="western"><surname>Sharafkhaneh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Najafi</surname><given-names>B</given-names> </name></person-group><article-title>Improving sleep quality assessment using wearable sensors by including information from postural/sleep position changes and body acceleration: a comparison of chest-worn sensors, wrist actigraphy, and polysomnography</article-title><source>J Clin Sleep Med</source><year>2017</year><month>11</month><day>15</day><volume>13</volume><issue>11</issue><fpage>1301</fpage><lpage>1310</lpage><pub-id pub-id-type="doi">10.5664/jcsm.6802</pub-id><pub-id pub-id-type="medline">28992827</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Derbin</surname><given-names>M</given-names> </name><name name-style="western"><surname>McKenna</surname><given-names>L</given-names> </name><name name-style="western"><surname>Chin</surname><given-names>D</given-names> </name><name name-style="western"><surname>Coffman</surname><given-names>B</given-names> </name><name name-style="western"><surname>Bloch-Salisbury</surname><given-names>E</given-names> </name></person-group><article-title>Actigraphy: metrics reveal it is not a valid tool for determining sleep in neonates</article-title><source>J Sleep Res</source><year>2022</year><month>02</month><volume>31</volume><issue>1</issue><fpage>e13444</fpage><pub-id pub-id-type="doi">10.1111/jsr.13444</pub-id><pub-id pub-id-type="medline">34291522</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Meltzer</surname><given-names>LJ</given-names> </name><name name-style="western"><surname>Wong</surname><given-names>P</given-names> </name><name name-style="western"><surname>Biggs</surname><given-names>SN</given-names> </name><etal/></person-group><article-title>Validation of actigraphy in middle childhood</article-title><source>Sleep</source><year>2016</year><month>06</month><day>1</day><volume>39</volume><issue>6</issue><fpage>1219</fpage><lpage>1224</lpage><pub-id pub-id-type="doi">10.5665/sleep.5836</pub-id><pub-id pub-id-type="medline">27091520</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="web"><article-title>ChatGPT</article-title><source>OpenAI</source><access-date>2025-11-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://www.openai.com/chatgpt">https://www.openai.com/chatgpt</ext-link></comment></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Smith</surname><given-names>MT</given-names> </name><name name-style="western"><surname>McCrae</surname><given-names>CS</given-names> </name><name name-style="western"><surname>Cheung</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Use of actigraphy for the evaluation of sleep disorders and circadian rhythm sleep-wake disorders: an American academy of sleep medicine systematic review, meta-analysis, and GRADE assessment</article-title><source>J Clin Sleep Med</source><year>2018</year><month>07</month><day>15</day><volume>14</volume><issue>7</issue><fpage>1209</fpage><lpage>1230</lpage><pub-id pub-id-type="doi">10.5664/jcsm.7228</pub-id><pub-id pub-id-type="medline">29991438</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liu</surname><given-names>F</given-names> </name><name name-style="western"><surname>Schrack</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wanigatunga</surname><given-names>SK</given-names> </name><etal/></person-group><article-title>Comparison of sleep parameters from wrist-worn ActiGraph and Actiwatch devices</article-title><source>Sleep</source><year>2024</year><month>02</month><day>8</day><volume>47</volume><issue>2</issue><fpage>zsad155</fpage><pub-id pub-id-type="doi">10.1093/sleep/zsad155</pub-id><pub-id pub-id-type="medline">37257489</pub-id></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cespedes Feliciano</surname><given-names>EM</given-names> </name><name name-style="western"><surname>Quante</surname><given-names>M</given-names> </name><name name-style="western"><surname>Weng</surname><given-names>J</given-names> </name><etal/></person-group><article-title>Actigraphy-derived daily rest-activity patterns and body mass index in community-dwelling adults</article-title><source>Sleep</source><year>2017</year><month>12</month><day>1</day><volume>40</volume><issue>12</issue><fpage>zsx168</fpage><pub-id pub-id-type="doi">10.1093/sleep/zsx168</pub-id><pub-id pub-id-type="medline">29029250</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Den Berg</surname><given-names>JF</given-names> </name><name name-style="western"><surname>Van Rooij</surname><given-names>FJA</given-names> </name><name name-style="western"><surname>Vos</surname><given-names>H</given-names> </name><etal/></person-group><article-title>Disagreement between subjective and actigraphic measures of sleep duration in a population-based study of elderly persons</article-title><source>J Sleep Res</source><year>2008</year><month>09</month><volume>17</volume><issue>3</issue><fpage>295</fpage><lpage>302</lpage><pub-id pub-id-type="doi">10.1111/j.1365-2869.2008.00638.x</pub-id><pub-id pub-id-type="medline">18321246</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Quante</surname><given-names>M</given-names> </name><name name-style="western"><surname>Kaplan</surname><given-names>ER</given-names> </name><name name-style="western"><surname>Cailler</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Actigraphy-based sleep estimation in adolescents and adults: a comparison with polysomnography using two scoring algorithms</article-title><source>Nat Sci Sleep</source><year>2018</year><volume>10</volume><fpage>13</fpage><lpage>20</lpage><pub-id pub-id-type="doi">10.2147/NSS.S151085</pub-id><pub-id pub-id-type="medline">29403321</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kim</surname><given-names>MJ</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>GH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>CS</given-names> </name><etal/></person-group><article-title>Comparison of three actigraphic algorithms used to evaluate sleep in patients with obstructive sleep apnea</article-title><source>Sleep Breath</source><year>2013</year><month>03</month><volume>17</volume><issue>1</issue><fpage>297</fpage><lpage>304</lpage><pub-id pub-id-type="doi">10.1007/s11325-012-0689-z</pub-id><pub-id pub-id-type="medline">22447172</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name></person-group><article-title>Actigraphically based automatic bedtime sleep-wake scoring. Validity and clinical applications</article-title><source>J Ambul Monit</source><year>1989</year><volume>87</volume><fpage>209</fpage><lpage>216</lpage></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sadeh</surname><given-names>A</given-names> </name><name name-style="western"><surname>Hauri</surname><given-names>PJ</given-names> </name><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Lavie</surname><given-names>P</given-names> </name></person-group><article-title>The role of actigraphy in the evaluation of sleep disorders</article-title><source>Sleep</source><year>1995</year><month>05</month><volume>18</volume><issue>4</issue><fpage>288</fpage><lpage>302</lpage><pub-id pub-id-type="doi">10.1093/sleep/18.4.288</pub-id><pub-id pub-id-type="medline">7618029</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Paquet</surname><given-names>J</given-names> </name><name name-style="western"><surname>Kawinska</surname><given-names>A</given-names> </name><name name-style="western"><surname>Carrier</surname><given-names>J</given-names> </name></person-group><article-title>Wake detection capacity of actigraphy during sleep</article-title><source>Sleep</source><year>2007</year><month>10</month><volume>30</volume><issue>10</issue><fpage>1362</fpage><lpage>1369</lpage><pub-id pub-id-type="doi">10.1093/sleep/30.10.1362</pub-id><pub-id pub-id-type="medline">17969470</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oakley</surname><given-names>NR</given-names> </name></person-group><article-title>Validation with polysomnography of the sleepwatch sleep/wake scoring algorithm used by the Actiwatch activity monitoring system</article-title><source>Mini Mitter Co</source><year>1997</year><access-date>2025-12-03</access-date></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>C</given-names> </name><name name-style="western"><surname>Li</surname><given-names>P</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>CJ</given-names> </name><etal/></person-group><article-title>Actigraphy-based sleep detection: validation with polysomnography and comparison of performance for nighttime and daytime sleep during simulated shift work</article-title><source>Nat Sci Sleep</source><year>2022</year><volume>14</volume><fpage>1801</fpage><lpage>1816</lpage><pub-id pub-id-type="doi">10.2147/NSS.S373107</pub-id><pub-id pub-id-type="medline">36275180</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Webster</surname><given-names>JB</given-names> </name><name name-style="western"><surname>Kripke</surname><given-names>DF</given-names> </name><name name-style="western"><surname>Messin</surname><given-names>S</given-names> </name><name name-style="western"><surname>Mullaney</surname><given-names>DJ</given-names> </name><name name-style="western"><surname>Wyborney</surname><given-names>G</given-names> </name></person-group><article-title>An activity-based sleep monitor system for ambulatory use</article-title><source>Sleep</source><year>1982</year><volume>5</volume><issue>4</issue><fpage>389</fpage><lpage>399</lpage><pub-id pub-id-type="doi">10.1093/sleep/5.4.389</pub-id><pub-id pub-id-type="medline">7163726</pub-id></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Cohen</surname><given-names>J</given-names> </name></person-group><article-title>A coefficient of agreement for nominal scales</article-title><source>Educ Psychol Meas</source><year>1960</year><month>04</month><volume>20</volume><issue>1</issue><fpage>37</fpage><lpage>46</lpage><pub-id pub-id-type="doi">10.1177/001316446002000104</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>McHugh</surname><given-names>ML</given-names> </name></person-group><article-title>Interrater reliability: the kappa statistic</article-title><source>Biochem Med (Zagreb)</source><year>2012</year><volume>22</volume><issue>3</issue><fpage>276</fpage><lpage>282</lpage><pub-id pub-id-type="doi">10.11613/bm.2012.031</pub-id><pub-id pub-id-type="medline">23092060</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Matthews</surname><given-names>BW</given-names> </name></person-group><article-title>Comparison of the predicted and observed secondary structure of T4 phage lysozyme</article-title><source>Biochim Biophys Acta</source><year>1975</year><month>10</month><day>20</day><volume>405</volume><issue>2</issue><fpage>442</fpage><lpage>451</lpage><pub-id pub-id-type="doi">10.1016/0005-2795(75)90109-9</pub-id><pub-id pub-id-type="medline">1180967</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chicco</surname><given-names>D</given-names> </name><name name-style="western"><surname>T&#x00F6;tsch</surname><given-names>N</given-names> </name><name name-style="western"><surname>Jurman</surname><given-names>G</given-names> </name></person-group><article-title>The Matthews correlation coefficient (MCC) is more reliable than balanced accuracy, bookmaker informedness, and markedness in two-class confusion matrix evaluation</article-title><source>BioData Min</source><year>2021</year><month>02</month><day>4</day><volume>14</volume><issue>1</issue><fpage>13</fpage><pub-id pub-id-type="doi">10.1186/s13040-021-00244-z</pub-id><pub-id pub-id-type="medline">33541410</pub-id></nlm-citation></ref><ref id="ref45"><label>45</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Fritz</surname><given-names>CO</given-names> </name><name name-style="western"><surname>Morris</surname><given-names>PE</given-names> </name><name name-style="western"><surname>Richler</surname><given-names>JJ</given-names> </name></person-group><article-title>Effect size estimates: current use, calculations, and interpretation</article-title><source>J Exp Psychol Gen</source><year>2012</year><month>02</month><volume>141</volume><issue>1</issue><fpage>2</fpage><lpage>18</lpage><pub-id pub-id-type="doi">10.1037/a0024338</pub-id><pub-id pub-id-type="medline">21823805</pub-id></nlm-citation></ref><ref id="ref46"><label>46</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Brydges</surname><given-names>CR</given-names> </name></person-group><article-title>Effect size guidelines, sample size calculations, and statistical power in gerontology</article-title><source>Innov Aging</source><year>2019</year><month>08</month><volume>3</volume><issue>4</issue><fpage>igz036</fpage><pub-id pub-id-type="doi">10.1093/geroni/igz036</pub-id><pub-id pub-id-type="medline">31528719</pub-id></nlm-citation></ref><ref id="ref47"><label>47</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bland</surname><given-names>JM</given-names> </name><name name-style="western"><surname>Altman</surname><given-names>DG</given-names> </name></person-group><article-title>Statistical methods for assessing agreement between two methods of clinical measurement</article-title><source>Lancet</source><year>1986</year><month>02</month><day>8</day><volume>1</volume><issue>8476</issue><fpage>307</fpage><lpage>310</lpage><pub-id pub-id-type="doi">10.1016/s0140-6736(86)90837-8</pub-id><pub-id pub-id-type="medline">2868172</pub-id></nlm-citation></ref><ref id="ref48"><label>48</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Koo</surname><given-names>BB</given-names> </name><name name-style="western"><surname>Sillau</surname><given-names>S</given-names> </name><name name-style="western"><surname>Dean</surname><given-names>DA</given-names>  <suffix>II</suffix></name><name name-style="western"><surname>Lutsey</surname><given-names>PL</given-names> </name><name name-style="western"><surname>Redline</surname><given-names>S</given-names> </name></person-group><article-title>Periodic limb movements during sleep and prevalent hypertension in the multi-ethnic study of atherosclerosis</article-title><source>Hypertension</source><year>2015</year><month>01</month><volume>65</volume><issue>1</issue><fpage>70</fpage><lpage>77</lpage><pub-id pub-id-type="doi">10.1161/HYPERTENSIONAHA.114.04193</pub-id><pub-id pub-id-type="medline">25287399</pub-id></nlm-citation></ref><ref id="ref49"><label>49</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sharrett</surname><given-names>AR</given-names> </name><name name-style="western"><surname>Ding</surname><given-names>J</given-names> </name><name name-style="western"><surname>Criqui</surname><given-names>MH</given-names> </name><etal/></person-group><article-title>Smoking, diabetes, and blood cholesterol differ in their associations with subclinical atherosclerosis: the Multiethnic Study of Atherosclerosis (MESA)</article-title><source>Atherosclerosis</source><year>2006</year><month>06</month><volume>186</volume><issue>2</issue><fpage>441</fpage><lpage>447</lpage><pub-id pub-id-type="doi">10.1016/j.atherosclerosis.2005.08.010</pub-id><pub-id pub-id-type="medline">16154575</pub-id></nlm-citation></ref><ref id="ref50"><label>50</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Haghayegh</surname><given-names>S</given-names> </name><name name-style="western"><surname>Khoshnevis</surname><given-names>S</given-names> </name><name name-style="western"><surname>Smolensky</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Diller</surname><given-names>KR</given-names> </name><name name-style="western"><surname>Castriotta</surname><given-names>RJ</given-names> </name></person-group><article-title>Performance comparison of different interpretative algorithms utilized to derive sleep parameters from wrist actigraphy data</article-title><source>Chronobiol Int</source><year>2019</year><month>12</month><volume>36</volume><issue>12</issue><fpage>1752</fpage><lpage>1760</lpage><pub-id pub-id-type="doi">10.1080/07420528.2019.1679826</pub-id><pub-id pub-id-type="medline">31658822</pub-id></nlm-citation></ref><ref id="ref51"><label>51</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Palotti</surname><given-names>J</given-names> </name><name name-style="western"><surname>Mall</surname><given-names>R</given-names> </name><name name-style="western"><surname>Aupetit</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Benchmark on a large cohort for sleep-wake classification with machine learning techniques</article-title><source>NPJ Digit Med</source><year>2019</year><volume>2</volume><fpage>50</fpage><pub-id pub-id-type="doi">10.1038/s41746-019-0126-9</pub-id><pub-id pub-id-type="medline">31304396</pub-id></nlm-citation></ref><ref id="ref52"><label>52</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jokar</surname><given-names>F</given-names> </name><name name-style="western"><surname>Azzopardi</surname><given-names>G</given-names> </name><name name-style="western"><surname>Palotti</surname><given-names>J</given-names> </name></person-group><article-title>Towards accurate and efficient sleep period detection using wearable devices</article-title><conf-name>International Conference on Computer Analysis of Images and Patterns</conf-name><conf-date>Sep 25-28, 2023</conf-date><conf-loc>Limassol, Cyprus</conf-loc><fpage>43</fpage><lpage>54</lpage><pub-id pub-id-type="doi">10.1007/978-3-031-44240-7_5</pub-id></nlm-citation></ref><ref id="ref53"><label>53</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>M</given-names> </name><name name-style="western"><surname>Cui</surname><given-names>W</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name></person-group><article-title>An attention based CNN-LSTM approach for sleep-wake detection with heterogeneous sensors</article-title><source>IEEE J Biomed Health Inform</source><year>2021</year><month>09</month><volume>25</volume><issue>9</issue><fpage>3270</fpage><lpage>3277</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2020.3006145</pub-id><pub-id pub-id-type="medline">32749983</pub-id></nlm-citation></ref><ref id="ref54"><label>54</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hammad</surname><given-names>G</given-names> </name><name name-style="western"><surname>Reyt</surname><given-names>M</given-names> </name><name name-style="western"><surname>Beliy</surname><given-names>N</given-names> </name><etal/></person-group><article-title>pyActigraphy: open-source python package for actigraphy data visualization and analysis</article-title><source>PLoS Comput Biol</source><year>2021</year><month>10</month><volume>17</volume><issue>10</issue><fpage>e1009514</fpage><pub-id pub-id-type="doi">10.1371/journal.pcbi.1009514</pub-id><pub-id pub-id-type="medline">34665807</pub-id></nlm-citation></ref><ref id="ref55"><label>55</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nunes</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Patterson</surname><given-names>MR</given-names> </name><name name-style="western"><surname>Gerstel</surname><given-names>D</given-names> </name><name name-style="western"><surname>Khan</surname><given-names>S</given-names> </name><name name-style="western"><surname>Guo</surname><given-names>CC</given-names> </name><name name-style="western"><surname>Neishabouri</surname><given-names>A</given-names> </name></person-group><article-title>Domain adversarial convolutional neural network improves the accuracy and generalizability of wearable sleep assessment technology</article-title><source>Sensors (Basel)</source><year>2024</year><month>12</month><day>14</day><volume>24</volume><issue>24</issue><fpage>7982</fpage><pub-id pub-id-type="doi">10.3390/s24247982</pub-id><pub-id pub-id-type="medline">39771718</pub-id></nlm-citation></ref><ref id="ref56"><label>56</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Li</surname><given-names>X</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Jiang</surname><given-names>F</given-names> </name><name name-style="western"><surname>Zhao</surname><given-names>H</given-names> </name></person-group><article-title>A novel machine learning unsupervised algorithm for sleep/wake identification using actigraphy</article-title><source>Chronobiol Int</source><year>2020</year><month>07</month><volume>37</volume><issue>7</issue><fpage>1002</fpage><lpage>1015</lpage><pub-id pub-id-type="doi">10.1080/07420528.2020.1754848</pub-id><pub-id pub-id-type="medline">32342702</pub-id></nlm-citation></ref><ref id="ref57"><label>57</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Sano</surname><given-names>A</given-names> </name><name name-style="western"><surname>Chen</surname><given-names>W</given-names> </name><name name-style="western"><surname>Lopez-Martinez</surname><given-names>D</given-names> </name><name name-style="western"><surname>Taylor</surname><given-names>S</given-names> </name><name name-style="western"><surname>Picard</surname><given-names>RW</given-names> </name></person-group><article-title>Multimodal ambulatory sleep detection using LSTM recurrent neural networks</article-title><source>IEEE J Biomed Health Inform</source><year>2019</year><month>07</month><volume>23</volume><issue>4</issue><fpage>1607</fpage><lpage>1617</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2018.2867619</pub-id><pub-id pub-id-type="medline">30176613</pub-id></nlm-citation></ref><ref id="ref58"><label>58</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Berahmand</surname><given-names>K</given-names> </name><name name-style="western"><surname>Daneshfar</surname><given-names>F</given-names> </name><name name-style="western"><surname>Salehi</surname><given-names>ES</given-names> </name><name name-style="western"><surname>Li</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names> </name></person-group><article-title>Autoencoders and their applications in machine learning: a survey</article-title><source>Artif Intell Rev</source><year>2024</year><month>02</month><volume>57</volume><issue>2</issue><fpage>28</fpage><pub-id pub-id-type="doi">10.1007/s10462-023-10662-6</pub-id></nlm-citation></ref><ref id="ref59"><label>59</label><nlm-citation citation-type="web"><article-title>Multi-ethnic study of atherosclerosis (MESA)</article-title><source>National Heart, Lung, and Blood Institute (NHLBI)</source><access-date>2025-11-21</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://clinicaltrials.gov/ct2/show/NCT00005487">https://clinicaltrials.gov/ct2/show/NCT00005487</ext-link></comment></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Sensitivity analysis of outliers.</p><media xlink:href="formative_v9i1e70778_app1.docx" xlink:title="DOCX File, 58 KB"/></supplementary-material><supplementary-material id="app2"><label>Multimedia Appendix 2</label><p>Formulae for statistical analyses.</p><media xlink:href="formative_v9i1e70778_app2.docx" xlink:title="DOCX File, 16 KB"/></supplementary-material><supplementary-material id="app3"><label>Multimedia Appendix 3</label><p>Confusion metrics, Matthews correlation coefficient, and Cohen &#x03BA; statistics.</p><media xlink:href="formative_v9i1e70778_app3.docx" xlink:title="DOCX File, 20 KB"/></supplementary-material><supplementary-material id="app4"><label>Multimedia Appendix 4</label><p>ANOVA and post hoc results for Matthews correlation coefficient and Cohen &#x03BA;.</p><media xlink:href="formative_v9i1e70778_app4.docx" xlink:title="DOCX File, 37 KB"/></supplementary-material><supplementary-material id="app5"><label>Multimedia Appendix 5</label><p>Post hoc results for confusion matrix statistics.</p><media xlink:href="formative_v9i1e70778_app5.docx" xlink:title="DOCX File, 76 KB"/></supplementary-material><supplementary-material id="app6"><label>Multimedia Appendix 6</label><p>Mean difference statistics.</p><media xlink:href="formative_v9i1e70778_app6.docx" xlink:title="DOCX File, 144 KB"/></supplementary-material><supplementary-material id="app7"><label>Multimedia Appendix 7</label><p>Receiver operator characteristic curve and area under the curve statistics for sleep problem subgroups.</p><media xlink:href="formative_v9i1e70778_app7.docx" xlink:title="DOCX File, 1954 KB"/></supplementary-material><supplementary-material id="app8"><label>Multimedia Appendix 8</label><p>Subgroup confusion matrix statistics.</p><media xlink:href="formative_v9i1e70778_app8.docx" xlink:title="DOCX File, 40 KB"/></supplementary-material><supplementary-material id="app9"><label>Multimedia Appendix 9</label><p>Subgroup ANOVA and post hoc results for confusion matrix statistics.</p><media xlink:href="formative_v9i1e70778_app9.docx" xlink:title="DOCX File, 268 KB"/></supplementary-material><supplementary-material id="app10"><label>Multimedia Appendix 10</label><p>Subgroup ANOVA and post hoc results for Matthews correlation coefficient and Cohen &#x03BA;.</p><media xlink:href="formative_v9i1e70778_app10.docx" xlink:title="DOCX File, 84 KB"/></supplementary-material><supplementary-material id="app11"><label>Multimedia Appendix 11</label><p>Subgroup Bland-Altman statistics.</p><media xlink:href="formative_v9i1e70778_app11.docx" xlink:title="DOCX File, 10757 KB"/></supplementary-material></app-group></back></article>