<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v6i6e34366</article-id>
      <article-id pub-id-type="pmid">35699997</article-id>
      <article-id pub-id-type="doi">10.2196/34366</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Fairness in Mobile Phone–Based Mental Health Assessment Algorithms: Exploratory Study</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Ayyoubzadeh</surname>
            <given-names>Seyed Mohammad</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Sükei</surname>
            <given-names>Emese</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author">
          <name name-style="western">
            <surname>Park</surname>
            <given-names>Jinkyung</given-names>
          </name>
          <degrees>MA</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-0804-832X</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Arunachalam</surname>
            <given-names>Ramanathan</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-8934-0981</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Silenzio</surname>
            <given-names>Vincent</given-names>
          </name>
          <degrees>MD</degrees>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-1408-7955</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Singh</surname>
            <given-names>Vivek K</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>School of Communication &#38; Information</institution>
            <institution>Rutgers University</institution>
            <addr-line>4 Huntington Street</addr-line>
            <addr-line>New Brunswick, NJ, 08901</addr-line>
            <country>United States</country>
            <phone>1 848 932 7588</phone>
            <email>v.singh@rutgers.edu</email>
          </address>
          <xref rid="aff4" ref-type="aff">4</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8194-2336</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>School of Communication &#38; Information</institution>
        <institution>Rutgers University</institution>
        <addr-line>New Brunswick, NJ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>Department of Computer Science</institution>
        <institution>Rutgers University</institution>
        <addr-line>New Brunswick, NJ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>School of Public Health</institution>
        <institution>Rutgers University</institution>
        <addr-line>Newark, NJ</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Institute for Data, Systems, and Society</institution>
        <institution>Massachusetts Institute of Technology</institution>
        <addr-line>Cambridge, MA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Vivek K Singh <email>v.singh@rutgers.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>14</day>
        <month>6</month>
        <year>2022</year>
      </pub-date>
      <volume>6</volume>
      <issue>6</issue>
      <elocation-id>e34366</elocation-id>
      <history>
        <date date-type="received">
          <day>19</day>
          <month>10</month>
          <year>2021</year>
        </date>
        <date date-type="rev-request">
          <day>17</day>
          <month>1</month>
          <year>2022</year>
        </date>
        <date date-type="rev-recd">
          <day>27</day>
          <month>3</month>
          <year>2022</year>
        </date>
        <date date-type="accepted">
          <day>10</day>
          <month>4</month>
          <year>2022</year>
        </date>
      </history>
      <copyright-statement>©Jinkyung Park, Ramanathan Arunachalam, Vincent Silenzio, Vivek K Singh. Originally published in JMIR Formative Research (https://formative.jmir.org), 14.06.2022.</copyright-statement>
      <copyright-year>2022</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2022/6/e34366" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>Approximately 1 in 5 American adults experience mental illness every year. Thus, mobile phone–based mental health prediction apps that use phone data and artificial intelligence techniques for mental health assessment have become increasingly important and are being rapidly developed. At the same time, multiple artificial intelligence–related technologies (eg, face recognition and search results) have recently been reported to be biased regarding age, gender, and race. This study moves this discussion to a new domain: phone-based mental health assessment algorithms. It is important to ensure that such algorithms do not contribute to gender disparities through biased predictions across gender groups.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>This research aimed to analyze the susceptibility of multiple commonly used machine learning approaches for gender bias in mobile mental health assessment and explore the use of an algorithmic disparate impact remover (DIR) approach to reduce bias levels while maintaining high accuracy.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>First, we performed preprocessing and model training using the data set (N=55) obtained from a previous study. Accuracy levels and differences in accuracy across genders were computed using 5 different machine learning models. We selected the random forest model, which yielded the highest accuracy, for a more detailed audit and computed multiple metrics that are commonly used for fairness in the machine learning literature. Finally, we applied the DIR approach to reduce bias in the mental health assessment algorithm.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>The highest observed accuracy for the mental health assessment was 78.57%. Although this accuracy level raises optimism, the audit based on gender revealed that the performance of the algorithm was statistically significantly different between the male and female groups (eg, difference in accuracy across genders was 15.85%; <italic>P</italic>&#60;.001). Similar trends were obtained for other fairness metrics. This disparity in performance was found to reduce significantly after the application of the DIR approach by adapting the data used for modeling (eg, the difference in accuracy across genders was 1.66%, and the reduction is statistically significant with <italic>P</italic>&#60;.001).</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>This study grounds the need for algorithmic auditing in phone-based mental health assessment algorithms and the use of gender as a protected attribute to study fairness in such settings. Such audits and remedial steps are the building blocks for the widespread adoption of fair and accurate mental health assessment algorithms in the future.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>algorithmic bias</kwd>
        <kwd>mental health</kwd>
        <kwd>health equity</kwd>
        <kwd>medical informatics</kwd>
        <kwd>health information systems</kwd>
        <kwd>gender bias</kwd>
        <kwd>mobile phone</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background</title>
        <p>Various machine learning (ML) algorithms are increasingly being used to make crucial decisions previously made by humans. Whether they are involved in approving loans, granting college admissions, or identifying the need for additional health support, automated algorithms find patterns, predict outcomes, and make decisions that may have consequential impacts on individuals’ lives [<xref ref-type="bibr" rid="ref1">1</xref>]. Indeed, the dependency on algorithms has eased our lives by replacing subjective human decisions with ML algorithms. The movement toward the application of automated algorithms in the health domain was not an exception. For instance, the proactive assessment of an individual’s mental health is essential for maintaining a healthy and well-functioning society [<xref ref-type="bibr" rid="ref2">2</xref>]. Although this holds the promise of dramatically wider access to mental health care, it is also fraught with inequities that can often inadvertently be baked into the algorithmic prediction of mental health levels.</p>
        <p>ML algorithms attempt to find the generalized pattern from the training data, and sometimes these algorithms can manifest inherent biases across demographic characteristics such as age, race, ethnicity, and gender. A reason for the existing biases can be explained by <italic>negative legacy</italic> [<xref ref-type="bibr" rid="ref3">3</xref>] (ie, the absence of sufficient data for a particular demographic group). For example, giving loans mostly to higher-income groups in the past may result in disapproval of loans to lower-income groups by algorithms that were informed by historical data, resulting in potential damage to individuals belonging to lower-income groups.</p>
        <p>Such biases can be especially deleterious if they are part of health care algorithms. For instance, a recent study by Allen et al [<xref ref-type="bibr" rid="ref4">4</xref>] found that algorithms used to assess mortality scores exhibit differential accuracy across races, thereby increasing racial disparities in health care. Similarly, Gianfrancesco et al [<xref ref-type="bibr" rid="ref5">5</xref>] demonstrated that algorithmic predictions based on electronic health records can discriminate against multiple demographic groups. In particular, Obermeyer et al [<xref ref-type="bibr" rid="ref1">1</xref>] showed that existing algorithms do not adequately identify the need for health support for people of color.</p>
        <p>Building on these trends, we move the discussion of algorithmic fairness to mobile mental health assessment algorithms, which have been increasingly used in recent times [<xref ref-type="bibr" rid="ref6">6</xref>]. With &#62;6 billion users, mobile phones are one of the most ubiquitous consumer devices in the world. Many of them (especially smartphones) have capabilities conducive to monitoring an individual’s physical activity, location, and communication patterns, each of which has been connected to mental health in the past [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref8">8</xref>]. Thus, mobile phones hold significant promise as a platform for monitoring multiple indicators of mental health risks and improving long-term management and treatment delivery to people with mental health issues [<xref ref-type="bibr" rid="ref7">7</xref>,<xref ref-type="bibr" rid="ref9">9</xref>]. At the same time, the creation of phone data–based ML models without considering the aspects of justice and fairness could reify, amplify, and multiply existing health disparities for certain segments of society (eg, women). Considering the abovementioned factors, the main research questions (RQs) of this study were as follows:</p>
        <list list-type="bullet">
          <list-item>
            <p>RQ1: Are mobile phone-based mental health algorithms susceptible to bias in terms of gender?</p>
          </list-item>
          <list-item>
            <p>RQ2: Is it possible to reduce the level of bias while maintaining high accuracy?</p>
          </list-item>
        </list>
      </sec>
      <sec>
        <title>Related Work</title>
        <sec>
          <title>Predicting Mental Health</title>
          <p>Over the past few decades, mental health has typically been assessed based on self-reported surveys that involved sporadic sampling, most of which were initiated after some significant events had taken place in an individual’s life. Recently, as the availability of mobile phone data has increased, several studies have suggested using mobile phone data to detect and predict mental health conditions. Wang et al [<xref ref-type="bibr" rid="ref10">10</xref>] introduced a mobile phone sensing system to automatically infer mental well-being, including depression, stress, flourishing, and loneliness. The study reported that automatically sensed conversation, activity, mobility, and sleep were significantly associated with mental health outcomes. By collecting data from sensors in mobile phone users (eg, location, messages, and calls), a longitudinal study showed a relationship between users’ routines and moods [<xref ref-type="bibr" rid="ref11">11</xref>]. Another study also found that mobile phone–based features such as call count, call response rate, and the number of new contacts are positively associated with mental health [<xref ref-type="bibr" rid="ref8">8</xref>]. Using location information collected by a mobile phone app, Canzian and Musolesi [<xref ref-type="bibr" rid="ref12">12</xref>] assessed the correlation between mobility patterns and the presence of depressive mood. A similar study also presented the relationship between depressive symptoms and the use of mobile phones and the movement through geographic spaces [<xref ref-type="bibr" rid="ref7">7</xref>].</p>
          <p>The results of the abovementioned studies provide clear evidence of interconnections between mobile phone data features and mental health conditions. More importantly, they suggested the potential of developing phone-based ML algorithms as a basis for the unobtrusive prediction of mental health conditions. However, to the best of our knowledge, no study has examined the possibility of algorithmic bias in predicting mental health status by using mobile phone data. Motivated by previous work on algorithmic fairness (see the <italic>Algorithmic Fairness</italic> section), this study attempted to mitigate the discriminatory impact of gender on mental health prediction algorithms.</p>
        </sec>
        <sec>
          <title>Algorithmic Fairness</title>
          <p>An increasing amount of research has suggested that ML algorithms in many domains are not free from discriminatory decision-making. Even with the best intentions, data-driven algorithmic decision-making processes can reproduce, inherit, or reflect the existing social biases. Algorithmic bias may stem from different sources, including (1) input data that may have unequal representation from different groups, (2) an algorithm that has been inadvertently or knowingly coded to make unfair decisions, (3) misuse of certain models in a different context, and (4) biased training data, which reaffirms that social biases may be used as evidence that an algorithm performs well [<xref ref-type="bibr" rid="ref13">13</xref>]. Broadly, the sociotechnical system framework underscores that the value system of the algorithm developers is coded during the algorithm design process; hence, each assumption (often implicit) made by the developers influences the real-world performance of the algorithm [<xref ref-type="bibr" rid="ref14">14</xref>].</p>
          <p>At the same time, multiple bias mitigation techniques have been developed for fairness in the ML literature [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. Roughly, they attempt to counter such algorithmic bias by modifying the training data (preprocessing), learning algorithms (in-processing), or prediction (postprocessing). Preprocessing approaches focus on adapting the data going into the algorithms [<xref ref-type="bibr" rid="ref16">16</xref>], in-processing approaches change the core algorithm (eg, change optimization function) [<xref ref-type="bibr" rid="ref15">15</xref>], and postprocessing algorithms tend to modify the predicted labels to increase fairness [<xref ref-type="bibr" rid="ref17">17</xref>].</p>
          <p>Despite the plethora of related work, attempts to ensure algorithmic fairness toward a protected attribute (gender in our case) in the algorithmic assessment of mental health (high or low) have not been made.</p>
        </sec>
        <sec>
          <title>Gender Bias</title>
          <p>Various attempts have been made to tackle the issue of gender bias in computer algorithms by auditing algorithms for gender bias and modifying algorithms to eliminate stereotypes. For example, a study found that image search results for occupations could amplify gender stereotypes by portraying the minority gender as less professional [<xref ref-type="bibr" rid="ref18">18</xref>]. Another study found gender stereotypes in word embeddings (eg, a framework to represent text data as vectors) and created debiasing algorithms to reduce gender bias while preserving the utility of the embeddings [<xref ref-type="bibr" rid="ref19">19</xref>]. Furthermore, Zhao et al [<xref ref-type="bibr" rid="ref20">20</xref>] tackled the problem of the effect of data imbalance, arguing that such data imbalance can worsen discrimination in terms of gender. They quantified the biases in visual recognition models and calibrated the models to reduce bias. However, no research has been conducted on gender equality using classification algorithms that predict mental health.</p>
          <p>This study addressed the problem of identifying and reducing gender bias, as the overrepresentation of men in training data could accelerate gender inequality in mental health prediction algorithms. Particularly, we focused on the issue of <italic>negative</italic> <italic>legacy</italic>, as suggested by Kamishima et al [<xref ref-type="bibr" rid="ref3">3</xref>], which involves the idea that unfair sampling or labeling in the training data may lead to a disparate impact [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref21">21</xref>] on a certain group of people (eg, granting loans mostly to those who with higher income in the past may result in disapproval of loans to those with low income by the algorithms).</p>
        </sec>
        <sec>
          <title>Perspective on Fairness and Justice</title>
          <p>There exist multiple interpretations of fairness in the algorithmic fairness literature [<xref ref-type="bibr" rid="ref22">22</xref>]. For instance, scholars define fairness in terms of maximizing utility for groups or respecting various rules such as individual rights and freedoms [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>]. However, other interpretations abound, some of which are mutually incompatible [<xref ref-type="bibr" rid="ref25">25</xref>].</p>
          <p>The most commonly used approaches are those based on <italic>distributive</italic> and <italic>procedural</italic> justice [<xref ref-type="bibr" rid="ref22">22</xref>]. While distributive justice focuses on how outcomes are distributed across the population, procedural justice focuses on the processes used to undertake the decisions [<xref ref-type="bibr" rid="ref26">26</xref>,<xref ref-type="bibr" rid="ref27">27</xref>].</p>
          <p>An influential philosophical theory of fairness is attributed to the 20th-century philosopher Rawls, who equated fairness and justice, arguing broadly that fairness is <italic>a demand for impartiality</italic> [<xref ref-type="bibr" rid="ref21">21</xref>,<xref ref-type="bibr" rid="ref22">22</xref>]. In this study, we followed the approach for distributive justice based on the interpretation of Rawls. Specifically, we considered an algorithm to be fair if its performance did not vary for individuals with different demographic descriptors (eg, gender).</p>
          <p>This is related to the concept of <italic>disparate impact</italic> [<xref ref-type="bibr" rid="ref28">28</xref>]. Disparate impact, in US labor law, refers to practices in areas such as employment and housing, which affect one group of people of a protected characteristic more adversely than another, even when the rules applied by employers or landlords appear to be neutral [<xref ref-type="bibr" rid="ref29">29</xref>]. Most federal civil rights laws protect against disparate impacts based on race, color, religion, national origin, and sex as protected traits, and some laws include disability status and other traits.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Data Set</title>
        <p>We used a labeled data set from a previous study by Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>], which explored the associations between call log data and mental health based on a 10-week field and laboratory study. The data set included phone-based behavioral data and self-reported mental health survey data. Phone-based data (eg, call volume, interaction dynamics, diversity in contacts, tie strength, and temporal rhythms) were collected through the app installed on each participant’s mobile phone. Meanwhile, mental health was measured via in-person survey sessions using the Mental Health Inventory subscale of the 36-Item Short Form Health Survey [<xref ref-type="bibr" rid="ref30">30</xref>]. After passing a preprocessing and classification process, the study showed that automated ML algorithms using phone-based features achieved up to 80% accuracy in automatically classifying the mental health level (above or below the mean) of an individual [<xref ref-type="bibr" rid="ref8">8</xref>].</p>
        <p>A total of 59 participants completed the survey administered by Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>]. However, some participants did not complete all the surveys, and some did not enter the correct identifier (International Mobile Equipment Identity [IMEI] number) consistently across surveys. This resulted in a subset of 45 participants in the study [<xref ref-type="bibr" rid="ref8">8</xref>]. For this study, we returned to the survey data and decided to manually handle the <italic>off-by-one</italic> errors (ie, the mismatch in IMEI for different surveys only by 1 digit). Given that IMEI numbers have 14 to 15 digits, in the approximately 60-participant sample size, we considered the odds of 2 participants to be off by just 1 digit without human error being extremely low. This process helped us obtain a complete data set (ie, phone data, a mental health survey, and a demographic survey) for 55 participants.</p>
        <p>The data set we obtained from Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>] had gender as a demographic attribute that we considered a protected attribute. Note that a protected attribute in the algorithmic fairness literature is one on which performance should not depend [<xref ref-type="bibr" rid="ref15">15</xref>]. Among these 55 participants, 21 (38%) self-reported their gender as women or female (minority class), and 34 (62%) self-described as men or male. Note that this study does not differentiate between (biological) sex and (socially construed) gender. In addition, note that we consider the use of binary gender as a limitation of this study. Future studies should be conducted, which include participants with nonbinary gender identities.</p>
      </sec>
      <sec>
        <title>Preprocessing and Model Training</title>
        <p>The initial obtained data set was imbalanced (ie, there was not enough data for one class), which is a common problem in the fairness literature [<xref ref-type="bibr" rid="ref31">31</xref>]. To mitigate the effect of imbalance, we applied the synthetic minority oversampling technique [<xref ref-type="bibr" rid="ref32">32</xref>] to the training data (the test data remained in the original ratio). This technique works in balancing the data set by generating synthetic observations based on the existing minority observations.</p>
        <p>Before moving on to the application of any ML algorithm, the missing values were filled with the median values of the corresponding features. To reduce the impact of features with high variance, the features were standardized by removing the mean and scaling to unit variance. To build a classification model for high or low mental health scores, instances were labeled into 2 categories (1=high and 0=low) via a median split.</p>
        <p>With small sample data and high-dimensional space, there is always a chance of overfitting and reduced generalization. To avoid these issues, we used principal component analysis [<xref ref-type="bibr" rid="ref33">33</xref>]. Principal component analysis confirmed that the top 5 components explained &#62;99% of the variance (the larger the variation across a dimension, the more the information it contains); hence, we used the top 5 components as features for model creation.</p>
        <p>The abovementioned latent features were passed to several classification algorithms to classify the level of mental health (ie, whether the score was above or below the mean score of the population). As the sample data size was relatively modest, we refrained from splitting the data set into training and test sets. Instead, as suggested by prior literature [<xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref34">34</xref>], we applied 5-fold cross-validations and experimented with 5 popular classification algorithms, including logistic regression, support vector machine, random forest, k-nearest neighbors, and multilayer perceptron neural networks using the <italic>scikit-learn</italic> library [<xref ref-type="bibr" rid="ref35">35</xref>]. We ran all algorithms for 100 iterations, and the results are reported in the form of average overall accuracy, male accuracy (ie, accuracy for male individuals), and female accuracy (see the <italic>Results</italic> section).</p>
        <p>Using the abovementioned data, we could, in principle, replicate the approach described by Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>]. Although the features used were the same, we must note that the implementation was undertaken de novo with different preprocessing steps.</p>
      </sec>
      <sec>
        <title>Auditing Mental Health Algorithms for Bias</title>
        <p>Gender was selected as a protected attribute. Following the previous literature [<xref ref-type="bibr" rid="ref36">36</xref>,<xref ref-type="bibr" rid="ref37">37</xref>], men were considered the privileged group, and women were considered the unprivileged group. As there are multiple metrics to characterize accuracy in traditional ML (eg, observed accuracy, precision, recall, and F<sub>1</sub> score), past literature has discussed the need for multiple metrics to characterize bias in ML [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. In this study, we adopted the five most commonly used metrics [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref38">38</xref>]:</p>
        <list list-type="order">
          <list-item>
            <p>Delta accuracy captures the difference in the accuracy of samples belonging to privileged and unprivileged groups based on sensitive features (eg, gender and race or ethnicity).</p>
          </list-item>
          <list-item>
            <p>Delta true positive rate (∆TPR) focuses on equal opportunity for truly deserving entries in both privileged and unprivileged groups to obtain a positive label (eg, higher mental health label) from the algorithm [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>].</p>
          </list-item>
          <list-item>
            <p>Delta false positive rate (∆FPR) ensures that both the true positive rate and the false positive rate (instances where undeserving candidates are granted positive outcomes) are equal across different groups [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref39">39</xref>].</p>
          </list-item>
          <list-item>
            <p>Statistical parity difference (SPD) calculates the difference in the probability of favorable outcomes from the algorithm being obtained by the unprivileged group to that of the privileged group [<xref ref-type="bibr" rid="ref38">38</xref>].</p>
          </list-item>
          <list-item>
            <p>Disparate impact captures the ratio of the probability of favorable outcomes for the unprivileged group to that of the privileged group [<xref ref-type="bibr" rid="ref16">16</xref>] (see <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref39">39</xref>-<xref ref-type="bibr" rid="ref41">41</xref>] for more details on the 5 metrics).</p>
          </list-item>
        </list>
        <p>Following the principle of disparate impact, a fair information system is one in which the performance does not vary for individuals with different demographic descriptors (eg, gender); hence, the disparate impact metric should be close to 1.0. However, for practical settings, a model is considered biased if its value is &#60;0.8 [<xref ref-type="bibr" rid="ref40">40</xref>]. Meanwhile, the values of delta accuracy, ∆TPR, ∆FPR, and SPD should be close to zero in fair systems. Following the previous literature [<xref ref-type="bibr" rid="ref39">39</xref>,<xref ref-type="bibr" rid="ref41">41</xref>], we used a 2-tailed <italic>t</italic> test to assess whether there was a significant difference in accuracy, true positive rate, and false positive rate levels observed for the privileged and unprivileged groups.</p>
      </sec>
      <sec>
        <title>Reducing Algorithmic Bias in Mental Health Assessment</title>
        <p>Disparate impact remover (DIR) [<xref ref-type="bibr" rid="ref16">16</xref>] is a preprocessing algorithm that modifies the feature values of the data set and makes the algorithm discrimination aware at the time of training. It does not require any changes in the classification algorithm, nor does it amend or postprocess the results of the classification algorithm, thus making it robust and applicable to different algorithms. The scenario in which DIR is needed to preprocess the data set depends on the metric called <italic>balanced error rate (BER),</italic> defined as follows:</p>
        <disp-formula>BER = (error rate [S = privileged] – error rate [S = unprivileged]) / 2</disp-formula>
        <p>In algorithmic fairness, the notion of BER is more important than the notion of traditional accuracy as, in most data sets, the contribution of the underprivileged attribute to the entire data set is lesser than that of the privileged attribute. For example, let us consider a data set with 100 rows, where 90 rows belong to the privileged group and 10 rows belong to the unprivileged group. With this data set, if the algorithm predicts all privileged rows right and unprivileged wrong, the error rate would be 10/100, which is 0.1, whereas the BER would be (0+1)/2, which is 0.5.</p>
        <p>An approach discussed in the literature [<xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>] is to replace the raw values of the data features with normalized variants that capture how extreme the value for an individual (eg, female) stands out within their own demographic group (eg, other women). In particular, the approach suggested by Feldman et al [<xref ref-type="bibr" rid="ref16">16</xref>] tackles this issue by allowing the considered classes to have equal probabilities of scoring high values for any of the chosen features. With a toy example, where output is college admissions, input is Scholastic Assessment Test (SAT) scores, and with a binary notion of gender (men and women) for the protected class, this approach gives men and women separate scores based on their ranking within their own genders. For example, a man with an 80th percentile SAT score within the men’s group is considered just as worthy as a woman with an 80th percentile SAT score within the women’s group, irrespective of the actual SAT scores. In this way, the approach supports an equitable admission process across 2 genders. Note that in many practical settings, it is useful to undertake <italic>partial repairs</italic> (eg, move the scores at the same percentile across the privileged and unprivileged groups to be closer to each other rather than being congruent). Finally, the above approach can be extended to multidimensional input features for the algorithm. In the considered domain (phone-based mental health assessments), phone use patterns for men and women are known to differ [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Hence, using the same thresholds for the features (eg, number of phone calls) of men and women as symptoms of mental health issues could yield erroneous and biased results.</p>
        <p>In this study, the DIR algorithm for bias reduction was implemented in Python using the IBM AIF360 library [<xref ref-type="bibr" rid="ref15">15</xref>]. The algorithm was run 100 times, with each iteration having a shuffled version of the data set. The average results for the accuracy and fairness metrics are presented in the <italic>Results</italic> section.</p>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Mental Health Assessment Results</title>
        <p><xref ref-type="table" rid="table1">Table 1</xref> shows the accuracy of multiple well-known ML algorithms for men and women (averaged over 100 iterations). The best-performing algorithm was random forest, which yielded 78.57% accuracy. These results are similar but not the same as those described by Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>]. In both studies, the random forest algorithm yielded the best performance, and the highest observed accuracy was close to 80%. The random forest model with the highest accuracy had 100 estimators or number of trees in the forest and a maximum depth of 6.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Results showing the average overall accuracy, accuracy for men, and accuracy for women for various machine learning models in mental health assessment (averaged over 100 iterations).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="190"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="160"/>
            <col width="170"/>
            <thead>
              <tr valign="top">
                <td>Machine learning models</td>
                <td>Overall accuracy (%), mean (SD)</td>
                <td>Male accuracy (%), mean (SD)</td>
                <td>Female accuracy (%), mean (SD)</td>
                <td>Delta across gender (%), mean (SD)</td>
                <td><italic>P</italic> value of the 2-tailed <italic>t</italic> test on delta</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Multilayer perceptron neural networks</td>
                <td>59.99 (3.67)</td>
                <td>58.68 (8.14)</td>
                <td>61.92 (9.24)</td>
                <td>12.10 (10.41)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Support vector machine</td>
                <td>63.17 (2.91)</td>
                <td>65.98 (6.49)</td>
                <td>59.60 (8.37)</td>
                <td>12.20 (8.67)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Logistic regression</td>
                <td>58.48 (2.69)</td>
                <td>66.59 (5.47)</td>
                <td>47.38 (6.75)</td>
                <td>19.73 (9.80)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>K-nearest neighbors</td>
                <td>61.77 (1.78)</td>
                <td>70.43 (3.72)</td>
                <td>49.63 (5.89)</td>
                <td>20.96 (8.46)</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Random forest</td>
                <td>78.57 (1.61)</td>
                <td>87.16 (2.73)</td>
                <td>71.31 (2.51)</td>
                <td>15.85 (0.22)</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
      </sec>
      <sec>
        <title>Audit Results</title>
        <p>We compared the accuracies of different algorithms for the male and female groups (<xref ref-type="table" rid="table1">Table 1</xref>). The performance was found to be significantly different for the 2 groups in each of the considered algorithms based on a nonpairwise (2-tailed) <italic>t</italic> test (α=.05; <italic>P</italic>&#60;.001) [<xref ref-type="bibr" rid="ref41">41</xref>]. This indicates that the commonly used ML algorithms, when used for phone-based mental health assessment, are susceptible to bias.</p>
        <p>There, a trade-off is expected between accuracy and fairness (ie, with increased fairness, there is typically a dip in accuracy) [<xref ref-type="bibr" rid="ref31">31</xref>], the random forest model with the highest observed accuracy was selected as the baseline model for further inspection of fairness.</p>
        <p>For random forest, the average absolute delta accuracy was 15.85% (<xref ref-type="table" rid="table2">Table 2</xref>). The absolute values of ∆TPR and ∆FPR were 0.88% and 33.43%, respectively. The average SPD was 26.1%, and the average disparate impact was 0.682, which were distant from the ideal values of 0 and 1.0, respectively.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>The average score for bias metrics in the random forest–based mental health assessment algorithm (average of 100 iterations).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="330"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td>Bias metrics</td>
                <td>Observed score, mean (SD)</td>
                <td>Ideal score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Delta accuracy (%)</td>
                <td>15.85 (0.22)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Delta true positive rate (%)</td>
                <td>−0.88 (8.39)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Delta false positive rate (%)</td>
                <td>33.43 (13.50)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Statistical parity difference (%)</td>
                <td>26.1 (4.16)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Disparate impact</td>
                <td>0.682 (0.049)</td>
                <td>1.0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>For 4 of the 5 considered metrics (ie, except ∆TPR), the fairness scores were far from the ideal scores. In other words, the developed model yielded significantly different outcomes for individuals across genders despite reasonable aggregate performance. More precisely, the model was mostly biased against the unprivileged group (in this case, women), and the disparate impact appeared to be a major issue.</p>
      </sec>
      <sec>
        <title>Bias Reduction Results</title>
        <p>We recomputed the abovementioned bias metrics after applying the bias reduction algorithm (DIR), and the results averaged over 100 iterations are reported in <xref ref-type="table" rid="table3">Table 3</xref>. Furthermore, a comparison of the results before and after applying the bias reduction algorithm is presented in <xref ref-type="table" rid="table4">Table 4</xref>.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>The average score for bias metrics after applying the disparate impact remover approach (average of 100 iterations).</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="340"/>
            <col width="330"/>
            <col width="330"/>
            <thead>
              <tr valign="top">
                <td>Bias metrics</td>
                <td>Observed score, mean (SD)</td>
                <td>Ideal score</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Delta accuracy (%)</td>
                <td>1.66 (1.56)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Delta true positive rate (%)</td>
                <td>3.74 (6.74)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Delta false positive rate (%)</td>
                <td>5.58 (9.88)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Statistical parity difference (%)</td>
                <td>−2.70 (1.71)</td>
                <td>0</td>
              </tr>
              <tr valign="top">
                <td>Disparate impact</td>
                <td>1.09 (0.041)</td>
                <td>1.0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>Comparison of delta accuracy, statistical parity difference, and disparate impact before and after applying the postprocessing algorithm.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="220"/>
            <col width="200"/>
            <col width="230"/>
            <col width="120"/>
            <col width="230"/>
            <thead>
              <tr valign="top">
                <td>Bias metrics</td>
                <td>Baseline model, mean (SD)</td>
                <td>After bias reduction, mean (SD)</td>
                <td>Difference</td>
                <td><italic>P</italic> values of 2-tailed <italic>t</italic> test on delta</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>Delta accuracy (%)</td>
                <td>15.85 (0.22)</td>
                <td>1.66 (1.56)</td>
                <td>14.19</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Delta true positive rate (%)</td>
                <td>−0.88 (8.39)</td>
                <td>3.74 (6.74)</td>
                <td>4.63</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Delta false positive rate (%)</td>
                <td>33.43 (13.50)</td>
                <td>5.58 (9.88)</td>
                <td>27.85</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Statistical parity difference (%)</td>
                <td>26.10 (4.16)</td>
                <td>−2.70 (1.71)</td>
                <td>28.80</td>
                <td>&#60;.001</td>
              </tr>
              <tr valign="top">
                <td>Disparate impact</td>
                <td>0.682 (0.049)</td>
                <td>1.09 (0.041)</td>
                <td>0.408</td>
                <td>&#60;.001</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>To test the significance of these improvements, we conducted a 2-tailed <italic>t</italic> test with α=.05 for each of the bias metrics for the before and after scores. The changes in all metrics were noteworthy (<italic>P</italic>&#60;.001). The bias levels were reduced for 4 of the 5 metrics considered in this study. The only exception was ∆TPR, which was the only metric with a low (&#60;5%) score in the baseline condition. This value remained &#60;5% before and after the bias reduction process.</p>
        <p>Note that as we move toward making the algorithm less biased, there is often a trade-off that arises in the form of the reduced overall accuracy of the model [<xref ref-type="bibr" rid="ref13">13</xref>]. The accuracy levels for men and women were 87.16% and 71.31%, respectively (∆accuracy 15.85%; mean 78.50%), before bias reduction. The accuracy levels changed to 78.49% and 76.83% for men and women, respectively (∆accuracy 1.66%; mean 76.83%), after the bias reduction process. The 1.38% reduction (78.50%-77.12%) in the model accuracy was considered an acceptable loss in accuracy for the abovementioned improvements in fairness.</p>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <sec>
          <title>RQs of the Study</title>
          <p>The first RQ in this work was as follows: are mobile phone–based mental health algorithms susceptible to bias in terms of gender?</p>
          <p>As summarized in <xref ref-type="table" rid="table1">Table 1</xref>, we found statistically significant differences across genders in the performance of phone-based mental health assessment algorithms with an array of common ML algorithms. All of these point to the potential for disparate impact across gender with mental health assessment algorithms.</p>
          <p>With respect to the performance of the highest accuracy algorithm (using random forest), we found noticeable differences in the performance of the algorithm across genders via the 5 commonly used bias metrics. As shown in <xref ref-type="table" rid="table2">Table 2</xref>, there was a difference in terms of all 5 metrics between the male and female groups. In particular, we found that the disparate impact ratio was 0.682 in the initial model. However, this value was much lower than the often recommended (and legally accepted) threshold of 0.8, irrespective of the intent of the designers [<xref ref-type="bibr" rid="ref29">29</xref>]. Although the in-principle replications of algorithms described in the past literature may yield reasonable accuracy, their deployment will require them to meet the legal and ethical guidelines of disparate impact. In addition, similar fairness issues have been well studied in some other spaces (eg, policing and bank loans [<xref ref-type="bibr" rid="ref44">44</xref>,<xref ref-type="bibr" rid="ref45">45</xref>]); they are much less explored in algorithmic mental health assessment. However, they will become important with the increased deployment or adoption of mobile mental health tools.</p>
          <p>The results also point to another domain in which women are disadvantaged. As per the US Department of Labor Statistics, women earn 82 cents for every dollar earned by men [<xref ref-type="bibr" rid="ref46">46</xref>]. Similarly, recent research has reported worse performance for women in face recognition [<xref ref-type="bibr" rid="ref47">47</xref>], Google Translate [<xref ref-type="bibr" rid="ref48">48</xref>], and image search results [<xref ref-type="bibr" rid="ref18">18</xref>]. The awareness of such disparities is an important first step in the creation of countermeasures. Broadly, such results in intersection with growing movements such as <italic>Data Feminism</italic> [<xref ref-type="bibr" rid="ref49">49</xref>] can support the creation of more equitable algorithms. Specifically, we hope that our findings will shed light on the need to ensure fairness in emerging mental health–related domains.</p>
          <p>Finally, there are multiple potential reasons for the reduced performance of women in the considered algorithms. Given that the performance is consistently poorer for all the considered ML algorithms (<xref ref-type="table" rid="table1">Table 1</xref>), possible explanations may lie in the <italic>negative legacy</italic> and <italic>data set imbalance</italic>. Data imbalance is the lack of data samples from a particular demographic group for algorithms to learn from, and negative legacy refers to the lack of positive examples for algorithms to learn from for the unprivileged group [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref31">31</xref>]. For instance, Buolamwini and Gebru [<xref ref-type="bibr" rid="ref47">47</xref>] argued that a lack of training samples is a reason for poorer performance for women and people of color. Similar to other areas, and perhaps even more urgently, there is a need for more diverse data samples to create accurate and fair ML models in mental health assessment algorithms.</p>
          <p>The second RQ in this study was as follows: is it possible to reduce the level of bias while maintaining high accuracy?</p>
          <p>On the basis of the results summarized in <xref ref-type="table" rid="table4">Table 4</xref>, we found that the DIR approach was effective in reducing the disparity in the performance of phone-based mental health assessment algorithms across genders. As reported in <xref ref-type="table" rid="table4">Table 4</xref>, there were statistically significant differences in terms of all 5 fairness metrics considered upon the application of the DIR approach.</p>
          <p>Past literature has discussed the need for multiple metrics to characterize bias in ML [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref31">31</xref>] and that metrics can be orthogonal to each other [<xref ref-type="bibr" rid="ref25">25</xref>,<xref ref-type="bibr" rid="ref44">44</xref>]. A suggested process is for system designers to identify a set of parameters that they consider appropriate for a given task [<xref ref-type="bibr" rid="ref50">50</xref>]. In this study, we considered disparate impact to be an important criterion, considered in consultation with the scores for other fairness metrics. In the considered scenario, noticeably large reductions in bias levels were observed regarding the 4 metrics, except for ∆TPR, where the scores were &#60;5% before and after bias reduction. Finally, we noted that there was a 1.38% decrease in accuracy upon the application of the bias reduction approach.</p>
          <p>Overall, we interpreted the results to imply that it is often possible to create fairer versions of algorithms. However, given the variety of fairness metrics that can be considered and the complexities of practical scenarios, the process of bias reduction is likely to involve a human-in-the-loop process and consideration of the trade-offs in terms of multiple metrics [<xref ref-type="bibr" rid="ref50">50</xref>]. Hence, rather than identifying a silver bullet solution, there might be opportunities for multiple small modifications that allow fairer versions of the algorithms. Having said that, value-sensitive design needs to be an important part of the future design of similar applications [<xref ref-type="bibr" rid="ref51">51</xref>], and algorithmic audits need to become an essential step in the process of medical approval of newer (algorithmic) diagnostic tools.</p>
          <p>The obtained results have multiple implications for different stakeholders engaged in health information systems.</p>
        </sec>
        <sec>
          <title>Health Informatics Researchers and Policy Designers</title>
          <p>This study moves the conversation with health policy designers beyond the equity of the built environment (eg, access to hospitals and parks) to the equity of data infrastructure, which can profoundly influence the health outcomes for millions of individuals going forward [<xref ref-type="bibr" rid="ref52">52</xref>]. Although there exist multiple legal and policy guidelines that counter the physical aspects of bias (eg, redlining [<xref ref-type="bibr" rid="ref53">53</xref>]), there is relatively little work on legal and policy frameworks with digital algorithms that undertake similar roles.</p>
        </sec>
        <sec>
          <title>Health Care Technology Companies</title>
          <p>This study identified a feasible pathway for creating algorithms that balance accuracy and equity in the creation of novel health care applications. Hence, the findings support the creation of equitable versions of just-in-time mobile mental health intervention apps.</p>
        </sec>
        <sec>
          <title>Health Care Providers</title>
          <p>This study allows for more robust detection and flagging of mental health issues in patients. Fairer algorithms will reduce the odds of patients being flagged for interventions incorrectly simply because of demographic characteristics, thus allowing for the better alignment of resources between individual providers and the health care industry at large.</p>
        </sec>
        <sec>
          <title>The Public</title>
          <p>The ultimate goal of this study was to create and promote equity in mental health information technology. The fairness of algorithms is intimately connected with trust and adoption. In fact, recent research suggests that disparate impact diminishes consumer trust, even for advantaged users [<xref ref-type="bibr" rid="ref40">40</xref>]. A robust fair detection process will allow for the scalable delivery of just-in-time and tailored mental health support services to a wider population. This is important, given the huge disparity between the need for mental health support and the percentage of the population that uses mental health services [<xref ref-type="bibr" rid="ref54">54</xref>].</p>
        </sec>
      </sec>
      <sec>
        <title>Limitations</title>
        <p>This study has some limitations. It focused on a single data set with 55 individuals and considered a specific type of feature (phone data based, as described by Singh and Long [<xref ref-type="bibr" rid="ref8">8</xref>] in the past literature). The use of binary gender in the assessment is another limitation of this study. Although this study examined many of the commonly used ML methods, other approaches are well represented in the literature. Hence, we will be cautious in generalizing the results until they are supported at a scale with samples of more representative populations and many other ML algorithms. Future work may also suggest other bias reduction techniques to reduce the discriminatory outcomes of mental health assessment algorithms based on protected attributes. At the same time, this work is the first empirical effort to analyze the difference in the performance of mental health assessment algorithms based on gender. A key contribution of this study is the motivation for future work in this domain using varied data sets and methods.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study grounds the use of gender as a protected attribute to study fairness in phone-based mental health assessment algorithms. Mobile phones are now actively used by billions of individuals; hence, the automatic assessment of mental health using ML algorithms could potentially be beneficial in estimating and intervening in billions of individuals’ mental health conditions. An audit of commonly used ML algorithms for mental health assessment revealed that the performance of these algorithms can vary significantly depending on gender. This disparity in performance was found to be noticeably reduced after the application of a DIR approach by adapting the data used for modeling. The results move the literature forward on fairness in mental health assessment algorithms, particularly with gender as a protected attribute. Future work could consider larger data sets, protected attributes other than gender, and a newer approach to creating fair and accurate mental health assessment algorithms. Such results will pave the way for accurate and fair mental health support for all sections of society.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>The 5 metrics to measure bias in machine learning algorithms.</p>
        <media xlink:href="formative_v6i6e34366_app1.docx" xlink:title="DOCX File , 21 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">∆FPR</term>
          <def>
            <p>delta false positive rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">∆TPR</term>
          <def>
            <p>delta true positive rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">BER</term>
          <def>
            <p>balanced error rate</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">DIR</term>
          <def>
            <p>disparate impact remover</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb5">IMEI</term>
          <def>
            <p>International Mobile Equipment Identity</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb6">ML</term>
          <def>
            <p>machine learning</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb7">RQ</term>
          <def>
            <p>research question</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb8">SAT</term>
          <def>
            <p>Scholastic Assessment Test</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb9">SPD</term>
          <def>
            <p>statistical parity difference</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>The authors would like to thank Gautam Sikka and Rahul Ellezhuthil for their help with the data analysis. This work was partly based on funding from the Rutgers Community Design for Health and Wellness Interdisciplinary Research Group, Rutgers Center for COVID-19 Response and Pandemic Preparedness, and Rutgers School of Communication &#38; Information Scholarly Futures Grant.</p>
    </ack>
    <fn-group>
      <fn fn-type="conflict">
        <p>None declared.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Obermeyer</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Powers</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Vogeli</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mullainathan</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Dissecting racial bias in an algorithm used to manage the health of populations</article-title>
          <source>Science</source>
          <year>2019</year>
          <month>10</month>
          <day>25</day>
          <volume>366</volume>
          <issue>6464</issue>
          <fpage>447</fpage>
          <lpage>53</lpage>
          <pub-id pub-id-type="doi">10.1126/science.aax2342</pub-id>
          <pub-id pub-id-type="medline">31649194</pub-id>
          <pub-id pub-id-type="pii">366/6464/447</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Melcher</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Hays</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Torous</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Digital phenotyping for mental health of college students: a clinical review</article-title>
          <source>Evid Based Ment Health</source>
          <year>2020</year>
          <month>11</month>
          <volume>23</volume>
          <issue>4</issue>
          <fpage>161</fpage>
          <lpage>6</lpage>
          <pub-id pub-id-type="doi">10.1136/ebmental-2020-300180</pub-id>
          <pub-id pub-id-type="medline">32998937</pub-id>
          <pub-id pub-id-type="pii">ebmental-2020-300180</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kamishima</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Akaho</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Asoh</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Sakuma</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Fairness-aware classifier with prejudice remover regularizer</article-title>
          <source>Proceedings of the 2012 European Conference on Machine Learning and Knowledge Discovery in Databases</source>
          <year>2012</year>
          <conf-name>ECML PKDD '12</conf-name>
          <conf-date>September 24-28, 2012</conf-date>
          <conf-loc>Bristol, UK</conf-loc>
          <fpage>35</fpage>
          <lpage>50</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/978-3-642-33486-3_3"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-3-642-33486-3_3</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Allen</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Mataraso</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Siefkas</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Burdick</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Braden</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Dellinger</surname>
              <given-names>RP</given-names>
            </name>
            <name name-style="western">
              <surname>McCoy</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pellegrini</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Green-Saxena</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Barnes</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Calvert</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Das</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>A racially unbiased, machine learning approach to prediction of mortality: algorithm development study</article-title>
          <source>JMIR Public Health Surveill</source>
          <year>2020</year>
          <month>10</month>
          <day>22</day>
          <volume>6</volume>
          <issue>4</issue>
          <fpage>e22400</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://publichealth.jmir.org/2020/4/e22400/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/22400</pub-id>
          <pub-id pub-id-type="medline">33090117</pub-id>
          <pub-id pub-id-type="pii">v6i4e22400</pub-id>
          <pub-id pub-id-type="pmcid">PMC7644374</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gianfrancesco</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Tamang</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yazdany</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Schmajuk</surname>
              <given-names>G</given-names>
            </name>
          </person-group>
          <article-title>Potential biases in machine learning algorithms using electronic health record data</article-title>
          <source>JAMA Intern Med</source>
          <year>2018</year>
          <month>11</month>
          <day>01</day>
          <volume>178</volume>
          <issue>11</issue>
          <fpage>1544</fpage>
          <lpage>7</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://europepmc.org/abstract/MED/30128552"/>
          </comment>
          <pub-id pub-id-type="doi">10.1001/jamainternmed.2018.3763</pub-id>
          <pub-id pub-id-type="medline">30128552</pub-id>
          <pub-id pub-id-type="pii">2697394</pub-id>
          <pub-id pub-id-type="pmcid">PMC6347576</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gindidis</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Stewart</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Roodenburg</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>A systematic scoping review of adolescent mental health treatment using mobile apps</article-title>
          <source>Adv Ment Health</source>
          <year>2019</year>
          <volume>17</volume>
          <issue>2</issue>
          <fpage>161</fpage>
          <lpage>77</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1080/18387357.2018.1523680"/>
          </comment>
          <pub-id pub-id-type="doi">10.1080/18387357.2018.1523680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Saeb</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Karr</surname>
              <given-names>CJ</given-names>
            </name>
            <name name-style="western">
              <surname>Schueller</surname>
              <given-names>SM</given-names>
            </name>
            <name name-style="western">
              <surname>Corden</surname>
              <given-names>ME</given-names>
            </name>
            <name name-style="western">
              <surname>Kording</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Mohr</surname>
              <given-names>DC</given-names>
            </name>
          </person-group>
          <article-title>Mobile phone sensor correlates of depressive symptom severity in daily-life behavior: an exploratory study</article-title>
          <source>J Med Internet Res</source>
          <year>2015</year>
          <month>07</month>
          <day>15</day>
          <volume>17</volume>
          <issue>7</issue>
          <fpage>e175</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmir.org/2015/7/e175/"/>
          </comment>
          <pub-id pub-id-type="doi">10.2196/jmir.4273</pub-id>
          <pub-id pub-id-type="medline">26180009</pub-id>
          <pub-id pub-id-type="pii">v17i7e175</pub-id>
          <pub-id pub-id-type="pmcid">PMC4526997</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Long</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Automatic assessment of mental health using phone metadata</article-title>
          <source>Proc Assoc Info Sci Technol</source>
          <year>2018</year>
          <volume>55</volume>
          <issue>1</issue>
          <fpage>450</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi-org.proxy.libraries.rutgers.edu/10.1002/pra2.2018.14505501049"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/pra2.2018.14505501049</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Abdullah</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Choudhury</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Sensing technologies for monitoring serious mental illnesses</article-title>
          <source>IEEE MultiMedia</source>
          <year>2018</year>
          <month>1</month>
          <volume>25</volume>
          <issue>1</issue>
          <fpage>61</fpage>
          <lpage>75</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1109/mmul.2018.011921236"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/mmul.2018.011921236</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Chen</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Li</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Harari</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Tignor</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Zhou</surname>
              <given-names>X</given-names>
            </name>
            <name name-style="western">
              <surname>Ben-Zeev</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Campbell</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <article-title>StudentLife: assessing mental health, academic performance and behavioral trends of college students using smartphones</article-title>
          <source>Proceedings of the 2014 ACM International Joint Conference on Pervasive and Ubiquitous Computing</source>
          <year>2014</year>
          <conf-name>UbiComp '14</conf-name>
          <conf-date>September 13-17, 2014</conf-date>
          <conf-loc>Seattle, WA, USA</conf-loc>
          <fpage>3</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/2632048.2632054"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2632048.2632054</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Servia-Rodríguez</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Rachuri</surname>
              <given-names>KK</given-names>
            </name>
            <name name-style="western">
              <surname>Mascolo</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Rentfrow</surname>
              <given-names>PJ</given-names>
            </name>
            <name name-style="western">
              <surname>Lathia</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Sandstrom</surname>
              <given-names>GM</given-names>
            </name>
          </person-group>
          <article-title>Mobile sensing at the service of mental well-being: a large-scale longitudinal study</article-title>
          <source>Proceedings of the 26th International Conference on World Wide Web</source>
          <year>2017</year>
          <conf-name>WWW '17</conf-name>
          <conf-date>April 3-7, 2017</conf-date>
          <conf-loc>Perth, Australia</conf-loc>
          <fpage>103</fpage>
          <lpage>12</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3038912.3052618"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3038912.3052618</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Canzian</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Musolesi</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Trajectories of depression: unobtrusive monitoring of depressive states by means of smartphone mobility traces analysis</article-title>
          <source>Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing</source>
          <year>2015</year>
          <conf-name>UbiComp '15</conf-name>
          <conf-date>September 7-11, 2015</conf-date>
          <conf-loc>Osaka, Japan</conf-loc>
          <fpage>1293</fpage>
          <lpage>304</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/2750858.2805845"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2750858.2805845</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lepri</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Oliver</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Letouzé</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Pentland</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Vinck</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Fair, transparent, and accountable algorithmic decision-making processes</article-title>
          <source>Philos Technol</source>
          <year>2018</year>
          <volume>31</volume>
          <issue>4</issue>
          <fpage>611</fpage>
          <lpage>27</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/s13347-017-0279-x"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s13347-017-0279-x</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kuhlman</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Jackson</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Chunara</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>No computation without representation: avoiding data and algorithm biases through diversity</article-title>
          <source>Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery &#38; Data Mining</source>
          <year>2020</year>
          <conf-name>KDD '20</conf-name>
          <conf-date>July 6-10, 2020</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>3593</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3394486.3411074"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3394486.3411074</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bellamy</surname>
              <given-names>RK</given-names>
            </name>
            <name name-style="western">
              <surname>Dey</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hind</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hoffman</surname>
              <given-names>SC</given-names>
            </name>
            <name name-style="western">
              <surname>Houde</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kannan</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Lohia</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Martino</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mehta</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Mojsilovic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Nagar</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ramamurthy</surname>
              <given-names>KN</given-names>
            </name>
            <name name-style="western">
              <surname>Richards</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saha</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Sattigeri</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Varshney</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Y</given-names>
            </name>
          </person-group>
          <article-title>AI Fairness 360: an extensible toolkit for detecting and mitigating algorithmic bias</article-title>
          <source>IBM J Res Dev</source>
          <year>2019</year>
          <month>7</month>
          <day>1</day>
          <volume>63</volume>
          <issue>4/5</issue>
          <fpage>4:1</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1147/JRD.2019.2942287"/>
          </comment>
          <pub-id pub-id-type="doi">10.1147/JRD.2019.2942287</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Feldman</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Friedler</surname>
              <given-names>SA</given-names>
            </name>
            <name name-style="western">
              <surname>Moeller</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Scheidegger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatasubramanian</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Certifying and removing disparate impact</article-title>
          <source>Proceedings of the 21st ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>
          <year>2015</year>
          <conf-name>KDD '15</conf-name>
          <conf-date>August 10-13, 2015</conf-date>
          <conf-loc>Sydney, Australia</conf-loc>
          <fpage>259</fpage>
          <lpage>68</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/2783258.2783311"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2783258.2783311</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kamiran</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Karim</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>X</given-names>
            </name>
          </person-group>
          <article-title>Decision theory for discrimination-aware classification</article-title>
          <source>Proceedings of the IEEE 12th International Conference on Data Mining</source>
          <year>2012</year>
          <conf-name>ICDM '12</conf-name>
          <conf-date>December 10-13, 2012</conf-date>
          <conf-loc>Brussels, Belgium</conf-loc>
          <fpage>924</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1109/icdm.2012.45"/>
          </comment>
          <pub-id pub-id-type="doi">10.1109/icdm.2012.45</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>VK</given-names>
            </name>
            <name name-style="western">
              <surname>Chayko</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Inamdar</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Floegel</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Female librarians and male computer programmers? Gender bias in occupational images on digital media platforms</article-title>
          <source>J Assoc Inf Sci Technol</source>
          <year>2020</year>
          <month>01</month>
          <day>22</day>
          <volume>71</volume>
          <issue>11</issue>
          <fpage>1281</fpage>
          <lpage>94</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1002/asi.24335"/>
          </comment>
          <pub-id pub-id-type="doi">10.1002/asi.24335</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bolukbasi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Zou</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Saligrama</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Kalai</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Man is to computer programmer as woman is to homemaker? Debiasing word embeddings</article-title>
          <source>Proceedings of the 30th International Conference on Neural Information Processing Systems</source>
          <year>2016</year>
          <conf-name>NIPS'16</conf-name>
          <conf-date>December 5-10, 2016</conf-date>
          <conf-loc>Barcelona, Spain</conf-loc>
          <fpage>4356</fpage>
          <lpage>64</lpage>
          <pub-id pub-id-type="doi">10.5555/3157382.3157584</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhao</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Wang</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Yatskar</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Ordonez</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Chang</surname>
              <given-names>KW</given-names>
            </name>
          </person-group>
          <article-title>Men also like shopping: reducing gender bias amplification using corpus-level constraints</article-title>
          <source>Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing</source>
          <year>2017</year>
          <conf-name>EMNLP '17</conf-name>
          <conf-date>September 7–11, 2017</conf-date>
          <conf-loc>Copenhagen, Denmark</conf-loc>
          <fpage>2979</fpage>
          <lpage>89</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.18653/v1/d17-1323"/>
          </comment>
          <pub-id pub-id-type="doi">10.18653/v1/d17-1323</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rawls</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>A Theory of Justice: Revised edition</source>
          <year>1999</year>
          <publisher-loc>Cambridge, MA, USA</publisher-loc>
          <publisher-name>Harvard University Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Lee</surname>
              <given-names>MK</given-names>
            </name>
            <name name-style="western">
              <surname>Jain</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Cha</surname>
              <given-names>HJ</given-names>
            </name>
            <name name-style="western">
              <surname>Ojha</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kusbit</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>Procedural justice in algorithmic fairness: leveraging transparency and outcome control for fair algorithmic mediation</article-title>
          <source>Proc ACM Hum Comput Interact</source>
          <year>2019</year>
          <month>11</month>
          <day>07</day>
          <volume>3</volume>
          <issue>CSCW</issue>
          <fpage>1</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3359284"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3359284</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Duster</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Individual fairness, group preferences, and the California strategy</article-title>
          <source>Representations</source>
          <year>1996</year>
          <volume>55</volume>
          <fpage>41</fpage>
          <lpage>58</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.2307/3043735"/>
          </comment>
          <pub-id pub-id-type="doi">10.2307/3043735</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fish</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Bashardoust</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Boyd</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Friedler</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Scheidegger</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Venkatasubramanian</surname>
              <given-names>S</given-names>
            </name>
          </person-group>
          <article-title>Gaps in information access in social networks?</article-title>
          <source>Proceedings of the 2019 World Wide Web Conference</source>
          <year>2019</year>
          <conf-name>WWW '19</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco, CA, USA</conf-loc>
          <fpage>480</fpage>
          <lpage>90</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3308558.3313680"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3308558.3313680</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kleinberg</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Mullainathan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Raghavan</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Inherent trade-offs in the fair determination of risk scores</article-title>
          <source>Proceedings of the 8th Innovations in Theoretical Computer Science Conference</source>
          <year>2017</year>
          <conf-name>ITCS '17</conf-name>
          <conf-date>January 9-11, 2017</conf-date>
          <conf-loc>Berkeley, CA, USA</conf-loc>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://drops.dagstuhl.de/opus/volltexte/2017/8156/"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref26">
        <label>26</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Gummadi</surname>
              <given-names>KP</given-names>
            </name>
            <name name-style="western">
              <surname>Heidari</surname>
              <given-names>H</given-names>
            </name>
          </person-group>
          <article-title>Economic theories of distributive justice for fair machine learning</article-title>
          <source>Proceedings of the 2019 World Wide Web Conference</source>
          <year>2019</year>
          <conf-name>WWW '19</conf-name>
          <conf-date>May 13-17, 2019</conf-date>
          <conf-loc>San Francisco, CA, USA</conf-loc>
          <fpage>1301</fpage>
          <lpage>2</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3308560.3320101"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3308560.3320101</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref27">
        <label>27</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ötting</surname>
              <given-names>SK</given-names>
            </name>
            <name name-style="western">
              <surname>Maier</surname>
              <given-names>GW</given-names>
            </name>
          </person-group>
          <article-title>The importance of procedural justice in Human–Machine Interactions: intelligent systems as new decision agents in organizations</article-title>
          <source>Comput Human Behav</source>
          <year>2018</year>
          <month>12</month>
          <volume>89</volume>
          <fpage>27</fpage>
          <lpage>39</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.chb.2018.07.022"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chb.2018.07.022</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref28">
        <label>28</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Barocas</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Selbst</surname>
              <given-names>AD</given-names>
            </name>
          </person-group>
          <article-title>Big data's disparate impact</article-title>
          <source>Calif L Rev</source>
          <year>2016</year>
          <volume>104</volume>
          <fpage>671</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.2139/ssrn.2477899"/>
          </comment>
          <pub-id pub-id-type="doi">10.2139/ssrn.2477899</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref29">
        <label>29</label>
        <nlm-citation citation-type="web">
          <article-title>EEOC v. Sambo's of Georgia, Inc., 530 F. Supp. 86 (N.D. Ga. 1981): US District Court for the Northern District of Georgia</article-title>
          <source>Justia US Law</source>
          <year>1981</year>
          <month>12</month>
          <day>30</day>
          <access-date>2021-07-09</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://law.justia.com/cases/federal/district-courts/FSupp/530/86/1370384/">https://law.justia.com/cases/federal/district-courts/FSupp/530/86/1370384/</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref30">
        <label>30</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Ware Jr</surname>
              <given-names>JE</given-names>
            </name>
            <name name-style="western">
              <surname>Sherbourne</surname>
              <given-names>CD</given-names>
            </name>
          </person-group>
          <article-title>The MOS 36-item short-form health survey (SF-36). I. Conceptual framework and item selection</article-title>
          <source>Med Care</source>
          <year>1992</year>
          <month>06</month>
          <volume>30</volume>
          <issue>6</issue>
          <fpage>473</fpage>
          <lpage>83</lpage>
          <pub-id pub-id-type="medline">1593914</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref31">
        <label>31</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pessach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Shmueli</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>A review on fairness in machine learning</article-title>
          <source>ACM Comput Surv</source>
          <year>2023</year>
          <month>04</month>
          <day>30</day>
          <volume>55</volume>
          <issue>3</issue>
          <fpage>1</fpage>
          <lpage>44</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://arxiv.org/pdf/2001.09784.pdf"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3494672</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref32">
        <label>32</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chawla</surname>
              <given-names>NV</given-names>
            </name>
            <name name-style="western">
              <surname>Bowyer</surname>
              <given-names>KW</given-names>
            </name>
            <name name-style="western">
              <surname>Hall</surname>
              <given-names>LO</given-names>
            </name>
            <name name-style="western">
              <surname>Kegelmeyer</surname>
              <given-names>WP</given-names>
            </name>
          </person-group>
          <article-title>SMOTE: synthetic minority over-sampling technique</article-title>
          <source>J Artif Intell Res</source>
          <year>2002</year>
          <month>06</month>
          <day>01</day>
          <volume>16</volume>
          <fpage>321</fpage>
          <lpage>57</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1613/jair.953"/>
          </comment>
          <pub-id pub-id-type="doi">10.1613/jair.953</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref33">
        <label>33</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Monfreda</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <person-group person-group-type="editor">
            <name name-style="western">
              <surname>Sanguansat</surname>
              <given-names>P</given-names>
            </name>
          </person-group>
          <article-title>Principal component analysis: a powerful interpretative tool at the service of analytical methodology</article-title>
          <source>Principal Component Analysis</source>
          <year>2012</year>
          <publisher-loc>London, UK</publisher-loc>
          <publisher-name>IntechOpen</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref34">
        <label>34</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dantas</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>The importance of k-fold cross-validation for model prediction in machine learning</article-title>
          <source>Towards Data Science</source>
          <year>2020</year>
          <month>11</month>
          <day>4</day>
          <access-date>2021-10-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://towardsdatascience.com/the-importance-of-k-fold-cross-validation-for-model-prediction-in-machine-learning-4709d3fed2ef">https://towardsdatascience.com/the-importance-of-k-fold-cross-validation-for-model-prediction-in-machine-learning-4709d3fed2ef</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref35">
        <label>35</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pedregosa</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Varoquaux</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Gramfort</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Thirion</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Scikit-learn: machine learning in Python</article-title>
          <source>J Mach Learn Res</source>
          <year>2011</year>
          <month>10</month>
          <volume>12</volume>
          <issue>2011</issue>
          <fpage>2825</fpage>
          <lpage>30</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.jmlr.org/papers/volume12/pedregosa11a/pedregosa11a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref36">
        <label>36</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Webster</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <source>Shaping Women's Work: Gender, Employment and Information Technology</source>
          <year>2014</year>
          <publisher-loc>Milton Park, UK</publisher-loc>
          <publisher-name>Routledge</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref37">
        <label>37</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Sarwono</surname>
              <given-names>BK</given-names>
            </name>
          </person-group>
          <article-title>Gender bias in a patriarchal society: a media analysis on virginity and reproductive health</article-title>
          <source>Wacana</source>
          <year>2012</year>
          <month>04</month>
          <day>01</day>
          <volume>14</volume>
          <issue>1</issue>
          <fpage>37</fpage>
          <lpage>60</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.17510/wjhi.v14i1.48"/>
          </comment>
          <pub-id pub-id-type="doi">10.17510/wjhi.v14i1.48</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref38">
        <label>38</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Dwork</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Hardt</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Pitassi</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Reingold</surname>
              <given-names>O</given-names>
            </name>
            <name name-style="western">
              <surname>Zemel</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Fairness through awareness</article-title>
          <source>Proceedings of the 3rd Innovations in Theoretical Computer Science Conference</source>
          <year>2012</year>
          <conf-name>ITCS '12</conf-name>
          <conf-date>January 8-10, 2012</conf-date>
          <conf-loc>Cambridge, MA, USA</conf-loc>
          <fpage>214</fpage>
          <lpage>26</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/2090236.2090255"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/2090236.2090255</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref39">
        <label>39</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Alasadi</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Al Hilli</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Singh</surname>
              <given-names>VK</given-names>
            </name>
          </person-group>
          <article-title>Toward fairness in face matching algorithms</article-title>
          <source>Proceedings of the 1st International Workshop on Fairness, Accountability, and Transparency in MultiMedia</source>
          <year>2019</year>
          <conf-name>FAT/MM '19</conf-name>
          <conf-date>October 25, 2019</conf-date>
          <conf-loc>Nice, France</conf-loc>
          <fpage>19</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3347447.3356751"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3347447.3356751</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref40">
        <label>40</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Draws</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Szlávik</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Timmermans</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Tintarev</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Varshney</surname>
              <given-names>KR</given-names>
            </name>
            <name name-style="western">
              <surname>Hind</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Disparate impact diminishes consumer trust even for advantaged users</article-title>
          <source>Proceedings of the 16th International Conference on Persuasive Technology</source>
          <year>2021</year>
          <conf-name>PERSUASIVE '21</conf-name>
          <conf-date>April 12-14, 2021</conf-date>
          <conf-loc>Virtual</conf-loc>
          <fpage>135</fpage>
          <lpage>49</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/978-3-030-79460-6_11"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/978-3-030-79460-6_11</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref41">
        <label>41</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hogg</surname>
              <given-names>RV</given-names>
            </name>
            <name name-style="western">
              <surname>McKean</surname>
              <given-names>JW</given-names>
            </name>
            <name name-style="western">
              <surname>Craig</surname>
              <given-names>AT</given-names>
            </name>
          </person-group>
          <source>Introduction to Mathematical Statistics. 6th edition</source>
          <year>2005</year>
          <publisher-loc>New York, NY, USA</publisher-loc>
          <publisher-name>Pearson</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref42">
        <label>42</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Kimbrough</surname>
              <given-names>AM</given-names>
            </name>
            <name name-style="western">
              <surname>Guadagno</surname>
              <given-names>RE</given-names>
            </name>
            <name name-style="western">
              <surname>Muscanell</surname>
              <given-names>NL</given-names>
            </name>
            <name name-style="western">
              <surname>Dill</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Gender differences in mediated communication: women connect more than do men</article-title>
          <source>Comput Human Behav</source>
          <year>2013</year>
          <month>5</month>
          <volume>29</volume>
          <issue>3</issue>
          <fpage>896</fpage>
          <lpage>900</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.chb.2012.12.005"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chb.2012.12.005</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref43">
        <label>43</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Forgays</surname>
              <given-names>DK</given-names>
            </name>
            <name name-style="western">
              <surname>Hyman</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Schreiber</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Texting everywhere for everything: gender and age differences in cell phone etiquette and use</article-title>
          <source>Comput Human Behav</source>
          <year>2014</year>
          <month>02</month>
          <volume>31</volume>
          <fpage>314</fpage>
          <lpage>21</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1016/j.chb.2013.10.053"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.chb.2013.10.053</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref44">
        <label>44</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chouldechova</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Fair prediction with disparate impact: a study of bias in recidivism prediction instruments</article-title>
          <source>Big Data</source>
          <year>2017</year>
          <month>06</month>
          <volume>5</volume>
          <issue>2</issue>
          <fpage>153</fpage>
          <lpage>63</lpage>
          <pub-id pub-id-type="doi">10.1089/big.2016.0047</pub-id>
          <pub-id pub-id-type="medline">28632438</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref45">
        <label>45</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Taylor</surname>
              <given-names>WF</given-names>
            </name>
          </person-group>
          <article-title>The ECOA and disparate impact theory: a historical perspective</article-title>
          <source>J Law Policy</source>
          <year>2018</year>
          <volume>26</volume>
          <issue>2</issue>
          <fpage>575</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://brooklynworks.brooklaw.edu/jlp/vol26/iss2/3"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref46">
        <label>46</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Jones</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>5 Facts About the State of the Gender Pay Gap</article-title>
          <source>U.S. Department of Labor Blog</source>
          <year>2021</year>
          <month>3</month>
          <day>19</day>
          <access-date>2021-10-13</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://blog.dol.gov/2021/03/19/5-facts-about-the-state-of-the-gender-pay-gap">https://blog.dol.gov/2021/03/19/5-facts-about-the-state-of-the-gender-pay-gap</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref47">
        <label>47</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Buolamwini</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Gebru</surname>
              <given-names>T</given-names>
            </name>
          </person-group>
          <article-title>Gender shades: intersectional accuracy disparities in commercial gender classification</article-title>
          <source>Proceedings of 2018 Machine Learning Research Conference on Fairness, Accountability and Transparency</source>
          <year>2018</year>
          <conf-name>PMLR '18</conf-name>
          <conf-date>February 23-24, 2018</conf-date>
          <conf-loc>New York, NY, USA</conf-loc>
          <fpage>1</fpage>
          <lpage>15</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="http://proceedings.mlr.press/v81/buolamwini18a/buolamwini18a.pdf"/>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref48">
        <label>48</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Prates</surname>
              <given-names>MO</given-names>
            </name>
            <name name-style="western">
              <surname>Avelar</surname>
              <given-names>PH</given-names>
            </name>
            <name name-style="western">
              <surname>Lamb</surname>
              <given-names>LC</given-names>
            </name>
          </person-group>
          <article-title>Assessing gender bias in machine translation: a case study with Google Translate</article-title>
          <source>Neural Comput Applic</source>
          <year>2020</year>
          <volume>32</volume>
          <issue>10</issue>
          <fpage>6363</fpage>
          <lpage>81</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1007/s00521-019-04144-6"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00521-019-04144-6</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref49">
        <label>49</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>D'Ignazio</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Klein</surname>
              <given-names>LF</given-names>
            </name>
          </person-group>
          <source>Data Feminism</source>
          <year>2020</year>
          <publisher-loc>Cambridge, MA, USA</publisher-loc>
          <publisher-name>MIT Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref50">
        <label>50</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Noriega-Campero</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Bakker</surname>
              <given-names>MA</given-names>
            </name>
            <name name-style="western">
              <surname>Garcia-Bulle</surname>
              <given-names>B</given-names>
            </name>
            <name name-style="western">
              <surname>Pentland</surname>
              <given-names>AS</given-names>
            </name>
          </person-group>
          <article-title>Active fairness in algorithmic decision making</article-title>
          <source>Proceedings of the 2019 AAAI/ACM Conference on AI, Ethics, and Society</source>
          <year>2019</year>
          <conf-name>AIES '19</conf-name>
          <conf-date>January 27-28, 2019</conf-date>
          <conf-loc>Honolulu, HI, USA</conf-loc>
          <fpage>77</fpage>
          <lpage>83</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/3306618.3314277"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/3306618.3314277</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref51">
        <label>51</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Friedman</surname>
              <given-names>B</given-names>
            </name>
          </person-group>
          <article-title>Value-sensitive design</article-title>
          <source>interactions</source>
          <year>1996</year>
          <month>12</month>
          <volume>3</volume>
          <issue>6</issue>
          <fpage>16</fpage>
          <lpage>23</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1145/242485.242493"/>
          </comment>
          <pub-id pub-id-type="doi">10.1145/242485.242493</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref52">
        <label>52</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Braveman</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Arkin</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Orleans</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Proctor</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Acker</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Plough</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>What is health equity?</article-title>
          <source>Behav Sci Policy</source>
          <year>2018</year>
          <volume>4</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>14</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1353/bsp.2018.0000"/>
          </comment>
          <pub-id pub-id-type="doi">10.1353/bsp.2018.0000</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref53">
        <label>53</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zenou</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Boccard</surname>
              <given-names>N</given-names>
            </name>
          </person-group>
          <article-title>Racial discrimination and redlining in cities</article-title>
          <source>J Urban Econ</source>
          <year>2000</year>
          <month>09</month>
          <volume>48</volume>
          <issue>2</issue>
          <fpage>260</fpage>
          <lpage>85</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://doi.org/10.1006/juec.1999.2166"/>
          </comment>
          <pub-id pub-id-type="doi">10.1006/juec.1999.2166</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref54">
        <label>54</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Augsberger</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Yeung</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Dougher</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hahm</surname>
              <given-names>HC</given-names>
            </name>
          </person-group>
          <article-title>Factors influencing the underutilization of mental health services among Asian American women with a history of depression and suicide</article-title>
          <source>BMC Health Serv Res</source>
          <year>2015</year>
          <month>12</month>
          <day>08</day>
          <volume>15</volume>
          <fpage>542</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://bmchealthservres.biomedcentral.com/articles/10.1186/s12913-015-1191-7"/>
          </comment>
          <pub-id pub-id-type="doi">10.1186/s12913-015-1191-7</pub-id>
          <pub-id pub-id-type="medline">26645481</pub-id>
          <pub-id pub-id-type="pii">10.1186/s12913-015-1191-7</pub-id>
          <pub-id pub-id-type="pmcid">PMC4673784</pub-id>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
