<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "http://dtd.nlm.nih.gov/publishing/2.0/journalpublishing.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="2.0">
  <front>
    <journal-meta>
      <journal-id journal-id-type="publisher-id">JFR</journal-id>
      <journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id>
      <journal-title>JMIR Formative Research</journal-title>
      <issn pub-type="epub">2561-326X</issn>
      <publisher>
        <publisher-name>JMIR Publications</publisher-name>
        <publisher-loc>Toronto, Canada</publisher-loc>
      </publisher>
    </journal-meta>
    <article-meta>
      <article-id pub-id-type="publisher-id">v7i1e45376</article-id>
      <article-id pub-id-type="pmid">37713239</article-id>
      <article-id pub-id-type="doi">10.2196/45376</article-id>
      <article-categories>
        <subj-group subj-group-type="heading">
          <subject>Original Paper</subject>
        </subj-group>
        <subj-group subj-group-type="article-type">
          <subject>Original Paper</subject>
        </subj-group>
      </article-categories>
      <title-group>
        <article-title>Developing a Semantically Based Query Recommendation for an Electronic Medical Record Search Engine: Query Log Analysis and Design Implications</article-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="editor">
          <name>
            <surname>Mavragani</surname>
            <given-names>Amaryllis</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Fultz Hollis</surname>
            <given-names>Kate</given-names>
          </name>
        </contrib>
        <contrib contrib-type="reviewer">
          <name>
            <surname>Zheng</surname>
            <given-names>Jiaping</given-names>
          </name>
        </contrib>
      </contrib-group>
      <contrib-group>
        <contrib id="contrib1" contrib-type="author" corresp="yes">
          <name name-style="western">
            <surname>Wu</surname>
            <given-names>Danny T Y</given-names>
          </name>
          <degrees>MSI, PhD</degrees>
          <xref rid="aff1" ref-type="aff">1</xref>
          <address>
            <institution>Department of Biomedical Informatics</institution>
            <institution>University of Cincinnati College of Medicine</institution>
            <addr-line>231 Albert Sabin Way, ML0840</addr-line>
            <addr-line>Cincinnati, OH, 45267</addr-line>
            <country>United States</country>
            <phone>1 5135586464</phone>
            <email>wutz@ucmail.uc.edu</email>
          </address>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-7658-3754</ext-link>
        </contrib>
        <contrib id="contrib2" contrib-type="author">
          <name name-style="western">
            <surname>Hanauer</surname>
            <given-names>David</given-names>
          </name>
          <degrees>MD, MSc</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0001-6931-3791</ext-link>
        </contrib>
        <contrib id="contrib3" contrib-type="author">
          <name name-style="western">
            <surname>Murdock</surname>
            <given-names>Paul</given-names>
          </name>
          <degrees>MSc</degrees>
          <xref rid="aff4" ref-type="aff">4</xref>
          <xref rid="aff5" ref-type="aff">5</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-5757-4641</ext-link>
        </contrib>
        <contrib id="contrib4" contrib-type="author">
          <name name-style="western">
            <surname>Vydiswaran</surname>
            <given-names>V G Vinod</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff3" ref-type="aff">3</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-3122-1936</ext-link>
        </contrib>
        <contrib id="contrib5" contrib-type="author">
          <name name-style="western">
            <surname>Mei</surname>
            <given-names>Qiaozhu</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0002-8640-1942</ext-link>
        </contrib>
        <contrib id="contrib6" contrib-type="author">
          <name name-style="western">
            <surname>Zheng</surname>
            <given-names>Kai</given-names>
          </name>
          <degrees>PhD</degrees>
          <xref rid="aff2" ref-type="aff">2</xref>
          <xref rid="aff6" ref-type="aff">6</xref>
          <ext-link ext-link-type="orcid">https://orcid.org/0000-0003-4121-4948</ext-link>
        </contrib>
      </contrib-group>
      <aff id="aff1">
        <label>1</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>University of Cincinnati College of Medicine</institution>
        <addr-line>Cincinnati, OH</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff2">
        <label>2</label>
        <institution>School of Information</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff3">
        <label>3</label>
        <institution>Department of Learning Health Sciences</institution>
        <institution>University of Michigan</institution>
        <addr-line>Ann Arbor, MI</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff4">
        <label>4</label>
        <institution>Burnett School of Medicine</institution>
        <institution>Texas Christian University</institution>
        <addr-line>Fort Worth, TX</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff5">
        <label>5</label>
        <institution>Department of Biomedical Informatics</institution>
        <institution>University of Cincinnati</institution>
        <addr-line>Cincinnati, OH</addr-line>
        <country>United States</country>
      </aff>
      <aff id="aff6">
        <label>6</label>
        <institution>Department of Informatics</institution>
        <institution>University of California</institution>
        <addr-line>Irvine, CA</addr-line>
        <country>United States</country>
      </aff>
      <author-notes>
        <corresp>Corresponding Author: Danny T Y Wu <email>wutz@ucmail.uc.edu</email></corresp>
      </author-notes>
      <pub-date pub-type="collection">
        <year>2023</year>
      </pub-date>
      <pub-date pub-type="epub">
        <day>15</day>
        <month>9</month>
        <year>2023</year>
      </pub-date>
      <volume>7</volume>
      <elocation-id>e45376</elocation-id>
      <history>
        <date date-type="received">
          <day>28</day>
          <month>12</month>
          <year>2022</year>
        </date>
        <date date-type="rev-request">
          <day>7</day>
          <month>6</month>
          <year>2023</year>
        </date>
        <date date-type="rev-recd">
          <day>19</day>
          <month>7</month>
          <year>2023</year>
        </date>
        <date date-type="accepted">
          <day>3</day>
          <month>8</month>
          <year>2023</year>
        </date>
      </history>
      <copyright-statement>©Danny T Y Wu, David Hanauer, Paul Murdock, V G Vinod Vydiswaran, Qiaozhu Mei, Kai Zheng. Originally published in JMIR Formative Research (https://formative.jmir.org), 15.09.2023.</copyright-statement>
      <copyright-year>2023</copyright-year>
      <license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/">
        <p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (https://creativecommons.org/licenses/by/4.0/), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on https://formative.jmir.org, as well as this copyright and license information must be included.</p>
      </license>
      <self-uri xlink:href="https://formative.jmir.org/2023/1/e45376" xlink:type="simple"/>
      <abstract>
        <sec sec-type="background">
          <title>Background</title>
          <p>An effective and scalable information retrieval (IR) system plays a crucial role in enabling clinicians and researchers to harness the valuable information present in electronic health records. In a previous study, we developed a prototype medical IR system, which incorporated a semantically based query recommendation (SBQR) feature. The system was evaluated empirically and demonstrated high perceived performance by end users. To delve deeper into the factors contributing to this perceived performance, we conducted a follow-up study using query log analysis.</p>
        </sec>
        <sec sec-type="objective">
          <title>Objective</title>
          <p>One of the primary challenges faced in IR is that users often have limited knowledge regarding their specific information needs. Consequently, an IR system, particularly its user interface, needs to be thoughtfully designed to assist users through the iterative process of refining their queries as they encounter relevant documents during their search. To address these challenges, we incorporated “query recommendation” into our Electronic Medical Record Search Engine (EMERSE), drawing inspiration from the success of similar features in modern IR systems for general purposes.</p>
        </sec>
        <sec sec-type="methods">
          <title>Methods</title>
          <p>The query log data analyzed in this study were collected during our previous experimental study, where we developed EMERSE with the SBQR feature. We implemented a logging mechanism to capture user query behaviors and the output of the IR system (retrieved documents). In this analysis, we compared the initial query entered by users with the query formulated with the assistance of the SBQR. By examining the results of this comparison, we could examine whether the use of SBQR helped in constructing improved queries that differed from the original ones.</p>
        </sec>
        <sec sec-type="results">
          <title>Results</title>
          <p>Our findings revealed that the first query entered without SBQR and the final query with SBQR assistance were highly similar (Jaccard similarity coefficient=0.77). This suggests that the perceived positive performance of the system was primarily attributed to the automatic query expansion facilitated by the SBQR rather than users manually manipulating their queries. In addition, through entropy analysis, we observed that search results converged in scenarios of moderate difficulty, and the degree of convergence correlated strongly with the perceived system performance.</p>
        </sec>
        <sec sec-type="conclusions">
          <title>Conclusions</title>
          <p>The study demonstrated the potential contribution of the SBQR in shaping participants' positive perceptions of system performance, contingent upon the difficulty of the search scenario. Medical IR systems should therefore consider incorporating an SBQR as a user-controlled option or a semiautomated feature. Future work entails redesigning the experiment in a more controlled manner and conducting multisite studies to demonstrate the effectiveness of EMERSE with SBQR for patient cohort identification. By further exploring and validating these findings, we can enhance the usability and functionality of medical IR systems in real-world settings.</p>
        </sec>
      </abstract>
      <kwd-group>
        <kwd>electronic health records</kwd>
        <kwd>information retrieval</kwd>
        <kwd>user-centered evaluation</kwd>
        <kwd>query recommendation</kwd>
        <kwd>query log analysis</kwd>
        <kwd>clinical research informatics</kwd>
      </kwd-group>
    </article-meta>
  </front>
  <body>
    <sec sec-type="introduction">
      <title>Introduction</title>
      <sec>
        <title>Background and Significance</title>
        <p>Clinical documentation is a central component of patient care, fundamental to clinicians’ ability to review patients’ medical history, makes sense of current medical problems, and determines optimal treatment and care plans [<xref ref-type="bibr" rid="ref1">1</xref>]. Clinical documentation is usually accomplished through the use of electronic health record (EHR) systems, which collect a massive amount of detailed computerized patient data [<xref ref-type="bibr" rid="ref2">2</xref>]. In addition to direct use in clinical care, EHR data are valuable to research, administration, billing, and medical education. Among these secondary uses, supporting clinical and translational research has been of great interest due to EHRs’ potential to reduce the cost of research data collection, improve the efficiency of study conduction, and achieve higher study generalizability [<xref ref-type="bibr" rid="ref3">3</xref>,<xref ref-type="bibr" rid="ref4">4</xref>].</p>
        <p>Despite the great potential, the secondary uses of EHR data to support clinical and translational research can be difficult. One major difficulty is that a significant portion of EHRs is recorded in free-text form such as progress notes and discharge summaries, making the information locked in sentences and not easily available. These free-text data are highly expressive, flexible, and compatible with the workflow [<xref ref-type="bibr" rid="ref5">5</xref>] and can provide great insight into patients’ medical conditions. To use this information, it is common to use natural language processing techniques to extract information and identify patient cohorts [<xref ref-type="bibr" rid="ref6">6</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. However, natural language processing techniques are sometimes suboptimal due to a large amount of required training data, low usability of system output, and the performance tuned for local context [<xref ref-type="bibr" rid="ref8">8</xref>]. On the other hand, information retrieval (IR) systems can help users pinpoint information of interest and have been shown to be a viable, scalable, and user-friendly solution to leverage medical information recorded in free-text form.</p>
        <p>An electronic medical record search engine (EMERSE) was developed at the University of Michigan [<xref ref-type="bibr" rid="ref9">9</xref>]. EMERSE has been used to index large-scale routine medical documents for the past 18 years [<xref ref-type="bibr" rid="ref10">10</xref>] and is now being deployed at other academic medical centers. To enhance the performance of EMERSE, a semantically based query recommendation (SBQR) feature was developed in its early version and evaluated to help users expand or substitute original search queries and further improve the accuracy and relevancy of their search results. Our previous evaluation study showed that the SBQR exhibits high performance when evaluated by prospective end users [<xref ref-type="bibr" rid="ref11">11</xref>]. This follow-up study conducted further analysis to demonstrate how SBRQ may contribute the positive feedback from the end users.</p>
      </sec>
      <sec>
        <title>Literature Review</title>
        <p>IR consists of a set of algorithms and techniques for effectively storing free-text documents and retrieving relevant documents to meet users’ information needs. A common challenge of IR is that users may have limited knowledge of what they are searching for. As Bates [<xref ref-type="bibr" rid="ref12">12</xref>] pointed out, users seek information in an evolving manner to deal with uncertainties that originate from a complex search environment. An IR system needs to be carefully designed, especially its user interface, to help users work through this “berry-picking” process where users’ information needs and queries change when relevant documents are found in the search process [<xref ref-type="bibr" rid="ref12">12</xref>]. These uncertainties are especially prominent in the medical domain. That is, clinicians and clinical researchers often have difficulties when formulating effective queries even with medical training and domain knowledge. It is even more challenging when searching for information in EHRs due to the complex nature of clinical documentation (eg, the use of interchangeable terms and nonstandard acronyms and abbreviations) [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>].</p>
        <p>One solution to addressing these issues is to use “query recommendation,” which has been an integral component in modern IR systems such as Google and Microsoft Bing. Query recommendation improves IR performance by suggesting alternative queries, which are derived from query log analysis and mining [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>] so that the IR system can retrieve more relevant documents and better match user needs. Query recommendations based on the content (plain text) of the documents can already boost the performance of a medical IR system by considering abbreviations, acronyms, and synonyms in EHRs. For example, the term “pat” can be a shortening of the word “patient” in certain situations but may also refer to “paroxysmal atrial tachycardia” in the medical context. On the other hand, query recommendations based on the medical concepts and semantics in the documents can be very powerful. For example, clinicians who search “car accident” in EHRs would appreciate the search results including terms such as “vehicle accident” or “motor accident” because of their similar meanings. While the SBQR has been studied and applied to IR in biomedical literature, images, and medical records [<xref ref-type="bibr" rid="ref17">17</xref>-<xref ref-type="bibr" rid="ref20">20</xref>], few studies have investigated end users’ perceptions on its performance. Our previous study addressed this gap by designing a prototype system with the SBQR [<xref ref-type="bibr" rid="ref21">21</xref>] and evaluated the system with 33 prospective end users (study participants) including clinicians, clinical researchers, and health care administrators [<xref ref-type="bibr" rid="ref11">11</xref>]. Each user was given 5 scenarios representing realistic information needs to search EHR notes. Each scenario was carefully designed with a difficulty level estimated by the research team (low, medium, or high). The results of our previous study showed that these participants had positive perceptions toward the SBQR, suggesting beneficial effects when implementing an SBQR in medical IR systems. Most of the participants would like to use the SBQR to assist with their day-to-day EHR search tasks. This study further analyzed the query logs and aimed to provide empirical evidence for the positive perception of the system performance and examined the role of the SBQR.</p>
      </sec>
    </sec>
    <sec sec-type="methods">
      <title>Methods</title>
      <sec>
        <title>Electronic Medical Record Search Engine</title>
        <p>The query log data analyzed in this paper were collected from our previous experimental study on EMERSE with SBQR [<xref ref-type="bibr" rid="ref11">11</xref>]. As illustrated in <xref rid="figure1" ref-type="fig">Figure 1</xref>, the SBQR mapped user-supplied queries to corresponding medical concepts using MetaMap (National Library of Medicine) [<xref ref-type="bibr" rid="ref22">22</xref>]. These medical concepts were then expanded to include synonyms based on two synonym sets: (1) a predefined set by the Unified Medical Language System and (2) an empirical collection from EMERSE based on historical searching records [<xref ref-type="bibr" rid="ref9">9</xref>]. The SBQR-expanded queries were further matched to the indexes of the corpus containing about 100,000 medical documents. These indexes were constructed following the same mapping process. With the assistance of the SBQR, the prototype system was expected to recall more relevant documents compared to it without the SBQR. For example, with the SBQR, users can obtain documents containing both “hearing loss” and “difficulty of hearing” when searching with either term.</p>
        <fig id="figure1" position="float">
          <label>Figure 1</label>
          <caption>
            <p>Workflow of semantically based query recommendation feature [<xref ref-type="bibr" rid="ref21">21</xref>]. EHR: electronic health record; EMERSE: Electronic Medical Record Search Engine; IDF: inverse document frequency; TF: term frequency; UMLS: unified medical language system.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e45376_fig1.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>User Information-Seeking Behaviors</title>
        <p>A logging mechanism was developed to keep track of the user query behaviors and the IR system output (retrieved documents). Specifically, the logs included the original user-supplied queries, the parsed medical concepts, the expanded terms when the SBQR was turned on, the top 30 retrieved documents, and the timestamps of each behavior. Five predefined scenarios with various difficulty levels (low, medium, or high), which were predefined by the research team, were given to all 33 participants in the user experiment (<xref ref-type="table" rid="table1">Table 1</xref>). The order of the scenarios matches the order given to each participant. In each scenario, the participants were told to turn the SBQR off and formulate as many queries as needed until they retrieved satisfactory results. The participants were then asked to turn the SBQR on and conduct another round of searching until they reached another set of satisfactory results. <xref rid="figure2" ref-type="fig">Figure 2</xref> illustrates a general search process of a user in a scenario, highlighting the selected queries used for the analysis. To determine whether the use of the SBQR helped in constructing improved queries, the initial query that a user entered (Q<sub>A1</sub>) was compared with the query achieved with the assistance of the SBQR (Q<sub>Bn</sub>). The study did not compare the last query that a user entered (Q<sub>Am</sub>) and the last query assisted by the SBQR (Q<sub>Bn</sub>) because users can freely turn SBQR on and off multiple times during the experiment to mimic real search behaviors. Such freedom, however, made the determination of Q<sub>Am</sub> very difficult.</p>
        <table-wrap position="float" id="table1">
          <label>Table 1</label>
          <caption>
            <p>Summary of query activities.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="140"/>
            <col width="190"/>
            <col width="190"/>
            <col width="190"/>
            <col width="210"/>
            <thead>
              <tr valign="top">
                <td>Scenario</td>
                <td>Estimate difficulty</td>
                <td>Average number of queries per user</td>
                <td>Average % of query with SBQR<sup>a</sup></td>
                <td>Average number of terms per user query</td>
                <td>Average number of characters per user query</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1<sup>b</sup></td>
                <td>Medium</td>
                <td>9.33</td>
                <td>36.1</td>
                <td>2.69</td>
                <td>32.26</td>
              </tr>
              <tr valign="top">
                <td>2<sup>c</sup></td>
                <td>Low</td>
                <td>8.24</td>
                <td>45.2</td>
                <td>2.56</td>
                <td>27.12</td>
              </tr>
              <tr valign="top">
                <td>3<sup>d</sup></td>
                <td>Medium</td>
                <td>13.45</td>
                <td>43.2</td>
                <td>3.54</td>
                <td>36.21</td>
              </tr>
              <tr valign="top">
                <td>4<sup>e</sup></td>
                <td>High</td>
                <td>8.24</td>
                <td>46.5</td>
                <td>3.28</td>
                <td>44.28</td>
              </tr>
              <tr valign="top">
                <td>5<sup>f</sup></td>
                <td>Medium</td>
                <td>6.97</td>
                <td>45.5</td>
                <td>3.02</td>
                <td>34.03</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table1fn1">
              <p><sup>a</sup>SBQR: semantically based query recommendation.</p>
            </fn>
            <fn id="table1fn2">
              <p><sup>b</sup>Scenario 1: You are doing a research project in which you want to identify people who have had a concussive episode after being in a car accident.</p>
            </fn>
            <fn id="table1fn3">
              <p><sup>c</sup>Scenario 2: You are interested in identifying patients who have the noninvasive form of breast cancer known as intraductal carcinoma (DCIS).</p>
            </fn>
            <fn id="table1fn4">
              <p><sup>d</sup>Scenario 3: Please try to identify patients who are smokers who have also been diagnosed with posttraumatic stress disorder.</p>
            </fn>
            <fn id="table1fn5">
              <p><sup>e</sup>Scenario 4: You are interested in how many patients are taking herbal supplements for the purposes of weight loss, high.</p>
            </fn>
            <fn id="table1fn6">
              <p><sup>f</sup>Scenario 5: Someone has asked you to determine if we have many patients diagnosed with mono who had an enlarged spleen.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
        <fig id="figure2" position="float">
          <label>Figure 2</label>
          <caption>
            <p>A sample and abstracted search process. Q<sub>A1</sub> is the first query that a user enters when the SBQR is turned off, resulting in the retrieved documents R<sub>A1</sub>. Users can try as many queries as they would like to and then turn the SBQR on manually. Q<sub>Bn</sub> is the last user-supplied query when the SBQR is turned on, which is expanded to Q’<sub>Bn</sub> by the SBQR, resulting in the retrieved documents R<sub>Bm</sub>. SBQR: semantically based query recommendation.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e45376_fig2.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Log Analysis</title>
        <p>The query log analysis contained 3 parts. First, the participants’ query activities were summarized by the number of queries per user, the number of terms per query per user, and the percentage of queries that the participants submitted when the SBQR was turned on. Second, a similarity analysis was conducted to compare the initial query that a user entered (Q<sub>A1</sub>) and the last SBQR-assisted query (Q<sub>Bn</sub>) of each user in each scenario. The difference between Q<sub>A1</sub> and Q<sub>Bn</sub> was measured by the degree of term overlapping. That is, terms contained in Q<sub>A1</sub> were extracted and converted to their lower-case form and merged to construct a term vector (V<sub>A1</sub>) in each user scenario. The term vector of Q<sub>Bn</sub> (V<sub>Bn</sub>) was formed in the same way. The similarity between Q<sub>A1</sub> and Q<sub>Bn</sub> was measured by the Jaccard similarity coefficient of the 2-term vectors. The coefficient was defined as the ratio of intersection between the terms of these 2 vectors over the union of them (equation 1). It is hypothesized that the queries were not significantly different. This hypothesis is based on the tendency that users rely more on an IR system to automatically manipulate their queries than on their own to modify the queries. In other words, if the queries are shown to be very similar (eg, Jaccard similarity coefficient ≥0.7), users would not make much effort to change the queries. Therefore, any perceived positive system performance at the end of each scenario would likely come from the automatic query modifications by the SBQR rather than the manual query changes by the users.</p>
        <graphic xlink:href="formative_v7i1e45376_fig4.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <p>The third part of the query log analysis explored the relationship between the SBQR, the search results, and the perceived system performance through an entropy analysis. As illustrated in <xref rid="figure3" ref-type="fig">Figure 3</xref>, it is hypothesized that different participants would return similar results with the help of the SBQR given the semantic overlap of concepts in their queries when compared to the queries of each person without the assistance of the SBQR. Therefore, participants with the same information needs but different search strategies would retrieve a similar (or a converged) set of documents using the SBQR. In other words, the SBQR helped “standardize” the queries across multiple users. This hypothesis was examined by comparing the entropy of the result sets, that is, comparing R<sub>A1</sub> and R<sub>Bn</sub> in <xref rid="figure2" ref-type="fig">Figure 2</xref>. A result set is defined as the union of the top 10 documents retrieved in each search scenario. The entropy analysis gauged the level of uncertainty in a probability distribution, which is constructed by a set of probability scores from the retrieved documents in the result set. The probability score of a document was calculated using the ratio of the frequency of a document to the sum of the frequency of all the documents in the result set. These probability scores were turned into an entropy score using the formula described in (equation 2). As equation 2 shows, the variable “<italic>x</italic>” represents the probability score of 1 resultant document. If the hypothesis is true, a decreased entropy score would be observed, leading to a negative difference in entropy between R<sub>A1</sub> and R<sub>Bn</sub>.</p>
        <graphic xlink:href="formative_v7i1e45376_fig5.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        <fig id="figure3" position="float">
          <label>Figure 3</label>
          <caption>
            <p>The effectiveness of the SBQR. Participants with the same information needs may use different search strategies (eg, terms), resulting in lower overlap between retrieved documents (left, high uncertainty). However, with the assistance of the SBQR, these users may be “brought together” by the SBQR and therefore retrieved a similar set of documents. SBQR: semantically based query recommendation.</p>
          </caption>
          <graphic xlink:href="formative_v7i1e45376_fig3.png" alt-version="no" mimetype="image" position="float" xlink:type="simple"/>
        </fig>
      </sec>
      <sec>
        <title>Statistical Analysis</title>
        <p>The difference in Jaccard similarity scores was examined statistically. First, the normality of the scores in each scenario and the homogeneity of the variance among scenarios were tested using the Kolmogorov-Smirnov test for goodness of fit and the Bartlett/Levene test for variance equality, respectively. If the distribution of the Jaccard similarity scores was not normal, a nonparametric test (Kruskal-Wallis) was used to examine group median differences. Query log data were stored and processed in a standalone SQLite database. Two data views were created to capture the first and last query of each user scenario. The procedures to calculate Jaccard similarity and Entropy were implemented in Python (version 2.7; Python Software Foundation) with libraries “pandas,” “numpy,” and “scipy.” The use of the query log data was approved by the University of Michigan Health Sciences and Behavioral Sciences institutional review boards [<xref ref-type="bibr" rid="ref11">11</xref>].</p>
      </sec>
      <sec>
        <title>Ethical Considerations</title>
        <sec>
          <title>Ethics Approval</title>
          <p>This study received ethics review and approval from the Human Research Protection Program at the University of Michigan (HUM00057979) prior to its commencement. The study protocol, including the data collection procedures and informed consent process, was reviewed to ensure compliance with ethical guidelines for conducting research involving human subjects.</p>
        </sec>
        <sec>
          <title>Privacy and Confidentiality Protection</title>
          <p>To protect the privacy and confidentiality of the participants, several measures were implemented. All collected data, including query logs and retrieved documents, were anonymized or deidentified to ensure that no personally identifiable information could be linked to individual participants. Access to the data was restricted to authorized researchers involved in the study, and the data were securely stored on servers with restricted access. Data transmission was encrypted to prevent unauthorized interception or access.</p>
        </sec>
        <sec>
          <title>Secondary Analysis of Research Data</title>
          <p>The original informed consent (or the institutional review board approval) allowed for such secondary analyses without the need for additional consent. The data used in this study were obtained from a previous study, and the original informed consent process allowed for the analysis of the data for research purposes.</p>
        </sec>
      </sec>
    </sec>
    <sec sec-type="results">
      <title>Results</title>
      <sec>
        <title>Query Activities</title>
        <p>The search activities of the 33 participants resulted in 10,451 records with 2098 distinct queries. <xref ref-type="table" rid="table2">Table 2</xref> shows an example of a participant’s queries in scenario 2. The description of the scenarios can be seen in <xref ref-type="table" rid="table1">Table 1</xref>. The sample records in <xref ref-type="table" rid="table2">Table 2</xref> show that User 005 submitted a total of 6 queries, with 2 of them being SBQR-expanded queries (the fourth and fifth queries). The user started with a single term query “dcis” without the SBQR and manually modified the query to “non-invasive dcis.” The user ended up with a query consisting of 2 concepts: “dcis” and “breast cancer.”</p>
        <p>The search activities of all scenarios are summarized in <xref ref-type="table" rid="table1">Table 1</xref>. Participants formulated between 6 and 13 queries, with nearly half constructed with the SBQR turned on. Scenario 1 was an exception, of which two-thirds of the queries were submitted when the SBQR was turned off. The queries contained 2 to 4 terms, and their length was between 27 and 44 characters. In the scenario with low difficulty, the participants formulated shorter queries, both in terms of number of terms (2.56) and the average number of characters (27.12). On the other hand, the participants tended to formulate longer queries with more terms (3.28) and characters (44.28) in the scenario with high difficulty.</p>
        <table-wrap position="float" id="table2">
          <label>Table 2</label>
          <caption>
            <p>A set of queries entered by user 005 (U005) in scenario 2 “You are interested in identifying patients who have the noninvasive form of breast cancer known as DCIS.” All query logs were uploaded in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="230"/>
            <col width="120"/>
            <col width="300"/>
            <col width="120"/>
            <col width="230"/>
            <thead>
              <tr valign="bottom">
                <td>Log ID</td>
                <td>Seq</td>
                <td>Query</td>
                <td>SBQR<sup>a</sup></td>
                <td>Time stamp</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1000006702<sup>b</sup></td>
                <td>1</td>
                <td>dcis</td>
                <td>Off</td>
                <td>09:54:34</td>
              </tr>
              <tr valign="top">
                <td>1000006712</td>
                <td>2</td>
                <td>non-invasive dcis</td>
                <td>Off</td>
                <td>09:55:11</td>
              </tr>
              <tr valign="top">
                <td>1000006717</td>
                <td>3</td>
                <td>non-invasive dcis breast cancer</td>
                <td>Off</td>
                <td>09:55:30</td>
              </tr>
              <tr valign="top">
                <td>1000006762</td>
                <td>4</td>
                <td>non-invasive dcis breast cancer</td>
                <td>On</td>
                <td>09:57:39</td>
              </tr>
              <tr valign="top">
                <td>1000006776<sup>a</sup></td>
                <td>5</td>
                <td>dcis, breast cancer</td>
                <td>On</td>
                <td>09:59:23</td>
              </tr>
              <tr valign="top">
                <td>1000006795</td>
                <td>6</td>
                <td>dcis, breast cancer</td>
                <td>Off</td>
                <td>10:00:30</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table2fn1">
              <p><sup>a</sup>SBQR: semantically based query recommendation feature.</p>
            </fn>
            <fn id="table2fn2">
              <p><sup>b</sup>Queries in Sequences 1 and 5 were selected for analysis: the first query when the SBQR was turned off (1000006702, Q<sub>A1</sub>) and the last query when the SBQR was turned on (1000006776, Q<sub>Bn</sub>).</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
      <sec>
        <title>Similarity Analysis</title>
        <p>The average Jaccard similarity coefficient of each scenario is reported in <xref ref-type="table" rid="table3">Table 3</xref>. Overall, the Jaccard similarity coefficient shows an average of 0.77 similarity between the initial query without SBRQ (Q<sub>A1</sub>) and the last query with SBQR (Q<sub>Bn</sub>) in all scenarios. Both queries were entered by the users. The Kolmogorov-Smirnov test and Bartlett/Levene test showed the distribution of the Jaccard coefficient in each scenario was non-normal (<italic>P</italic>=.01) with equal variance (<italic>P</italic>=.15). Then, the Kruskal-Wallis test shows no significant difference between the medians of the coefficient scores (<italic>P</italic>=.26). As shown in <xref ref-type="table" rid="table3">Table 3</xref>, scenario 1 (first in the order) and scenario 4 (high difficulty) had a lower Jaccard similarity coefficient than the others, while scenario 2 (low difficulty) has the highest Jaccard similarity coefficient. Together with the query analysis in <xref ref-type="table" rid="table1">Table 1</xref>, participants submitted shorter and similar queries in easier scenarios and longer, varying queries in more challenging ones.</p>
        <table-wrap position="float" id="table3">
          <label>Table 3</label>
          <caption>
            <p>Summary of query similarities.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="150"/>
            <col width="250"/>
            <col width="330"/>
            <col width="270"/>
            <thead>
              <tr valign="top">
                <td>Scenario</td>
                <td>Estimate difficulty</td>
                <td>Average Jaccard coefficient, mean (SD)</td>
                <td>Median Jaccard coefficient</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Medium</td>
                <td>0.69 (0.35)</td>
                <td>0.8</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Low</td>
                <td>0.86 (0.22)</td>
                <td>1.0</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Medium</td>
                <td>0.80 (0.32)</td>
                <td>1.0</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>High</td>
                <td>0.71 (0.37)</td>
                <td>1.0</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Medium</td>
                <td>0.79 (0.33)</td>
                <td>1.0</td>
              </tr>
            </tbody>
          </table>
        </table-wrap>
        <p>Overall, the results show that participants did not have much change in their queries after the assistance of the SBQR. The high Jaccard similarity coefficient (0.77) between the initial query without SBQR (Q<sub>A1</sub>) and the last query with SBRQ (Q<sub>Bn</sub>) indicates the small change, suggesting that the observed positive perception of end users toward our prototype system likely came from the SBQR, which modified the last query and may help retrieve more relevant documents. Since participants entered similar queries at the end of the search process, variations in queries were not large and likely did not influence the perceived system performance much. Rather, the SBQR changed the search results and affected the perceived system performance. If participants were to formulate different queries, the perceived system performance would have been affected by both the variation in the user-supplied queries and their SBQR-expanded form. In this case, it would be challenging to separate contributing factors and explain the correlation between SBQR use and the perceived system performance.</p>
      </sec>
      <sec>
        <title>Entropy Analysis</title>
        <p>The results of the entropy analysis support our second hypothesis. As listed in <xref ref-type="table" rid="table4">Table 4</xref>, a negative difference in the entropy scores was observed in scenarios 1, 3, and 5 (all with medium difficulty), suggesting that the SBQR helped standardize the queries submitted by different participants. However, a positive gain in entropy scores was observed in scenarios 2 and 4, indicating that the result sets were more variable after the SBQR was turned on. Further analysis shows that differences in entropy scores were highly correlated to participants’ perceived system performance (Pearson correlation coefficient –0.85). This high negative correlation suggests that a converged result set when using the SBQR may lead participants to a higher perceived system performance. It seems that in a high-difficulty scenario, the participants formulated very different queries with distinct medical concepts due to their limited knowledge, preventing the retrieved results from being converging even with the help of the SBQR. On the other hand, in an easier scenario where information needs are clear, the SBQR may introduce “noise” into the result set by adding semantically related but not closely relevant terms. Of note, the SBQR mapped queries with similar medical concepts into similar queries, resulting in similar result sets. The SBQR did not examine whether the user-supplied queries were correct or accurate in each of the scenarios.</p>
        <table-wrap position="float" id="table4">
          <label>Table 4</label>
          <caption>
            <p>The analysis of the entropy of the top 10 result sets. The Pearson correlation between the entropy difference (%) and the perceived performance was –0.85.</p>
          </caption>
          <table width="1000" cellpadding="5" cellspacing="0" border="1" rules="groups" frame="hsides">
            <col width="80"/>
            <col width="170"/>
            <col width="190"/>
            <col width="180"/>
            <col width="200"/>
            <col width="180"/>
            <thead>
              <tr valign="bottom">
                <td>Scenario</td>
                <td>Estimate difficulty</td>
                <td>Entropy SBQR<sup>a</sup>-Off</td>
                <td>Entropy SBQR-On</td>
                <td>Entropy difference, n (%)</td>
                <td>Perceived performance</td>
              </tr>
            </thead>
            <tbody>
              <tr valign="top">
                <td>1</td>
                <td>Medium</td>
                <td>4.4384</td>
                <td>3.6747</td>
                <td>–17.21<sup>b</sup></td>
                <td>4.24</td>
              </tr>
              <tr valign="top">
                <td>2</td>
                <td>Low</td>
                <td>3.0688</td>
                <td>3.3858</td>
                <td>10.33</td>
                <td>3.94</td>
              </tr>
              <tr valign="top">
                <td>3</td>
                <td>Medium</td>
                <td>3.8411</td>
                <td>3.5537</td>
                <td>–7.48<sup>a</sup></td>
                <td>4.42</td>
              </tr>
              <tr valign="top">
                <td>4</td>
                <td>High</td>
                <td>3.9398</td>
                <td>4.0709</td>
                <td>3.33</td>
                <td>4.09</td>
              </tr>
              <tr valign="top">
                <td>5</td>
                <td>Medium</td>
                <td>3.6617</td>
                <td>2.9248</td>
                <td>–20.12<sup>a</sup></td>
                <td>4.55</td>
              </tr>
            </tbody>
          </table>
          <table-wrap-foot>
            <fn id="table4fn1">
              <p><sup>a</sup>SBQR: semantically based query recommendation.</p>
            </fn>
            <fn id="table4fn2">
              <p><sup>b</sup>Scenarios 1, 3, and 5 show a negative entropy.</p>
            </fn>
          </table-wrap-foot>
        </table-wrap>
      </sec>
    </sec>
    <sec sec-type="discussion">
      <title>Discussion</title>
      <sec>
        <title>Principal Findings</title>
        <p>Using unstructured clinical data require IR techniques to effectively assist clinicians in finding relevant information in free-text documents. In our previous study, a prototype medical IR system with an SBQR feature was developed and evaluated to show its perceived positive performance with 33 prospective end users. In this study, the query logs were analyzed to generate empirical evidence and potential explanations for the participants’ positive perceptions toward the system. The results showed that participants formulated similar queries with the assistance of the SBQR, suggesting that perceived positive system performance was likely contributed by the SBQR rather than the manual modifications of the queries. Moreover, the participants tended to formulate shorter and more similar queries in an easy scenario and longer and less similar queries in more challenging scenarios.</p>
        <p>The results of the entropy analysis suggest that the estimated difficulty level of the scenarios was a contingent factor on participants’ perceived system performance. The SBQR achieved higher performance in scenarios with a medium difficulty as opposed to the extremes (a low or high difficulty). One explanation could be that in the extreme case, the SBQR provided limited help and introduced noise. This finding provides 2 insights into how to incorporate an SBQR in modern medical IR systems, such as EMERSE. First, an SBQR could be designed as a user-controlled option, in which user input is necessary to determine when to turn the feature on or off. Second, an SBQR can be designed as a semiautomated feature, which is activated based on the observed and inferred difficulty of users’ information needs, potentially through real-time analysis of query terms and retrieved documents [<xref ref-type="bibr" rid="ref23">23</xref>-<xref ref-type="bibr" rid="ref25">25</xref>].</p>
      </sec>
      <sec>
        <title>Strengths and Limitations</title>
        <p>The strength of this study lies in using a combination of objective query logs and self-report survey data to uncover participants’ complicated search behaviors when using the SBQR feature to search EHR notes. The study has several limitations. First, the SBQR was not strictly controlled in the user experiment. The participants alternated between turning the SBQR on and off. While this allowed participants to conduct searches more naturally, it added complexity to the analysis. To mitigate this issue, our analysis focused on the initial query that a user entered when the SBQR was turned off and the last query when the SBQR was turned on. Second, the participants were not asked to provide relevant feedback on the retrieved documents nor was there a gold standard of document relevance for each scenario. Since this study did not collect relevant feedback from the users, standard IR metrics such as normalized discounted cumulative gain cannot be calculated to generate more direct evidence. The document-level relevance feedback was not collected because ranking has almost no meaning in this type of search. The search goal of EMERSE is identifying patient cohorts rather than ranking relevant patients on the top of the pages. We, therefore, conduct the entropy analysis to show a high correlation between the perceived system performance and a high degree of convergence of the retrieved results among participants. Of note, our analysis mainly focused on the top 10 retrieved documents across the 33 participants because each participant may review a varied number of documents in the experiment. Only including the top 10 documents may not be very reflective of the cohort identification process, in which many more patients or documents could be needed. In the same vein, it would be challenging to ascertain that the change in entropy would represent the change in document variety. Next, the difficulty level of each scenario was estimated by the research team. User-indicated difficulty levels can help improve the validity of the entropy analysis and will be considered in our follow-up studies. Finally, this study only analyzed the SBQR in 1 prototype system. The SBQR can be implemented in multiple EHR search engines and used in multiple institutions to examine the effectiveness and generalizability of the SBQR.</p>
        <p>Our future work is 2-fold. First, we will learn from this study and redesign the experiment in a more controlled manner to demonstrate the effectiveness of the SBQR in improving the quality of input queries and search results. Second, we will implement SBQR in EMERSE and deploy the feature to the participating sites. We will collect query and usage logs and compare search behaviors across multiple institutions. We will also develop a query standardization and exchange platform to facilitate patient cohort identification across institutions to support large-scale clinical research studies.</p>
      </sec>
      <sec>
        <title>Conclusions</title>
        <p>This study analyzed the query logs in a user experiment of a prototype EHR search engine with the SBQR and provided empirical evidence as well as potential explanations for the perceived system performance. The results show that the positive perception was likely attributed to the effectiveness of SBQR and was contingent upon the difficulty level of a particular search scenario. This study confirms that an SBQR has the potential to overcome challenges when retrieving medical documents. Modern medical IR systems, such as EMERSE, should consider the design of an SBQR as a user-controllable option or a semiautomated feature that is triggered when the difficulty of a user’s information needs can be assessed or inferred.</p>
      </sec>
    </sec>
  </body>
  <back>
    <app-group>
      <supplementary-material id="app1">
        <label>Multimedia Appendix 1</label>
        <p>Log data analyzed in the manuscript.</p>
        <media xlink:href="formative_v7i1e45376_app1.xlsx" xlink:title="XLSX File  (Microsoft Excel File), 208 KB"/>
      </supplementary-material>
    </app-group>
    <glossary>
      <title>Abbreviations</title>
      <def-list>
        <def-item>
          <term id="abb1">EHR</term>
          <def>
            <p>electronic health record</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb2">EMERSE</term>
          <def>
            <p>Electronic Medical Record Search Engine</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb3">IR</term>
          <def>
            <p>information retrieval</p>
          </def>
        </def-item>
        <def-item>
          <term id="abb4">SBQR</term>
          <def>
            <p>semantically based query recommendation</p>
          </def>
        </def-item>
      </def-list>
    </glossary>
    <ack>
      <p>We thank Mr Karthikeyan Meganathan at the University of Cincinnati Center for Health Informatics for his statistical consulting, and Ms Shuai Mu and Ms Catherine Xu in the corresponding author’s laboratory for their copyediting support.</p>
    </ack>
    <notes>
      <sec>
        <title>Data Availability</title>
        <p>The query logs analyzed in this study were deidentified and uploaded as <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p>
      </sec>
    </notes>
    <fn-group>
      <fn fn-type="conflict">
        <p>DH maintains a resource of clinical "synonyms" which can be used for query expansion and is licensed by the University of Michigan. He is entitled to a percentage of royalties when the dataset is licensed. The authors have no further interests to declare.</p>
      </fn>
    </fn-group>
    <ref-list>
      <ref id="ref1">
        <label>1</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Weed</surname>
              <given-names>LL</given-names>
            </name>
          </person-group>
          <article-title>The importance of medical records</article-title>
          <source>Can Fam Physician</source>
          <year>1969</year>
          <volume>15</volume>
          <issue>12</issue>
          <fpage>23</fpage>
          <lpage>25</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/20468453"/>
          </comment>
          <pub-id pub-id-type="medline">20468453</pub-id>
          <pub-id pub-id-type="pmcid">PMC2281464</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref2">
        <label>2</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <collab>Institute of Medicine (U.S.). Committee on Improving the Patient Record</collab>
            <name name-style="western">
              <surname>Dick</surname>
              <given-names>RS</given-names>
            </name>
            <name name-style="western">
              <surname>Steen</surname>
              <given-names>EW</given-names>
            </name>
          </person-group>
          <source>The Computer-Based Patient Record: An Essential Technology for Health Care</source>
          <year>1991</year>
          <publisher-loc>Washington</publisher-loc>
          <publisher-name>National Academy Press</publisher-name>
        </nlm-citation>
      </ref>
      <ref id="ref3">
        <label>3</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rasmussen</surname>
              <given-names>LV</given-names>
            </name>
          </person-group>
          <article-title>The electronic health record for translational research</article-title>
          <source>J Cardiovasc Transl Res</source>
          <year>2014</year>
          <volume>7</volume>
          <issue>6</issue>
          <fpage>607</fpage>
          <lpage>614</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/25070682"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s12265-014-9579-z</pub-id>
          <pub-id pub-id-type="medline">25070682</pub-id>
          <pub-id pub-id-type="pmcid">PMC4147395</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref4">
        <label>4</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Cowie</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Blomster</surname>
              <given-names>JI</given-names>
            </name>
            <name name-style="western">
              <surname>Curtis</surname>
              <given-names>LH</given-names>
            </name>
            <name name-style="western">
              <surname>Duclaux</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Ford</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>Fritz</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Goldman</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Janmohamed</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Kreuzer</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Leenay</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Michel</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Ong</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Pell</surname>
              <given-names>JP</given-names>
            </name>
            <name name-style="western">
              <surname>Southworth</surname>
              <given-names>MR</given-names>
            </name>
            <name name-style="western">
              <surname>Stough</surname>
              <given-names>WG</given-names>
            </name>
            <name name-style="western">
              <surname>Thoenes</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Zannad</surname>
              <given-names>F</given-names>
            </name>
            <name name-style="western">
              <surname>Zalewski</surname>
              <given-names>A</given-names>
            </name>
          </person-group>
          <article-title>Electronic health records to facilitate clinical research</article-title>
          <source>Clin Res Cardiol</source>
          <year>2017</year>
          <volume>106</volume>
          <issue>1</issue>
          <fpage>1</fpage>
          <lpage>9</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27557678"/>
          </comment>
          <pub-id pub-id-type="doi">10.1007/s00392-016-1025-6</pub-id>
          <pub-id pub-id-type="medline">27557678</pub-id>
          <pub-id pub-id-type="pii">10.1007/s00392-016-1025-6</pub-id>
          <pub-id pub-id-type="pmcid">PMC5226988</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref5">
        <label>5</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Rosenbloom</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Denny</surname>
              <given-names>JC</given-names>
            </name>
            <name name-style="western">
              <surname>Xu</surname>
              <given-names>H</given-names>
            </name>
            <name name-style="western">
              <surname>Lorenzi</surname>
              <given-names>N</given-names>
            </name>
            <name name-style="western">
              <surname>Stead</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Johnson</surname>
              <given-names>KB</given-names>
            </name>
          </person-group>
          <article-title>Data from clinical notes: a perspective on the tension between structure and flexible documentation</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>2</issue>
          <fpage>181</fpage>
          <lpage>186</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://academic.oup.com/jamia/article/18/2/181/802561?login=false"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.2010.007237</pub-id>
          <pub-id pub-id-type="medline">21233086</pub-id>
          <pub-id pub-id-type="pii">jamia.2010.007237</pub-id>
          <pub-id pub-id-type="pmcid">PMC3116264</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref6">
        <label>6</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Pons</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Braun</surname>
              <given-names>LMM</given-names>
            </name>
            <name name-style="western">
              <surname>Hunink</surname>
              <given-names>MGM</given-names>
            </name>
            <name name-style="western">
              <surname>Kors</surname>
              <given-names>JA</given-names>
            </name>
          </person-group>
          <article-title>Natural language processing in radiology: a systematic review</article-title>
          <source>Radiology</source>
          <year>2016</year>
          <volume>279</volume>
          <issue>2</issue>
          <fpage>329</fpage>
          <lpage>343</lpage>
          <pub-id pub-id-type="doi">10.1148/radiol.16142770</pub-id>
          <pub-id pub-id-type="medline">27089187</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref7">
        <label>7</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Castro</surname>
              <given-names>VM</given-names>
            </name>
            <name name-style="western">
              <surname>Dligach</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Finan</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Yu</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Can</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Abd-El-Barr</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Gainer</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Shadick</surname>
              <given-names>NA</given-names>
            </name>
            <name name-style="western">
              <surname>Murphy</surname>
              <given-names>S</given-names>
            </name>
            <name name-style="western">
              <surname>Cai</surname>
              <given-names>T</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>G</given-names>
            </name>
            <name name-style="western">
              <surname>Weiss</surname>
              <given-names>ST</given-names>
            </name>
            <name name-style="western">
              <surname>Du</surname>
              <given-names>R</given-names>
            </name>
          </person-group>
          <article-title>Large-scale identification of patients with cerebral aneurysms using natural language processing</article-title>
          <source>Neurology</source>
          <year>2017</year>
          <volume>88</volume>
          <issue>2</issue>
          <fpage>164</fpage>
          <lpage>168</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/27927935"/>
          </comment>
          <pub-id pub-id-type="doi">10.1212/WNL.0000000000003490</pub-id>
          <pub-id pub-id-type="medline">27927935</pub-id>
          <pub-id pub-id-type="pii">WNL.0000000000003490</pub-id>
          <pub-id pub-id-type="pmcid">PMC5224711</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref8">
        <label>8</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Chapman</surname>
              <given-names>WW</given-names>
            </name>
            <name name-style="western">
              <surname>Nadkarni</surname>
              <given-names>PM</given-names>
            </name>
            <name name-style="western">
              <surname>Hirschman</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>D'Avolio</surname>
              <given-names>LW</given-names>
            </name>
            <name name-style="western">
              <surname>Savova</surname>
              <given-names>GK</given-names>
            </name>
            <name name-style="western">
              <surname>Uzuner</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Overcoming barriers to NLP for clinical text: the role of shared tasks and the need for additional creative solutions</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>2011</year>
          <volume>18</volume>
          <issue>5</issue>
          <fpage>540</fpage>
          <lpage>543</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/21846785"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/amiajnl-2011-000465</pub-id>
          <pub-id pub-id-type="medline">21846785</pub-id>
          <pub-id pub-id-type="pii">amiajnl-2011-000465</pub-id>
          <pub-id pub-id-type="pmcid">PMC3168329</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref9">
        <label>9</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>EMERSE: the electronic medical record search engine</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2006</year>
          <volume>2006</volume>
          <fpage>941</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/17238560"/>
          </comment>
          <pub-id pub-id-type="medline">17238560</pub-id>
          <pub-id pub-id-type="pii">85792</pub-id>
          <pub-id pub-id-type="pmcid">PMC1839699</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref10">
        <label>10</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Law</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Khanna</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Supporting information retrieval from electronic health records: a report of University of Michigan's nine-year experience in developing and using the electronic medical record search engine (EMERSE)</article-title>
          <source>J Biomed Inform</source>
          <year>2015</year>
          <volume>55</volume>
          <fpage>290</fpage>
          <lpage>300</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(15)00082-9"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2015.05.003</pub-id>
          <pub-id pub-id-type="medline">25979153</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(15)00082-9</pub-id>
          <pub-id pub-id-type="pmcid">PMC4527540</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref11">
        <label>11</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>DTY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Murkowski-Steffy</surname>
              <given-names>KB</given-names>
            </name>
            <name name-style="western">
              <surname>Vydiswaran</surname>
              <given-names>VGV</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Development and empirical user-centered evaluation of semantically-based query recommendation for an electronic health record search engine</article-title>
          <source>J Biomed Inform</source>
          <year>2017</year>
          <volume>67</volume>
          <fpage>1</fpage>
          <lpage>10</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://linkinghub.elsevier.com/retrieve/pii/S1532-0464(17)30017-5"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.jbi.2017.01.013</pub-id>
          <pub-id pub-id-type="medline">28131722</pub-id>
          <pub-id pub-id-type="pii">S1532-0464(17)30017-5</pub-id>
          <pub-id pub-id-type="pmcid">PMC5378386</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref12">
        <label>12</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bates</surname>
              <given-names>MJ</given-names>
            </name>
          </person-group>
          <article-title>The design of browsing and berrypicking techniques for the online search interface</article-title>
          <source>Online Rev</source>
          <year>1989</year>
          <volume>13</volume>
          <issue>5</issue>
          <fpage>407</fpage>
          <lpage>424</lpage>
          <pub-id pub-id-type="doi">10.1108/eb024320</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref13">
        <label>13</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
          </person-group>
          <article-title>Query log analysis of an electronic health record search engine</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2011</year>
          <volume>2011</volume>
          <fpage>915</fpage>
          <lpage>924</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/22195150"/>
          </comment>
          <pub-id pub-id-type="medline">22195150</pub-id>
          <pub-id pub-id-type="pmcid">PMC3243246</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref14">
        <label>14</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Liu</surname>
              <given-names>Y</given-names>
            </name>
            <name name-style="western">
              <surname>Mei</surname>
              <given-names>Q</given-names>
            </name>
            <name name-style="western">
              <surname>Manion</surname>
              <given-names>FJ</given-names>
            </name>
            <name name-style="western">
              <surname>Balis</surname>
              <given-names>UJ</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <article-title>Hedging their mets: the use of uncertainty terms in clinical documents and its potential implications when sharing the documents with patients</article-title>
          <source>AMIA Annu Symp Proc</source>
          <year>2012</year>
          <volume>2012</volume>
          <fpage>321</fpage>
          <lpage>330</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/23304302"/>
          </comment>
          <pub-id pub-id-type="medline">23304302</pub-id>
          <pub-id pub-id-type="pmcid">PMC3540426</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref15">
        <label>15</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Baeza-Yates</surname>
              <given-names>R</given-names>
            </name>
            <name name-style="western">
              <surname>Hurtado</surname>
              <given-names>C</given-names>
            </name>
            <name name-style="western">
              <surname>Mendoza</surname>
              <given-names>M</given-names>
            </name>
          </person-group>
          <article-title>Query recommendation using query logs in search engines</article-title>
          <source>Current Trends Database Technology—EDBT 2004 Workshop</source>
          <year>2004</year>
          <conf-name>International Conference on Extending Database Technology</conf-name>
          <conf-date>14-18 March</conf-date>
          <conf-loc>Heraklion, Crete, Greece</conf-loc>
          <publisher-loc>Berlin, Heidelberg</publisher-loc>
          <publisher-name>Springer</publisher-name>
          <fpage>588</fpage>
          <lpage>596</lpage>
          <pub-id pub-id-type="doi">10.1007/978-3-540-30192-9_58</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref16">
        <label>16</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Zhang</surname>
              <given-names>Z</given-names>
            </name>
            <name name-style="western">
              <surname>Nasraoui</surname>
              <given-names>O</given-names>
            </name>
          </person-group>
          <article-title>Mining search engine query logs for query recommendation</article-title>
          <year>2006</year>
          <conf-name>WWW06: The 15th International World Wide Web Conference 2006</conf-name>
          <conf-date>May 23 - 26, 2006</conf-date>
          <conf-loc>Edinburgh Scotland</conf-loc>
          <publisher-name>ACM Press</publisher-name>
          <fpage>1039</fpage>
          <lpage>1040</lpage>
          <pub-id pub-id-type="doi">10.1145/1135777.1136004</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref17">
        <label>17</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Fernández</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Cantador</surname>
              <given-names>I</given-names>
            </name>
            <name name-style="western">
              <surname>López</surname>
              <given-names>V</given-names>
            </name>
            <name name-style="western">
              <surname>Vallet</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Castells</surname>
              <given-names>P</given-names>
            </name>
            <name name-style="western">
              <surname>Motta</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <article-title>Semantically enhanced information retrieval: an ontology-based approach</article-title>
          <source>J Web Semantics</source>
          <year>2011</year>
          <volume>9</volume>
          <issue>4</issue>
          <fpage>434</fpage>
          <lpage>452</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://www.sciencedirect.com/science/article/abs/pii/S1570826810000910?via%3Dihub"/>
          </comment>
          <pub-id pub-id-type="doi">10.1016/j.websem.2010.11.003</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref18">
        <label>18</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Bell</surname>
              <given-names>DS</given-names>
            </name>
            <name name-style="western">
              <surname>Pattison-Gordon</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Greenes</surname>
              <given-names>RA</given-names>
            </name>
          </person-group>
          <article-title>Experiments in concept modeling for radiographic image reports</article-title>
          <source>J Am Med Inform Assoc</source>
          <year>1994</year>
          <volume>1</volume>
          <issue>3</issue>
          <fpage>249</fpage>
          <lpage>262</lpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://europepmc.org/abstract/MED/7719807"/>
          </comment>
          <pub-id pub-id-type="doi">10.1136/jamia.1994.95236156</pub-id>
          <pub-id pub-id-type="medline">7719807</pub-id>
          <pub-id pub-id-type="pmcid">PMC116203</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref19">
        <label>19</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mojsilovic</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Gomes</surname>
              <given-names>J</given-names>
            </name>
          </person-group>
          <article-title>Semantic based categorization, browsing and retrieval in medical image databases</article-title>
          <year>2002</year>
          <conf-name>IEEE International Conference on Image Processing</conf-name>
          <conf-date>22-25 September 2002</conf-date>
          <conf-loc>Rochester, NY, USA</conf-loc>
          <publisher-loc>New York</publisher-loc>
          <publisher-name>IEEE</publisher-name>
          <fpage>III-145</fpage>
          <lpage>III–148</lpage>
          <pub-id pub-id-type="doi">10.1109/icip.2002.1038925</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref20">
        <label>20</label>
        <nlm-citation citation-type="journal">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Müller</surname>
              <given-names>Hans-Michael</given-names>
            </name>
            <name name-style="western">
              <surname>Kenny</surname>
              <given-names>EE</given-names>
            </name>
            <name name-style="western">
              <surname>Sternberg</surname>
              <given-names>PW</given-names>
            </name>
          </person-group>
          <article-title>Textpresso: an ontology-based information retrieval and extraction system for biological literature</article-title>
          <source>PLoS Biol</source>
          <year>2004</year>
          <month>11</month>
          <volume>2</volume>
          <issue>11</issue>
          <fpage>e309</fpage>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://dx.plos.org/10.1371/journal.pbio.0020309"/>
          </comment>
          <pub-id pub-id-type="doi">10.1371/journal.pbio.0020309</pub-id>
          <pub-id pub-id-type="medline">15383839</pub-id>
          <pub-id pub-id-type="pmcid">PMC517822</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref21">
        <label>21</label>
        <nlm-citation citation-type="web">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Wu</surname>
              <given-names>DTY</given-names>
            </name>
            <name name-style="western">
              <surname>Yang</surname>
              <given-names>L</given-names>
            </name>
            <name name-style="western">
              <surname>Qiaozhu</surname>
              <given-names>M</given-names>
            </name>
            <name name-style="western">
              <surname>Hanauer</surname>
              <given-names>DA</given-names>
            </name>
            <name name-style="western">
              <surname>Zheng</surname>
              <given-names>K</given-names>
            </name>
          </person-group>
          <source>Towards intelligent and socially oriented query recommendation for electronic health records retrieval</source>
          <year>2013</year>
          <access-date>2023-08-08</access-date>
          <comment>
            <ext-link ext-link-type="uri" xlink:type="simple" xlink:href="https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.719.4541">https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.719.4541</ext-link>
          </comment>
        </nlm-citation>
      </ref>
      <ref id="ref22">
        <label>22</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Aronson</surname>
              <given-names>AR</given-names>
            </name>
            <name name-style="western">
              <surname>Rindflesch</surname>
              <given-names>TC</given-names>
            </name>
            <name name-style="western">
              <surname>Browne</surname>
              <given-names>AC</given-names>
            </name>
          </person-group>
          <article-title>Exploiting a large thesaurus for information retrieval</article-title>
          <source>Intelligent Multimedia Information Retrieval Systems and Management: RIAO 94</source>
          <year>1994</year>
          <publisher-loc>Paris</publisher-loc>
          <publisher-name>CID-CASIS</publisher-name>
          <fpage>197</fpage>
          <lpage>216</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref23">
        <label>23</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Mothe</surname>
              <given-names>J</given-names>
            </name>
            <name name-style="western">
              <surname>Tanguy</surname>
              <given-names>L</given-names>
            </name>
          </person-group>
          <article-title>Linguistic features to predict query difficulty: a case study on previous TREC campaign</article-title>
          <year>2005</year>
          <conf-name>ACM Conference on Research and Development in Information Retrieval, SIGIR, Predicting Query Difficulty - Methods and Applications Workshop</conf-name>
          <conf-date>2005</conf-date>
          <conf-loc>Salvador de Bahia, Brazil</conf-loc>
          <fpage>7</fpage>
          <lpage>10</lpage>
        </nlm-citation>
      </ref>
      <ref id="ref24">
        <label>24</label>
        <nlm-citation citation-type="confproc">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carmel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yom-Tov</surname>
              <given-names>E</given-names>
            </name>
            <name name-style="western">
              <surname>Darlow</surname>
              <given-names>A</given-names>
            </name>
            <name name-style="western">
              <surname>Pelleg</surname>
              <given-names>D</given-names>
            </name>
          </person-group>
          <article-title>What makes a query difficult</article-title>
          <year>2006</year>
          <conf-name>SIGIR06: The 29th Annual International SIGIR Conference</conf-name>
          <conf-date>August 6-11, 2006</conf-date>
          <conf-loc>Seattle Washington USA</conf-loc>
          <pub-id pub-id-type="doi">10.1145/1148170.1148238</pub-id>
        </nlm-citation>
      </ref>
      <ref id="ref25">
        <label>25</label>
        <nlm-citation citation-type="book">
          <person-group person-group-type="author">
            <name name-style="western">
              <surname>Carmel</surname>
              <given-names>D</given-names>
            </name>
            <name name-style="western">
              <surname>Yom-Tov</surname>
              <given-names>E</given-names>
            </name>
          </person-group>
          <source>Estimating the Query Difficulty for Information Retrieval</source>
          <year>2010</year>
          <publisher-loc>San Rafael</publisher-loc>
          <publisher-name>Morgan &#38; Claypool</publisher-name>
          <fpage>1</fpage>
          <lpage>89</lpage>
        </nlm-citation>
      </ref>
    </ref-list>
  </back>
</article>
