<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing DTD v2.0 20040830//EN" "journalpublishing.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" dtd-version="2.0" xml:lang="en" article-type="research-article"><front><journal-meta><journal-id journal-id-type="nlm-ta">JMIR Form Res</journal-id><journal-id journal-id-type="publisher-id">formative</journal-id><journal-id journal-id-type="index">27</journal-id><journal-title>JMIR Formative Research</journal-title><abbrev-journal-title>JMIR Form Res</abbrev-journal-title><issn pub-type="epub">2561-326X</issn><publisher><publisher-name>JMIR Publications</publisher-name><publisher-loc>Toronto, Canada</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">v10i1e70826</article-id><article-id pub-id-type="doi">10.2196/70826</article-id><article-categories><subj-group subj-group-type="heading"><subject>Original Paper</subject></subj-group></article-categories><title-group><article-title>Personalized Glucose Management With AI: Pilot Study Using a Multiarmed Bandit Approach</article-title></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name name-style="western"><surname>Hotta</surname><given-names>Shinji</given-names></name><degrees>MInf</degrees><xref ref-type="aff" rid="aff1">1</xref><xref ref-type="aff" rid="aff2">2</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Kyt&#x00F6;</surname><given-names>Mikko</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff3">3</xref><xref ref-type="aff" rid="aff4">4</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Koivusalo</surname><given-names>Saila</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Heinonen</surname><given-names>Seppo</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff5">5</xref></contrib><contrib contrib-type="author"><name name-style="western"><surname>Marttinen</surname><given-names>Pekka</given-names></name><degrees>PhD</degrees><xref ref-type="aff" rid="aff2">2</xref></contrib></contrib-group><aff id="aff1"><institution>Fujitsu Limited</institution><addr-line>4-1-1 Kamikodanaka, Nakahara-ku</addr-line><addr-line>Kawasaki</addr-line><addr-line>Kanagawa</addr-line><country>Japan</country></aff><aff id="aff2"><institution>Department of Computer Science, Aalto University</institution><addr-line>Espoo</addr-line><addr-line>Uusimaa</addr-line><country>Finland</country></aff><aff id="aff3"><institution>Development and Strategy Unit, Helsinki University Hospital, University of Helsinki</institution><addr-line>Helsinki</addr-line><addr-line>Uusimaa</addr-line><country>Finland</country></aff><aff id="aff4"><institution>Department of Computer Science, University of Helsinki</institution><addr-line>Helsinki</addr-line><addr-line>Uusimaa</addr-line><country>Finland</country></aff><aff id="aff5"><institution>Department of Obstetrics and Gynecology, Helsinki University Hospital, University of Helsinki</institution><addr-line>Helsinki</addr-line><addr-line>Uusimaa</addr-line><country>Finland</country></aff><contrib-group><contrib contrib-type="editor"><name name-style="western"><surname>Sarvestan</surname><given-names>Javad</given-names></name></contrib></contrib-group><contrib-group><contrib contrib-type="reviewer"><name name-style="western"><surname>Yom-Tov</surname><given-names>Elad</given-names></name></contrib><contrib contrib-type="reviewer"><name name-style="western"><surname>Kamble</surname><given-names>Vijay A</given-names></name></contrib></contrib-group><author-notes><corresp>Correspondence to Shinji Hotta, MInf, Fujitsu Limited, 4-1-1 Kamikodanaka, Nakahara-ku, Kawasaki, Kanagawa, 211-8588, Japan, 81 44 777 1111; <email>hotta_s@fujitsu.com</email></corresp></author-notes><pub-date pub-type="collection"><year>2026</year></pub-date><pub-date pub-type="epub"><day>19</day><month>3</month><year>2026</year></pub-date><volume>10</volume><elocation-id>e70826</elocation-id><history><date date-type="received"><day>14</day><month>01</month><year>2025</year></date><date date-type="rev-recd"><day>24</day><month>01</month><year>2026</year></date><date date-type="accepted"><day>28</day><month>01</month><year>2026</year></date></history><copyright-statement>&#x00A9; Shinji Hotta, Mikko Kyt&#x00F6;, Saila Koivusalo, Seppo Heinonen, Pekka Marttinen. Originally published in JMIR Formative Research (<ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>), 19.3.2026. </copyright-statement><copyright-year>2026</copyright-year><license license-type="open-access" xlink:href="https://creativecommons.org/licenses/by/4.0/"><p>This is an open-access article distributed under the terms of the Creative Commons Attribution License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided the original work, first published in JMIR Formative Research, is properly cited. The complete bibliographic information, a link to the original publication on <ext-link ext-link-type="uri" xlink:href="https://formative.jmir.org">https://formative.jmir.org</ext-link>, as well as this copyright and license information must be included.</p></license><self-uri xlink:type="simple" xlink:href="https://formative.jmir.org/2026/1/e70826"/><abstract><sec><title>Background</title><p>Personalized behavioral recommendations through mobile apps have proven effective in preventing serious chronic diseases such as diabetes. Recent studies have primarily focused on optimizing personalized recommendations using reinforcement learning. However, the main problem with these approaches is that they focus on behavioral changes and overlook clinical outcomes.</p></sec><sec><title>Objective</title><p>This study aimed to propose a method for online planning of dietary and exercise recommendations to optimize postprandial glucose levels through behavioral changes directly.</p></sec><sec sec-type="methods"><title>Methods</title><p>The proposed method is a multiarmed bandit based on a two-stage reward prediction model, where an action is a combination of the total carbohydrate intake and postprandial walking duration, and the reward is the reduction in postprandial glucose levels. We implemented the prediction of the reward for each action based on the predicted behavioral responses to an action, and subsequently, the postprandial glycemic response.</p></sec><sec sec-type="results"><title>Results</title><p>In a simulation experiment, we demonstrated that the proposed online algorithm can significantly improve postprandial glucose levels with personalized recommendations, compared to the randomized policy. Furthermore, we conducted a small real-world experiment with a simplified proposed method involving a single update of the recommendation policy into a personalized one. For 6 participants, compared to the randomized policy, we observed a 23% improvement, on average, in actual glucose responses along with the behavioral adherence to the recommendations concerning carbohydrate intake and postprandial walking.</p></sec><sec sec-type="conclusions"><title>Conclusions</title><p>The preliminary effectiveness of the proposed method was demonstrated from both the simulation experiment and the small real-world experiment. However, further longitudinal real-world experiments in patients with diabetes are needed to validate and generalize the findings.</p></sec></abstract><kwd-group><kwd>diabetes</kwd><kwd>mobile intervention</kwd><kwd>personalization</kwd><kwd>dietary and exercise recommendation</kwd><kwd>glucose management</kwd><kwd>multiarmed bandit</kwd></kwd-group></article-meta></front><body><sec id="s1" sec-type="intro"><title>Introduction</title><sec id="s1-1"><title>Background</title><p>The global incidence of diabetes has increased considerably in recent years, accompanied by escalating disease severity. This progression has detrimental ramifications, including compromised quality of life, multifarious complications, and expensive surgical treatment. The medical expenses for diabetes are expected to increase to approximately US $2.5 trillion worldwide by 2030 [<xref ref-type="bibr" rid="ref1">1</xref>]. This has facilitated the urgent need to reduce the severity of diabetes. The recent national clinical practice guidelines [<xref ref-type="bibr" rid="ref2">2</xref>] state that the basic principle of this preventive treatment is to maintain glucose levels within the normal range. The central strategy to manage glucose levels is to maintain an appropriate lifestyle of eating and exercising [<xref ref-type="bibr" rid="ref3">3</xref>], in addition to insulin or oral medication. In recent years, technological advances in continuous glucose monitoring devices have helped patients manage their glucose levels with mobile apps at any given time and place, thus promoting self-management [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref5">5</xref>]. However, it is difficult for patients to learn the optimal behavioral action to maintain glucose within a normal range [<xref ref-type="bibr" rid="ref6">6</xref>]. Therefore, a personalized system that recommends individualized optimal behavioral actions is required to ensure that glucose levels do not deviate from the normal range in the future.</p><p>Over the past decade, it has become possible for patients with prediabetes to self-monitor their glucose levels, along with their lifestyle habits, such as diet and exercise, by visualizing them through an integrated mobile app [<xref ref-type="bibr" rid="ref4">4</xref>,<xref ref-type="bibr" rid="ref7">7</xref>]. While this has made it easier for patients to manage their own glucose levels daily, and maintaining high user adherence to such monitoring is achievable, as demonstrated by recent studies reporting over 90% daily adherence [<xref ref-type="bibr" rid="ref8">8</xref>], learning the best behaviors to avoid abnormal glucose levels remains a significant hurdle [<xref ref-type="bibr" rid="ref4">4</xref>]. For instance, the total grams of carbohydrates to be consumed or the total minutes of walking exercise to be performed vary from person to person. Therefore, the behavioral recommendations should be automatically generated for each patient by analyzing personal datasets collected daily.</p></sec><sec id="s1-2"><title>Literature Review</title><p>Thus far, there has been significant research on personalized behavioral recommendations through mobile apps for the prevention of chronic diseases, including diabetes [<xref ref-type="bibr" rid="ref9">9</xref>], and a framework called just-in-time adaptive intervention has been established. In just-in-time adaptive intervention, to improve both the short-term and long-term outcomes, decision rules regarding intervention conditions, such as content, frequency, and timing of recommendations, are personally optimized [<xref ref-type="bibr" rid="ref10">10</xref>]. For instance, in [<xref ref-type="bibr" rid="ref11">11</xref>], to promote physical activity in patients with hypertension, many interventions with different conditions were allocated and performed on each patient&#x2019;s timeline, and the best-fitting intervention conditions were identified based on the results of the interventional experiments. While this approach can robustly identify personally appropriate intervention conditions, its practical application for practitioners is constrained by the significant burden associated with prolonged experimental periods and the necessity for patients to undergo multiple interventions that may prove to be suboptimal. This underscores the need to develop more efficient and effective personalization methods.</p><p>With regard to the aforementioned factors, recent studies have focused on sequentially optimizing personal intervention conditions using reinforcement learning [<xref ref-type="bibr" rid="ref12">12</xref>]. Yom-Tov et al [<xref ref-type="bibr" rid="ref13">13</xref>] proposed a contextual bandit that optimized the message content of exercise interventions for patients with diabetes. In this study, an action is the message content, and a reward is the amount of activity after the recommendation. The individual optimization of message content can be adaptively performed by selecting an action based on a reward using Boltzmann sampling. Liao et al [<xref ref-type="bibr" rid="ref14">14</xref>] proposed a framework that applies a multistep Markov decision process by introducing a user state, which represents how much intervention to increase daily walking has been received so far. This helps adjust both the intervention content and the intervention frequency to avoid user dropout caused by frequent interventions. However, while the focus of these existing methods is on patient behavior change, it is still inconclusive whether these methods can offer optimal interventions for improving clinical outcomes. For instance, in the case of diabetes, the extent to which the intervention lowers glucose levels is highly significant. In other words, further research is needed to identify the intervention conditions that can lower glucose levels most effectively to the target range.</p><p>Some studies have proposed optimizing clinical insulin interventions for people with more severe diabetes using reinforcement learning with an artificial pancreas to directly control their glucose levels [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>]. When insulin is injected, the glucose level drops immediately. Therefore, the primary goal of such research is to determine the appropriate insulin dose and timing for each patient to ensure that the glucose level remains within the normal range. The action is the insulin dosage, the state is the user&#x2019;s glucose level, and the reward is the normality of the glucose levels after insulin injection. The integration of reinforcement learning helped obtain the optimal insulin dosage for each patient [<xref ref-type="bibr" rid="ref17">17</xref>]. However, in this type of study, the variations in action were limited to clinical and direct interventions, such as insulin injection, which are not directly controlled by the patient. Incorporating behavioral interventions, such as diet and exercise recommendations, in addition to clinical interventions, is currently regarded as future work [<xref ref-type="bibr" rid="ref15">15</xref>,<xref ref-type="bibr" rid="ref16">16</xref>].</p></sec><sec id="s1-3"><title>Objectives</title><p>Altogether, there is a lack of approaches that individually optimize behavioral recommendations such as diet and exercise to control glucose levels. In this paper, we propose a multiarmed bandit algorithm that can individually plan mobile behavioral interventions, making it easy for users to perform and reducing glucose levels after eating (ie, postprandial glucose, which is most likely to become abnormal in the daily lives of people with diabetes). The most novel aspect of the proposed method is the introduction of a two-stage reward prediction mechanism for each intervention: (1) prediction of the actual user behaviors that occur after the intervention, and (2) prediction of the postprandial glucose trajectory from these predicted behaviors.</p><p>An overview of the proposed method is provided in <xref ref-type="fig" rid="figure1">Figure 1</xref>. The aim is to lower postprandial glucose levels with the intervention (<xref ref-type="fig" rid="figure1">Figure 1A</xref>). Here, the action (or arm) is a mobile behavioral intervention that combines recommended amounts of carbohydrate intake and postprandial walking exercise, which have been proven to have specific effects on glucose levels [<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>]. Multiple alternative candidates for these recommended amounts are prepared in advance, and the optimal action for each user is selected based on the reward predicted in a two-stage manner, as provided in <xref ref-type="fig" rid="figure1">Figure 1B</xref>. In this reward prediction, the actual amounts of both carbohydrate intake and postprandial walking are predicted for each action candidate. For example, for carbohydrate intake, when a user receives a recommendation to take 60 g of carbohydrates, it is predicted that the participant will take 70 g because the recommended amount is small according to their dietary preference. Second, based on the predicted dietary and exercise behaviors, the temporal trajectory of postprandial glucose levels is predicted. Third, according to the predicted glucose trajectory, batched actions are selected using the &#x03B5;-greedy method, based on the assumption that the greater the suppression of the increase in postprandial glucose, the higher the reward obtained by a user. Finally, after performing the selected actions using a real intervention, the behavioral adherence and glycemic response models are retrained using the buffer dataset of actual behaviors and glucose, and the parameters are updated accordingly. By repeating this experiential learning cycle, which is composed of action planning and real experiments, it is possible to fill the gap between each user&#x2019;s real response and the virtually predicted response, which yields a more optimal action selection for lowering postprandial glucose levels. Furthermore, to accelerate this cycle, we suggest incorporating prior knowledge into the prediction models for retraining.</p><p>We demonstrate the effectiveness of the proposed method through simulation experiments and clarify the conditions under which the proposed method works best. Then, with a simplified algorithm based on these conditions, we conducted a small-scale mobile intervention experiment with healthy participants. The results provide preliminary evidence that the proposed method improves both behavior and glucose levels. To the best of our knowledge, this is the first attempt to (1) clarify how to optimize behavioral interventions for improving glucose levels and (2) demonstrate the effectiveness of the method through simulation experiments and real-world intervention experiments.</p><fig position="float" id="figure1"><label>Figure 1.</label><caption><p>Purpose of our research (A) and overview of the proposed method (B).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig01.png"/></fig></sec></sec><sec id="s2" sec-type="methods"><title>Methods</title><sec id="s2-1"><title>Overview</title><p>In the following subsections, we describe the problem setting, the details of the proposed method, the simulation experiment setup, and the real-world feasibility study setup. The proposed method is formulated as a model-based multiarmed bandit framework with a two-stage reward prediction model that integrates behavioral adherence modeling and glycemic response modeling to enable data-efficient personalization of dietary and exercise recommendations. Its effectiveness is evaluated through both simulation experiments and a real-world feasibility study.</p></sec><sec id="s2-2"><title>Problem Setting</title><p>The purpose of our method is to select and recommend the best action candidate for each user that is easy to perform and reduces postprandial glucose levels the most through interaction with the user. For this purpose, a multiarmed bandit algorithm is often used, as in previous literature [<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref>]. The multiarmed bandit is one of the reinforcement learning methods. It continuously addresses the dilemma of selecting actions to gather more information (exploration) vs selecting actions to maximize immediate reward based on current knowledge (exploitation). Thus, a bandit algorithm can adjust this exploration-exploitation trade-off [<xref ref-type="bibr" rid="ref22">22</xref>] by estimating the rewards associated with different actions.</p><p>In our method, the reward <inline-formula><mml:math id="ieqn1"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>r</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> can be defined as the function of the total increase in postprandial glucose levels after the target diet, which occurs after recommending an action <inline-formula><mml:math id="ieqn2"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to a certain user. We mainly focus on postprandial glucose, given that glucose levels always rise after a meal and that deviations from the normal range of glucose levels occur most frequently after a meal [<xref ref-type="bibr" rid="ref23">23</xref>,<xref ref-type="bibr" rid="ref24">24</xref>].</p><p>An action in our method <inline-formula><mml:math id="ieqn3"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> can be represented as a combination of <inline-formula><mml:math id="ieqn4"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, where <inline-formula><mml:math id="ieqn5"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> means a recommendation for &#x201C;how much carbohydrates in grams a user should take at maximum in target diet,&#x201D; and <inline-formula><mml:math id="ieqn6"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> means a recommendation for &#x201C;the minimum duration a user should walk just after target diet.&#x201D; For clarity, users do not directly estimate carbohydrate content; instead, it is automatically calculated by a mobile app such as MyFitnessPal (MyFitnessPal, Inc) based on inputted food items. The accuracy of MyFitnessPal&#x2019;s carbohydrate calculations has been rigorously validated, showing strong correlations (<italic>r</italic>=0.90) with established food composition databases [<xref ref-type="bibr" rid="ref25">25</xref>]. Based on the assumption that each user has their own appropriate amounts for both actions, multiple action candidates are prepared in advance within a clinically plausible range. Regarding the amount of carbohydrate intake (<inline-formula><mml:math id="ieqn7"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), we prepared 10 candidates in the range of 10&#x2010;100 g, referring to the low-carbohydrate diet recommendations [<xref ref-type="bibr" rid="ref26">26</xref>]. In addition, regarding the amount of postprandial walking exercise (<inline-formula><mml:math id="ieqn8"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), we prepared 6 candidates between 5 and 30 minutes based on the experimental results of the effect of walking exercise on glucose levels [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Therefore, there are 60 action candidates in the action set <inline-formula><mml:math id="ieqn9"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> based on previous research [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>], as shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>.</p><fig position="float" id="figure2"><label>Figure 2.</label><caption><p>Illustration of action candidates. For default actions (<bold>&#x00D7;</bold>), the amount of carbohydrate intake is set within the range of 20&#x2010;60 g, and postprandial walking duration is set within the range of 10&#x2010;20 minutes. Although it is easy to adhere to taking a large amount of carbohydrate and walking for a few minutes, this leads to higher postprandial glucose levels. The main purpose is to find the best personalized action (<bold>&#x00D7;</bold>) that can achieve lower postprandial glucose levels while maintaining adherence for each user.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig02.png"/></fig><p>Here, there are 2 approaches for action selection in a multiarmed bandit, that is, the experience- and the model-based approaches. The former (eg, Upper Confidence Bound) evaluates each action based on the actual observed rewards, which enables robust evaluation but requires extensive exploration for all actions. In contrast, the latter (eg, Thompson Sampling) can reduce exploratory trials because it can estimate the reward (or value) for each action by learning the parameters of the reward distribution. Returning to our setup (<xref ref-type="fig" rid="figure1">Figure 1A</xref>), each observation of the reward (glucose level change) requires users to wear a glucose monitoring device, imposing a recurring physical burden. Therefore, it is important to reduce the burden of exploration trials on users for real applications, and we adopted this model-based approach to predict the reward for untried actions efficiently from limited observations.</p><p>Now, the reward prediction model is represented as follows:</p><disp-formula id="equWL1"><mml:math id="eqn1"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>r</mml:mi><mml:mo>=</mml:mo><mml:mi>R</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>The input is an action <inline-formula><mml:math id="ieqn10"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and <inline-formula><mml:math id="ieqn11"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is a model parameter set of a target user. Because <inline-formula><mml:math id="ieqn12"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is unknown at the initial trial, it is sequentially updated when observing user responses to previously recommended actions. Here, the user response is composed of 3 variables, (<inline-formula><mml:math id="ieqn13"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) as shown in <xref ref-type="fig" rid="figure3">Figure 3</xref>; <inline-formula><mml:math id="ieqn14"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the actual amount vector of each carbohydrate intake at the target diet, <inline-formula><mml:math id="ieqn15"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the actual walking duration vector of each walking event occurring after starting the target diet, and <inline-formula><mml:math id="ieqn16"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the time series of postprandial glucose levels after the start time of the target diet as <inline-formula><mml:math id="ieqn17"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Because the glucose level within 1 hour after a diet is of clinical importance, we focused on user responses within 90 minutes of starting the diet. As a result, for each action, the observed data are represented as <inline-formula><mml:math id="ieqn18"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and are added to the buffered dataset <inline-formula><mml:math id="ieqn19"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> after observation.</p><fig position="float" id="figure3"><label>Figure 3.</label><caption><p>Two examples of observed data with a small reward (A) and a large reward (B). The greater the postprandial increase in glucose levels, the smaller the reward.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig03.png"/></fig><p>Here, glucose levels and behaviors in 1 observation &#x201C;<inline-formula><mml:math id="ieqn20"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201D; would involve high uncertainty because they may be affected by many factors. Thus, a parameter set &#x201C;<inline-formula><mml:math id="ieqn21"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201D; is updated in a Bayesian fashion when obtaining batched observation data containing &#x201C;<inline-formula><mml:math id="ieqn22"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201D; actions and &#x201C;<inline-formula><mml:math id="ieqn23"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201D; user responses. That is, as shown on the right side of <xref ref-type="fig" rid="figure1">Figure 1B</xref>, parameter learning is executed after recommending &#x201C;<inline-formula><mml:math id="ieqn24"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201D; actions to the users. This can be considered as a batched multiarmed bandits setting [<xref ref-type="bibr" rid="ref30">30</xref>]. Hereafter, the method of selecting &#x201C;<inline-formula><mml:math id="ieqn25"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>&#x201C; actions is referred to as the policy.</p></sec><sec id="s2-3"><title>Algorithm of the Proposed Method</title><p>In this subsection, we describe the proposed multiarmed bandit algorithm based on two-stage reward prediction. The pseudocode for the proposed algorithm is shown in <xref ref-type="other" rid="box1">Textbox 1</xref>. In this algorithm, the following processes are executed in the given order: (1) actions are recommended to users according to the predetermined policy, and batch observation data are obtained; (2) the parameters of the behavioral adherence and glycemic response models are retrained using these observation data; (3) the reward for each action candidate is predicted using a two-stage reward model with retrained parameters; and (4) finally, the next batch actions are selected based on the reward prediction result using the &#x03B5;-greedy method.</p><p>These processes are repeated to improve the rewards and model parameters. This predictive model&#x2013;based approach allows the algorithm to estimate potential outcomes of all possible actions based on learned models, rather than requiring direct, burdensome observation for every action candidate. In the first iteration, default actions are recommended for pure parameter exploration, aiming to gather diverse data for initial model parameter estimation. Afterward, as the number of iterations increases, more robust parameter learning can be performed with more observational data, resulting in accurate reward prediction and action selection. However, if the convergence of this parameter learning requires long-term (eg, several months of) observation data owing to the uncertainty contained in the observation, the user would drop out due to the burden of such time-consuming experiments for collecting data. To prevent this, we propose introducing prior knowledge for each parameter and performing Bayesian learning.</p><boxed-text id="box1"><title> Algorithm 1: multiarmed bandit with two-stage reward prediction.</title><p><bold>INPUT</bold>: maximum iteration number <italic>I</italic>, batch size <italic>M</italic>, default policy <inline-formula><mml:math id="ieqn26"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and action set <inline-formula><mml:math id="ieqn27"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p><list list-type="order"><list-item><p>// <italic><bold>Repeat experimental learning cycle of intervention experiment and action planning for a user</bold></italic></p></list-item><list-item><p><bold>For</bold> <italic>i</italic> <bold>in</bold> 1:<italic>I</italic></p></list-item><list-item><p>// <italic>Perform intervention</italic></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>For</bold> <italic>j</italic> <bold>in</bold> 1:<italic>M</italic></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Recommend the action <inline-formula><mml:math id="ieqn28"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mo stretchy="false">(</mml:mo><mml:mi>j</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for the user</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Observe the user responses <inline-formula><mml:math id="ieqn29"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> &#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026;&#x2026; (&#x2020;)</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>End</bold></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> Update buffered dataset <inline-formula><mml:math id="ieqn30"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> by adding new batch data<inline-formula><mml:math id="ieqn31"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> // <italic>Train each user model from subset of buffered dataset</italic></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> Train the parameters <inline-formula><mml:math id="ieqn32"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in dietary adherence model from the subset data<inline-formula><mml:math id="ieqn33"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>d</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> Train the parameters <inline-formula><mml:math id="ieqn34"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>e</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in exercise adherence model from the subset data</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> Train the parameters <inline-formula><mml:math id="ieqn35"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in glycemic response model from the subset data</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> // <italic>Two-stage reward prediction for each action</italic></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>For</bold> <inline-formula><mml:math id="ieqn36"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2208;</mml:mo><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Predict dietary behavior <inline-formula><mml:math id="ieqn37"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mi>k</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> with <inline-formula><mml:math id="ieqn38"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>d</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> under action <inline-formula><mml:math id="ieqn39"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>A</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Predict exercise behavior <inline-formula><mml:math id="ieqn40"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>z</mml:mi><mml:mi>k</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> with <inline-formula><mml:math id="ieqn41"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>e</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> under action <inline-formula><mml:math id="ieqn42"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>A</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Predict glucose increase trajectory <inline-formula><mml:math id="ieqn43"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mi>k</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> with <inline-formula><mml:math id="ieqn44"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mi>g</mml:mi></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> under predicted behaviors <inline-formula><mml:math id="ieqn45"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi>x</mml:mi><mml:mi>k</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mi>z</mml:mi><mml:mi>k</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup><mml:mo>,</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Obtain reward <inline-formula><mml:math id="ieqn46"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>r</mml:mi><mml:mi>k</mml:mi></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> from glucose increase trajectory <inline-formula><mml:math id="ieqn47"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mi>k</mml:mi><mml:mo>&#x2217;</mml:mo></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>End</bold></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> // <italic>Action selection for next intervention</italic></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>If</bold> <italic>Rand</italic>() &#x003C; &#x03B5;</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Select actions randomly as <inline-formula><mml:math id="ieqn48"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mi>R</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> // for exploration</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>Else</bold></p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content><named-content content-type="indent">&#x2003;</named-content> Select actions greedy as <inline-formula><mml:math id="ieqn49"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>+</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:munder><mml:mrow><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mi>k</mml:mi></mml:munder><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> // for exploitation</p></list-item><list-item><p><named-content content-type="indent">&#x2003;</named-content> <bold>End</bold></p></list-item><list-item><p><bold>End</bold></p></list-item></list></boxed-text></sec><sec id="s2-4"><title>Two-Stage Reward Prediction Model</title><p>Here, we first focus on describing a two-stage reward prediction model that integrates behavioral adherence models into a glycemic response model. The starting point for modeling is based on previous clinical findings, which state that the changes in postprandial glucose are highly dependent on the amount of carbohydrates an individual takes [<xref ref-type="bibr" rid="ref31">31</xref>] and the amount of exercise performed after a meal [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. Furthermore, these dietary and exercise behaviors are assumed to depend on the kind of actions recommended immediately beforehand. Based on these premises, the reward prediction model is described as follows:</p><disp-formula id="equWL3"><label>(1)</label><mml:math id="eqn2"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>R</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL4"><label>(2)</label><mml:math id="eqn3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi mathvariant="bold-italic">g</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL5"><label>(3)</label><mml:math id="eqn4"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn50"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>R</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the reward, <inline-formula><mml:math id="ieqn51"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the glucose time series, <inline-formula><mml:math id="ieqn52"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the amount of carbohydrate intake, and <inline-formula><mml:math id="ieqn53"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the walking duration. Further, <inline-formula><mml:math id="ieqn54"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold">&#x0398;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn55"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represent parameters in a glycemic response model, a carbohydrate intake model, and a postprandial exercise model, respectively. These parameters are defined for each user. In actual reward prediction, the first stage is to predict the dietary and exercise behaviors using Equation 3 with behavior adherence models <inline-formula><mml:math id="ieqn56"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn57"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and the second stage predicts the trajectory of the postprandial glucose increase <inline-formula><mml:math id="ieqn58"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> using Equation 2 with the predicted behaviors (<inline-formula><mml:math id="ieqn59"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>). Finally, the glucose increase trajectory is converted into a reward scale using Equation 1.</p></sec><sec id="s2-5"><title>Behavior Adherence Model</title><p>The first stage is behavior prediction. Because each user receives a specific action immediately beforehand, we can assume that they act according to this action. However, certain actions are difficult to execute in real-world scenarios. For example, most users would find it easy to keep their carbohydrate intake less than 100 g, but it is difficult to consume less than 10 g. In addition, the actual reaction to reducing carbohydrate intake to 50 g may vary for each user. This is consistent with the claim in the theory of planned behavior [<xref ref-type="bibr" rid="ref32">32</xref>], which states that behavioral intentions may be influenced by &#x201C;perceived behavioral control<italic>.</italic>&#x201D; Inspired by this, in the behavior adherence model, we hypothesize that there is a perceptual critical point specific to each user for each action candidate, as indicated in <xref ref-type="fig" rid="figure2">Figure 2</xref>, and that the actual behavioral tendency changes depending on this critical point. Specifically, as shown in Figure S1 (left) in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, we assume that if the recommended amount of carbohydrate <inline-formula><mml:math id="ieqn60"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is higher than the critical point <inline-formula><mml:math id="ieqn61"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the user is likely to try to behave in accordance with the action. In contrast, if <inline-formula><mml:math id="ieqn62"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is lower than <inline-formula><mml:math id="ieqn63"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the user is likely to ignore the action and behave normally.</p><p>Based on the aforementioned assumptions, the behavior adherence model of carbohydrate intake is represented by the following linear model:</p><disp-formula id="equWL6"><label>(4)</label><mml:math id="eqn5"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>&#x003C;</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn64"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn65"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the perceptual critical point for a target user, <inline-formula><mml:math id="ieqn66"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is a parameter that represents the normal carbohydrate intake at the target diet. We also introduce Gaussian noises <inline-formula><mml:math id="ieqn67"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn68"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to represent the uncertainty of carbohydrate intake from other factors. Here, different variance parameters (<inline-formula><mml:math id="ieqn69"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>x</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) are set because the level of behavioral uncertainty may depend on whether the critical point is exceeded.</p><p>Subsequently, the adherence model for postprandial exercise is based on the same concept, as described for carbohydrate intake above. As shown in Figure S1 (right) in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>, if the recommended walking duration <inline-formula><mml:math id="ieqn70"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> included in the action is shorter than the perceptual critical point of each user, it is expected to be adhered to. Otherwise, the recommended action is not adhered to, that is, the actual walking duration tends to be zero. Specifically, the exercise adherence model is represented using the following linear model:</p><disp-formula id="E5"><label>(5)</label><mml:math id="eqn6"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mtable columnalign="left left" rowspacing=".2em" columnspacing="1em" displaystyle="false"><mml:mtr><mml:mtd><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr><mml:mtr><mml:mtd><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mtext>&#x00A0;</mml:mtext><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>&#x003E;</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable><mml:mo fence="true" stretchy="true" symmetric="true"/></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn71"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn72"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the perceptual critical point and <inline-formula><mml:math id="ieqn73"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the parameter that represents the upper bound of walking duration in terms of the user&#x2019;s physical strength. In this model, it was assumed that the walking time is zero at the upper bound. Further, we introduce Gaussian noises <inline-formula><mml:math id="ieqn74"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>1</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn75"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub><mml:mo>&#x223C;</mml:mo><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:mi>z</mml:mi><mml:mn>2</mml:mn></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to represent the uncertainty of exercise behavior from other factors.</p><p>Based on these behavior adherence models, each user&#x2019;s dietary behavior parameters <inline-formula><mml:math id="ieqn76"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and exercise behavior parameters <inline-formula><mml:math id="ieqn77"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are inferred according to the buffered observation data <inline-formula><mml:math id="ieqn78"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> accumulated for each user at an iterative time point <inline-formula><mml:math id="ieqn79"><mml:mi>i</mml:mi></mml:math></inline-formula>. Then, by deploying the inferred parameters <inline-formula><mml:math id="ieqn80"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> into each corresponding model <inline-formula><mml:math id="ieqn81"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mo>&#x22C5;</mml:mo><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the behaviors <inline-formula><mml:math id="ieqn82"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, <inline-formula><mml:math id="ieqn83"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in response to each action candidate can be predicted.</p></sec><sec id="s2-6"><title>Glycemic Response Model</title><p>Next, the postprandial glucose trajectory <inline-formula><mml:math id="ieqn84"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is predicted based on each behavior <inline-formula><mml:math id="ieqn85"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> predicted for action <inline-formula><mml:math id="ieqn86"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Our approach models this trajectory based on the clinical findings. First, it is well known that glucose levels increase with carbohydrate intake. For example, Ashrafi et al [<xref ref-type="bibr" rid="ref33">33</xref>] showed that the actual postprandial glucose trajectory could be fitted by a response curve to carbohydrate intake. Second, it has been proven that exercises, such as walking, immediately lower glucose levels, as previously mentioned in this &#x201C;two-stage reward prediction model&#x201D; subheading [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. The mechanism is that energy expenditure in muscle tissues due to exercise causes glucose absorption for energy supply [<xref ref-type="bibr" rid="ref34">34</xref>]. For example, Jankovic et al [<xref ref-type="bibr" rid="ref35">35</xref>] and Xie and Wang [<xref ref-type="bibr" rid="ref36">36</xref>] adopted a linear additive model to predict the glucose trajectory and showed that predictive performance was improved by adding the exercise effect on glucose as a factor, in addition to the carbohydrate intake effect. Based on this glucose modeling, we hypothesized that the observed postprandial glucose trajectory <inline-formula><mml:math id="ieqn87"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> can be represented based on the summation of the response curve <inline-formula><mml:math id="ieqn88"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to carbohydrate intake and the response curve <inline-formula><mml:math id="ieqn89"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>R</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to exercise to the baseline preprandial glucose level <inline-formula><mml:math id="ieqn90"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, as follows [<xref ref-type="bibr" rid="ref37">37</xref>]:</p><disp-formula id="equWL8"><label>(6)</label><mml:math id="eqn7"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mo>=</mml:mo><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">e</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Because the time range of the glucose trajectory that we focus on here is short-term, we assume that the baseline value <inline-formula><mml:math id="ieqn91"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>y</mml:mi><mml:mrow><mml:mi>b</mml:mi><mml:mi>a</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is constant and substitute the average of the glucose history from 15 minutes before starting the target diet. Here, <inline-formula><mml:math id="ieqn92"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">e</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the vector of Gaussian noise following <inline-formula><mml:math id="ieqn93"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn mathvariant="bold">0</mml:mn><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi><mml:mrow><mml:mi>y</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. The response curves <inline-formula><mml:math id="ieqn94"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are represented by the following equations:</p><disp-formula id="equWL9"><label>(7)</label><mml:math id="eqn8"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03C4;</mml:mi><mml:mrow><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>P</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>0.5</mml:mn><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold">&#x0394;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:msubsup><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:msub><mml:mi>d</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi mathvariant="bold">&#x0394;</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="bold-italic">t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL10"><label>(8)</label><mml:math id="eqn9"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03C4;</mml:mi><mml:mrow><mml:mi>z</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>Q</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mfrac><mml:mrow><mml:mo>&#x2212;</mml:mo><mml:mn>0.5</mml:mn><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold">&#x0394;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow><mml:msubsup><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msubsup></mml:mfrac><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi>h</mml:mi><mml:mrow><mml:msub><mml:mi>e</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>z</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:mtext>&#x00A0;</mml:mtext><mml:msub><mml:mi mathvariant="bold">&#x0394;</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mi mathvariant="bold-italic">t</mml:mi><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn95"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the time of event occurrence of the <italic>i</italic>th carbohydrate intake or the <italic>j</italic>th walking event, respectively. Further, <inline-formula><mml:math id="ieqn96"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the number of carbohydrates consumed within the target diet, usually <inline-formula><mml:math id="ieqn97"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>P</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Also, <inline-formula><mml:math id="ieqn98"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>Q</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the number of walking events that occurred within 90 minutes of starting the target diet. In these equations, we adopted a bell-shaped function as the response curve, similar to that in previous works [<xref ref-type="bibr" rid="ref33">33</xref>,<xref ref-type="bibr" rid="ref38">38</xref>], owing to its interpretability and the small number of parameters. Some specific examples of each response curve, <inline-formula><mml:math id="ieqn99"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are shown in Figure S2 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>. This response curve starts at the timing of event occurrence <inline-formula><mml:math id="ieqn100"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mo>,</mml:mo><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. The response curve is then amplified based on the amount of carbohydrate intake <inline-formula><mml:math id="ieqn101"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> or the duration of each walk <inline-formula><mml:math id="ieqn102"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Here <inline-formula><mml:math id="ieqn103"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is a parameter that represents the degree of amplification with respect to the volume of each behavior. Further, <inline-formula><mml:math id="ieqn104"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is a parameter that represents the degree of time delay of the response. A hyperparameter of the prior is introduced for each parameter <inline-formula><mml:math id="ieqn105"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and we describe how this can be obtained in the subsequent <italic>&#x201C;</italic>Parameter Learning<italic>&#x201D;</italic> subheading<italic>.</italic></p><p>Based on the above setting, specifically defined by Equations 6-8, it can be seen that the parameters of our glycemic response model <inline-formula><mml:math id="ieqn106"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for each user are estimated from the observed behavior sequence (<inline-formula><mml:math id="ieqn107"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) and glucose trajectory (<inline-formula><mml:math id="ieqn108"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), at each iteration point (<inline-formula><mml:math id="ieqn109"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>). In this parameter learning, we use the observed behaviors (<inline-formula><mml:math id="ieqn110"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) rather than the predicted behaviors (<inline-formula><mml:math id="ieqn111"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) as training data to identify the parameters more accurately, reflecting the real relationship. In contrast, the predicted behaviors (<inline-formula><mml:math id="ieqn112"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) are used as inputs for reward prediction for the action candidate <inline-formula><mml:math id="ieqn113"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Accordingly, the predicted postprandial trajectory is represented as follows:</p><disp-formula id="equWL11"><label>(9)</label><mml:math id="eqn10"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi mathvariant="bold-italic">g</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>+</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">R</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where the baseline term is removed compared to Equation 6. This is because the baseline term does not contribute to the glucose increase and, hence, the reward. Then, <inline-formula><mml:math id="ieqn114"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi mathvariant="bold-italic">g</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi>x</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msup><mml:mi>z</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the series of pure increase of postprandial glucose affected by behaviors. It should be noted that for each iteration (<inline-formula><mml:math id="ieqn115"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), the estimated parameter values <inline-formula><mml:math id="ieqn116"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are used as <inline-formula><mml:math id="ieqn117"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in these equations.</p><p>Finally, the glucose increase trajectory, <inline-formula><mml:math id="ieqn118"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, is converted into a reward scale. The reward should be designed such that the smaller the degree of glucose increase, the higher the reward. We use the incremental area under the curve (iAUC), a traditional glucose metric [<xref ref-type="bibr" rid="ref39">39</xref>], as the degree of glucose increase. Based on these factors, we perform reward conversion using the sigmoid function, as shown below:</p><disp-formula id="equWL12"><label>(10)</label><mml:math id="eqn11"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mn>1</mml:mn><mml:mo>+</mml:mo><mml:mi>e</mml:mi><mml:mi>x</mml:mi><mml:mi>p</mml:mi><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mi>a</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mi>A</mml:mi><mml:mi>U</mml:mi><mml:mi>C</mml:mi><mml:mo>&#x2212;</mml:mo><mml:mi>b</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow></mml:mfrac></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL13"><mml:math id="eqn12"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>i</mml:mi><mml:mi>A</mml:mi><mml:mi>U</mml:mi><mml:mi>C</mml:mi><mml:mo>=</mml:mo><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mrow><mml:mrow><mml:msup><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>+</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>y</mml:mi><mml:mrow><mml:mi>t</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msubsup></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where iAUC is the total value of the trajectory of glucose increase. As described in the <italic>&#x201C;</italic>Problem Setting<italic>&#x201D;</italic> subheading, <inline-formula><mml:math id="ieqn119"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C4;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> denotes the start time of the diet, and T=90 minutes. <inline-formula><mml:math id="ieqn120"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>a</mml:mi><mml:mo>,</mml:mo><mml:mi>b</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are parameters for the conversion from the iAUC to the reward scale. We set <inline-formula><mml:math id="ieqn121"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>a</mml:mi><mml:mo>=</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>3</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn122"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>b</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> empirically, based on the distribution of the iAUC. Consequently, the reward value ranges between 0 and 1.</p><p>Thus, for each action candidate <inline-formula><mml:math id="ieqn123"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, a reward calculated from postprandial glucose increase <inline-formula><mml:math id="ieqn124"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>R</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mi>f</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msup><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>A</mml:mi><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is predicted based on our original two-stage prediction.</p></sec><sec id="s2-7"><title>Action Selection</title><p>After the reward is predicted for each action candidate, the policy for selecting batched actions for the real intervention is selected. Specifically, the action that maximizes the predicted reward is selected. However, the predicted reward is not always accurate due to overfitting caused by scarce training data at the initial stage. Moreover, if the action is always selected to maximize that reward, the real action for the user becomes fixed, and the observed user response subsequently exhibits a small variation, which also leads to overfitting. Therefore, we adopt the <inline-formula><mml:math id="ieqn125"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>-greedy method for balancing exploration and exploitation. For exploitation, assuming that the prediction model is accurate, actions that maximize the reward (<inline-formula><mml:math id="ieqn126"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow/></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) should be selected. For exploration, assuming that the prediction model needs to be trained more, actions are selected randomly (<inline-formula><mml:math id="ieqn127"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>). Such policies <inline-formula><mml:math id="ieqn128"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn129"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are taken with probabilities of <inline-formula><mml:math id="ieqn130"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mn>1</mml:mn><mml:mo>&#x2212;</mml:mo><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn131"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, respectively. Because parameter learning is performed in a batched manner, <inline-formula><mml:math id="ieqn132"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> actions are selected simultaneously for each policy. Therefore, <inline-formula><mml:math id="ieqn133"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn134"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are represented as follows:</p><disp-formula id="equWL14"><mml:math id="eqn13"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:munder><mml:mrow><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">r</mml:mi><mml:mi mathvariant="normal">g</mml:mi><mml:mi mathvariant="normal">m</mml:mi><mml:mi mathvariant="normal">a</mml:mi><mml:mi mathvariant="normal">x</mml:mi></mml:mrow><mml:mi>k</mml:mi></mml:munder><mml:mtext>&#x00A0;</mml:mtext><mml:mi>R</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>k</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:mi mathvariant="bold">&#x0398;</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mi>R</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where, <italic>Random</italic>(<inline-formula><mml:math id="ieqn135"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) represents an operation for extracting an element randomly from a set <inline-formula><mml:math id="ieqn136"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mi mathvariant="bold-script">A</mml:mi></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Specifically, in <inline-formula><mml:math id="ieqn137"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, this operation is repeated <inline-formula><mml:math id="ieqn138"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> times independently.</p></sec><sec id="s2-8"><title>Parameter Learning</title><p>The learning of the parameter set <inline-formula><mml:math id="ieqn139"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold">&#x0398;</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in the reward model is performed at each iteration time just after <inline-formula><mml:math id="ieqn140"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>-intervention experiments are finished, and the batched observation data are accumulated in the buffered dataset. In this learning, all the buffered datasets accumulated up to the <inline-formula><mml:math id="ieqn141"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th iteration point are used. If the <inline-formula><mml:math id="ieqn142"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th batch of data <inline-formula><mml:math id="ieqn143"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are observed, the training dataset at that time becomes <inline-formula><mml:math id="ieqn144"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>i</mml:mi></mml:mover><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. The batched data includes a history of 4 types of data (action, postprandial glucose level, amount of carbohydrate intake, and postprandial walking duration) for <inline-formula><mml:math id="ieqn145"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>M</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> target meals. This is represented as <inline-formula><mml:math id="ieqn146"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. In the observation data, carbohydrate intake and postprandial walking can occur multiple times per target diet. Therefore, they are represented as vectors (recall Figure 3B). In practice, <inline-formula><mml:math id="ieqn147"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are separately trained from the following subsets of the buffer data <inline-formula><mml:math id="ieqn148"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>g</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>d</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>e</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for each parameter.</p><disp-formula id="equWL15"><mml:math id="eqn14"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>g</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>i</mml:mi></mml:mover><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL16"><mml:math id="eqn15"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>d</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>i</mml:mi></mml:mover><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>,</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><disp-formula id="equWL17"><mml:math id="eqn16"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>e</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>i</mml:mi></mml:mover><mml:mrow><mml:mo>[</mml:mo><mml:mrow><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>m</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>M</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>A</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">z</mml:mi></mml:mrow><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mrow><mml:mo>]</mml:mo></mml:mrow><mml:mo>.</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>Here, <inline-formula><mml:math id="ieqn149"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>g</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>d</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup><mml:mo>,</mml:mo><mml:msubsup><mml:mrow><mml:mi mathvariant="bold-fraktur">B</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>e</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msubsup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are the training datasets for the glycemic response, dietary adherence, and exercise adherence models, respectively.</p><p>To improve the efficiency of this learning process, we introduce prior distributions for each parameter. Specifically, for obtaining prior information for the behavioral parameter sets <inline-formula><mml:math id="ieqn150"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, we ask each user to answer questions corresponding to each parameter beforehand and use the responses to define the prior distribution. For example, the question for the critical point <inline-formula><mml:math id="ieqn151"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is &#x201C;If you were to reduce carbohydrates in your dinner, what would you say is the minimum amount of carbohydrates?&#x201D; Then, the user&#x2019;s answer is set to the mean <inline-formula><mml:math id="ieqn152"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> of the Gaussian prior distribution <inline-formula><mml:math id="ieqn153"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>&#x03BC;</mml:mi><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03C3;</mml:mi><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. For the other parameters <inline-formula><mml:math id="ieqn154"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, each prior distribution is set similarly.</p><p>On the other hand, the glycemic parameter set <inline-formula><mml:math id="ieqn155"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> cannot be clarified through user interviews because each parameter is related to a complex individual biological response. Therefore, we adopt a data-driven approach wherein the prior distribution is pretrained through the original upstream task. Before starting the first intervention experiment, we recruited additional participants belonging to the same disease group as the target users. Second, we collect a glucose dataset, <inline-formula><mml:math id="ieqn156"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mrow><mml:mi mathvariant="bold-script">D</mml:mi></mml:mrow><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mi>g</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup><mml:mo>=</mml:mo><mml:mover><mml:munder><mml:mo>&#x22C3;</mml:mo><mml:mrow><mml:mi>l</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:munder><mml:mi>L</mml:mi></mml:mover><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">x</mml:mi><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">z</mml:mi><mml:mrow><mml:mi>l</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for L diet events from each participant. Third, this dataset is used to infer glycemic parameters <inline-formula><mml:math id="ieqn157"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for each participant. Finally, the distribution of these parameters <inline-formula><mml:math id="ieqn158"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> among all participants is used as the Gaussian prior distribution of <inline-formula><mml:math id="ieqn159"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Specifically, the mean <inline-formula><mml:math id="ieqn160"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mrow><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and the variance <inline-formula><mml:math id="ieqn161"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mrow><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> of these parameters <inline-formula><mml:math id="ieqn162"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are set as <inline-formula><mml:math id="ieqn163"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mrow><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mrow><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi></mml:mrow><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Here, each parameter <inline-formula><mml:math id="ieqn164"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is assumed to be generated independently.</p><p>Through this process, each prior distribution of the parameter set <inline-formula><mml:math id="ieqn165"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is obtained before the first intervention experiment, and this prior is used for Bayesian parameter learning at every iteration time point. This parameter learning is performed by executing a Markov Chain Monte Carlo simulation with the No U-Turn Sampler implemented in RStan (Stan Development Team) [<xref ref-type="bibr" rid="ref40">40</xref>].</p></sec><sec id="s2-9"><title>Simulation Experiment Setup</title><p>We studied in a simulation experiment whether our online algorithm can improve postprandial glucose levels with personalized recommendations. In addition, to carry out real experiments with real personalized recommendations, we investigated the best setting for the proposed method to maximize performance. Specifically, we aim to clarify the following questions through simulation experiments:</p><list list-type="bullet"><list-item><p>Q1: In action selection, how should the trade-off point between exploration and exploitation be set to maximize the cumulative reward?</p></list-item><list-item><p>Q2: How fast does parameter learning converge when introducing a prior distribution into the reward prediction model?</p></list-item></list><p>To answer these questions, we set up several virtual users who exhibited different behavioral and glycemic responses, performed repeated action planning and pseudo-intervention experiments using the proposed method for each user, and evaluated these experimental results to address the above questions. At the same time, we also show that by introducing the proposed method, the reward performance can be improved compared to selecting default actions uniformly.</p></sec><sec id="s2-10"><title>Virtual User Setting</title><p>In the real world, behavioral tendencies and glycemic responses differ from user to user. Therefore, in this simulation, a parameter set <inline-formula><mml:math id="ieqn166"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for each user was set to ensure that virtual users exhibit response patterns as diverse as possible. <xref ref-type="table" rid="table1">Table 1</xref> shows the actual settings of parameter values for each user. The total number of virtual users was 10.</p><table-wrap id="t1" position="float"><label>Table 1.</label><caption><p>User settings for each parameter.</p></caption><table id="table1" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom" rowspan="2">Virtual user</td><td align="left" valign="bottom" colspan="2">Dietary adherence model</td><td align="left" valign="bottom" colspan="2">Exercise adherence model</td><td align="left" valign="bottom" colspan="4">Glycemic response model</td></tr><tr><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn167"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>(g)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn168"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>(g)<sup><xref ref-type="table-fn" rid="table1fn2">b</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn169"><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>(min)<sup><xref ref-type="table-fn" rid="table1fn1">a</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn170"><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula>(min)<sup><xref ref-type="table-fn" rid="table1fn3">c</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn171"><mml:msub><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn172"><mml:msub><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn173"><mml:msub><mml:mrow><mml:mi>&#x03B2;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><sup><xref ref-type="table-fn" rid="table1fn4">d</xref></sup></td><td align="left" valign="bottom"><inline-formula><mml:math id="ieqn174"><mml:msub><mml:mrow><mml:mi>&#x03B1;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula><sup><xref ref-type="table-fn" rid="table1fn5">e</xref></sup></td></tr></thead><tbody><tr><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/><td align="left" valign="top"/></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn175"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>1</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">50</td><td align="left" valign="top">100</td><td align="left" valign="top">20</td><td align="left" valign="top">60</td><td align="left" valign="top">0.05</td><td align="left" valign="top">20</td><td align="left" valign="top">&#x2212;0.15</td><td align="left" valign="top">6.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn176"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>2</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">30</td><td align="left" valign="top">60</td><td align="left" valign="top">10</td><td align="left" valign="top">40</td><td align="left" valign="top">0.05</td><td align="left" valign="top">20</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">8.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn177"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>3</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">70</td><td align="left" valign="top">120</td><td align="left" valign="top">30</td><td align="left" valign="top">80</td><td align="left" valign="top">0.05</td><td align="left" valign="top">20</td><td align="left" valign="top">&#x2212;0.15</td><td align="left" valign="top">6.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn178"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>4</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">90</td><td align="left" valign="top">120</td><td align="left" valign="top">5</td><td align="left" valign="top">40</td><td align="left" valign="top">0.05</td><td align="left" valign="top">20</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">8.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn179"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>5</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">50</td><td align="left" valign="top">100</td><td align="left" valign="top">20</td><td align="left" valign="top">60</td><td align="left" valign="top">0.04</td><td align="left" valign="top">15</td><td align="left" valign="top">&#x2212;0.15</td><td align="left" valign="top">6.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn180"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>6</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">70</td><td align="left" valign="top">120</td><td align="left" valign="top">10</td><td align="left" valign="top">60</td><td align="left" valign="top">0.04</td><td align="left" valign="top">15</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">8.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn181"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>7</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">30</td><td align="left" valign="top">60</td><td align="left" valign="top">30</td><td align="left" valign="top">80</td><td align="left" valign="top">0.04</td><td align="left" valign="top">15</td><td align="left" valign="top">&#x2212;0.15</td><td align="left" valign="top">6.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn182"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>8</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">90</td><td align="left" valign="top">120</td><td align="left" valign="top">5</td><td align="left" valign="top">30</td><td align="left" valign="top">0.04</td><td align="left" valign="top">15</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">8.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn183"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>9</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">10</td><td align="left" valign="top">40</td><td align="left" valign="top">30</td><td align="left" valign="top">80</td><td align="left" valign="top">0.04</td><td align="left" valign="top">15</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">6.0</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn184"><mml:mi>I</mml:mi><mml:mi>D</mml:mi><mml:mn>10</mml:mn></mml:math></inline-formula></td><td align="left" valign="top">10</td><td align="left" valign="top">40</td><td align="left" valign="top">5</td><td align="left" valign="top">30</td><td align="left" valign="top">0.05</td><td align="left" valign="top">20</td><td align="left" valign="top">&#x2212;0.20</td><td align="left" valign="top">8.0</td></tr></tbody></table><table-wrap-foot><fn id="table1fn1"><p><sup>a</sup>X<sub>th</sub> and Z<sub>th</sub>: perceptual critical points for carbohydrate intake and postprandial walking duration (Equations 4 and 5).</p></fn><fn id="table1fn2"><p><sup>b</sup>X<sub>norm</sub>: normal carbohydrate intake (Equation 4).</p></fn><fn id="table1fn3"><p><sup>c</sup>Z<sub>max</sub>: upper bound of walking duration (Equation 5).</p></fn><fn id="table1fn4"><p><sup>d</sup> &#x03B2;<sub>d</sub> and &#x03B2;<sub>e</sub>: the degree of glycemic amplification of carbohydrate intake and postprandial walking duration (Equations 7 and 8).</p></fn><fn id="table1fn5"><p><sup>e</sup> &#x03B1;<sub>d</sub> and &#x03B1;<sub>e</sub>: the degree of time delay of the glycemic response to carbohydrate intake and postprandial walking duration (Equations 7 and 8).</p></fn></table-wrap-foot></table-wrap><p>As an example of user diversity, it is difficult for user ID4 to reduce carbohydrate intake because of <inline-formula><mml:math id="ieqn185"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>90</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, whereas user ID9, with <inline-formula><mml:math id="ieqn186"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>10</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> finds it easy to reduce carbohydrate intake. User ID4 also has <inline-formula><mml:math id="ieqn187"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.05</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, indicating a greater glycemic response to carbohydrate intake, while user ID5 has a lower glycemic response due to <inline-formula><mml:math id="ieqn188"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mn>0.04</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Thus, each parameter value was set differently for different users. The value ranges of the behavior parameters <inline-formula><mml:math id="ieqn189"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> were within the range of the action set shown in <xref ref-type="fig" rid="figure2">Figure 2</xref>. These ranges were defined following established dietary and exercise guidelines. Specifically, the minimum carbohydrate intake (<inline-formula><mml:math id="ieqn190"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) for virtual users was set with reference to &#x201C;very low-carbohydrate&#x201D; diets (20&#x2010;50 g/d) and &#x201C;low-carbohydrate&#x201D; diets (&#x003C;130 g/d) as defined by Oh et al [<xref ref-type="bibr" rid="ref41">41</xref>], while the normal carbohydrate intake (<inline-formula><mml:math id="ieqn191"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) was guided by typical daily carbohydrate intake levels (200&#x2010;325 g/d) as described in the literature [<xref ref-type="bibr" rid="ref26">26</xref>]. For the parameters of postprandial walking durations (<inline-formula><mml:math id="ieqn192"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), typical ranges in prior experimental studies (5&#x2010;30 minutes) [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref27">27</xref>,<xref ref-type="bibr" rid="ref29">29</xref>] and recommendations for at least 10 minutes of walking [<xref ref-type="bibr" rid="ref27">27</xref>] informed the setting of <inline-formula><mml:math id="ieqn193"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and cases of walking up to 90 minutes [<xref ref-type="bibr" rid="ref28">28</xref>] guided <inline-formula><mml:math id="ieqn194"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. To introduce a diverse range of realistic individual differences among virtual users, slight deviations from these foundational literature values were sometimes permitted. Nevertheless, the chosen parameter ranges for virtual users in <xref ref-type="table" rid="table1">Table 1</xref> do not drastically diverge from the prior information collected from real participants (in <xref ref-type="table" rid="table2">Table 2</xref>), suggesting that the simulated settings are not unrealistic. In addition, the value ranges of the glycemic parameters <inline-formula><mml:math id="ieqn195"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi></mml:mrow><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> were set based on the prior knowledge obtained in the upstream task.</p><table-wrap id="t2" position="float"><label>Table 2.</label><caption><p>Prior information on behavioral parameters.</p></caption><table id="table2" frame="hsides" rules="groups"><thead><tr><td align="left" valign="bottom">Parameter</td><td align="left" valign="bottom">Questionnaire</td><td align="left" valign="bottom">ID1</td><td align="left" valign="bottom">ID2</td><td align="left" valign="bottom">ID3</td><td align="left" valign="bottom">ID4</td><td align="left" valign="bottom">ID5</td><td align="left" valign="bottom">ID6</td></tr></thead><tbody><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn196"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td><td align="left" valign="top">&#x201C;If you were to reduce carbohydrates in your target diet, what would you say is the minimum amount of carbohydrates?&#x201D;</td><td align="char" char="." valign="top">50 g</td><td align="char" char="." valign="top">50 g</td><td align="char" char="." valign="top">30 g</td><td align="char" char="." valign="top">30 g</td><td align="char" char="." valign="top">50 g</td><td align="char" char="." valign="top">50 g</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn197"><mml:msub><mml:mrow><mml:mi>X</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td><td align="left" valign="top">&#x201C;How much carbohydrates in grams do you typically consume in your target diet?&#x201D;</td><td align="char" char="." valign="top">110 g</td><td align="char" char="." valign="top">90 g</td><td align="char" char="." valign="top">50 g</td><td align="char" char="." valign="top">50 g</td><td align="char" char="." valign="top">70 g</td><td align="char" char="." valign="top">150 g</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn198"><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td><td align="left" valign="top">&#x201C;If you were to walk after your target meal, how long do you think you could walk each day?&#x201D;</td><td align="char" char="." valign="top">30 minutes</td><td align="char" char="." valign="top">15 minutes</td><td align="char" char="." valign="top">15 minutes</td><td align="char" char="." valign="top">20 minutes</td><td align="char" char="." valign="top">25 minutes</td><td align="char" char="." valign="top">5 minutes</td></tr><tr><td align="left" valign="top"><inline-formula><mml:math id="ieqn199"><mml:msub><mml:mrow><mml:mi>Z</mml:mi></mml:mrow><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:math></inline-formula></td><td align="left" valign="top">&#x201C;Based on your physical fitness, how long do you think you can continue walking?&#x201D;</td><td align="char" char="." valign="top">60 minutes</td><td align="char" char="." valign="top">60 minutes</td><td align="char" char="." valign="top">180 minutes</td><td align="char" char="." valign="top">60 minutes</td><td align="char" char="." valign="top">40 minutes</td><td align="char" char="." valign="top">120 minutes</td></tr></tbody></table></table-wrap><sec id="s2-10-1"><title>User Response Simulator</title><p>Next, to perform the simulation experiment, it is necessary to generate a pseudo-response to any action according to each user setting. Therefore, we developed a user-response simulator that outputs 3 responses (dietary, exercise, and glycemic) to 2 inputs (an action and each parameter value), as shown in Figure S3 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref>.</p><p>In this simulator, the simulated dietary behavior and exercise behavior for an action are the first outputs, and a simulated glucose trajectory is generated based on these behaviors. First, the simulated behaviors are pre-estimated according to the corresponding behavior adherence model in Eqs 4 and 5 by setting the input parameter values of the virtual user and input action <inline-formula><mml:math id="ieqn200"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Subsequently, by perturbing the simulated behaviors by adding Gaussian noise, we obtain the final simulated behaviors <inline-formula><mml:math id="ieqn201"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mover><mml:mi>x</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover><mml:mo>,</mml:mo><mml:mover><mml:mi>z</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Here, the perturbation noise for carbohydrate intake followed <inline-formula><mml:math id="ieqn202"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>10</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, and the noise for postprandial walking duration followed <inline-formula><mml:math id="ieqn203"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>5</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Subsequently, these simulated behaviors, <inline-formula><mml:math id="ieqn204"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mover><mml:mi>x</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover><mml:mo>,</mml:mo><mml:mover><mml:mi>z</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> are input into the glycemic response model (Equation 9), and the obtained trajectory is then perturbed by adding Gaussian noise to generate the final simulated glucose trajectory <inline-formula><mml:math id="ieqn205"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mover><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. In this perturbation, a time-independent noise following <inline-formula><mml:math id="ieqn206"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>0</mml:mn><mml:mo>,</mml:mo><mml:mn>0.02</mml:mn></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> was added. Figure S4 in <xref ref-type="supplementary-material" rid="app1">Multimedia Appendix 1</xref> shows a representative example of these simulated responses.</p><p>By substituting the part (<inline-formula><mml:math id="ieqn207"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mo>&#x2020;</mml:mo></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the pseudocode in Algorithm 1 (<xref ref-type="other" rid="box1">Textbox 1</xref>) with the above simulator, it is possible to simulate repeated experiential learning cycles of action planning and intervention experiments, as shown in <xref ref-type="fig" rid="figure1">Figure 1B</xref>, for each virtual user.</p></sec><sec id="s2-10-2"><title>Simulation Evaluation Settings</title><p>To answer the question (Q1), we tracked how the actual cumulative reward changes depending on the settings of the proposed method while changing them. One of the most significant settings is the balance of exploration and exploitation in action selection, which is determined according to the set value of <inline-formula><mml:math id="ieqn208"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, a hyperparameter of the proposed method. Therefore, we set multiple patterns of <inline-formula><mml:math id="ieqn209"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> values and investigated which setting yielded the highest cumulative reward. For these value patterns, we considered <inline-formula><mml:math id="ieqn210"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>1.0</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for random selection, <inline-formula><mml:math id="ieqn211"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.5</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for high, <inline-formula><mml:math id="ieqn212"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for medium, and <inline-formula><mml:math id="ieqn213"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for low condition of <inline-formula><mml:math id="ieqn214"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p><p>Another significant setting is the introduction of prior distributions for each model parameter. Thus, we also investigated how the cumulative reward varied depending on whether the prior distribution was used in parameter learning. In this simulation, prior knowledge of the behavioral parameters <inline-formula><mml:math id="ieqn215"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>n</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mi>h</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> was pseudo-generated by adding Gaussian noise to the true parameter values. This may be attributed to the fact that the results of the survey questionnaire are subject to the human cognitive errors of each user regarding their true behavior, and these cognitive errors are assumed to follow Gaussian noise. On the other hand, to set the prior distribution of the glycemic parameters <inline-formula><mml:math id="ieqn216"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the mean and the variance of the true values among all users were substituted. The initial batch of 6 actions was based on past clinical findings (refer to <xref ref-type="fig" rid="figure2">Figure 2</xref>). From the second iteration (<inline-formula><mml:math id="ieqn217"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>&#x2265;</mml:mo><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), action selection was performed by integrating the proposed method under each setting.</p></sec></sec><sec id="s2-11"><title>Metrics</title><p>Specifically, the cumulative reward of the virtual user (<inline-formula><mml:math id="ieqn218"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) at iteration of the experiential learning cycle (<inline-formula><mml:math id="ieqn219"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) can be represented as follows:</p><disp-formula id="equWL36"><mml:math id="eqn17"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>G</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mi>i</mml:mi><mml:mo>&#x00D7;</mml:mo><mml:mi>D</mml:mi></mml:mrow></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>j</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:munderover><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>where <inline-formula><mml:math id="ieqn220"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>j</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> represents the actual reward transformed by Equation 10 from the glucose trajectory <inline-formula><mml:math id="ieqn221"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mover><mml:mi mathvariant="bold-italic">y</mml:mi><mml:mrow><mml:mo>˘</mml:mo></mml:mrow></mml:mover></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> of the virtual user <inline-formula><mml:math id="ieqn222"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>n</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> after the <inline-formula><mml:math id="ieqn223"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>d</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th target diet in the <inline-formula><mml:math id="ieqn224"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>j</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th intervention experiment. Further, <inline-formula><mml:math id="ieqn225"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>D</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the number of target diets included in each experiment; and <inline-formula><mml:math id="ieqn226"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mn>6</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> was used in the simulation. The average cumulative reward of <inline-formula><mml:math id="ieqn227"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> virtual users, which is used as an evaluation metric, can be represented as follows:</p><disp-formula id="equWL37"><mml:math id="eqn18"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mi>J</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>G</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>By increasing <inline-formula><mml:math id="ieqn228"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> to the maximum iteration time <inline-formula><mml:math id="ieqn229"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>I</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, we can track how the metric <inline-formula><mml:math id="ieqn230"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>J</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> changes depending on each condition of <inline-formula><mml:math id="ieqn231"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. Considering the burden on users in real experiments, <inline-formula><mml:math id="ieqn232"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>I</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> was set to 10. This means that each user must continue wearing the necessary sensors to collect user responses for a maximum of 60 meals when <inline-formula><mml:math id="ieqn233"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>D</mml:mi><mml:mo>=</mml:mo><mml:mn>6</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p><p>In this simulation, the true value of each parameter for each user was known, as listed in <xref ref-type="table" rid="table1">Table 1</xref>. This enables the investigation of the estimation error of each parameter for any setting of our method. To answer the question (Q2), we investigated how the estimation error changes as the iteration time increases (ie, the amount of training data), with and without prior distribution. The estimation error was defined as follows:</p><disp-formula id="E21"><mml:math id="eqn19"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msub><mml:mi>&#x03F5;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mover><mml:mi>&#x03B8;</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mo stretchy="false">(</mml:mo><mml:mi>i</mml:mi><mml:mo stretchy="false">)</mml:mo><mml:mo>&#x2212;</mml:mo><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>|</mml:mo></mml:mrow><mml:mrow><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo stretchy="false">|</mml:mo></mml:mrow></mml:mrow></mml:mfrac><mml:mo>&#x00D7;</mml:mo><mml:mn>100</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>This corresponds to the standard error (%). <inline-formula><mml:math id="ieqn234"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>p</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the parameter number, <inline-formula><mml:math id="ieqn235"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03B8;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the true value of the parameter in <xref ref-type="table" rid="table1">Table 1</xref>, and <inline-formula><mml:math id="ieqn236"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mrow><mml:mover><mml:mi>&#x03B8;</mml:mi><mml:mo stretchy="false">^</mml:mo></mml:mover></mml:mrow><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is the estimated value of the parameter in the <inline-formula><mml:math id="ieqn237"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th iteration time.</p></sec><sec id="s2-12"><title>Real-World Feasibility Study Setup</title><p>Additionally, we conducted a preliminary set of short intervention experiments with real users using the proposed method, based on the simulation results. This experiment was positioned as a pilot study before conducting a full-scale study. The purpose of this experiment was to explore the feasibility of whether postprandial glucose levels can be improved in real users after recommending actions updated by the proposed method compared to default actions. Accordingly, in this study, we compared the performance when default actions were selected in the first experiment (referred to as the &#x201C;default policy&#x201D;) with the performance when actions were selected by the proposed method in the second experiment (referred to as the &#x201C;optimized policy&#x201D;) using a within-participant study design, to evaluate the feasibility of the proposed method. The optimized policy was updated from the observed participant responses in the first experiment, following the proposed method.</p></sec><sec id="s2-13"><title>Experimental Protocol</title><p>First, we recruited 6 healthy participants (4 men and 2 women) as real users from April 2023 to July 2023. Because this experiment was the first feasibility study, the target group was healthy rather than diabetic. All participants were researchers at Aalto University and the University of Helsinki.</p><p>Next, each participant was assigned the goal of reducing postprandial glucose levels to prevent diabetes. To achieve this goal, the participants received daily recommendations for the target diet, including total carbohydrate intake and postprandial walking duration, via a dedicated smartphone app. The target diet was either dinner or lunch each day, and the participants chose according to their lifestyle preferences. While the recommendations in the first experiment were preset to default actions, those in the subsequent experiment were preset to actions selected by the proposed method. Participants were free to choose whether to follow the recommendations and to act accordingly. To collect a participant&#x2019;s behavioral and glycemic responses to the recommendation actions, they wore a continuous glucose monitoring device and an activity tracker.</p><p>This process of mobile intervention and data collection for each participant was carried out for 6 days in the first experiment and for 3 days in the second experiment. Because the first experiment corresponded primarily to the exploration phase and the second experiment corresponded to the exploitation phase, the number of days in the second experiment was set to be short.</p></sec><sec id="s2-14"><title>Recommendation of Action</title><p>An action was delivered as a recommendation in a notification app on a dedicated smartphone (iPhone 12), which was provided to each participant. In the notification app, a dedicated message including the action was delivered along with a notification sound, a few hours before the target diet. The frequency and duration of this recommendation were predetermined according to participants&#x2019; preferences.</p><p>An example of this message appearing on the smartphone screen is &#x201C;Eat less than 60 g of carbohydrates at dinner, and walk for more than 20 minutes just after dinner.&#x201D; Depending on the selected action, the amount of carbohydrate and postprandial walking duration in the message changed. For default actions in the first experiment, carbohydrate intake (<inline-formula><mml:math id="ieqn238"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) was within the range of 20&#x2010;80 g, and the postprandial walking duration (<inline-formula><mml:math id="ieqn239"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) was within the range of 5&#x2010;20 minutes, considering the participants&#x2019; preferences and previous literature [<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref26">26</xref>-<xref ref-type="bibr" rid="ref29">29</xref>]. For example, in the second experiment, actions were selected based on the two-stage reward prediction results of each action candidate; therefore, they were not always within this range. This reward prediction was attained by setting the parameters learned from the user response data obtained in the first experiment.</p></sec><sec id="s2-15"><title>Response Data Collection</title><p>For user response data, we collected data related to the participant&#x2019;s continuous glucose levels, physical activity, and food records. <xref ref-type="fig" rid="figure4">Figure 4</xref> illustrates how the data were collected. Continuous glucose measurements were collected every 5 minutes using a Guardian Connect medical device (Medtronic Inc), which was inserted on the participant&#x2019;s skin. At the same time, physical activity data were collected using the activity tracker Vivosmart 3 (Garmin Ltd), which was worn on the participant&#x2019;s arm. To mitigate erroneous detection of walking from various daily movements, the device identifies a walking event when characteristic arm movements indicative of continuous walking are detected for a duration exceeding a certain minimum threshold. The physical activity data included the start time and duration of each walking event that was automatically detected by the device. A dedicated fitness tracker was chosen over participants&#x2019; personal smartphones to ensure data consistency and reliability across participants, as smartphone usage and carrying patterns (eg, in a pocket or bag) can vary significantly and lead to inconsistent activity detection. This approach also minimizes issues related to battery drain on personal devices and addresses potential privacy concerns by avoiding continuous access to personal smartphone data. The glucose and activity data were continuously sent to a dedicated iPhone through a Bluetooth connection with each device. In addition, a food diary, including the amount of carbohydrates and the start time of each target diet, was recorded manually by the participant. The carbohydrate content was automatically calculated using the mobile app MyFitnessPal by inputting all food contents and amounts.</p><p>Finally, because we focus on the improvement of postprandial glucose levels, we extracted postprandial segment data from the longitudinal time series as user response data for each action. Specifically, the segment data were defined from the start of each target diet until 90 minutes later, as shown by the blue frames in <xref ref-type="fig" rid="figure4">Figure 4</xref>. Each data segment was added to a buffered dataset.</p><fig position="float" id="figure4"><label>Figure 4.</label><caption><p>Illustration of response data collection.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig04.png"/></fig></sec><sec id="s2-16"><title>Metrics</title><p>The evaluation was based on whether the reward related to postprandial glucose increased in the second experiment compared to the first. Therefore, we set the following metric as the actual reward average <inline-formula><mml:math id="ieqn240"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>J</mml:mi><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for all diets of all <italic>N</italic> participants in the <italic>i</italic>th experiment, and this metric was compared between the first and second experiments.</p><disp-formula id="equWL39"><mml:math id="eqn20"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:msup><mml:mi>J</mml:mi><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>N</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:msubsup><mml:mi>G</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mrow><mml:mo>&#x2032;</mml:mo></mml:mrow></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>,</mml:mo><mml:msubsup><mml:mi>G</mml:mi><mml:mrow><mml:mi>n</mml:mi></mml:mrow><mml:mo>&#x2032;</mml:mo></mml:msubsup><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mfrac><mml:mn>1</mml:mn><mml:mi>D</mml:mi></mml:mfrac><mml:munderover><mml:mo>&#x2211;</mml:mo><mml:mrow><mml:mi>d</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>D</mml:mi></mml:mrow></mml:munderover><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo>,</mml:mo><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mstyle></mml:mrow></mml:mstyle></mml:math></disp-formula><p>It should be noted that the actual reward <inline-formula><mml:math id="ieqn241"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>r</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mo>,</mml:mo><mml:mi>d</mml:mi><mml:mo>,</mml:mo><mml:mi>k</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is transformed by Equation 10 from the glucose increase trajectory, which is obtained by subtracting its baseline derived from preprandial glucose levels from the observed glucose trajectory <inline-formula><mml:math id="ieqn242"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi mathvariant="bold-italic">y</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></sec><sec id="s2-17"><title>Acquisition of Prior Knowledge</title><p>To introduce the prior distribution of the behavioral parameters, each participant was asked to answer a questionnaire before starting the experiment. As described in the &#x201C;Parameter Learning&#x201D; subheading, the result was set as the mean value of the prior distribution of the corresponding parameter. <xref ref-type="table" rid="table2">Table 2</xref> shows the actual answers for each parameter together with the corresponding question. It can be observed that the values vary according to the participant. As for glycemic parameters, we estimated the prior distribution <inline-formula><mml:math id="ieqn243"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>N</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, which is common to target participants, using the glucose dataset from other participants. To obtain this, we recruited 4 additional healthy participants and collected continuous glucose levels, physical activity, and food records around lunchtime over 6 days for each participant in the same manner, as described previously. Consequently, a total of 6 segment data were obtained for each participant, as shown in <xref ref-type="fig" rid="figure4">Figure 4</xref>. Then, after estimating the glycemic parameters <inline-formula><mml:math id="ieqn244"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi mathvariant="bold-italic">&#x03B8;</mml:mi><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for each participant using his segment data, the parameters <inline-formula><mml:math id="ieqn245"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> were calculated among 4 participants, as described in the &#x201C;Parameter Learning&#x201D; subheading in this section. As a result of this calculation, we obtained <inline-formula><mml:math id="ieqn246"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03BC;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0.047</mml:mn><mml:mo>,</mml:mo><mml:mo>&#x2212;</mml:mo><mml:mn>0.148</mml:mn><mml:mo>,</mml:mo><mml:mn>19.07</mml:mn><mml:mo>,</mml:mo><mml:mn>6.22</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and <inline-formula><mml:math id="ieqn247"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mover><mml:mi mathvariant="bold-italic">&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:mi>g</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:msub><mml:mover><mml:mi>&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B2;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mover><mml:mi>&#x03C3;</mml:mi><mml:mo>&#x223C;</mml:mo></mml:mover><mml:mrow><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:msub></mml:mrow><mml:mo>}</mml:mo></mml:mrow><mml:mo>=</mml:mo><mml:mrow><mml:mo>{</mml:mo><mml:mrow><mml:mn>0.028</mml:mn><mml:mo>,</mml:mo><mml:mn>0.035</mml:mn><mml:mo>,</mml:mo><mml:mn>1.91</mml:mn><mml:mo>,</mml:mo><mml:mn>0.39</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p></sec><sec id="s2-18"><title>Ethical Considerations</title><p>The study was approved by the Aalto University Research Ethics Committee (Treatment Planning Project/08.06/2023). Written informed consent was obtained from all participants before the start of the experiment. Direct identifiers were removed, and data were pseudonymized and stored securely. No personal data were shared with third parties. Additionally, to ensure participant safety, participants were informed orally and via the consent form that they could withdraw at any time if they experienced discomfort or unease.</p></sec></sec><sec id="s3" sec-type="results"><title>Results</title><sec id="s3-1"><title>Simulation Experiment Results</title><p>We present the simulated results of the average cumulative rewards (<inline-formula><mml:math id="ieqn248"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>J</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) with and without prior distribution in <xref ref-type="fig" rid="figure5">Figure 5</xref>. First, in any case, when an action selection by the proposed method is adopted (ie, when <inline-formula><mml:math id="ieqn249"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is other than 1.0), we can confirm that the cumulative reward gradually increases from the second iteration (<inline-formula><mml:math id="ieqn250"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) after executing default actions at the first iteration (<inline-formula><mml:math id="ieqn251"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>). This demonstrates the effectiveness of action selection by the proposed method in the second and subsequent iterations (<inline-formula><mml:math id="ieqn252"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>&#x2265;</mml:mo><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>). Regarding question Q1, by focusing on the variation of <inline-formula><mml:math id="ieqn253"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, we can also confirm that the smaller the value of <inline-formula><mml:math id="ieqn254"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the greater the accumulated reward, and the maximum cumulative reward is obtained when <inline-formula><mml:math id="ieqn255"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>.</p><p>The theoretical boundary of the cumulative reward is shown by the black dotted line in <xref ref-type="fig" rid="figure5">Figure 5</xref>, and it is calculated as an ideal condition, wherein the true parameter values are known and used for action selection from the second iteration. The cumulative reward for <inline-formula><mml:math id="ieqn256"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> with prior knowledge shows almost the same value as that in this theoretical bound. In other words, when prior knowledge is introduced, exploration with the policy <inline-formula><mml:math id="ieqn257"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>n</mml:mi><mml:mi>d</mml:mi><mml:mi>o</mml:mi><mml:mi>m</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> is unnecessary, and the best performance can be attained by full exploitation with the policy <inline-formula><mml:math id="ieqn258"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow/></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> that maximizes the predicted reward from the beginning. On the other hand, in the case without prior knowledge in <xref ref-type="fig" rid="figure5">Figure 5B</xref>, the cumulative reward decreases significantly when <inline-formula><mml:math id="ieqn259"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and 0.2 are compared with the case with prior knowledge in <xref ref-type="fig" rid="figure5">Figure 5A</xref>. In particular, when <inline-formula><mml:math id="ieqn260"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>, the decrease in the second iteration is remarkable.</p><fig position="float" id="figure5"><label>Figure 5.</label><caption><p>Simulation results of average cumulative reward with prior distribution &#x201C;(A)&#x201D; and without prior distribution &#x201C;(B)&#x201D; of the proposed method.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig05.png"/></fig><p>Next, regarding question Q2, we show the actual changes in the parameter estimation error (<inline-formula><mml:math id="ieqn261"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03F5;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the behavioral adherence and glycemic response models in <xref ref-type="fig" rid="figure6">Figure 6</xref>. First, it can be seen that the case without prior knowledge (&#x25B2;) has a larger estimation error than the case with prior knowledge (&#x25A0;) for the first iteration. This large error leads to a prediction error of the reward for each action candidate and therefore makes it easier to select worse actions with the policy <inline-formula><mml:math id="ieqn262"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msup><mml:mi>&#x03C0;</mml:mi><mml:mrow><mml:mo>&#x2217;</mml:mo></mml:mrow></mml:msup></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>. This may be the reason why the cumulative reward decreases immediately in case <inline-formula><mml:math id="ieqn263"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0.01</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> and 0.2 due to the erroneous action selection at <inline-formula><mml:math id="ieqn264"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> in <xref ref-type="fig" rid="figure5">Figure 5B</xref>. Furthermore, in the case without prior knowledge, it can be seen that the estimation errors of some parameters (such as <inline-formula><mml:math id="ieqn265"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>Z</mml:mi><mml:mrow><mml:mi>m</mml:mi><mml:mi>a</mml:mi><mml:mi>x</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>&#x03B1;</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) are not improved as iteration time of experiential learning cycle increases. This indicates the potential risk in real experiments, showing that it is impossible to collect the necessary training data for stable parameter learning, even when imposing heavy burdens of repeated interventions on users. However, when introducing prior knowledge, we can confirm that the parameter errors are minimized from the first iteration for most parameters. This suggests that introducing prior knowledge can eliminate the burden on users and maximize their cumulative rewards.</p><fig position="float" id="figure6"><label>Figure 6.</label><caption><p>Average estimation error of behavioral parameters and glycemic parameters across different &#x03B5;-greedy conditions: (A) low condition, (B) middle condition, (C) high condition, and (D) random. &#x25A0; represents results with prior, and &#x25B2; represents results without prior. The vertical axis of each graph represents the average estimation error (<inline-formula><mml:math id="ieqn266"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>&#x03F5;</mml:mi><mml:mrow><mml:mi>p</mml:mi></mml:mrow></mml:msub><mml:mrow><mml:mo>(</mml:mo><mml:mi>i</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the <inline-formula><mml:math id="ieqn267"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>p</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>th parameter among all 10 users, and the horizontal axis represents the iteration time (<inline-formula><mml:math id="ieqn268"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) of the experiential learning.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig06.png"/></fig></sec><sec id="s3-2"><title>Real-World Experiment Results</title><p>Based on the simulation results, we conducted a real-world experiment with a simplified algorithm involving a single update of the policy. According to the setting derived in the simulation experiment, we introduced prior knowledge for parameter learning, and we took the greedy policy with <inline-formula><mml:math id="ieqn269"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>&#x03B5;</mml:mi><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> for action selection.</p><p>The obtained metric values for each participant from the real experiments conducted on the within-participant design are shown in <xref ref-type="fig" rid="figure7">Figure 7A</xref>. For all participants except ID5, the simple reward average increased in the second experiment using the proposed method. On average, for all participants, this metric increased from 0.65 in the first experiment to 0.80 in the second experiment, representing a 23% increase in reward. To statistically evaluate this improvement, we performed a paired <italic>t</italic> test comparing the average rewards between the first (default policy, R1) and second (optimized policy, R2) experiments across the 6 participants. The analysis revealed a statistically significant increase in reward following the optimized policy (<italic>P</italic>=.04). This result indicates that action selection using the proposed method is effective for real-world applications, consistent with the simulation results.</p><p>Moreover, we show how carbohydrate intake and postprandial walking duration changed in the first and second experiments in <xref ref-type="fig" rid="figure7">Figures 7B and 7C</xref>. All participants showed decreased carbohydrate intake and increased postprandial walking duration. For all participants, the average carbohydrate intake decreased by 35.3% from 60.5 to 39.1 g, and postprandial walking duration increased by 34.3% from 14.0 to 18.8 minutes. To further quantify these behavioral improvements, we conducted paired <italic>t</italic> tests for carbohydrate intake and postprandial walking duration. For carbohydrate intake, the mean difference (R2 &#x2013; R1) was &#x2212;21.425 g, and the paired <italic>t</italic> test showed a statistically significant reduction (<italic>P</italic>=.003). Similarly, for postprandial walking duration, the mean difference (R2 &#x2013; R1) was 4.780 minutes, and the paired <italic>t</italic> test indicated a statistically significant increase (<italic>P</italic>=.03). These statistically significant behavioral changes underscore the effectiveness of the proposed method in guiding users toward healthier actions.</p><fig position="float" id="figure7"><label>Figure 7.</label><caption><p>Changes in average reward (A), average carbohydrate intake (B), and average postprandial walking duration (C) for each participant in real experiments. Standard errors are shown as error bars. Statistically significant improvements (<italic>P</italic>&#x003C;.05) were observed in each of reward, carbohydrate intake, and postprandial walking duration from the first experiment (default policy) to the second experiment (optimized policy). On average, reward increased from 0.65 (default policy) to 0.80 (optimized policy), carbohydrate intake decreased from 60.5 g (default policy) to 39.1 g (optimized policy), and postprandial walking duration increased from 14 minutes (default policy) to 18.8 minutes (optimized policy).</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig07.png"/></fig><p>Furthermore, to investigate the impact on behavioral adherence, we show the actual behaviors of all participants in response to the recommended actions in <xref ref-type="fig" rid="figure8">Figure 8</xref>. The adherence to carbohydrate intake improved from the first experiment to the second experiment. In fact, as a result of calculating the adherence rate for each experiment, the adherence rate for carbohydrate intake increased from 45.7% to 83.3% and that for postprandial walking duration increased from 40% to 55.6%. These behavioral improvements support the hypothesis that action selection using the proposed method is appropriate for participants from the viewpoint of ease of behavior change.</p><fig position="float" id="figure8"><label>Figure 8.</label><caption><p>Recommended action vs actual behaviors in real experiments (left: carbohydrate intake and right: postprandial walking). The horizontal and vertical axes show the values of recommended action (<inline-formula><mml:math id="ieqn270"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub><mml:mo>,</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) and actual behavior (<inline-formula><mml:math id="ieqn271"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>x</mml:mi><mml:mo>,</mml:mo><mml:mi>z</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), respectively. Each data point denotes a pair of one action and one behavior for any participant. A participant is considered to adhere to the recommended action in cases <inline-formula><mml:math id="ieqn272"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>x</mml:mi><mml:mo>&#x2264;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>d</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>or <inline-formula><mml:math id="ieqn273"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>z</mml:mi><mml:mo>&#x2265;</mml:mo><mml:msub><mml:mi>a</mml:mi><mml:mrow><mml:mi>e</mml:mi></mml:mrow></mml:msub></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula> shown in the blue region. For each policy, the frequency of action value is shown on top of each graph. The distribution differs between policies.</p></caption><graphic alt-version="no" mimetype="image" position="float" xlink:type="simple" xlink:href="formative_v10i1e70826_fig08.png"/></fig></sec></sec><sec id="s4" sec-type="discussion"><title>Discussion</title><sec id="s4-1"><title>Principal Findings</title><p>In this paper, we propose a multiarmed bandit algorithm based on the two-stage reward prediction mechanism, which can individually plan mobile behavioral interventions and make it easier for users to perform actions and reduce glucose levels after eating.</p><p>From the simulation experiment results, first, we confirm that a larger cumulative reward can be obtained by adopting the proposed method than by selecting the default action. This demonstrates the effectiveness of our online algorithm for personalizing recommendations. Second, it is also demonstrated that the parameter learning converged immediately at the first iteration (<inline-formula><mml:math id="ieqn274"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>) by introducing prior knowledge. This indicates it is not always necessary to repeat the intervention experiment many times for obtaining accurate parameters, emphasizing the importance of the initial single update we conducted in the real-world feasibility study.</p><p>From the short-term real-world feasibility study, with a simplified proposed method involving a single update of the policy into a personalized one, it is found that actual postprandial glucose levels can be improved using the proposed method through behavioral improvements in both carbohydrate intake and postprandial walking. Specifically, we observed an average 23% improvement in actual glucose responses, which was statistically significant (<italic>P</italic>=.04), along with statistically significant improvements in behavioral adherence to the recommendations concerning carbohydrate intake (<italic>P</italic>=.003) and postprandial walking (<italic>P</italic>=.03). The adherence rates also increased notably (from 45.7% to 83.3% for carbohydrate intake and 40% to 55.6% for postprandial walking). While promising, these results are based on a small-scale pilot study with healthy participants and a simplified intervention protocol. Therefore, they should be viewed as preliminary evidence rather than definitive conclusions, primarily serving as guidance for the design of future, more comprehensive clinical trials. This demonstrates the potential effectiveness of the proposed method in a real-world setting. Crucially, this pilot demonstrated that even a single update, informed by our novel two-stage reward prediction model and based on initial user-specific data, can yield statistically significant improvements in clinical outcomes (glucose levels) and behavioral adherence in a real-world setting. This offers compelling initial evidence for the practical viability and immediate impact of our personalized approach, serving as a vital proof-of-concept for its core mechanism in a complex human context.</p><p>While simulation experiments elucidated the algorithm&#x2019;s behavior across diverse iteration numbers (<inline-formula><mml:math id="ieqn275"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>), the real-world feasibility study, designed as a pilot, adopted a simplified approach: a single policy update (from default to optimized after the initial experiment), prioritizing feasibility and logistical tractability. This design inherently meant that the full extent of continuous learning and adaptive capabilities, central to multiarmed bandit frameworks over extended iterations, was not fully explored. Consequently, this study primarily demonstrated the potential of personalized recommendations derived from an initial learning phase, rather than evaluating continuous adaptive optimization over a prolonged period. This outcome underscores the critical need for future investigations to deploy the full online algorithm, addressing the challenges and benefits of continuous policy updates, particularly concerning user engagement and dynamic model accuracy.</p><p>To the best of our knowledge, this is the first attempt to (1) clarify how to optimize behavioral interventions for improving glucose levels and (2) demonstrate the effectiveness of the method through simulation experiments and real interventional experiments.</p></sec><sec id="s4-2"><title>Limitation</title><p>While this preliminary study offers promising insights, it is important to acknowledge several limitations that warrant future investigation to ensure comprehensive validation and generalization. First, the real experiment was short-term and small-scale. The findings from this preliminary study, which involved only 6 healthy participants over a short duration, should therefore be interpreted with caution and are not readily generalizable to a broader population, particularly including patients with diabetes who represent the primary target group for this intervention. The exclusion of patients with diabetes in this initial feasibility study naturally limits the immediate clinical applicability of these specific real-world results. While this real-world feasibility study offers encouraging initial evidence that effective personalized policy updates could be established within a practical, short-term data collection period (eg, 6 days), future research will include a long-term and massive intervention study. This is needed to fully evaluate the sustained effects and long-term stability of parameter convergence and policy updates beyond the initial phase (ie, after the third iteration [<inline-formula><mml:math id="ieqn276"><mml:mstyle><mml:mrow><mml:mstyle displaystyle="false"><mml:mi>i</mml:mi><mml:mo>&#x2265;</mml:mo><mml:mn>3</mml:mn></mml:mstyle></mml:mrow></mml:mstyle></mml:math></inline-formula>]) in diverse real-world settings and patient populations. Also, as our target in this study was limited to healthy participants, the interventional experiments in patients with diabetes should be performed for a comprehensive evaluation, adhering to robust statistical principles, including multiple comparison adjustments.</p><p>Second, this study primarily compared the proposed online algorithm against a randomized (default) policy. While this comparison effectively demonstrated the benefit of personalization over a nonpersonalized baseline, it did not include a direct comparison with other existing reinforcement learning-based or contextual bandit algorithms from the literature. Such comparisons would provide a more comprehensive understanding of the relative performance advantages and disadvantages of our proposed method against alternative state-of-the-art approaches. Future work could aim to include these comparisons, potentially through simulation studies based on established benchmarks or by incorporating other algorithms into real-world trials. Furthermore, a limitation exists in the parameterization of our simulation experiments. While parameter ranges for virtual users (<xref ref-type="table" rid="table1">Table 1</xref>) were based on literature, specific values were partially informed by our small-scale feasibility study (<xref ref-type="table" rid="table2">Table 2</xref>). This limited sample of 6 healthy participants may not be fully representative, potentially affecting the generalizability of simulation results. Consequently, these simulations primarily served to qualitatively explore the algorithm&#x2019;s behavior and identify optimal operational settings (eg, epsilon values and importance of prior knowledge) for future large-scale trials, acknowledging they do not provide definitive quantitative predictions. Future research should prioritize parameter validation and derivation from larger, more diverse datasets, possibly through crowdsourcing or extensive clinical trials, to enhance simulation robustness.</p><p>Third, the predictive models, key to our two-stage reward prediction mechanism, possess inherent limitations, particularly regarding data requirements and accuracy. Furthermore, the two-stage prediction itself introduces a compounding effect of prediction errors: inaccuracies in behavioral adherence predictions can propagate and amplify errors in subsequent glycemic response prediction. This &#x201C;multiplication&#x201D; of noise impacts reward estimation, especially with limited data. While our preliminary study showed improvements in both glucose and behavior, the potential impact of such compounded prediction errors on long-term reward estimation and intervention effectiveness needs to be thoroughly evaluated in future large-scale studies. Finally, our behavior adherence model is simple, and user responses are also affected by external contextual factors such as weather or daily schedules [<xref ref-type="bibr" rid="ref14">14</xref>], and adherence levels may decrease with engagement time [<xref ref-type="bibr" rid="ref42">42</xref>,<xref ref-type="bibr" rid="ref43">43</xref>]. Therefore, future development should focus on robustly handling limited initial data, efficient data collection strategies, precise quantification or mitigation of propagated uncertainty, and incorporating psychological and contextual factors into sophisticated behavioral models. Crucially, improving behavioral adherence through this future development is also key to minimizing the compounding effects of prediction errors within our two-stage model.</p><p>Fourth, the behavioral parameters&#x2019; prior knowledge, derived from self-reported questionnaires (<xref ref-type="table" rid="table2">Table 2</xref>), may not perfectly reflect true user tendencies due to cognitive biases. Inaccuracies in these initial priors could affect the model&#x2019;s early parameter estimation accuracy and the speed of convergence to optimal recommendations. While our multiarmed bandit naturally adapts over time through experiential learning, more robust methods for acquiring these priors, such as integrating objective behavioral data, could further enhance initial performance and reduce the burden of exploration.</p><p>Finally, while our current action candidates are tailored around carbohydrate intake and postprandial walking, expanding the scope to include other types of exercise or dietary components would enhance personalization. Also, redesigning the reward function to align more closely with clinical guidelines for diabetes would improve its clinical relevance. Incorporating advanced behavioral change techniques [<xref ref-type="bibr" rid="ref44">44</xref>], such as gamification, could further boost user engagement and usability.</p></sec><sec id="s4-3"><title>Future Directions</title><p>Beyond the immediate limitations, our study highlights several important directions for future research and practical implementation. Foremost among these, addressing the ethical considerations and practical challenges associated with long-term behavior modification interventions is crucial for successful clinical deployment. Factors such as user fatigue from continuous data collection and potential disengagement with recommendations over extended periods need careful consideration. Moreover, the current framework does not explicitly incorporate mechanisms to prevent the recommendation of potentially clinically damaging actions. While action candidates are prepared within a clinically plausible range, individual-specific contraindications or extreme physiological states could render even a seemingly &#x201C;plausible&#x201D; recommendation detrimental (eg, recommending a very low carbohydrate intake to someone with specific metabolic conditions). Future work will also include empirically establishing practical timelines for robust parameter convergence and policy updates, considering user engagement and burden.</p><p>Another critical area concerns the data requirements and accuracy for personalized recommendations. A significant challenge, especially for initial deployment, is the need for sufficient user-specific data to effectively learn and personalize recommendations. While our method uses prior knowledge to accelerate initial learning, acquiring this initial information (eg, via questionnaires or pretraining data) can be difficult and prone to cognitive biases, as self-reported behavioral priors (<xref ref-type="table" rid="table2">Table 2</xref>) may not perfectly reflect true user tendencies. However, recent advancements in mobile health apps, such as eMOM (Helsinki University Hospital) [<xref ref-type="bibr" rid="ref8">8</xref>], are enabling the comprehensive collection and visualization of vast amounts of user behavioral and physiological data, including glucose levels. This digital self-tracking, by visualizing relevant data, has shown a positive impact on user understanding of their own health characteristics and offers a promising avenue for acquiring more robust and objective priors. For future clinical real-world applications, directly providing recommendations without proper context carries medical risks. Therefore, a clinician-in-the-loop approach is crucial. This involves an initial phase focused on data collection and visualization (eg, through mobile apps such as eMOM [<xref ref-type="bibr" rid="ref8">8</xref>]), followed by a collaborative process where clinicians work with users to understand individual characteristics based on the visualized data. This systematic approach ensures the quantity and quality of prior information for each user before recommendations are made. Future work will integrate such an eMOM-like mobile app within a clinician-in-the-loop framework, enabling large-scale validation of the entire process from data visualization to personalized recommendation.</p><p>To achieve full clinical deployment, it is paramount to integrate safety constraints into the action selection process, developing methods to filter out adverse clinical outcomes (eg, through real-time physiological data integration or clinician-in-the-loop systems). Future work should proactively investigate strategies to mitigate these issues, such as dynamic adjustment of intervention intensity, gamification elements to maintain sustained engagement, and robust ethical frameworks ensuring user well-being, privacy, and safety in longitudinal deployments.</p></sec><sec id="s4-4"><title>Conclusions</title><p>In this study, we proposed a method for optimizing the planning of dietary and exercise recommendations to improve postprandial glucose levels through behavioral changes. The proposed method is a multiarmed bandit based on a two-stage reward prediction model, where an action is a combination of the amount of carbohydrate intake and postprandial walking duration, and the reward is the postprandial glucose level. We specifically realized the reward prediction for each action by predicting the behavioral responses to an action and then predicting the postprandial glycemic response using the predicted responses. From the simulation experiment, it was demonstrated that action selection using the proposed method significantly increased the cumulative reward compared to the default action selection. Simultaneously, we found that the most beneficial setting of the proposed method is adopting a policy to maximize the predicted rewards from the beginning, together with robust parameter learning using prior distributions for the prediction models. Finally, based on this finding, an intervention experiment was conducted on 6 healthy participants, and it was shown that the application of the proposed method provided preliminary evidence that it could improve postprandial glucose levels along with the behaviors of carbohydrate intake and postprandial walking. This initial real-world investigation, while valuable for understanding the practical considerations and informing the design of larger trials, underscores the need for extensive long-term validation in diverse patient populations, ensuring user safety and promoting sustained engagement.</p></sec></sec></body><back><notes><sec><title>Funding</title><p>No external financial support or grants were received for this work.</p></sec><sec><title>Data Availability</title><p>The datasets analyzed during this study are not publicly available due to ethical considerations and the privacy of personal data but are available from the corresponding author on reasonable request.</p></sec></notes><fn-group><fn fn-type="conflict"><p>None declared.</p></fn></fn-group><glossary><title>Abbreviations</title><def-list><def-item><term id="abb1">iAUC</term><def><p>incremental area under the curve</p></def></def-item></def-list></glossary><ref-list><title>References</title><ref id="ref1"><label>1</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Bommer</surname><given-names>C</given-names> </name><name name-style="western"><surname>Sagalova</surname><given-names>V</given-names> </name><name name-style="western"><surname>Heesemann</surname><given-names>E</given-names> </name><etal/></person-group><article-title>Global economic burden of diabetes in adults: projections from 2015 to 2030</article-title><source>Diabetes Care</source><year>2018</year><month>05</month><volume>41</volume><issue>5</issue><fpage>963</fpage><lpage>970</lpage><pub-id pub-id-type="doi">10.2337/dc17-1962</pub-id><pub-id pub-id-type="medline">29475843</pub-id></nlm-citation></ref><ref id="ref2"><label>2</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Lee</surname><given-names>SH</given-names> </name><name name-style="western"><surname>Kim</surname><given-names>MK</given-names> </name></person-group><article-title>Recent updates to clinical practice guidelines for diabetes mellitus</article-title><source>Endocrinol Metab (Seoul)</source><year>2022</year><month>02</month><volume>37</volume><issue>1</issue><fpage>26</fpage><lpage>37</lpage><pub-id pub-id-type="doi">10.3803/EnM.2022.105</pub-id><pub-id pub-id-type="medline">35255599</pub-id></nlm-citation></ref><ref id="ref3"><label>3</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Asif</surname><given-names>M</given-names> </name></person-group><article-title>The prevention and control the type-2 diabetes by changing lifestyle and dietary pattern</article-title><source>J Educ Health Promot</source><year>2014</year><volume>3</volume><fpage>1</fpage><pub-id pub-id-type="doi">10.4103/2277-9531.127541</pub-id><pub-id pub-id-type="medline">24741641</pub-id></nlm-citation></ref><ref id="ref4"><label>4</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kyt&#x00F6;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Koivusalo</surname><given-names>S</given-names> </name><name name-style="western"><surname>Ruonala</surname><given-names>A</given-names> </name><etal/></person-group><article-title>Behavior change app for self-management of gestational diabetes: design and evaluation of desirable features</article-title><source>JMIR Hum Factors</source><year>2022</year><month>10</month><day>12</day><volume>9</volume><issue>4</issue><fpage>e36987</fpage><pub-id pub-id-type="doi">10.2196/36987</pub-id><pub-id pub-id-type="medline">36222806</pub-id></nlm-citation></ref><ref id="ref5"><label>5</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ramakrishnan</surname><given-names>P</given-names> </name><name name-style="western"><surname>Yan</surname><given-names>K</given-names> </name><name name-style="western"><surname>Balijepalli</surname><given-names>C</given-names> </name><name name-style="western"><surname>Druyts</surname><given-names>E</given-names> </name></person-group><article-title>Changing face of healthcare: digital therapeutics in the management of diabetes</article-title><source>Curr Med Res Opin</source><year>2021</year><month>12</month><volume>37</volume><issue>12</issue><fpage>2089</fpage><lpage>2091</lpage><pub-id pub-id-type="doi">10.1080/03007995.2021.1976737</pub-id><pub-id pub-id-type="medline">34511002</pub-id></nlm-citation></ref><ref id="ref6"><label>6</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kyt&#x00F6;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Markussen</surname><given-names>LT</given-names> </name><name name-style="western"><surname>Marttinen</surname><given-names>P</given-names> </name><etal/></person-group><article-title>Comprehensive self-tracking of blood glucose and lifestyle with a mobile application in the management of gestational diabetes: a study protocol for a randomised controlled trial (eMOM GDM study)</article-title><source>BMJ Open</source><year>2022</year><month>11</month><day>7</day><volume>12</volume><issue>11</issue><fpage>e066292</fpage><pub-id pub-id-type="doi">10.1136/bmjopen-2022-066292</pub-id><pub-id pub-id-type="medline">36344008</pub-id></nlm-citation></ref><ref id="ref7"><label>7</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Everett</surname><given-names>E</given-names> </name><name name-style="western"><surname>Kane</surname><given-names>B</given-names> </name><name name-style="western"><surname>Yoo</surname><given-names>A</given-names> </name><name name-style="western"><surname>Dobs</surname><given-names>A</given-names> </name><name name-style="western"><surname>Mathioudakis</surname><given-names>N</given-names> </name></person-group><article-title>A novel approach for fully automated, personalized health coaching for adults with prediabetes: pilot clinical trial</article-title><source>J Med Internet Res</source><year>2018</year><month>02</month><day>27</day><volume>20</volume><issue>2</issue><fpage>e72</fpage><pub-id pub-id-type="doi">10.2196/jmir.9723</pub-id><pub-id pub-id-type="medline">29487046</pub-id></nlm-citation></ref><ref id="ref8"><label>8</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Kyt&#x00F6;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hotta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Niinist&#x00F6;</surname><given-names>S</given-names> </name><etal/></person-group><article-title>Periodic mobile application (eMOM) with self-tracking of glucose and lifestyle improves treatment of diet-controlled gestational diabetes without human guidance: a randomized controlled trial</article-title><source>Am J Obstet Gynecol</source><year>2024</year><month>11</month><volume>231</volume><issue>5</issue><fpage>541</fpage><pub-id pub-id-type="doi">10.1016/j.ajog.2024.02.303</pub-id><pub-id pub-id-type="medline">38432415</pub-id></nlm-citation></ref><ref id="ref9"><label>9</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Debon</surname><given-names>R</given-names> </name><name name-style="western"><surname>Coleone</surname><given-names>JD</given-names> </name><name name-style="western"><surname>Bellei</surname><given-names>EA</given-names> </name><name name-style="western"><surname>De Marchi</surname><given-names>ACB</given-names> </name></person-group><article-title>Mobile health applications for chronic diseases: a systematic review of features for lifestyle improvement</article-title><source>Diabetes Metab Syndr</source><year>2019</year><volume>13</volume><issue>4</issue><fpage>2507</fpage><lpage>2512</lpage><pub-id pub-id-type="doi">10.1016/j.dsx.2019.07.016</pub-id><pub-id pub-id-type="medline">31405669</pub-id></nlm-citation></ref><ref id="ref10"><label>10</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Nahum-Shani</surname><given-names>I</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>SN</given-names> </name><name name-style="western"><surname>Spring</surname><given-names>BJ</given-names> </name><etal/></person-group><article-title>Just-in-time adaptive interventions (JITAIs) in mobile health: key components and design principles for ongoing health behavior support</article-title><source>Ann Behav Med</source><year>2018</year><month>05</month><day>18</day><volume>52</volume><issue>6</issue><fpage>446</fpage><lpage>462</lpage><pub-id pub-id-type="doi">10.1007/s12160-016-9830-8</pub-id><pub-id pub-id-type="medline">27663578</pub-id></nlm-citation></ref><ref id="ref11"><label>11</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Klasnja</surname><given-names>P</given-names> </name><name name-style="western"><surname>Smith</surname><given-names>S</given-names> </name><name name-style="western"><surname>Seewald</surname><given-names>NJ</given-names> </name><etal/></person-group><article-title>Efficacy of contextually tailored suggestions for physical activity: a micro-randomized optimization trial of HeartSteps</article-title><source>Ann Behav Med</source><year>2019</year><month>05</month><day>3</day><volume>53</volume><issue>6</issue><fpage>573</fpage><lpage>582</lpage><pub-id pub-id-type="doi">10.1093/abm/kay067</pub-id><pub-id pub-id-type="medline">30192907</pub-id></nlm-citation></ref><ref id="ref12"><label>12</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Liu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Nemati</surname><given-names>S</given-names> </name><name name-style="western"><surname>Yin</surname><given-names>G</given-names> </name></person-group><article-title>Reinforcement learning in healthcare: a survey</article-title><source>ACM Comput Surv</source><year>2023</year><month>01</month><day>31</day><volume>55</volume><issue>1</issue><fpage>1</fpage><lpage>36</lpage><pub-id pub-id-type="doi">10.1145/3477600</pub-id></nlm-citation></ref><ref id="ref13"><label>13</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yom-Tov</surname><given-names>E</given-names> </name><name name-style="western"><surname>Feraru</surname><given-names>G</given-names> </name><name name-style="western"><surname>Kozdoba</surname><given-names>M</given-names> </name><name name-style="western"><surname>Mannor</surname><given-names>S</given-names> </name><name name-style="western"><surname>Tennenholtz</surname><given-names>M</given-names> </name><name name-style="western"><surname>Hochberg</surname><given-names>I</given-names> </name></person-group><article-title>Encouraging physical activity in patients with diabetes: intervention using a reinforcement learning system</article-title><source>J Med Internet Res</source><year>2017</year><month>10</month><day>10</day><volume>19</volume><issue>10</issue><fpage>e338</fpage><pub-id pub-id-type="doi">10.2196/jmir.7994</pub-id><pub-id pub-id-type="medline">29017988</pub-id></nlm-citation></ref><ref id="ref14"><label>14</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Liao</surname><given-names>P</given-names> </name><name name-style="western"><surname>Greenewald</surname><given-names>K</given-names> </name><name name-style="western"><surname>Klasnja</surname><given-names>P</given-names> </name><name name-style="western"><surname>Murphy</surname><given-names>S</given-names> </name></person-group><article-title>Personalized HeartSteps: a reinforcement learning algorithm for optimizing physical activity</article-title><source>Proc ACM Interact Mob Wearable Ubiquitous Technol</source><year>2020</year><month>03</month><volume>4</volume><issue>1</issue><fpage>18</fpage><pub-id pub-id-type="doi">10.1145/3381007</pub-id><pub-id pub-id-type="medline">34527853</pub-id></nlm-citation></ref><ref id="ref15"><label>15</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Tejedor</surname><given-names>M</given-names> </name><name name-style="western"><surname>Woldaregay</surname><given-names>AZ</given-names> </name><name name-style="western"><surname>Godtliebsen</surname><given-names>F</given-names> </name></person-group><article-title>Reinforcement learning application in diabetes blood glucose control: a systematic review</article-title><source>Artif Intell Med</source><year>2020</year><month>04</month><volume>104</volume><fpage>101836</fpage><pub-id pub-id-type="doi">10.1016/j.artmed.2020.101836</pub-id><pub-id pub-id-type="medline">32499004</pub-id></nlm-citation></ref><ref id="ref16"><label>16</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Yau</surname><given-names>KLA</given-names> </name><name name-style="western"><surname>Chong</surname><given-names>YW</given-names> </name><name name-style="western"><surname>Fan</surname><given-names>X</given-names> </name><name name-style="western"><surname>Wu</surname><given-names>C</given-names> </name><name name-style="western"><surname>Saleem</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Lim</surname><given-names>PC</given-names> </name></person-group><article-title>Reinforcement learning models and algorithms for diabetes management</article-title><source>IEEE Access</source><year>2023</year><volume>11</volume><fpage>28391</fpage><lpage>28415</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2023.3259425</pub-id></nlm-citation></ref><ref id="ref17"><label>17</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Oroojeni Mohammad Javad</surname><given-names>M</given-names> </name><name name-style="western"><surname>Agboola</surname><given-names>SO</given-names> </name><name name-style="western"><surname>Jethwani</surname><given-names>K</given-names> </name><name name-style="western"><surname>Zeid</surname><given-names>A</given-names> </name><name name-style="western"><surname>Kamarthi</surname><given-names>S</given-names> </name></person-group><article-title>A reinforcement learning-based method for management of type 1 diabetes: exploratory study</article-title><source>JMIR Diabetes</source><year>2019</year><month>08</month><day>28</day><volume>4</volume><issue>3</issue><fpage>e12905</fpage><pub-id pub-id-type="doi">10.2196/12905</pub-id><pub-id pub-id-type="medline">31464196</pub-id></nlm-citation></ref><ref id="ref18"><label>18</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Shepherd</surname><given-names>E</given-names> </name><name name-style="western"><surname>Gomersall</surname><given-names>JC</given-names> </name><name name-style="western"><surname>Tieu</surname><given-names>J</given-names> </name><name name-style="western"><surname>Han</surname><given-names>S</given-names> </name><name name-style="western"><surname>Crowther</surname><given-names>CA</given-names> </name><name name-style="western"><surname>Middleton</surname><given-names>P</given-names> </name></person-group><article-title>Combined diet and exercise interventions for preventing gestational diabetes mellitus</article-title><source>Cochrane Database Syst Rev</source><year>2017</year><month>11</month><day>13</day><volume>2017</volume><issue>11</issue><fpage>CD010443</fpage><pub-id pub-id-type="doi">10.1002/14651858.CD010443.pub3</pub-id></nlm-citation></ref><ref id="ref19"><label>19</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Borror</surname><given-names>A</given-names> </name><name name-style="western"><surname>Zieff</surname><given-names>G</given-names> </name><name name-style="western"><surname>Battaglini</surname><given-names>C</given-names> </name><name name-style="western"><surname>Stoner</surname><given-names>L</given-names> </name></person-group><article-title>The effects of postprandial exercise on glucose control in individuals with type 2 diabetes: a systematic review</article-title><source>Sports Med</source><year>2018</year><month>06</month><volume>48</volume><issue>6</issue><fpage>1479</fpage><lpage>1491</lpage><pub-id pub-id-type="doi">10.1007/s40279-018-0864-x</pub-id><pub-id pub-id-type="medline">29396781</pub-id></nlm-citation></ref><ref id="ref20"><label>20</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Rabbi</surname><given-names>M</given-names> </name><name name-style="western"><surname>Aung</surname><given-names>MH</given-names> </name><name name-style="western"><surname>Zhang</surname><given-names>M</given-names> </name><name name-style="western"><surname>Choudhury</surname><given-names>T</given-names> </name></person-group><article-title>MyBehavior: automatic personalized health feedback from user behaviors and preferences using smartphones</article-title><conf-name>UbiComp &#x2019;15: Proceedings of the 2015 ACM International Joint Conference on Pervasive and Ubiquitous Computing</conf-name><conf-date>Sep 7-11, 2015</conf-date><pub-id pub-id-type="doi">10.1145/2750858.2805840</pub-id></nlm-citation></ref><ref id="ref21"><label>21</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Ameko</surname><given-names>MK</given-names> </name><name name-style="western"><surname>Beltzer</surname><given-names>ML</given-names> </name><name name-style="western"><surname>Cai</surname><given-names>L</given-names> </name><name name-style="western"><surname>Boukhechba</surname><given-names>M</given-names> </name><name name-style="western"><surname>Teachman</surname><given-names>BA</given-names> </name><name name-style="western"><surname>Barnes</surname><given-names>LE</given-names> </name></person-group><article-title>Offline contextual multi-armed bandits for mobile health interventions: a case study on emotion regulation</article-title><conf-name>RecSys &#x2019;20: Proceedings of the 14th ACM Conference on Recommender Systems</conf-name><conf-date>Sep 22-26, 2020</conf-date><pub-id pub-id-type="doi">10.1145/3383313.3412244</pub-id></nlm-citation></ref><ref id="ref22"><label>22</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Sutton</surname><given-names>RS</given-names> </name><name name-style="western"><surname>Barto</surname><given-names>AG</given-names> </name></person-group><source>Reinforcement Learning: An Introduction</source><year>2018</year><access-date>2026-03-07</access-date><publisher-name>MIT press</publisher-name><comment><ext-link ext-link-type="uri" xlink:href="https://mitpress.mit.edu/9780262039246/reinforcement-learning/">https://mitpress.mit.edu/9780262039246/reinforcement-learning/</ext-link></comment></nlm-citation></ref><ref id="ref23"><label>23</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Pustozerov</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Tkachuk</surname><given-names>AS</given-names> </name><name name-style="western"><surname>Vasukova</surname><given-names>EA</given-names> </name><etal/></person-group><article-title>Machine learning approach for postprandial blood glucose prediction in gestational diabetes mellitus</article-title><source>IEEE Access</source><year>2020</year><volume>8</volume><fpage>219308</fpage><lpage>219321</lpage><pub-id pub-id-type="doi">10.1109/ACCESS.2020.3042483</pub-id></nlm-citation></ref><ref id="ref24"><label>24</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gallwitz</surname><given-names>B</given-names> </name></person-group><article-title>Implications of postprandial glucose and weight control in people with type 2 diabetes: understanding and implementing the International Diabetes Federation guidelines</article-title><source>Diabetes Care</source><year>2009</year><month>11</month><volume>32 Suppl 2</volume><issue>Suppl 2</issue><fpage>S322</fpage><lpage>5</lpage><pub-id pub-id-type="doi">10.2337/dc09-S331</pub-id><pub-id pub-id-type="medline">19875573</pub-id></nlm-citation></ref><ref id="ref25"><label>25</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Evenepoel</surname><given-names>C</given-names> </name><name name-style="western"><surname>Clevers</surname><given-names>E</given-names> </name><name name-style="western"><surname>Deroover</surname><given-names>L</given-names> </name><name name-style="western"><surname>Van Loo</surname><given-names>W</given-names> </name><name name-style="western"><surname>Matthys</surname><given-names>C</given-names> </name><name name-style="western"><surname>Verbeke</surname><given-names>K</given-names> </name></person-group><article-title>Accuracy of nutrient calculations using the consumer-focused online app MyFitnessPal: validation study</article-title><source>J Med Internet Res</source><year>2020</year><month>10</month><day>21</day><volume>22</volume><issue>10</issue><fpage>e18237</fpage><pub-id pub-id-type="doi">10.2196/18237</pub-id><pub-id pub-id-type="medline">33084583</pub-id></nlm-citation></ref><ref id="ref26"><label>26</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wylie-Rosett</surname><given-names>J</given-names> </name><name name-style="western"><surname>Aebersold</surname><given-names>K</given-names> </name><name name-style="western"><surname>Conlon</surname><given-names>B</given-names> </name><name name-style="western"><surname>Isasi</surname><given-names>CR</given-names> </name><name name-style="western"><surname>Ostrovsky</surname><given-names>NW</given-names> </name></person-group><article-title>Health effects of low-carbohydrate diets: where should new research go?</article-title><source>Curr Diab Rep</source><year>2013</year><month>04</month><volume>13</volume><issue>2</issue><fpage>271</fpage><lpage>278</lpage><pub-id pub-id-type="doi">10.1007/s11892-012-0357-5</pub-id><pub-id pub-id-type="medline">23266565</pub-id></nlm-citation></ref><ref id="ref27"><label>27</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Reynolds</surname><given-names>AN</given-names> </name><name name-style="western"><surname>Mann</surname><given-names>JI</given-names> </name><name name-style="western"><surname>Williams</surname><given-names>S</given-names> </name><name name-style="western"><surname>Venn</surname><given-names>BJ</given-names> </name></person-group><article-title>Advice to walk after meals is more effective for lowering postprandial glycaemia in type 2 diabetes mellitus than advice that does not specify timing: a randomised crossover study</article-title><source>Diabetologia</source><year>2016</year><month>12</month><volume>59</volume><issue>12</issue><fpage>2572</fpage><lpage>2578</lpage><pub-id pub-id-type="doi">10.1007/s00125-016-4085-2</pub-id><pub-id pub-id-type="medline">27747394</pub-id></nlm-citation></ref><ref id="ref28"><label>28</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aqeel</surname><given-names>M</given-names> </name><name name-style="western"><surname>Forster</surname><given-names>A</given-names> </name><name name-style="western"><surname>Richards</surname><given-names>EA</given-names> </name><etal/></person-group><article-title>The effect of timing of exercise and eating on postprandial response in adults: a systematic review</article-title><source>Nutrients</source><year>2020</year><month>01</month><day>15</day><volume>12</volume><issue>1</issue><fpage>221</fpage><pub-id pub-id-type="doi">10.3390/nu12010221</pub-id><pub-id pub-id-type="medline">31952250</pub-id></nlm-citation></ref><ref id="ref29"><label>29</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Andersen</surname><given-names>MB</given-names> </name><name name-style="western"><surname>Fuglsang</surname><given-names>J</given-names> </name><name name-style="western"><surname>Ostenfeld</surname><given-names>EB</given-names> </name><name name-style="western"><surname>Poulsen</surname><given-names>CW</given-names> </name><name name-style="western"><surname>Daugaard</surname><given-names>M</given-names> </name><name name-style="western"><surname>Ovesen</surname><given-names>PG</given-names> </name></person-group><article-title>Postprandial interval walking-effect on blood glucose in pregnant women with gestational diabetes</article-title><source>Am J Obstet Gynecol MFM</source><year>2021</year><month>11</month><volume>3</volume><issue>6</issue><fpage>100440</fpage><pub-id pub-id-type="doi">10.1016/j.ajogmf.2021.100440</pub-id><pub-id pub-id-type="medline">34216833</pub-id></nlm-citation></ref><ref id="ref30"><label>30</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Han</surname><given-names>Y</given-names> </name><name name-style="western"><surname>Ren</surname><given-names>Z</given-names> </name><name name-style="western"><surname>Zhou</surname><given-names>Z</given-names> </name></person-group><article-title>Batched multi-armed bandits problem</article-title><access-date>2026-03-07</access-date><conf-name>Proceedings of the 33rd International Conference on Neural Information Processing Systems</conf-name><conf-date>Dec 8-14, 2019</conf-date><conf-loc>Vancouver, BC, Canada</conf-loc><fpage>503</fpage><comment><ext-link ext-link-type="uri" xlink:href="https://dl.acm.org/doi/10.5555/3454287.3454333">https://dl.acm.org/doi/10.5555/3454287.3454333</ext-link></comment></nlm-citation></ref><ref id="ref31"><label>31</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Wolever</surname><given-names>TM</given-names> </name><name name-style="western"><surname>Bolognesi</surname><given-names>C</given-names> </name></person-group><article-title>Source and amount of carbohydrate affect postprandial glucose and insulin in normal subjects</article-title><source>J Nutr</source><year>1996</year><month>11</month><volume>126</volume><issue>11</issue><fpage>2798</fpage><lpage>2806</lpage><pub-id pub-id-type="doi">10.1093/jn/126.11.2798</pub-id><pub-id pub-id-type="medline">8914951</pub-id></nlm-citation></ref><ref id="ref32"><label>32</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ajzen</surname><given-names>I</given-names> </name></person-group><article-title>The theory of planned behavior</article-title><source>Organ Behav Hum Decis Process</source><year>1991</year><month>12</month><volume>50</volume><issue>2</issue><fpage>179</fpage><lpage>211</lpage><pub-id pub-id-type="doi">10.1016/0749-5978(91)90020-T</pub-id></nlm-citation></ref><ref id="ref33"><label>33</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Ashrafi</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Ahola</surname><given-names>AJ</given-names> </name><name name-style="western"><surname>Roseng&#x00E5;rd-B&#x00E4;rlund</surname><given-names>M</given-names> </name><etal/></person-group><article-title>Computational modelling of self-reported dietary carbohydrate intake on glucose concentrations in patients undergoing Roux-en-Y gastric bypass versus one-anastomosis gastric bypass</article-title><source>Ann Med</source><year>2021</year><month>12</month><volume>53</volume><issue>1</issue><fpage>1885</fpage><lpage>1895</lpage><pub-id pub-id-type="doi">10.1080/07853890.2021.1964035</pub-id><pub-id pub-id-type="medline">34714211</pub-id></nlm-citation></ref><ref id="ref34"><label>34</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Richter</surname><given-names>EA</given-names> </name><name name-style="western"><surname>Derave</surname><given-names>W</given-names> </name><name name-style="western"><surname>Wojtaszewski</surname><given-names>JF</given-names> </name></person-group><article-title>Glucose, exercise and insulin: emerging concepts</article-title><source>J Physiol</source><year>2001</year><month>09</month><day>1</day><volume>535</volume><issue>Pt 2</issue><fpage>313</fpage><lpage>322</lpage><pub-id pub-id-type="doi">10.1111/j.1469-7793.2001.t01-2-00313.x</pub-id><pub-id pub-id-type="medline">11533125</pub-id></nlm-citation></ref><ref id="ref35"><label>35</label><nlm-citation citation-type="confproc"><person-group person-group-type="author"><name name-style="western"><surname>Jankovic</surname><given-names>MV</given-names> </name><name name-style="western"><surname>Mosimann</surname><given-names>S</given-names> </name><name name-style="western"><surname>Bally</surname><given-names>L</given-names> </name><name name-style="western"><surname>Stettler</surname><given-names>C</given-names> </name><name name-style="western"><surname>Mougiakakou</surname><given-names>S</given-names> </name></person-group><article-title>Deep prediction model: the case of online adaptive prediction of subcutaneous glucose</article-title><conf-name>2016 13th Symposium on Neural Networks and Applications (NEUREL)</conf-name><conf-date>Nov 22-24, 2016</conf-date><pub-id pub-id-type="doi">10.1109/NEUREL.2016.7800095</pub-id></nlm-citation></ref><ref id="ref36"><label>36</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Xie</surname><given-names>J</given-names> </name><name name-style="western"><surname>Wang</surname><given-names>Q</given-names> </name></person-group><article-title>A data-driven personalized model of glucose dynamics taking account of the effects of physical activity for type 1 diabetes: an in silico study</article-title><source>J Biomech Eng</source><year>2019</year><month>01</month><day>1</day><volume>141</volume><issue>1</issue><pub-id pub-id-type="doi">10.1115/1.4041522</pub-id><pub-id pub-id-type="medline">30458503</pub-id></nlm-citation></ref><ref id="ref37"><label>37</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Hotta</surname><given-names>S</given-names> </name><name name-style="western"><surname>Kyt&#x00F6;</surname><given-names>M</given-names> </name><name name-style="western"><surname>Koivusalo</surname><given-names>S</given-names> </name><name name-style="western"><surname>Heinonen</surname><given-names>S</given-names> </name><name name-style="western"><surname>Marttinen</surname><given-names>P</given-names> </name></person-group><article-title>Optimizing postprandial glucose prediction through integration of diet and exercise: leveraging transfer learning with imbalanced patient data</article-title><source>PLoS ONE</source><year>2024</year><volume>19</volume><issue>8</issue><fpage>e0298506</fpage><pub-id pub-id-type="doi">10.1371/journal.pone.0298506</pub-id><pub-id pub-id-type="medline">39088422</pub-id></nlm-citation></ref><ref id="ref38"><label>38</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Zhang</surname><given-names>G</given-names> </name><name name-style="western"><surname>Ashrafi</surname><given-names>RA</given-names> </name><name name-style="western"><surname>Juuti</surname><given-names>A</given-names> </name><name name-style="western"><surname>Pietilainen</surname><given-names>K</given-names> </name><name name-style="western"><surname>Marttinen</surname><given-names>P</given-names> </name></person-group><article-title>Errors-in-variables modeling of personalized treatment-response trajectories</article-title><source>IEEE J Biomed Health Inform</source><year>2021</year><month>01</month><volume>25</volume><issue>1</issue><fpage>201</fpage><lpage>208</lpage><pub-id pub-id-type="doi">10.1109/JBHI.2020.2987323</pub-id><pub-id pub-id-type="medline">32324579</pub-id></nlm-citation></ref><ref id="ref39"><label>39</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Le Floch</surname><given-names>JP</given-names> </name><name name-style="western"><surname>Escuyer</surname><given-names>P</given-names> </name><name name-style="western"><surname>Baudin</surname><given-names>E</given-names> </name><name name-style="western"><surname>Baudon</surname><given-names>D</given-names> </name><name name-style="western"><surname>Perlemuter</surname><given-names>L</given-names> </name></person-group><article-title>Blood glucose area under the curve. Methodological aspects</article-title><source>Diabetes Care</source><year>1990</year><month>02</month><volume>13</volume><issue>2</issue><fpage>172</fpage><lpage>175</lpage><pub-id pub-id-type="doi">10.2337/diacare.13.2.172</pub-id><pub-id pub-id-type="medline">2351014</pub-id></nlm-citation></ref><ref id="ref40"><label>40</label><nlm-citation citation-type="web"><article-title>RStan: the r interface to stan</article-title><source>R package version 2218</source><year>2023</year><access-date>2026-02-20</access-date><comment><ext-link ext-link-type="uri" xlink:href="https://mc-stan.org">https://mc-stan.org</ext-link></comment></nlm-citation></ref><ref id="ref41"><label>41</label><nlm-citation citation-type="book"><person-group person-group-type="author"><name name-style="western"><surname>Oh</surname><given-names>R</given-names> </name><name name-style="western"><surname>Gilani</surname><given-names>B</given-names> </name><name name-style="western"><surname>Uppaluri</surname><given-names>KR</given-names> </name></person-group><article-title>Low-carbohydrate diet</article-title><source>StatPearls</source><year>2023</year><publisher-name>Treasure Island (FL): StatPearls Publishing</publisher-name><pub-id pub-id-type="medline">30725769</pub-id></nlm-citation></ref><ref id="ref42"><label>42</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Gershkowitz</surname><given-names>BD</given-names> </name><name name-style="western"><surname>Hillert</surname><given-names>CJ</given-names> </name><name name-style="western"><surname>Crotty</surname><given-names>BH</given-names> </name></person-group><article-title>Digital coaching strategies to facilitate behavioral change in type 2 diabetes: a systematic review</article-title><source>J Clin Endocrinol Metab</source><year>2021</year><month>03</month><day>25</day><volume>106</volume><issue>4</issue><fpage>e1513</fpage><lpage>e1520</lpage><pub-id pub-id-type="doi">10.1210/clinem/dgaa850</pub-id><pub-id pub-id-type="medline">33206975</pub-id></nlm-citation></ref><ref id="ref43"><label>43</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Van Rhoon</surname><given-names>L</given-names> </name><name name-style="western"><surname>Byrne</surname><given-names>M</given-names> </name><name name-style="western"><surname>Morrissey</surname><given-names>E</given-names> </name><name name-style="western"><surname>Murphy</surname><given-names>J</given-names> </name><name name-style="western"><surname>McSharry</surname><given-names>J</given-names> </name></person-group><article-title>A systematic review of the behaviour change techniques and digital features in technology-driven type 2 diabetes prevention interventions</article-title><source>Digit Health</source><year>2020</year><volume>6</volume><fpage>2055207620914427</fpage><pub-id pub-id-type="doi">10.1177/2055207620914427</pub-id><pub-id pub-id-type="medline">32269830</pub-id></nlm-citation></ref><ref id="ref44"><label>44</label><nlm-citation citation-type="journal"><person-group person-group-type="author"><name name-style="western"><surname>Aguiar</surname><given-names>M</given-names> </name><name name-style="western"><surname>Trujillo</surname><given-names>M</given-names> </name><name name-style="western"><surname>Chaves</surname><given-names>D</given-names> </name><name name-style="western"><surname>&#x00C1;lvarez</surname><given-names>R</given-names> </name><name name-style="western"><surname>Epelde</surname><given-names>G</given-names> </name></person-group><article-title>mHealth apps using behavior change techniques to self-report data: systematic review</article-title><source>JMIR Mhealth Uhealth</source><year>2022</year><month>09</month><day>9</day><volume>10</volume><issue>9</issue><fpage>e33247</fpage><pub-id pub-id-type="doi">10.2196/33247</pub-id><pub-id pub-id-type="medline">36083606</pub-id></nlm-citation></ref></ref-list><app-group><supplementary-material id="app1"><label>Multimedia Appendix 1</label><p>Additional material on the two-stage reward prediction model and the simulation experiment setup.</p><media xlink:href="formative_v10i1e70826_app1.docx" xlink:title="DOCX File, 657 KB"/></supplementary-material></app-group></back></article>