Implement sleep_efficiency and sleep_onset_latency in sleep diaries

djmarques · djmarques · commit 1321d045bf0a · 2025-07-23T16:13:19.000+01:00
diff --git a/circStudio/analysis/sleep/diary.py b/circStudio/analysis/sleep/diary.py
@@ -1,6 +1,8 @@
 import os
 import pandas as pd
 import pyexcel as pxl
+import warnings
+from circStudio.analysis.sleep import *
 
 
 class SleepDiary:
@@ -226,3 +228,85 @@ def total_nowear_time(self, state='NOWEAR'):
         """
 
         return self.state_infos(state)
+
+
+    def sleep_efficiency(self, data):
+        """
+        Computes sleep efficiency as the average total sleep time, as classified by the Roenneberg algorithm,
+        divided by the average total sleep time, as identified in the sleep diary.
+
+        Parameters
+        ----------
+        data : pd.Series
+
+        Returns
+        -------
+        float
+            Sleep efficiency.
+
+        """
+        # Calculate average total sleep time (within the main sleep bout)
+        avg_total_sleep_time = main_sleep_bouts(data=data)[1]
+
+        # Calculate average total bedtime (from sleep diary)
+        avg_total_bed_time = self.total_bed_time()[0]
+
+        # If avg_total_bed_time is zero, do not return a result
+        if avg_total_bed_time == 0:
+            warnings.warn('Average total sleep time is 0.')
+            return None
+
+        # If avg_total_bed_time < avg_total_sleep_time
+        if avg_total_sleep_time > avg_total_bed_time:
+            warnings.warn('Average total sleep time is greater than average total sleep time.')
+            return None
+
+        return avg_total_sleep_time / avg_total_bed_time
+
+
+    def sleep_onset_latency(self, data):
+        """
+        Computes sleep onset latency using the Roenneberg algorithm to predict sleep onset and
+        the sleep diary to determine total bedtime.
+
+        Parameters
+        ----------
+        data : pandas.Series
+            Input data series with a DatetimeIndex, where the index specifies the time points and
+            the values represent the input variable (e.g., activity, light). Time and value arrays
+            are extracted from this series.
+
+        Returns
+        -------
+        pd.Series
+            Array containing sleep onset latency indexed by day of the recording.
+        pd.Timedelta
+            Mean sleep onset latency.
+
+        """
+        main_sleep_df = main_sleep_bouts(data=data)[0]
+        diary_nights_df = self._diary[self._diary['TYPE'] == 'NIGHT']
+
+        # Create an empty dictionary to store sleep_onset_latency (sol) values
+        sol = {}
+
+        # Iterate over the rows of the sleep diary corresponding to nighttime
+        for _, row in diary_nights_df.iterrows():
+            # Extract the date from the current row
+            date = row['START'].date()
+
+            # Identify matches between the sleep diary and detected periods of sleep
+            matches = main_sleep_df[main_sleep_df['start_time'].dt.date == date]
+
+            # If a match was found, then calculate the latency between bedtime and sleep onsets
+            if not matches.empty:
+                # Extract sleep onset
+                sleep_onset = matches.iloc[0]['start_time']
+
+                # Calculate the latency and store it in the sol dictionary
+                latency = sleep_onset - row['START']
+                sol[date] = latency
+        # Typecast and return, sol to a pd.Series, along with the mean
+        sol = pd.Series(sol)
+        return pd.Series(sol), np.mean(sol)
+
diff --git a/circStudio/analysis/sleep/sleep.py b/circStudio/analysis/sleep/sleep.py
@@ -1212,7 +1212,7 @@ def SleepProfile(data, freq='15min', algo='Roenneberg', *args, **kwargs):
     return sleep_prof.resample(freq).mean()
 
 
-def SleepRegularityIndex(data, freq='15min', bin_threshold=None, algo='Roenneberg', *args, **kwargs):
+def SleepRegularityIndex(data, bin_threshold=None, algo='Roenneberg', *args, **kwargs):
     r""" Sleep regularity index
 
     Likelihood that any two time-points (epoch-by-epoch) 24 hours apart are
@@ -1518,4 +1518,61 @@ def active_durations(data, duration_min=None, duration_max=None, algo='Roenneber
         **kwargs
     )
 
-    return [s.index[-1]-s.index[0] for s in filtered_bouts]
+    return [s.index[-1]-s.index[0] for s in filtered_bouts]
+
+
+def main_sleep_bouts(data, report='major'):
+    """
+    Calculate main sleep episodes using the Roenneberg algorithm.
+
+    Parameters
+    ----------
+    data : pandas.Series, optional
+        Input data series with a DatetimeIndex, where the index specifies the time points and
+        the values represent the input variable (e.g., activity, light). Time and value arrays
+        are extracted from this series.
+    report : str, optional
+        Either 'major' or 'minor'. Default is 'major'. If set to 'major', the function will
+        return a dataframe containing all the major sleep bouts in the recording, along
+        with the mean. If set to 'minor', the function will return a dataframe containing
+        all the minor sleep bouts in the recording, along with the mean.
+
+    Returns
+    -------
+    pd.DataFrame
+        Dataframe containing the main sleep episodes (date, start_time, stop_time and duration).
+
+    """
+    # Compute the activity onset and off using the Roenneberg algorithm
+    activity_onset, activity_offset = Roenneberg_AoT(data)
+
+    # Create empty dataframe to store all the sleep events
+    sleep_events = pd.DataFrame()
+
+    # sleep_onset = activity_offset; sleep_offset = activity_onset
+    sleep_events['date'] = activity_onset.date
+    sleep_events['start_time'] = activity_offset
+    sleep_events['stop_time'] = activity_onset
+
+    # Sleep/rest episode duration
+    sleep_events['duration'] = sleep_events['stop_time'] - sleep_events['start_time']
+
+    # Identify main sleep episode
+    main_sleep = sleep_events.loc[sleep_events.groupby('date')['duration'].idxmax()]
+
+    # Identify minor sleep episodes
+    minor_sleep = sleep_events.drop(main_sleep.index)
+
+    if report == 'major':
+        # Calculate mean duration (in minutes) of the main sleep episode
+        mean = main_sleep['duration'].mean().total_seconds() / 60
+
+        # Return dataframe with major sleep events and summary stats
+        return main_sleep, mean
+
+    elif report == 'minor':
+        # Calculate mean duration (in minutes) of the main sleep episode
+        mean = minor_sleep['duration'].mean().total_seconds() / 60
+
+        # Return dataframe with major sleep events and summary stats
+        return minor_sleep, mean
diff --git a/docs/source/tutorial_3.ipynb b/docs/source/tutorial_3.ipynb
@@ -36,7 +36,12 @@
    "cell_type": "code",
    "execution_count": 1,
    "id": "ba14333118837a04",
-   "metadata": {},
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-07-23T14:16:41.434784Z",
+     "start_time": "2025-07-23T14:16:39.106959Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import circStudio\n",
@@ -6690,10 +6695,85 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
+   "id": "a02a7dde-08fe-4d1b-8401-c264de032916",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
    "id": "32e995ba-6cb9-411e-b4c5-ec531e865c1e",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "def sleep_onset_latency():\n",
+    "    main_sleep_df = pd.DataFrame({\n",
+    "        'date': ['1918-01-24', '1918-01-25', '1918-01-27'],\n",
+    "        'start_time': ['1918-01-24 23:43:00', '1918-01-25 22:17:00', '1918-01-27 01:43:00'],\n",
+    "        'stop_time': ['1918-01-25 06:25:00', '1918-01-26 07:20:00', '1918-01-27 07:11:00']\n",
+    "    })\n",
+    "    for col in main_sleep_df.columns:\n",
+    "            main_sleep_df[col] = pd.to_datetime(main_sleep_df[col])\n",
+    "\n",
+    "    diary_nights_df = pd.DataFrame({\n",
+    "        'START': ['1918-01-24 23:00:00', '1918-01-25 22:00:00', '1918-01-27 00:00:00'],\n",
+    "        'END': ['1918-01-25 07:00:00', '1918-01-26 07:30:00', '1918-01-27 07:30:00']\n",
+    "    })\n",
+    "    for col in diary_nights_df.columns:\n",
+    "        diary_nights_df[col] = pd.to_datetime(diary_nights_df[col])\n",
+    "\n",
+    "    # Create an empty dictionary to store sleep_onset_latency (sol) values\n",
+    "    sol = {}\n",
+    "    \n",
+    "    # Iterate over the rows of the sleep diary corresponding to nighttime\n",
+    "    for _, row in diary_nights_df.iterrows():\n",
+    "        # Extract the date from the current row\n",
+    "        date = row['START'].date()\n",
+    "        matches = main_sleep_df[main_sleep_df['start_time'].dt.date == date]\n",
+    "        if not matches.empty:\n",
+    "            sleep_onset = matches.iloc[0]['start_time']\n",
+    "            latency = sleep_onset - row['START']\n",
+    "            sol[date] = latency\n",
+    "    sol = pd.Series(sol)\n",
+    "    return pd.Series(sol), np.mean(sol)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "id": "5711a686-722d-4e11-9726-498beaed6e6a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1918-01-24   0 days 00:43:00\n",
+       " 1918-01-25   0 days 00:17:00\n",
+       " 1918-01-27   0 days 01:43:00\n",
+       " dtype: timedelta64[ns],\n",
+       " Timedelta('0 days 00:54:20'))"
+      ]
+     },
+     "execution_count": 97,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sleep_onset_latency()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea5f75de-52d0-43c5-9252-43dac1698d96",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
diff --git a/tests/sleep_tests.py b/tests/sleep_tests.py
@@ -0,0 +1,51 @@
+import pandas as pd
+import numpy as np
+
+def main():
+    sleep_onset_latency_test()
+
+
+def sleep_onset_latency_test():
+    """
+    Computes sleep onset latency using the Roenneberg algorithm to predict sleep onset and
+    the sleep diary to determine total bedtime.
+
+    Parameters
+    ----------
+
+    Returns
+    -------
+    pd.Series
+        Sleep onset latency.
+
+    """
+    main_sleep_df = pd.DataFrame({
+        'date': ['1918-01-24', '1918-01-25', '1918-01-27'],
+        'start_time': ['1918-01-24 23:43:00', '1918-01-25 22:17:00', '1918-01-27 01:43:00'],
+        'stop_time': ['1918-01-25 06:25:00', '1918-01-26 07:20:00', '1918-01-27 07:11:00']
+    })
+    for col in main_sleep_df.columns:
+        main_sleep_df[col] = pd.to_datetime(main_sleep_df[col])
+
+    diary_nights_df = pd.DataFrame({
+        'START': ['1918-01-24 23:00:00', '1918-01-25 22:00:00', '1918-01-27 00:00:00'],
+        'END': ['1918-01-25 07:00:00', '1918-01-26 07:30:00', '1918-01-27 07:30:00']
+    })
+    for col in diary_nights_df.columns:
+        diary_nights_df[col] = pd.to_datetime(diary_nights_df[col])
+
+    sol = {}
+
+    for idx, row in diary_nights_df.iterrows():
+        date = row['START']
+        matches = main_sleep_df[main_sleep_df['start_time'].dt.date == date.date()]
+        if not matches.empty:
+            sleep_onset = matches.iloc[0]['start_time']
+            latency = sleep_onset - row['START']
+            sol[date.date()] = latency
+    sol = pd.Series(sol)
+    return pd.Series(sol), np.mean(sol)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tests/test_light_mask.py b/tests/test_light_mask.py
diff --git a/tests/test_rar.py b/tests/test_rar.py