diff --git a/experimenter/experimenter/jetstream/client.py b/experimenter/experimenter/jetstream/client.py index 5fc4ba5f1..c0d77b200 100644 --- a/experimenter/experimenter/jetstream/client.py +++ b/experimenter/experimenter/jetstream/client.py @@ -287,11 +287,29 @@ def get_experiment_data(experiment: NimbusExperiment): if data and window == AnalysisWindow.OVERALL: # Append some values onto the incoming Jetstream data data.append_population_percentages() - data.append_retention_data( + week_2_retention = data.get_retention_by_window( + 2, raw_data.get(AnalysisWindow.WEEKLY, {}) .get(AnalysisBasis.ENROLLMENTS, {}) - .get(segment) + .get(segment), + Metric.RETENTION, ) + if not week_2_retention: + runtime_errors.append( + AnalysisError( + experiment=experiment.slug, + filename="experimenter/jetstream/client.py", + func_name="get_experiment_data", + log_level="WARNING", + message=( + "Week 2 retention is unavailable because this " + "experiment did not run long enough." + ), + metric=Metric.RETENTION, + timestamp=timezone.now(), + ) + ) + data.extend(week_2_retention) # Append 3-day retention from daily data data.append_retention_3_days( raw_data.get(AnalysisWindow.DAILY, {}) @@ -307,6 +325,12 @@ def get_experiment_data(experiment: NimbusExperiment): if segment == Segment.ALL: experiment_data["other_metrics"] = other_metrics elif data and window == AnalysisWindow.WEEKLY: + data.replace_retention_weeks( + raw_data.get(AnalysisWindow.WEEKLY, {}) + .get(AnalysisBasis.ENROLLMENTS, {}) + .get(segment) + ) + # Append 3-day retention from daily data data.append_retention_3_days( raw_data.get(AnalysisWindow.DAILY, {}) @@ -370,6 +394,12 @@ def get_experiment_data(experiment: NimbusExperiment): if segment == Segment.ALL: experiment_data["other_metrics"].update(other_metrics) elif data and window == AnalysisWindow.WEEKLY: + data.replace_retention_weeks( + raw_data.get(AnalysisWindow.WEEKLY, {}) + .get(AnalysisBasis.EXPOSURES, {}) + .get(segment) + ) + # Append 3-day retention from daily data data.append_retention_3_days( raw_data.get(AnalysisWindow.DAILY, {}) @@ -421,14 +451,17 @@ def get_experiment_data(experiment: NimbusExperiment): errors_experiment_overall.append(err) for e in runtime_errors: - analysis_error = AnalysisError( - experiment=experiment.slug, - filename="experimenter/jetstream/client.py", - func_name="load_data_from_gcs", - log_level="WARNING", - message=e, - timestamp=timezone.now(), - ) + if isinstance(e, AnalysisError): + analysis_error = e + else: + analysis_error = AnalysisError( + experiment=experiment.slug, + filename="experimenter/jetstream/client.py", + func_name="load_data_from_gcs", + log_level="WARNING", + message=e, + timestamp=timezone.now(), + ) errors_experiment_overall.append(analysis_error.model_dump()) errors_by_metric["experiment"] = errors_experiment_overall diff --git a/experimenter/experimenter/jetstream/models.py b/experimenter/experimenter/jetstream/models.py index bb7c620fc..fa8b9a7d0 100644 --- a/experimenter/experimenter/jetstream/models.py +++ b/experimenter/experimenter/jetstream/models.py @@ -77,6 +77,7 @@ class Group(StrEnum): Group.SEARCH: SEARCH_METRICS, Group.USAGE: USAGE_METRICS, } +RETENTION_2_WEEKS_WINDOW_INDEX = 2 RETENTION_3_DAYS_WINDOW_INDEX = 4 @@ -155,14 +156,24 @@ def get_retention_by_window(self, window_index, data, metric): ] def append_retention_data(self, weekly_data): - # Try to get the two-week retention data. If it doesn't - # exist (experiment was too short), settle for 1 week. - retention_data = self.get_retention_by_window(2, weekly_data, Metric.RETENTION) - if len(retention_data) == 0: - retention_data = self.get_retention_by_window( - 1, weekly_data, Metric.RETENTION - ) + # Only use two-week retention data. + retention_data = self.get_retention_by_window( + RETENTION_2_WEEKS_WINDOW_INDEX, weekly_data, Metric.RETENTION + ) + + self.extend(retention_data) + + def replace_retention_weeks(self, weekly_data): + # Remove all weekly retention data except for week 2. + retention_data = self.get_retention_by_window( + RETENTION_2_WEEKS_WINDOW_INDEX, weekly_data, Metric.RETENTION + ) + self.root = [ + jetstream_data_point + for jetstream_data_point in self.root + if jetstream_data_point.metric != Metric.RETENTION + ] self.extend(retention_data) def append_retention_3_days(self, daily_data): diff --git a/experimenter/experimenter/jetstream/tests/test_jetstream_data.py b/experimenter/experimenter/jetstream/tests/test_jetstream_data.py index 56481a790..44a4477f5 100644 --- a/experimenter/experimenter/jetstream/tests/test_jetstream_data.py +++ b/experimenter/experimenter/jetstream/tests/test_jetstream_data.py @@ -61,6 +61,75 @@ def test_append_retention_3_days_extracts_data(self): self.assertIn(retention, data) + def test_append_retention_data_ignores_week_1_only_data(self): + week_1_retention = JetstreamDataPoint( + metric=Metric.RETENTION, + statistic=Statistic.BINOMIAL, + branch="control", + point=0.65, + segment=Segment.ALL, + window_index="1", + ) + + data = JetstreamData([]) + data.append_retention_data([week_1_retention]) + + self.assertNotIn(week_1_retention, data) + + def test_append_retention_data_uses_week_2_data(self): + week_2_retention = JetstreamDataPoint( + metric=Metric.RETENTION, + statistic=Statistic.BINOMIAL, + branch="control", + point=0.65, + segment=Segment.ALL, + window_index="2", + ) + + data = JetstreamData([]) + data.append_retention_data([week_2_retention]) + + self.assertIn(week_2_retention, data) + + def test_replace_retention_replaces_existing_entries(self): + existing_retention_1 = JetstreamDataPoint( + metric=Metric.RETENTION, + statistic=Statistic.BINOMIAL, + branch="control", + point=0.5, + segment=Segment.ALL, + window_index="1", + ) + kept_retention = JetstreamDataPoint( + metric=Metric.RETENTION_3_DAYS, + statistic=Statistic.BINOMIAL, + branch="control", + point=0.25, + segment=Segment.ALL, + window_index="2", + ) + existing_retention_2 = JetstreamDataPoint( + metric=Metric.RETENTION_3_DAYS, + statistic=Statistic.BINOMIAL, + branch="control", + point=0.25, + segment=Segment.ALL, + window_index="3", + ) + + data = JetstreamData( + [ + existing_retention_1, + kept_retention, + existing_retention_2, + ] + ) + data.replace_retention_3_days(data) + + self.assertNotIn(existing_retention_1, data) + self.assertNotIn(existing_retention_2, data) + self.assertIn(kept_retention, data) + def test_replace_retention_3_days_replaces_existing_entries(self): existing_retention_1 = JetstreamDataPoint( metric=Metric.RETENTION_3_DAYS,