From 674f963f388689fcd67bbbcbad26468e09cc86b5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 25 Mar 2026 10:29:17 -0400 Subject: [PATCH 01/31] wip: es8 djelme records (migration targets) --- osf/metrics/es8_metrics.py | 221 +++++++++++++++++++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 osf/metrics/es8_metrics.py diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py new file mode 100644 index 00000000000..ec20215449e --- /dev/null +++ b/osf/metrics/es8_metrics.py @@ -0,0 +1,221 @@ +from __future__ import annotations +import datetime + +import elasticsearch8.dsl as esdsl +import elasticsearch_metrics.imps.elastic8 as djelme + +from osf.metrics.utils import YearMonth + + +### +# custom dsl fields + +class YearmonthField(esdsl.Date): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, format='strict_year_month') + + def deserialize(self, data): + if isinstance(data, int): + # elasticsearch stores dates in milliseconds since the unix epoch + _as_datetime = datetime.datetime.fromtimestamp(data // 1000) + return YearMonth.from_date(_as_datetime) + elif data is None: + return None + try: + return YearMonth.from_any(data) + except ValueError: + raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth') + + def serialize(self, data): + if isinstance(data, str): + return data + elif isinstance(data, YearMonth): + return str(data) + elif isinstance(data, (datetime.datetime, datetime.date)): + return str(YearMonth.from_date(data)) + elif data is None: + return None + else: + raise ValueError(f'unsure how to serialize "{data}" (of type {type(data)}) as YYYY-MM') + + +### +# inner objects for events + +route_prefix_analyzer = esdsl.analyzer( + 'route_prefix_analyzer', + tokenizer=esdsl.tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.'), +) + + +class PageviewInfo(esdsl.InnerDoc): + """PageviewInfo + + for CountedAuthUsage generated by viewing a web page + """ + # fields that should be provided + referer_url: str + page_url: str + page_title: str + route_name: str = esdsl.mapped_field(esdsl.Keyword( + fields={ + 'by_prefix': esdsl.Text(analyzer=route_prefix_analyzer), + }, + )) + + # fields autofilled from the above (see `_autofill_fields`) + page_path: str + referer_domain: str + hour_of_day: str + + +### +# Event records + +class OsfCountedUsageRecord(djelme.CountedUsageRecord): + ''' + + inherited fields: + platform_iri: str + database_iri: str + item_iri: str + sessionhour_id: str + within_iris: list[str] + ''' + # osf-specific fields + item_osfid: str + item_type: str + item_public: bool + user_is_authenticated: bool + action_labels: list[str] + pageview_info: PageviewInfo + + +### +# Reusable inner objects for reports + +class RunningTotal(esdsl.InnerDoc): + total: int + total_daily: int + + +class FileRunningTotals(esdsl.InnerDoc): + total: int + public: int + private: int + total_daily: int + public_daily: int + private_daily: int + + +class NodeRunningTotals(esdsl.InnerDoc): + total: int + total_excluding_spam: int + public: int + private: int + total_daily: int + total_daily_excluding_spam: int + public_daily: int + private_daily: int + + +class RegistrationRunningTotals(esdsl.InnerDoc): + total: int + public: int + embargoed: int + embargoed_v2: int + withdrawn: int + total_daily: int + public_daily: int + embargoed_daily: int + embargoed_v2_daily: int + withdrawn_daily: int + + +### +# Cyclic reports + + +class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): + node_confirmed_spam: int + node_confirmed_ham: int + node_flagged: int + registration_confirmed_spam: int + registration_confirmed_ham: int + registration_flagged: int + preprint_confirmed_spam: int + preprint_confirmed_ham: int + preprint_flagged: int + user_marked_as_spam: int + user_marked_as_ham: int + + +class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): + # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',) + institution_id: str + # user info: + user_id: str + user_name: str + department_name: str + month_last_login = YearmonthField() + month_last_active = YearmonthField() + account_creation_date = YearmonthField() + orcid_id: str + # counts: + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + + +class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): + UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', ) + institution_id: str + user_count: int + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_logged_in_user_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): + # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid') + + # where noted, fields are meant to correspond to defined terms from COUNTER + # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html + # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html + item_osfid: str + item_type: list[str] # counter:Data-Type + provider_id: list[str] # counter:Database(?) + platform_iri: list[str] # counter:Platform + + # view counts include views on components or files contained by this item + view_count: int = esdsl.mapped_field(esdsl.Long()) + view_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_session_count: int = esdsl.mapped_field(esdsl.Long()) + + # download counts of this item only (not including contained components or files) + download_count: int = esdsl.mapped_field(esdsl.Long()) + download_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PrivateSpamMetricsReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): + node_oopspam_flagged: int + node_oopspam_hammed: int + node_akismet_flagged: int + node_akismet_hammed: int + preprint_oopspam_flagged: int + preprint_oopspam_hammed: int + preprint_akismet_flagged: int + preprint_akismet_hammed: int From 2e73161b508a73e192ae3675f60ec05569502848 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 1 Apr 2026 00:52:14 +0300 Subject: [PATCH 02/31] add new metrics --- osf/metrics/es8_metrics.py | 166 ++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 4 deletions(-) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index ec20215449e..4c1b2de4a2d 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -1,6 +1,6 @@ from __future__ import annotations import datetime - +import enum import elasticsearch8.dsl as esdsl import elasticsearch_metrics.imps.elastic8 as djelme @@ -91,6 +91,100 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord): pageview_info: PageviewInfo +class CountedAuthUsage(djelme.CountedUsageRecord): + """CountedAuthUsage + + Something was used! Let's quickly take note of that and + move on, then come back later to query/analyze/investigate. + + Aim to support a COUNTER-style reporting api + (see https://cop5.projectcounter.org/en/5.0.2/) + """ + + # where noted, fields correspond to defined terms from COUNTER + # https://cop5.projectcounter.org/en/5.0.2/appendices/a-glossary-of-terms.html + platform_iri: str + provider_id: str + session_id: str + item_guid: str + item_type: str + surrounding_guids: list[str] + item_public: bool + user_is_authenticated: bool + action_labels: list[str] + class ActionLabel(enum.Enum): + SEARCH = 'search' # counter:Search + VIEW = 'view' # counter:Investigation + DOWNLOAD = 'download' # counter:Request + WEB = 'web' # counter:Regular (aka "pageview") + API = 'api' # counter:TDM (aka "non-web api usage") + # TODO: count api usage, distinguish between web and non-web api requests + + # pageviews get additional info to support the "node analytics" view + # (see `api.metrics.views.NodeAnalyticsQuery`) + pageview_info: PageviewInfo + + class Meta: + dynamic = djelme.MetaField('strict') + source = djelme.MetaField(enabled=True) + + +class BasePreprintMetrics(djelme.CountedUsageRecord): + ''' + inherited fields: + platform_iri: str + database_iri: str + item_iri: str + sessionhour_id: str + within_iris: list[str] + ''' + count: int + provider_id: str + user_id: str + preprint_id: str + version: str + path: str + + class Index: + settings = { + 'number_of_shards': 1, + 'number_of_replicas': 1, + 'refresh_interval': '1s', + } + + class Meta: + abstract = True + source = djelme.MetaField(enabled=True) + + +class PreprintView(BasePreprintMetrics): + pass + + +class PreprintDownload(BasePreprintMetrics): + pass + + +class RegistriesModerationMetrics(djelme.CountedUsageRecord): + registration_id: str + provider_id: str + trigger: str + from_state: str + to_state: str + user_id: str + comment: str + + class Index: + settings = { + 'number_of_shards': 1, + 'number_of_replicas': 1, + 'refresh_interval': '1s', + } + + class Meta: + source = djelme.MetaField(enabled=True) + + ### # Reusable inner objects for reports @@ -132,10 +226,74 @@ class RegistrationRunningTotals(esdsl.InnerDoc): withdrawn_daily: int +class UsageByStorageAddon(esdsl.InnerDoc): + addon_shortname: str + enabled_usersettings: RunningTotal + linked_usersettings: RunningTotal + deleted_usersettings: RunningTotal + usersetting_links: RunningTotal + connected_nodesettings: RunningTotal + disconnected_nodesettings: RunningTotal + deleted_nodesettings: RunningTotal + + ### # Cyclic reports +class StorageAddonUsage(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + usage_by_addon: UsageByStorageAddon + + +class DownloadCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + daily_file_downloads: int + + +class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) + + institution_id: str + institution_name: str + users: RunningTotal + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) + + domain_name: str + domain_name: int + + +class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class OsfstorageFileCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + files: FileRunningTotals + + +class PreprintSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) + provider_key: str + preprint_count: int + + +class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): + active: int + deactivated: int + merged: int + new_users_daily: int + new_users_with_institution_daily: int + unconfirmed: int + + class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): node_confirmed_spam: int node_confirmed_ham: int @@ -151,7 +309,7 @@ class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): - # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', 'user_id',) + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) institution_id: str # user info: user_id: str @@ -172,7 +330,7 @@ class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHL class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): - UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'institution_id', ) + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) institution_id: str user_count: int public_project_count: int @@ -187,7 +345,7 @@ class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelm class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): - # TODO: UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid') + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') # where noted, fields are meant to correspond to defined terms from COUNTER # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html From 4b4a4780cadb0361f69757a1db290b08b6d6178e Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Mon, 6 Apr 2026 14:29:56 +0300 Subject: [PATCH 03/31] fix flake8 --- osf/metrics/es8_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index 4c1b2de4a2d..e9ed147e858 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -269,7 +269,7 @@ class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): - nodes: NodeRunningTotals + nodes: NodeRunningTotals projects: NodeRunningTotals registered_nodes: RegistrationRunningTotals registered_projects: RegistrationRunningTotals From d3b48e401e9713fda09ebc3633ae2c635a31daba Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 8 Apr 2026 17:46:42 +0300 Subject: [PATCH 04/31] add tests, use new version of djelme, consolidate into OsfCountedUsageRecord --- osf/metrics/es8_metrics.py | 151 ++++++++++---------------- osf_tests/metrics/test_es8_metrics.py | 42 +++++++ poetry.lock | 6 +- pyproject.toml | 2 +- 4 files changed, 102 insertions(+), 99 deletions(-) create mode 100644 osf_tests/metrics/test_es8_metrics.py diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index e9ed147e858..666f9bae359 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -1,8 +1,8 @@ -from __future__ import annotations import datetime import enum import elasticsearch8.dsl as esdsl import elasticsearch_metrics.imps.elastic8 as djelme +from urllib.parse import urlsplit from osf.metrics.utils import YearMonth @@ -53,6 +53,16 @@ class PageviewInfo(esdsl.InnerDoc): for CountedAuthUsage generated by viewing a web page """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.page_path: str = '' + if self.page_url: self.page_path = urlsplit(self.page_url).path.rstrip('/') + self.referer_domain: str = '' + if self.referer_url: self.referer_domain = urlsplit(self.referer_url).netloc + self.hour_of_day: int = 0 + if self.timestamp: self.hour_of_day = self.timestamp.hour + # fields that should be provided referer_url: str page_url: str @@ -63,11 +73,9 @@ class PageviewInfo(esdsl.InnerDoc): }, )) - # fields autofilled from the above (see `_autofill_fields`) page_path: str referer_domain: str - hour_of_day: str - + hour_of_day: int ### # Event records @@ -90,82 +98,15 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord): action_labels: list[str] pageview_info: PageviewInfo +class ActionLabel(enum.Enum): + SEARCH = 'search' # counter:Search + VIEW = 'view' # counter:Investigation + DOWNLOAD = 'download' # counter:Request + WEB = 'web' # counter:Regular (aka "pageview") + API = 'api' # counter:TDM (aka "non-web api usage") -class CountedAuthUsage(djelme.CountedUsageRecord): - """CountedAuthUsage - - Something was used! Let's quickly take note of that and - move on, then come back later to query/analyze/investigate. - - Aim to support a COUNTER-style reporting api - (see https://cop5.projectcounter.org/en/5.0.2/) - """ - # where noted, fields correspond to defined terms from COUNTER - # https://cop5.projectcounter.org/en/5.0.2/appendices/a-glossary-of-terms.html - platform_iri: str - provider_id: str - session_id: str - item_guid: str - item_type: str - surrounding_guids: list[str] - item_public: bool - user_is_authenticated: bool - action_labels: list[str] - class ActionLabel(enum.Enum): - SEARCH = 'search' # counter:Search - VIEW = 'view' # counter:Investigation - DOWNLOAD = 'download' # counter:Request - WEB = 'web' # counter:Regular (aka "pageview") - API = 'api' # counter:TDM (aka "non-web api usage") - # TODO: count api usage, distinguish between web and non-web api requests - - # pageviews get additional info to support the "node analytics" view - # (see `api.metrics.views.NodeAnalyticsQuery`) - pageview_info: PageviewInfo - - class Meta: - dynamic = djelme.MetaField('strict') - source = djelme.MetaField(enabled=True) - - -class BasePreprintMetrics(djelme.CountedUsageRecord): - ''' - inherited fields: - platform_iri: str - database_iri: str - item_iri: str - sessionhour_id: str - within_iris: list[str] - ''' - count: int - provider_id: str - user_id: str - preprint_id: str - version: str - path: str - - class Index: - settings = { - 'number_of_shards': 1, - 'number_of_replicas': 1, - 'refresh_interval': '1s', - } - - class Meta: - abstract = True - source = djelme.MetaField(enabled=True) - - -class PreprintView(BasePreprintMetrics): - pass - - -class PreprintDownload(BasePreprintMetrics): - pass - - -class RegistriesModerationMetrics(djelme.CountedUsageRecord): +class Es8RegistriesModerationMetrics(djelme.EventRecord): registration_id: str provider_id: str trigger: str @@ -181,9 +122,6 @@ class Index: 'refresh_interval': '1s', } - class Meta: - source = djelme.MetaField(enabled=True) - ### # Reusable inner objects for reports @@ -241,15 +179,20 @@ class UsageByStorageAddon(esdsl.InnerDoc): # Cyclic reports -class StorageAddonUsage(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): - usage_by_addon: UsageByStorageAddon +class Es8StorageAddonUsage(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 + + usage_by_addon: list[UsageByStorageAddon] + +class Es8DownloadCountReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 -class DownloadCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): daily_file_downloads: int -class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8InstitutionSummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) institution_id: str @@ -261,31 +204,40 @@ class InstitutionSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY registered_projects: RegistrationRunningTotals -class NewUserDomainReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8NewUserDomainReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) domain_name: str domain_name: int -class NodeSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8NodeSummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 + nodes: NodeRunningTotals projects: NodeRunningTotals registered_nodes: RegistrationRunningTotals registered_projects: RegistrationRunningTotals -class OsfstorageFileCountReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8OsfstorageFileCountReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 + files: FileRunningTotals -class PreprintSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8PreprintSummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) provider_key: str preprint_count: int -class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): +class Es8UserSummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 3 + active: int deactivated: int merged: int @@ -294,7 +246,9 @@ class UserSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.DAILY): unconfirmed: int -class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): +class Es8SpamSummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 2 + node_confirmed_spam: int node_confirmed_ham: int node_flagged: int @@ -308,8 +262,10 @@ class SpamSummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): user_marked_as_ham: int -class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): +class Es8InstitutionalUserReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 2 UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) + institution_id: str # user info: user_id: str @@ -329,8 +285,10 @@ class InstitutionalUserReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHL storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) -class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): +class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 2 UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) + institution_id: str user_count: int public_project_count: int @@ -344,7 +302,8 @@ class InstitutionMonthlySummaryReport(djelme.CyclicRecord, cycle_timedepth=djelm monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long()) -class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): +class Es8PublicItemUsageReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 2 UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') # where noted, fields are meant to correspond to defined terms from COUNTER @@ -368,7 +327,9 @@ class PublicItemUsageReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY) cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long()) -class PrivateSpamMetricsReport(djelme.CyclicRecord, cycle_timedepth=djelme.MONTHLY): +class Es8PrivateSpamMetricsReport(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = 2 + node_oopspam_flagged: int node_oopspam_hammed: int node_akismet_flagged: int diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py new file mode 100644 index 00000000000..2afca72174b --- /dev/null +++ b/osf_tests/metrics/test_es8_metrics.py @@ -0,0 +1,42 @@ +from datetime import datetime + +from osf.metrics.es8_metrics import ( + Es8DownloadCountReport, + Es8UserSummaryReport, + OsfCountedUsageRecord, + PageviewInfo +) + + +class TestEs8Metrics: + def test_import_all_reports(self): + assert True + + def test_instantiate_of_reports(self): + download_report = Es8DownloadCountReport() + assert hasattr(download_report, 'daily_file_downloads') + assert download_report.daily_file_downloads is None + + user_report = Es8UserSummaryReport() + assert hasattr(user_report, 'active') + assert user_report.active is None + + def test_nested_pageview(self): + usage = OsfCountedUsageRecord( + pageview_info={ + "page_url": "https://example.com", + "referer_url": "https://google.com", + } + ) + assert usage.pageview_info is not None + + def test_pageview_info_autofill(self): + obj = PageviewInfo( + page_url="https://example.com/path/test", + referer_url="https://google.com", + timestamp=datetime(2024, 1, 1, 15, 0), + ) + + assert obj.page_path == "/path/test" + assert obj.referer_domain == "google.com" + assert obj.hour_of_day == 15 diff --git a/poetry.lock b/poetry.lock index f0dca07d95c..5bbe2ae1f49 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6" -resolved_reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6" +reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" +resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" [[package]] name = "django-extensions" @@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "c3108a036ae092e35f7696ffe230e271b774bb12e546db77bb8b12b5fb6eca7d" +content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922" diff --git a/pyproject.toml b/pyproject.toml index b40cdc704ab..375b8cacd25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"} +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" From e4bec9dfa9f5e760bc11df3ce991af32a56e300a Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 9 Apr 2026 10:45:01 +0300 Subject: [PATCH 05/31] add imports to init, flake8 --- osf/metrics/__init__.py | 18 ++++++++++++++++++ osf/metrics/es8_metrics.py | 9 ++++++--- osf_tests/metrics/test_es8_metrics.py | 12 ++++++------ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py index 0e7b1a1cf32..b2c8af54999 100644 --- a/osf/metrics/__init__.py +++ b/osf/metrics/__init__.py @@ -18,6 +18,17 @@ UserSummaryReport, ) +from .es8_metrics import ( + Es8DownloadCountReport, + Es8UserSummaryReport, + Es8NodeSummaryReport, + Es8SpamSummaryReport, + Es8InstitutionSummaryReport, + Es8NewUserDomainReport, + Es8OsfstorageFileCountReport, + Es8StorageAddonUsage, +) + DAILY_REPORTS = ( DownloadCountReport, InstitutionSummaryReport, @@ -27,6 +38,13 @@ PreprintSummaryReport, StorageAddonUsage, UserSummaryReport, + Es8DownloadCountReport, + Es8InstitutionSummaryReport, + Es8NewUserDomainReport, + Es8NodeSummaryReport, + Es8OsfstorageFileCountReport, + Es8StorageAddonUsage, + Es8UserSummaryReport ) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index 666f9bae359..020a9c72c80 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -57,11 +57,14 @@ class PageviewInfo(esdsl.InnerDoc): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.page_path: str = '' - if self.page_url: self.page_path = urlsplit(self.page_url).path.rstrip('/') + if self.page_url: + self.page_path = urlsplit(self.page_url).path.rstrip('/') self.referer_domain: str = '' - if self.referer_url: self.referer_domain = urlsplit(self.referer_url).netloc + if self.referer_url: + self.referer_domain = urlsplit(self.referer_url).netloc self.hour_of_day: int = 0 - if self.timestamp: self.hour_of_day = self.timestamp.hour + if self.timestamp: + self.hour_of_day = self.timestamp.hour # fields that should be provided referer_url: str diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 2afca72174b..1158836b688 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -24,19 +24,19 @@ def test_instantiate_of_reports(self): def test_nested_pageview(self): usage = OsfCountedUsageRecord( pageview_info={ - "page_url": "https://example.com", - "referer_url": "https://google.com", + 'page_url': 'https://example.com', + 'referer_url': 'https://google.com', } ) assert usage.pageview_info is not None def test_pageview_info_autofill(self): obj = PageviewInfo( - page_url="https://example.com/path/test", - referer_url="https://google.com", + page_url='https://example.com/path/test', + referer_url='https://google.com', timestamp=datetime(2024, 1, 1, 15, 0), ) - assert obj.page_path == "/path/test" - assert obj.referer_domain == "google.com" + assert obj.page_path == '/path/tes' + assert obj.referer_domain == 'google.com' assert obj.hour_of_day == 15 From ee515ef615b2363724aba1d445ef7e4e15f89c4a Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Thu, 9 Apr 2026 16:39:44 +0300 Subject: [PATCH 06/31] fix test, imports, flake8 --- .docker-compose.env | 2 +- osf/metrics/__init__.py | 1 - osf_tests/metrics/test_es8_metrics.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.docker-compose.env b/.docker-compose.env index 449c9747adf..444788ecb46 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -6,7 +6,7 @@ DOMAIN=http://localhost:5000/ INTERNAL_DOMAIN=http://192.168.168.167:5000/ API_DOMAIN=http://localhost:8000/ ELASTIC_URI=192.168.168.167:9200 -ELASTIC6_URI=192.168.168.167:9201 +ELASTIC6_URI=http://192.168.168.167:9201 ELASTIC8_URI=http://192.168.168.167:9202 OSF_DB_HOST=192.168.168.167 DB_HOST=192.168.168.167 diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py index b2c8af54999..6cef14f5cf9 100644 --- a/osf/metrics/__init__.py +++ b/osf/metrics/__init__.py @@ -22,7 +22,6 @@ Es8DownloadCountReport, Es8UserSummaryReport, Es8NodeSummaryReport, - Es8SpamSummaryReport, Es8InstitutionSummaryReport, Es8NewUserDomainReport, Es8OsfstorageFileCountReport, diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 1158836b688..3d48a3d35c4 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -37,6 +37,6 @@ def test_pageview_info_autofill(self): timestamp=datetime(2024, 1, 1, 15, 0), ) - assert obj.page_path == '/path/tes' + assert obj.page_path == '/path/test' assert obj.referer_domain == 'google.com' assert obj.hour_of_day == 15 From ca60b58e0dc08d9f81ca085df45f43792d3ed252 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Fri, 10 Apr 2026 17:23:02 +0300 Subject: [PATCH 07/31] add security, flake8, fixes, add to test-build.yml --- .docker-compose.env | 5 ++++- .github/workflows/test-build.yml | 18 ++++++++++++++++++ api/base/settings/defaults.py | 8 +++++++- docker-compose.yml | 13 ++++++++++++- website/settings/defaults.py | 5 ++++- 5 files changed, 45 insertions(+), 4 deletions(-) diff --git a/.docker-compose.env b/.docker-compose.env index 444788ecb46..2542d16e841 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -6,8 +6,11 @@ DOMAIN=http://localhost:5000/ INTERNAL_DOMAIN=http://192.168.168.167:5000/ API_DOMAIN=http://localhost:8000/ ELASTIC_URI=192.168.168.167:9200 -ELASTIC6_URI=http://192.168.168.167:9201 +ELASTIC6_URI=192.168.168.167:9201 ELASTIC8_URI=http://192.168.168.167:9202 +ELASTIC8_CERT_PATH=/elastic8_certs/ca/ca.crt +ELASTIC8_USERNAME=elastic +ELASTIC8_SECRET=secretsecret OSF_DB_HOST=192.168.168.167 DB_HOST=192.168.168.167 REDIS_HOST=redis://192.168.168.167:6379 diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 33942968529..0e8541acf2a 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -64,6 +64,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -84,6 +92,8 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report + env: + ELASTIC8_URL: http://localhost:9202 api1_and_js: runs-on: ubuntu-22.04 @@ -208,6 +218,14 @@ jobs: checks: write needs: build-cache services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py index 816586ffcfb..42e8d9bd495 100644 --- a/api/base/settings/defaults.py +++ b/api/base/settings/defaults.py @@ -316,7 +316,7 @@ HASHIDS_SALT = 'pinkhimalayan' # django-elasticsearch-metrics -DJELME_AUTOSETUP = True +# DJELME_AUTOSETUP = True DJELME_BACKENDS = { 'osfmetrics_es6': { 'elasticsearch_metrics.imps.elastic6': { @@ -327,6 +327,12 @@ 'osfmetrics_es8': { 'elasticsearch_metrics.imps.elastic8': { 'hosts': osf_settings.ELASTIC8_URI, + 'ca_certs': osf_settings.ELASTIC8_CERT_PATH, + 'basic_auth': ( + (osf_settings.ELASTIC8_USERNAME, osf_settings.ELASTIC8_SECRET) + if osf_settings.ELASTIC8_SECRET is not None + else None + ), }, }, } diff --git a/docker-compose.yml b/docker-compose.yml index f26c3617b67..09aedd58247 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -82,8 +82,19 @@ services: image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11 platform: linux/arm64 environment: - - xpack.security.enabled=false + - ELASTIC_PASSWORD=secretsecret + - node.name=singlenode - discovery.type=single-node + - xpack.security.enabled=true + - xpack.security.http.ssl.enabled=true + - xpack.security.http.ssl.key=/elastic8_certs/singlenode/singlenode.key + - xpack.security.http.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt + - xpack.security.http.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt + - xpack.security.transport.ssl.enabled=true + - xpack.security.transport.ssl.key=/elastic8_certs/singlenode/singlenode.key + - xpack.security.transport.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt + - xpack.security.transport.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt + - xpack.security.transport.ssl.verification_mode=certificate ports: - 9202:9200 volumes: diff --git a/website/settings/defaults.py b/website/settings/defaults.py index d0ae58dc863..1e8032cc95c 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -113,7 +113,10 @@ def parent_dir(path): SEARCH_ENGINE = 'elastic' # Can be 'elastic', or None ELASTIC_URI = '127.0.0.1:9200' ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201') -ELASTIC8_URI = os.environ.get('ELASTIC8_URI', '127.0.0.1:9202') +ELASTIC8_URI = os.environ.get('ELASTIC8_URI') +ELASTIC8_CERT_PATH = os.environ.get('ELASTIC8_CERT_PATH') +ELASTIC8_USERNAME = os.environ.get('ELASTIC8_USERNAME', 'elastic') +ELASTIC8_SECRET = os.environ.get('ELASTIC8_SECRET') ELASTIC_TIMEOUT = 10 ELASTIC_INDEX = 'website' ELASTIC_KWARGS = { From 080daf69dbcd839ed7d712c7f78053b13097b1e1 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Sat, 11 Apr 2026 00:01:31 +0300 Subject: [PATCH 08/31] test-build update --- .github/workflows/test-build.yml | 41 +++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 0e8541acf2a..6aa39e39800 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -12,6 +12,7 @@ env: OSF_DB_PORT: 5432 OSF_DB_PASSWORD: postgres GITHUB_ACTIONS: true + ELASTIC8_URL: http://localhost:9202 jobs: build-cache: @@ -37,6 +38,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -57,6 +66,8 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report + env: + ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} website: runs-on: ubuntu-22.04 @@ -93,7 +104,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: http://localhost:9202 + ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} api1_and_js: runs-on: ubuntu-22.04 @@ -101,6 +112,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -123,6 +142,8 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report + env: + ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} api2: runs-on: ubuntu-22.04 @@ -130,6 +151,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -150,6 +179,8 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report + env: + ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} api3_and_osf: runs-on: ubuntu-22.04 @@ -218,14 +249,6 @@ jobs: checks: write needs: build-cache services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode postgres: image: postgres From fde32a4ee09debee75af5523088fd8c3c921f713 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Sat, 11 Apr 2026 01:19:07 +0300 Subject: [PATCH 09/31] test-build fix url --- .github/workflows/test-build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 6aa39e39800..60d056de001 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -67,7 +67,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} + ELASTIC8_URL: ${{ env.ELASTIC8_URL }} website: runs-on: ubuntu-22.04 @@ -104,7 +104,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} + ELASTIC8_URL: ${{ env.ELASTIC8_URL }} api1_and_js: runs-on: ubuntu-22.04 @@ -143,7 +143,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} + ELASTIC8_URL: ${{ env.ELASTIC8_URL }} api2: runs-on: ubuntu-22.04 @@ -180,7 +180,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.OSF_DB_PASSWORD }} + ELASTIC8_URL: ${{ env.ELASTIC8_URL }} api3_and_osf: runs-on: ubuntu-22.04 From e6da70bbf73cbbf348fddf148d91b03472913e9e Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Sat, 11 Apr 2026 01:39:47 +0300 Subject: [PATCH 10/31] test-build fix naming --- .github/workflows/test-build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 60d056de001..fdfd4c190b3 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -12,7 +12,7 @@ env: OSF_DB_PORT: 5432 OSF_DB_PASSWORD: postgres GITHUB_ACTIONS: true - ELASTIC8_URL: http://localhost:9202 + ELASTIC8_URI: http://localhost:9202 jobs: build-cache: @@ -67,7 +67,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.ELASTIC8_URL }} + ELASTIC8_URI: ${{ env.ELASTIC8_URI }} website: runs-on: ubuntu-22.04 @@ -104,7 +104,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.ELASTIC8_URL }} + ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api1_and_js: runs-on: ubuntu-22.04 @@ -143,7 +143,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.ELASTIC8_URL }} + ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api2: runs-on: ubuntu-22.04 @@ -180,7 +180,7 @@ jobs: if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report env: - ELASTIC8_URL: ${{ env.ELASTIC8_URL }} + ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api3_and_osf: runs-on: ubuntu-22.04 From 2b8a81c10b13e687c29144acf55b699d73ac3a2d Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Sat, 11 Apr 2026 08:55:02 +0300 Subject: [PATCH 11/31] update test --- osf_tests/metrics/test_es8_metrics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 3d48a3d35c4..28dedd01eb4 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -15,11 +15,9 @@ def test_import_all_reports(self): def test_instantiate_of_reports(self): download_report = Es8DownloadCountReport() assert hasattr(download_report, 'daily_file_downloads') - assert download_report.daily_file_downloads is None user_report = Es8UserSummaryReport() assert hasattr(user_report, 'active') - assert user_report.active is None def test_nested_pageview(self): usage = OsfCountedUsageRecord( From 6167778672af9f5d87ede22d31ce71b0863d09fc Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Mon, 13 Apr 2026 12:24:40 +0300 Subject: [PATCH 12/31] add wait --- .github/workflows/test-build.yml | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index fdfd4c190b3..844ca93fb15 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -61,13 +61,23 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build + - name: Wait for Elasticsearch + run: | + echo "Waiting for Elasticsearch..." + for i in {1..30}; do + if curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; then + echo "Elasticsearch is ready" + exit 0 + fi + sleep 2 + done + echo "Elasticsearch failed" + exit 1 - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report - env: - ELASTIC8_URI: ${{ env.ELASTIC8_URI }} website: runs-on: ubuntu-22.04 @@ -103,8 +113,6 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report - env: - ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api1_and_js: runs-on: ubuntu-22.04 @@ -142,8 +150,6 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report - env: - ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api2: runs-on: ubuntu-22.04 @@ -179,8 +185,6 @@ jobs: - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report - env: - ELASTIC8_URI: ${{ env.ELASTIC8_URI }} api3_and_osf: runs-on: ubuntu-22.04 From eb0a5d9efe8cc6c683550acf99be5047b766e07e Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Mon, 13 Apr 2026 17:57:22 +0300 Subject: [PATCH 13/31] remove wait --- .github/workflows/test-build.yml | 12 ------------ poetry.lock | 6 +++--- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 844ca93fb15..d6bf817b7c6 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -61,18 +61,6 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - - name: Wait for Elasticsearch - run: | - echo "Waiting for Elasticsearch..." - for i in {1..30}; do - if curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; then - echo "Elasticsearch is ready" - exit 0 - fi - sleep 2 - done - echo "Elasticsearch failed" - exit 1 - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit - name: Upload report diff --git a/poetry.lock b/poetry.lock index 5bbe2ae1f49..d524525f564 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" -resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" +reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5" +resolved_reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5" [[package]] name = "django-extensions" @@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922" +content-hash = "fd91980689d1fa7c440e0c81a0b0e9543445821350cb154f18c63f236c0898be" diff --git a/pyproject.toml b/pyproject.toml index 375b8cacd25..fb008eb2c41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"} +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" From 78ed96fdc13cfb70d509d9e55f2028e9f789adf5 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 14 Apr 2026 12:50:11 +0300 Subject: [PATCH 14/31] cleanup --- .github/workflows/test-build.yml | 37 +++++++++++--------------------- api/base/settings/defaults.py | 1 - 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index d6bf817b7c6..8d1d3ebb318 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -12,7 +12,6 @@ env: OSF_DB_PORT: 5432 OSF_DB_PASSWORD: postgres GITHUB_ACTIONS: true - ELASTIC8_URI: http://localhost:9202 jobs: build-cache: @@ -61,8 +60,20 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build + - name: Wait for Elasticsearch + run: | + echo "Waiting for ES8 health..." + sleep 5 + until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do + echo "ES8 not ready yet..." + sleep 5 + done + + echo "ES8 started successfully!" - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -73,14 +84,6 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -108,14 +111,6 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -145,14 +140,6 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode postgres: image: postgres env: diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py index 42e8d9bd495..72e169c25a1 100644 --- a/api/base/settings/defaults.py +++ b/api/base/settings/defaults.py @@ -316,7 +316,6 @@ HASHIDS_SALT = 'pinkhimalayan' # django-elasticsearch-metrics -# DJELME_AUTOSETUP = True DJELME_BACKENDS = { 'osfmetrics_es6': { 'elasticsearch_metrics.imps.elastic6': { From 70cf5e2442fb8d8c271a1f8ab7d1b8b63191c0d0 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 14 Apr 2026 13:13:36 +0300 Subject: [PATCH 15/31] add wait, downgrade djelme, flake8 --- .github/workflows/test-build.yml | 20 +++++++++++++++++++- poetry.lock | 6 +++--- pyproject.toml | 2 +- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 8d1d3ebb318..09fbbb5b319 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -68,7 +68,6 @@ jobs: echo "ES8 not ready yet..." sleep 5 done - echo "ES8 started successfully!" - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit @@ -111,6 +110,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -128,8 +135,19 @@ jobs: - uses: ./.github/actions/start-build - name: NVM & yarn install run: poetry run python3 -m invoke assets --dev + - name: Wait for Elasticsearch + run: | + echo "Waiting for ES8 health..." + sleep 5 + until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do + echo "ES8 not ready yet..." + sleep 5 + done + echo "ES8 started successfully!" - name: Run test run: poetry run python3 -m invoke test-ci-api1-and-js --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report diff --git a/poetry.lock b/poetry.lock index d524525f564..5bbe2ae1f49 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5" -resolved_reference = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5" +reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" +resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" [[package]] name = "django-extensions" @@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "fd91980689d1fa7c440e0c81a0b0e9543445821350cb154f18c63f236c0898be" +content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922" diff --git a/pyproject.toml b/pyproject.toml index fb008eb2c41..375b8cacd25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "c43abd63c623cdfbfaf87da6194d2a6f74ac2dd5"} +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" From 3e35fee6522c4ad7e23da83a8915fde74455bebf Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 14 Apr 2026 16:09:27 +0300 Subject: [PATCH 16/31] add elastic8 --- .github/workflows/test-build.yml | 19 +++++++++++++++++++ osf_tests/metrics/test_es8_metrics.py | 6 ++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 09fbbb5b319..0f2e101c408 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -158,6 +158,14 @@ jobs: permissions: checks: write services: + elasticsearch8: + image: elasticsearch:8.19.11 + ports: + - 9202:9200 + env: + xpack.security.enabled: false + node.name: singlenode + cluster.initial_master_nodes: singlenode postgres: image: postgres env: @@ -173,8 +181,19 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build + - name: Wait for Elasticsearch + run: | + echo "Waiting for ES8 health..." + sleep 5 + until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do + echo "ES8 not ready yet..." + sleep 5 + done + echo "ES8 started successfully!" - name: Run tests run: poetry run python3 -m invoke test-ci-api2 --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 28dedd01eb4..07705825f86 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -13,14 +13,15 @@ def test_import_all_reports(self): assert True def test_instantiate_of_reports(self): - download_report = Es8DownloadCountReport() + download_report = Es8DownloadCountReport(cycle_coverage='2026-01-01') assert hasattr(download_report, 'daily_file_downloads') - user_report = Es8UserSummaryReport() + user_report = Es8UserSummaryReport(cycle_coverage='2026-01-01') assert hasattr(user_report, 'active') def test_nested_pageview(self): usage = OsfCountedUsageRecord( + cycle_coverage='2026-01-01', pageview_info={ 'page_url': 'https://example.com', 'referer_url': 'https://google.com', @@ -30,6 +31,7 @@ def test_nested_pageview(self): def test_pageview_info_autofill(self): obj = PageviewInfo( + cycle_coverage='2026-01-01', page_url='https://example.com/path/test', referer_url='https://google.com', timestamp=datetime(2024, 1, 1, 15, 0), From a2363420c43c40a55b69630a096fb549cc49a71a Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Tue, 14 Apr 2026 16:46:59 +0300 Subject: [PATCH 17/31] fix test --- osf_tests/metrics/test_es8_metrics.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 07705825f86..d6b3d4c4434 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -13,15 +13,15 @@ def test_import_all_reports(self): assert True def test_instantiate_of_reports(self): - download_report = Es8DownloadCountReport(cycle_coverage='2026-01-01') + download_report = Es8DownloadCountReport(cycle_coverage='2026.01.01') assert hasattr(download_report, 'daily_file_downloads') - user_report = Es8UserSummaryReport(cycle_coverage='2026-01-01') + user_report = Es8UserSummaryReport(cycle_coverage='2026.01.01') assert hasattr(user_report, 'active') def test_nested_pageview(self): usage = OsfCountedUsageRecord( - cycle_coverage='2026-01-01', + cycle_coverage='2026.01.01', pageview_info={ 'page_url': 'https://example.com', 'referer_url': 'https://google.com', @@ -31,7 +31,7 @@ def test_nested_pageview(self): def test_pageview_info_autofill(self): obj = PageviewInfo( - cycle_coverage='2026-01-01', + cycle_coverage='2026.01.01', page_url='https://example.com/path/test', referer_url='https://google.com', timestamp=datetime(2024, 1, 1, 15, 0), From 00b055b5a13db955fdf1eab1a558f2bc5b64f33f Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 12:36:35 -0400 Subject: [PATCH 18/31] timedepth constants --- osf/metrics/es8_metrics.py | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index 020a9c72c80..85c9141fba8 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -1,8 +1,10 @@ import datetime import enum +from urllib.parse import urlsplit + import elasticsearch8.dsl as esdsl +from elasticsearch_metrics import DAILY, MONTHLY import elasticsearch_metrics.imps.elastic8 as djelme -from urllib.parse import urlsplit from osf.metrics.utils import YearMonth @@ -183,19 +185,19 @@ class UsageByStorageAddon(esdsl.InnerDoc): class Es8StorageAddonUsage(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY usage_by_addon: list[UsageByStorageAddon] class Es8DownloadCountReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY daily_file_downloads: int class Es8InstitutionSummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) institution_id: str @@ -208,7 +210,7 @@ class Es8InstitutionSummaryReport(djelme.CyclicRecord): class Es8NewUserDomainReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) domain_name: str @@ -216,7 +218,7 @@ class Es8NewUserDomainReport(djelme.CyclicRecord): class Es8NodeSummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY nodes: NodeRunningTotals projects: NodeRunningTotals @@ -225,13 +227,13 @@ class Es8NodeSummaryReport(djelme.CyclicRecord): class Es8OsfstorageFileCountReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY files: FileRunningTotals class Es8PreprintSummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) provider_key: str @@ -239,7 +241,7 @@ class Es8PreprintSummaryReport(djelme.CyclicRecord): class Es8UserSummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 3 + CYCLE_TIMEDEPTH = DAILY active: int deactivated: int @@ -250,7 +252,7 @@ class Es8UserSummaryReport(djelme.CyclicRecord): class Es8SpamSummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 2 + CYCLE_TIMEDEPTH = MONTHLY node_confirmed_spam: int node_confirmed_ham: int @@ -266,7 +268,7 @@ class Es8SpamSummaryReport(djelme.CyclicRecord): class Es8InstitutionalUserReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 2 + CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) institution_id: str @@ -289,7 +291,7 @@ class Es8InstitutionalUserReport(djelme.CyclicRecord): class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 2 + CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) institution_id: str @@ -306,7 +308,7 @@ class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord): class Es8PublicItemUsageReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 2 + CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') # where noted, fields are meant to correspond to defined terms from COUNTER @@ -331,7 +333,7 @@ class Es8PublicItemUsageReport(djelme.CyclicRecord): class Es8PrivateSpamMetricsReport(djelme.CyclicRecord): - CYCLE_TIMEDEPTH = 2 + CYCLE_TIMEDEPTH = MONTHLY node_oopspam_flagged: int node_oopspam_hammed: int From dddc94e791d7de76f487d1a00ac767848279ce87 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 12:36:14 -0400 Subject: [PATCH 19/31] tidy gh actions with yaml anchors, health checks --- .github/workflows/test-build.yml | 131 ++++--------------------------- 1 file changed, 17 insertions(+), 114 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 0f2e101c408..3433e689a42 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -37,15 +37,19 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 + elasticsearch8: &ES8_SERVICE + image: elasticsearch:8.19.14 ports: - 9202:9200 env: + discovery.type: single-node xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode - postgres: + options: >- + --health-cmd "curl -sf http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s" + --health-interval 10s + --health-timeout 30s + --health-retries 5 + postgres: &POSTGRES_SERVICE image: postgres env: POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} @@ -60,15 +64,6 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - - name: Wait for Elasticsearch - run: | - echo "Waiting for ES8 health..." - sleep 5 - until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do - echo "ES8 not ready yet..." - sleep 5 - done - echo "ES8 started successfully!" - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit env: @@ -83,18 +78,7 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -110,40 +94,13 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - name: NVM & yarn install run: poetry run python3 -m invoke assets --dev - - name: Wait for Elasticsearch - run: | - echo "Waiting for ES8 health..." - sleep 5 - until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do - echo "ES8 not ready yet..." - sleep 5 - done - echo "ES8 started successfully!" - name: Run test run: poetry run python3 -m invoke test-ci-api1-and-js --junit env: @@ -158,26 +115,8 @@ jobs: permissions: checks: write services: - elasticsearch8: - image: elasticsearch:8.19.11 - ports: - - 9202:9200 - env: - xpack.security.enabled: false - node.name: singlenode - cluster.initial_master_nodes: singlenode - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -204,19 +143,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -232,19 +159,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE mailhog: image: mailhog/mailhog ports: @@ -265,19 +180,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build From 46a934f901b1c685aa33b9a34ff204b370abcdf5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 13:59:10 -0400 Subject: [PATCH 20/31] simplify local elasticsearch8 config --- docker-compose.yml | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 09aedd58247..83e8fd27483 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -79,26 +79,22 @@ services: stdin_open: true elasticsearch8: - image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11 - platform: linux/arm64 + image: elasticsearch:8.19.14 environment: - - ELASTIC_PASSWORD=secretsecret - - node.name=singlenode - discovery.type=single-node - - xpack.security.enabled=true - - xpack.security.http.ssl.enabled=true - - xpack.security.http.ssl.key=/elastic8_certs/singlenode/singlenode.key - - xpack.security.http.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt - - xpack.security.http.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt - - xpack.security.transport.ssl.enabled=true - - xpack.security.transport.ssl.key=/elastic8_certs/singlenode/singlenode.key - - xpack.security.transport.ssl.certificate=/elastic8_certs/singlenode/singlenode.crt - - xpack.security.transport.ssl.certificate_authorities=/elastic8_certs/ca/ca.crt - - xpack.security.transport.ssl.verification_mode=certificate + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms512m -Xmx512m # reduce memory usage + - xpack.ml.enabled=false ports: - 9202:9200 volumes: - elasticsearch8_data_vol:/usr/share/elasticsearch/data + healthcheck: + start_period: 15s + test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"] + interval: 10s + timeout: 30s + retries: 5 stdin_open: true postgres: From 49f925945a2ea913dd56755fb9ac1d9efb905eb4 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 15:57:53 -0400 Subject: [PATCH 21/31] bump djelme to get fixes --- poetry.lock | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/poetry.lock b/poetry.lock index 5bbe2ae1f49..90665bce81f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" -resolved_reference = "1b644bb927cfb28e3a23b28ad625279749d859e5" +reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" +resolved_reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" [[package]] name = "django-extensions" @@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "3a5ea0758a65dac062ba307a2f29bdb8d637c2b0a78a2f68fea86c39516c6922" +content-hash = "ef1d6d327f5557e43482793b276ccb6c5fd07989f27367af3a3736a8547b4d1a" diff --git a/pyproject.toml b/pyproject.toml index 375b8cacd25..013df3f448d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "1b644bb927cfb28e3a23b28ad625279749d859e5"} +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" From 29839b975f440d1bbbe962d7e1ee0fce813e16c5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:03:45 -0400 Subject: [PATCH 22/31] tests passing with djelme es8 - use elasticsearch_metrics.test.util - move "Es8" prefix to suffix - autofill fields on `save`, not `__init__` (to work with how esdsl loads search results) --- conftest.py | 22 +++----- osf/metrics/__init__.py | 18 +------ osf/metrics/es8_metrics.py | 57 +++++++++++---------- osf/metrics/reports.py | 2 + osf_tests/metrics/test_es8_metrics.py | 74 +++++++++++++++------------ 5 files changed, 82 insertions(+), 91 deletions(-) diff --git a/conftest.py b/conftest.py index 232b788c0fb..6eafa1b7a55 100644 --- a/conftest.py +++ b/conftest.py @@ -6,7 +6,7 @@ from django.db import transaction from elasticsearch6_dsl.connections import connections from website import settings as osf_settings -from elasticsearch_metrics.tests._test_util import RealElasticTestCase +from elasticsearch_metrics.tests.util import djelme_test_backends from faker import Factory import pytest import responses @@ -146,19 +146,9 @@ def _es_metrics_marker(request): yield return - connections.create_connection( - alias='osfmetrics_es6', - hosts=osf_settings.ELASTIC6_URI, - ) - - class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True): - ... - es6_test_case = _Es6TestCase() - es6_test_case.setUp() - try: + with djelme_test_backends(): yield - finally: - es6_test_case.tearDown() + @pytest.fixture def mock_share_responses(): @@ -356,6 +346,6 @@ def mock_gravy_valet_get_verified_links(): yield mock_get_verified_links -@pytest.fixture(autouse=True) -def load_notification_types(db, *args, **kwargs): - populate_notification_types(*args, **kwargs) +# @pytest.fixture(autouse=True) +# def load_notification_types(db, *args, **kwargs): +# populate_notification_types(*args, **kwargs) diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py index 6cef14f5cf9..6056e6d92f3 100644 --- a/osf/metrics/__init__.py +++ b/osf/metrics/__init__.py @@ -17,16 +17,8 @@ StorageAddonUsage, UserSummaryReport, ) +from . import es8_metrics -from .es8_metrics import ( - Es8DownloadCountReport, - Es8UserSummaryReport, - Es8NodeSummaryReport, - Es8InstitutionSummaryReport, - Es8NewUserDomainReport, - Es8OsfstorageFileCountReport, - Es8StorageAddonUsage, -) DAILY_REPORTS = ( DownloadCountReport, @@ -37,13 +29,6 @@ PreprintSummaryReport, StorageAddonUsage, UserSummaryReport, - Es8DownloadCountReport, - Es8InstitutionSummaryReport, - Es8NewUserDomainReport, - Es8NodeSummaryReport, - Es8OsfstorageFileCountReport, - Es8StorageAddonUsage, - Es8UserSummaryReport ) @@ -53,4 +38,5 @@ 'PreprintView', 'PreprintDownload', 'RegistriesModerationMetrics', + 'es8_metrics', ) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py index 85c9141fba8..436a1c62d46 100644 --- a/osf/metrics/es8_metrics.py +++ b/osf/metrics/es8_metrics.py @@ -56,18 +56,6 @@ class PageviewInfo(esdsl.InnerDoc): for CountedAuthUsage generated by viewing a web page """ - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.page_path: str = '' - if self.page_url: - self.page_path = urlsplit(self.page_url).path.rstrip('/') - self.referer_domain: str = '' - if self.referer_url: - self.referer_domain = urlsplit(self.referer_url).netloc - self.hour_of_day: int = 0 - if self.timestamp: - self.hour_of_day = self.timestamp.hour - # fields that should be provided referer_url: str page_url: str @@ -78,10 +66,12 @@ def __init__(self, *args, **kwargs): }, )) + # fields auto-filled page_path: str referer_domain: str hour_of_day: int + ### # Event records @@ -103,6 +93,19 @@ class OsfCountedUsageRecord(djelme.CountedUsageRecord): action_labels: list[str] pageview_info: PageviewInfo + def save(self, *args, **kwargs): + # autofill pageview_info fields + if self.pageview_info: + self.pageview_info.hour_of_day = self.timestamp.hour + _url = self.pageview_info.page_url + if _url: + self.pageview_info.page_path = urlsplit(_url).path.rstrip('/') + _ref_url = self.pageview_info.referer_url + if _ref_url: + self.pageview_info.referer_domain = urlsplit(_ref_url).netloc + super().save(*args, **kwargs) + + class ActionLabel(enum.Enum): SEARCH = 'search' # counter:Search VIEW = 'view' # counter:Investigation @@ -111,7 +114,7 @@ class ActionLabel(enum.Enum): API = 'api' # counter:TDM (aka "non-web api usage") -class Es8RegistriesModerationMetrics(djelme.EventRecord): +class RegistriesModerationMetricsEs8(djelme.EventRecord): registration_id: str provider_id: str trigger: str @@ -184,19 +187,19 @@ class UsageByStorageAddon(esdsl.InnerDoc): # Cyclic reports -class Es8StorageAddonUsage(djelme.CyclicRecord): +class StorageAddonUsageEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY usage_by_addon: list[UsageByStorageAddon] -class Es8DownloadCountReport(djelme.CyclicRecord): +class DownloadCountReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY daily_file_downloads: int -class Es8InstitutionSummaryReport(djelme.CyclicRecord): +class InstitutionSummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) @@ -209,15 +212,15 @@ class Es8InstitutionSummaryReport(djelme.CyclicRecord): registered_projects: RegistrationRunningTotals -class Es8NewUserDomainReport(djelme.CyclicRecord): +class NewUserDomainReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) domain_name: str - domain_name: int + new_user_count: int -class Es8NodeSummaryReport(djelme.CyclicRecord): +class NodeSummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY nodes: NodeRunningTotals @@ -226,13 +229,13 @@ class Es8NodeSummaryReport(djelme.CyclicRecord): registered_projects: RegistrationRunningTotals -class Es8OsfstorageFileCountReport(djelme.CyclicRecord): +class OsfstorageFileCountReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY files: FileRunningTotals -class Es8PreprintSummaryReport(djelme.CyclicRecord): +class PreprintSummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) @@ -240,7 +243,7 @@ class Es8PreprintSummaryReport(djelme.CyclicRecord): preprint_count: int -class Es8UserSummaryReport(djelme.CyclicRecord): +class UserSummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = DAILY active: int @@ -251,7 +254,7 @@ class Es8UserSummaryReport(djelme.CyclicRecord): unconfirmed: int -class Es8SpamSummaryReport(djelme.CyclicRecord): +class SpamSummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = MONTHLY node_confirmed_spam: int @@ -267,7 +270,7 @@ class Es8SpamSummaryReport(djelme.CyclicRecord): user_marked_as_ham: int -class Es8InstitutionalUserReport(djelme.CyclicRecord): +class InstitutionalUserReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) @@ -290,7 +293,7 @@ class Es8InstitutionalUserReport(djelme.CyclicRecord): storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) -class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord): +class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) @@ -307,7 +310,7 @@ class Es8InstitutionMonthlySummaryReport(djelme.CyclicRecord): monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long()) -class Es8PublicItemUsageReport(djelme.CyclicRecord): +class PublicItemUsageReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = MONTHLY UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') @@ -332,7 +335,7 @@ class Es8PublicItemUsageReport(djelme.CyclicRecord): cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long()) -class Es8PrivateSpamMetricsReport(djelme.CyclicRecord): +class PrivateSpamMetricsReportEs8(djelme.CyclicRecord): CYCLE_TIMEDEPTH = MONTHLY node_oopspam_flagged: int diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 9d71ea7e8c2..62479e359cd 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -120,6 +120,8 @@ def save(self, *args, **kwargs): @receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): + if not issubclass(sender, metrics.Metric): + return # skip es8 record types try: _unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS except AttributeError: diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index d6b3d4c4434..68d767fca89 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -1,42 +1,52 @@ from datetime import datetime +from elasticsearch_metrics.tests.util import djelme_test_backends +import pytest + from osf.metrics.es8_metrics import ( - Es8DownloadCountReport, - Es8UserSummaryReport, + PageviewInfo, + DownloadCountReportEs8, OsfCountedUsageRecord, - PageviewInfo ) class TestEs8Metrics: - def test_import_all_reports(self): - assert True - - def test_instantiate_of_reports(self): - download_report = Es8DownloadCountReport(cycle_coverage='2026.01.01') - assert hasattr(download_report, 'daily_file_downloads') - - user_report = Es8UserSummaryReport(cycle_coverage='2026.01.01') - assert hasattr(user_report, 'active') - - def test_nested_pageview(self): - usage = OsfCountedUsageRecord( - cycle_coverage='2026.01.01', - pageview_info={ - 'page_url': 'https://example.com', - 'referer_url': 'https://google.com', - } - ) - assert usage.pageview_info is not None - - def test_pageview_info_autofill(self): - obj = PageviewInfo( - cycle_coverage='2026.01.01', - page_url='https://example.com/path/test', - referer_url='https://google.com', + """smoke tests to check that djelme records can be saved and searched""" + @pytest.fixture(autouse=True) + def _real_elastic(self): + with djelme_test_backends(): + yield + + def test_nested_pageview_autofill(self): + usage = OsfCountedUsageRecord.record( timestamp=datetime(2024, 1, 1, 15, 0), + sessionhour_id='blah', + database_iri='https://osf.example/provider', + item_iri='https://osf.example/itemm', + item_osfid='itemm', + item_public=True, + item_type='https://osf.example/Preprint', + platform_iri='https://osf.example', + user_is_authenticated=False, + pageview_info=PageviewInfo( + page_url="https://example.com/path/test", + referer_url="https://google.com", + route_name='foo.bar', + page_title='title title', + ), ) - - assert obj.page_path == '/path/test' - assert obj.referer_domain == 'google.com' - assert obj.hour_of_day == 15 + assert usage.pageview_info.page_path == "/path/test" + assert usage.pageview_info.referer_domain == "google.com" + assert usage.pageview_info.hour_of_day == 15 + + def test_save_report(self): + _saved = DownloadCountReportEs8.record( + cycle_coverage="2026.1.1", + daily_file_downloads=17, + ) + DownloadCountReportEs8.refresh_timeseries_indexes() + _response = DownloadCountReportEs8.search().execute() + (_fetched,) = _response + assert _fetched.meta.id == _saved.meta.id + assert _fetched.cycle_coverage == '2026.1.1' + assert _fetched.daily_file_downloads == 17 From 619cac7cca77df36e2d04f37dd55a060d36e4f75 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:12:45 -0400 Subject: [PATCH 23/31] fix(test): patch check_index_template --- osf_tests/metrics/test_daily_report.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py index 9301cdb114f..5228e2342c5 100644 --- a/osf_tests/metrics/test_daily_report.py +++ b/osf_tests/metrics/test_daily_report.py @@ -10,8 +10,9 @@ class TestDailyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per day From 8cec095a5b604a9f97abd4297af96774c7e585ac Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:20:45 -0400 Subject: [PATCH 24/31] uncomment autouse fixture --- conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conftest.py b/conftest.py index 6eafa1b7a55..7adf6bdeba6 100644 --- a/conftest.py +++ b/conftest.py @@ -346,6 +346,6 @@ def mock_gravy_valet_get_verified_links(): yield mock_get_verified_links -# @pytest.fixture(autouse=True) -# def load_notification_types(db, *args, **kwargs): -# populate_notification_types(*args, **kwargs) +@pytest.fixture(autouse=True) +def load_notification_types(db, *args, **kwargs): + populate_notification_types(*args, **kwargs) From c24430fff7b2fdca860be0bb216bad20108a67ab Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:25:56 -0400 Subject: [PATCH 25/31] remove unnecessary loop --- .github/workflows/test-build.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 3433e689a42..011b621cca9 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -120,15 +120,6 @@ jobs: steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - - name: Wait for Elasticsearch - run: | - echo "Waiting for ES8 health..." - sleep 5 - until curl -sf http://localhost:9202/_cluster/health?wait_for_status=yellow; do - echo "ES8 not ready yet..." - sleep 5 - done - echo "ES8 started successfully!" - name: Run tests run: poetry run python3 -m invoke test-ci-api2 --junit env: From cd3282786f5fc2c715f8bd0387903e6ba2d44d9a Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:28:40 -0400 Subject: [PATCH 26/31] plac8 flake8 --- conftest.py | 1 - osf_tests/metrics/test_es8_metrics.py | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conftest.py b/conftest.py index 7adf6bdeba6..9d8861a1e97 100644 --- a/conftest.py +++ b/conftest.py @@ -5,7 +5,6 @@ from django.db import transaction from elasticsearch6_dsl.connections import connections -from website import settings as osf_settings from elasticsearch_metrics.tests.util import djelme_test_backends from faker import Factory import pytest diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py index 68d767fca89..e93579628dc 100644 --- a/osf_tests/metrics/test_es8_metrics.py +++ b/osf_tests/metrics/test_es8_metrics.py @@ -12,6 +12,7 @@ class TestEs8Metrics: """smoke tests to check that djelme records can be saved and searched""" + @pytest.fixture(autouse=True) def _real_elastic(self): with djelme_test_backends(): @@ -29,19 +30,19 @@ def test_nested_pageview_autofill(self): platform_iri='https://osf.example', user_is_authenticated=False, pageview_info=PageviewInfo( - page_url="https://example.com/path/test", - referer_url="https://google.com", + page_url='https://example.com/path/test', + referer_url='https://google.com', route_name='foo.bar', page_title='title title', ), ) - assert usage.pageview_info.page_path == "/path/test" - assert usage.pageview_info.referer_domain == "google.com" + assert usage.pageview_info.page_path == '/path/test' + assert usage.pageview_info.referer_domain == 'google.com' assert usage.pageview_info.hour_of_day == 15 def test_save_report(self): _saved = DownloadCountReportEs8.record( - cycle_coverage="2026.1.1", + cycle_coverage='2026.1.1', daily_file_downloads=17, ) DownloadCountReportEs8.refresh_timeseries_indexes() From db938be047d4df29e2d01ee18b923f9c681eaa35 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 17:47:24 -0400 Subject: [PATCH 27/31] remove unused local env vars --- .docker-compose.env | 2 -- 1 file changed, 2 deletions(-) diff --git a/.docker-compose.env b/.docker-compose.env index 2542d16e841..80eebc8707b 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -8,9 +8,7 @@ API_DOMAIN=http://localhost:8000/ ELASTIC_URI=192.168.168.167:9200 ELASTIC6_URI=192.168.168.167:9201 ELASTIC8_URI=http://192.168.168.167:9202 -ELASTIC8_CERT_PATH=/elastic8_certs/ca/ca.crt ELASTIC8_USERNAME=elastic -ELASTIC8_SECRET=secretsecret OSF_DB_HOST=192.168.168.167 DB_HOST=192.168.168.167 REDIS_HOST=redis://192.168.168.167:6379 From 52a2bc94935057d874e9fc3cdf28b6f5d0e9e684 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 18:16:12 -0400 Subject: [PATCH 28/31] better use waffle switch ELASTICSEARCH_METRICS --- .../test_registries_moderation_metrics.py | 12 ------------ conftest.py | 18 ++++++++++++------ osf/models/registrations.py | 5 ++++- osf_tests/metrics/test_monthly_report.py | 5 +++-- 4 files changed, 19 insertions(+), 21 deletions(-) diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py index 0f3dddb79b6..f5d3a047b10 100644 --- a/api_tests/metrics/test_registries_moderation_metrics.py +++ b/api_tests/metrics/test_registries_moderation_metrics.py @@ -1,7 +1,5 @@ import pytest -from waffle.testutils import override_switch -from osf import features from osf_tests.factories import RegistrationFactory, AuthUserFactory from osf.utils.workflows import RegistrationModerationStates, RegistrationModerationTriggers from osf.metrics import RegistriesModerationMetrics @@ -17,11 +15,6 @@ class TestRegistrationModerationMetrics: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.mark.es_metrics def test_record_transitions(self, registration): with capture_notifications(): @@ -50,11 +43,6 @@ class TestRegistrationModerationMetricsView: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.fixture def user(self): user = AuthUserFactory() diff --git a/conftest.py b/conftest.py index 9d8861a1e97..0c944957661 100644 --- a/conftest.py +++ b/conftest.py @@ -10,12 +10,15 @@ import pytest import responses import xml.etree.ElementTree as ET +from waffle.testutils import override_switch from api_tests.share import _utils as shtrove_test_utils from framework.celery_tasks import app as celery_app from osf.external.spam import tasks as spam_tasks from website import settings as website_settings from osf.management.commands.populate_notification_types import populate_notification_types +from osf import features + def pytest_configure(config): if not os.getenv('GITHUB_ACTIONS') == 'true': @@ -141,12 +144,15 @@ def _es_metrics_marker(request): """ marker = request.node.get_closest_marker('es_metrics') - if not marker: - yield - return - - with djelme_test_backends(): - yield + if marker: + with ( + override_switch(features.ELASTICSEARCH_METRICS, active=True), + djelme_test_backends(), + ): + yield + else: + with override_switch(features.ELASTICSEARCH_METRICS, active=False): + yield @pytest.fixture diff --git a/osf/models/registrations.py b/osf/models/registrations.py index e1d819b43bf..e9114355649 100644 --- a/osf/models/registrations.py +++ b/osf/models/registrations.py @@ -14,9 +14,11 @@ UserObjectPermissionBase, ) from dirtyfields import DirtyFieldsMixin +import waffle from framework.auth import Auth from framework.exceptions import PermissionsError +from osf import features from osf.models import Identifier from osf.utils.fields import NonNaiveDateTimeField, LowercaseCharField from osf.utils.permissions import ADMIN, READ, WRITE @@ -782,7 +784,8 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment comment=comment ) action.save() - RegistriesModerationMetrics.record_transitions(action) + if waffle.switch_is_active(features.ELASTICSEARCH_METRICS): + RegistriesModerationMetrics.record_transitions(action) moderation_notifications = { RegistrationModerationTriggers.SUBMIT: notify.notify_submit, diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index cc8c4137cb2..9d0980cd5b8 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -11,8 +11,9 @@ class TestMonthlyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per month From 82de65b8ed8c2eb20e30fcb09eb139e40e7cbcd9 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 14 Apr 2026 18:18:46 -0400 Subject: [PATCH 29/31] mock check mock save --- api_tests/metrics/test_counted_usage.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py index 568d663be9e..e2cb7040037 100644 --- a/api_tests/metrics/test_counted_usage.py +++ b/api_tests/metrics/test_counted_usage.py @@ -38,8 +38,9 @@ def assert_saved_with(mock_save, *, expected_doc_id=None, expected_attrs): @pytest.fixture def mock_save(): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save @pytest.mark.django_db From b33280df27eee0082eebcf9f037b9eea62e0df07 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 15 Apr 2026 15:57:03 +0300 Subject: [PATCH 30/31] remove the override --- conftest.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/conftest.py b/conftest.py index 0c944957661..198316f1cc4 100644 --- a/conftest.py +++ b/conftest.py @@ -144,15 +144,15 @@ def _es_metrics_marker(request): """ marker = request.node.get_closest_marker('es_metrics') - if marker: - with ( - override_switch(features.ELASTICSEARCH_METRICS, active=True), - djelme_test_backends(), - ): - yield - else: - with override_switch(features.ELASTICSEARCH_METRICS, active=False): - yield + if not marker: + yield + return + + with ( + override_switch(features.ELASTICSEARCH_METRICS, active=True), + djelme_test_backends(), + ): + yield @pytest.fixture From 1cef7d335c8a00677f6e37ddb975bd14619e02d6 Mon Sep 17 00:00:00 2001 From: Bohdan Odintsov Date: Wed, 15 Apr 2026 16:55:34 +0300 Subject: [PATCH 31/31] fix failing test --- osf_tests/metrics/test_monthly_report.py | 1 + 1 file changed, 1 insertion(+) diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index 9d0980cd5b8..ba981e997d6 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -80,6 +80,7 @@ class Meta: @pytest.mark.es_metrics +@pytest.mark.django_db class TestLastMonthReport: @pytest.fixture def osfid(self):