diff --git a/.docker-compose.env b/.docker-compose.env index 449c9747adf..80eebc8707b 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -8,6 +8,7 @@ API_DOMAIN=http://localhost:8000/ ELASTIC_URI=192.168.168.167:9200 ELASTIC6_URI=192.168.168.167:9201 ELASTIC8_URI=http://192.168.168.167:9202 +ELASTIC8_USERNAME=elastic OSF_DB_HOST=192.168.168.167 DB_HOST=192.168.168.167 REDIS_HOST=redis://192.168.168.167:6379 diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 33942968529..011b621cca9 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -37,7 +37,19 @@ jobs: permissions: checks: write services: - postgres: + elasticsearch8: &ES8_SERVICE + image: elasticsearch:8.19.14 + ports: + - 9202:9200 + env: + discovery.type: single-node + xpack.security.enabled: false + options: >- + --health-cmd "curl -sf http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s" + --health-interval 10s + --health-timeout 30s + --health-retries 5 + postgres: &POSTGRES_SERVICE image: postgres env: POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} @@ -54,6 +66,8 @@ jobs: - uses: ./.github/actions/start-build - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -64,18 +78,7 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -91,18 +94,8 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -110,6 +103,8 @@ jobs: run: poetry run python3 -m invoke assets --dev - name: Run test run: poetry run python3 -m invoke test-ci-api1-and-js --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -120,23 +115,15 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - name: Run tests run: poetry run python3 -m invoke test-ci-api2 --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -147,19 +134,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -175,19 +150,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE mailhog: image: mailhog/mailhog ports: @@ -208,19 +171,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py index 816586ffcfb..72e169c25a1 100644 --- a/api/base/settings/defaults.py +++ b/api/base/settings/defaults.py @@ -316,7 +316,6 @@ HASHIDS_SALT = 'pinkhimalayan' # django-elasticsearch-metrics -DJELME_AUTOSETUP = True DJELME_BACKENDS = { 'osfmetrics_es6': { 'elasticsearch_metrics.imps.elastic6': { @@ -327,6 +326,12 @@ 'osfmetrics_es8': { 'elasticsearch_metrics.imps.elastic8': { 'hosts': osf_settings.ELASTIC8_URI, + 'ca_certs': osf_settings.ELASTIC8_CERT_PATH, + 'basic_auth': ( + (osf_settings.ELASTIC8_USERNAME, osf_settings.ELASTIC8_SECRET) + if osf_settings.ELASTIC8_SECRET is not None + else None + ), }, }, } diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py index 568d663be9e..e2cb7040037 100644 --- a/api_tests/metrics/test_counted_usage.py +++ b/api_tests/metrics/test_counted_usage.py @@ -38,8 +38,9 @@ def assert_saved_with(mock_save, *, expected_doc_id=None, expected_attrs): @pytest.fixture def mock_save(): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save @pytest.mark.django_db diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py index 0f3dddb79b6..f5d3a047b10 100644 --- a/api_tests/metrics/test_registries_moderation_metrics.py +++ b/api_tests/metrics/test_registries_moderation_metrics.py @@ -1,7 +1,5 @@ import pytest -from waffle.testutils import override_switch -from osf import features from osf_tests.factories import RegistrationFactory, AuthUserFactory from osf.utils.workflows import RegistrationModerationStates, RegistrationModerationTriggers from osf.metrics import RegistriesModerationMetrics @@ -17,11 +15,6 @@ class TestRegistrationModerationMetrics: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.mark.es_metrics def test_record_transitions(self, registration): with capture_notifications(): @@ -50,11 +43,6 @@ class TestRegistrationModerationMetricsView: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.fixture def user(self): user = AuthUserFactory() diff --git a/conftest.py b/conftest.py index 232b788c0fb..198316f1cc4 100644 --- a/conftest.py +++ b/conftest.py @@ -5,18 +5,20 @@ from django.db import transaction from elasticsearch6_dsl.connections import connections -from website import settings as osf_settings -from elasticsearch_metrics.tests._test_util import RealElasticTestCase +from elasticsearch_metrics.tests.util import djelme_test_backends from faker import Factory import pytest import responses import xml.etree.ElementTree as ET +from waffle.testutils import override_switch from api_tests.share import _utils as shtrove_test_utils from framework.celery_tasks import app as celery_app from osf.external.spam import tasks as spam_tasks from website import settings as website_settings from osf.management.commands.populate_notification_types import populate_notification_types +from osf import features + def pytest_configure(config): if not os.getenv('GITHUB_ACTIONS') == 'true': @@ -146,19 +148,12 @@ def _es_metrics_marker(request): yield return - connections.create_connection( - alias='osfmetrics_es6', - hosts=osf_settings.ELASTIC6_URI, - ) - - class _Es6TestCase(RealElasticTestCase, autosetup_djelme_backends=True): - ... - es6_test_case = _Es6TestCase() - es6_test_case.setUp() - try: + with ( + override_switch(features.ELASTICSEARCH_METRICS, active=True), + djelme_test_backends(), + ): yield - finally: - es6_test_case.tearDown() + @pytest.fixture def mock_share_responses(): diff --git a/docker-compose.yml b/docker-compose.yml index f26c3617b67..83e8fd27483 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -79,15 +79,22 @@ services: stdin_open: true elasticsearch8: - image: docker.elastic.co/elasticsearch/elasticsearch:8.19.11 - platform: linux/arm64 + image: elasticsearch:8.19.14 environment: - - xpack.security.enabled=false - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms512m -Xmx512m # reduce memory usage + - xpack.ml.enabled=false ports: - 9202:9200 volumes: - elasticsearch8_data_vol:/usr/share/elasticsearch/data + healthcheck: + start_period: 15s + test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"] + interval: 10s + timeout: 30s + retries: 5 stdin_open: true postgres: diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py index 0e7b1a1cf32..6056e6d92f3 100644 --- a/osf/metrics/__init__.py +++ b/osf/metrics/__init__.py @@ -17,6 +17,8 @@ StorageAddonUsage, UserSummaryReport, ) +from . import es8_metrics + DAILY_REPORTS = ( DownloadCountReport, @@ -36,4 +38,5 @@ 'PreprintView', 'PreprintDownload', 'RegistriesModerationMetrics', + 'es8_metrics', ) diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py new file mode 100644 index 00000000000..436a1c62d46 --- /dev/null +++ b/osf/metrics/es8_metrics.py @@ -0,0 +1,348 @@ +import datetime +import enum +from urllib.parse import urlsplit + +import elasticsearch8.dsl as esdsl +from elasticsearch_metrics import DAILY, MONTHLY +import elasticsearch_metrics.imps.elastic8 as djelme + +from osf.metrics.utils import YearMonth + + +### +# custom dsl fields + +class YearmonthField(esdsl.Date): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, format='strict_year_month') + + def deserialize(self, data): + if isinstance(data, int): + # elasticsearch stores dates in milliseconds since the unix epoch + _as_datetime = datetime.datetime.fromtimestamp(data // 1000) + return YearMonth.from_date(_as_datetime) + elif data is None: + return None + try: + return YearMonth.from_any(data) + except ValueError: + raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth') + + def serialize(self, data): + if isinstance(data, str): + return data + elif isinstance(data, YearMonth): + return str(data) + elif isinstance(data, (datetime.datetime, datetime.date)): + return str(YearMonth.from_date(data)) + elif data is None: + return None + else: + raise ValueError(f'unsure how to serialize "{data}" (of type {type(data)}) as YYYY-MM') + + +### +# inner objects for events + +route_prefix_analyzer = esdsl.analyzer( + 'route_prefix_analyzer', + tokenizer=esdsl.tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.'), +) + + +class PageviewInfo(esdsl.InnerDoc): + """PageviewInfo + + for CountedAuthUsage generated by viewing a web page + """ + + # fields that should be provided + referer_url: str + page_url: str + page_title: str + route_name: str = esdsl.mapped_field(esdsl.Keyword( + fields={ + 'by_prefix': esdsl.Text(analyzer=route_prefix_analyzer), + }, + )) + + # fields auto-filled + page_path: str + referer_domain: str + hour_of_day: int + + +### +# Event records + +class OsfCountedUsageRecord(djelme.CountedUsageRecord): + ''' + + inherited fields: + platform_iri: str + database_iri: str + item_iri: str + sessionhour_id: str + within_iris: list[str] + ''' + # osf-specific fields + item_osfid: str + item_type: str + item_public: bool + user_is_authenticated: bool + action_labels: list[str] + pageview_info: PageviewInfo + + def save(self, *args, **kwargs): + # autofill pageview_info fields + if self.pageview_info: + self.pageview_info.hour_of_day = self.timestamp.hour + _url = self.pageview_info.page_url + if _url: + self.pageview_info.page_path = urlsplit(_url).path.rstrip('/') + _ref_url = self.pageview_info.referer_url + if _ref_url: + self.pageview_info.referer_domain = urlsplit(_ref_url).netloc + super().save(*args, **kwargs) + + +class ActionLabel(enum.Enum): + SEARCH = 'search' # counter:Search + VIEW = 'view' # counter:Investigation + DOWNLOAD = 'download' # counter:Request + WEB = 'web' # counter:Regular (aka "pageview") + API = 'api' # counter:TDM (aka "non-web api usage") + + +class RegistriesModerationMetricsEs8(djelme.EventRecord): + registration_id: str + provider_id: str + trigger: str + from_state: str + to_state: str + user_id: str + comment: str + + class Index: + settings = { + 'number_of_shards': 1, + 'number_of_replicas': 1, + 'refresh_interval': '1s', + } + + +### +# Reusable inner objects for reports + +class RunningTotal(esdsl.InnerDoc): + total: int + total_daily: int + + +class FileRunningTotals(esdsl.InnerDoc): + total: int + public: int + private: int + total_daily: int + public_daily: int + private_daily: int + + +class NodeRunningTotals(esdsl.InnerDoc): + total: int + total_excluding_spam: int + public: int + private: int + total_daily: int + total_daily_excluding_spam: int + public_daily: int + private_daily: int + + +class RegistrationRunningTotals(esdsl.InnerDoc): + total: int + public: int + embargoed: int + embargoed_v2: int + withdrawn: int + total_daily: int + public_daily: int + embargoed_daily: int + embargoed_v2_daily: int + withdrawn_daily: int + + +class UsageByStorageAddon(esdsl.InnerDoc): + addon_shortname: str + enabled_usersettings: RunningTotal + linked_usersettings: RunningTotal + deleted_usersettings: RunningTotal + usersetting_links: RunningTotal + connected_nodesettings: RunningTotal + disconnected_nodesettings: RunningTotal + deleted_nodesettings: RunningTotal + + +### +# Cyclic reports + + +class StorageAddonUsageEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + usage_by_addon: list[UsageByStorageAddon] + + +class DownloadCountReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + daily_file_downloads: int + + +class InstitutionSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) + + institution_id: str + institution_name: str + users: RunningTotal + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class NewUserDomainReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) + + domain_name: str + new_user_count: int + + +class NodeSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class OsfstorageFileCountReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + files: FileRunningTotals + + +class PreprintSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) + provider_key: str + preprint_count: int + + +class UserSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + active: int + deactivated: int + merged: int + new_users_daily: int + new_users_with_institution_daily: int + unconfirmed: int + + +class SpamSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + + node_confirmed_spam: int + node_confirmed_ham: int + node_flagged: int + registration_confirmed_spam: int + registration_confirmed_ham: int + registration_flagged: int + preprint_confirmed_spam: int + preprint_confirmed_ham: int + preprint_flagged: int + user_marked_as_spam: int + user_marked_as_ham: int + + +class InstitutionalUserReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) + + institution_id: str + # user info: + user_id: str + user_name: str + department_name: str + month_last_login = YearmonthField() + month_last_active = YearmonthField() + account_creation_date = YearmonthField() + orcid_id: str + # counts: + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + + +class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) + + institution_id: str + user_count: int + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_logged_in_user_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PublicItemUsageReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') + + # where noted, fields are meant to correspond to defined terms from COUNTER + # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html + # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html + item_osfid: str + item_type: list[str] # counter:Data-Type + provider_id: list[str] # counter:Database(?) + platform_iri: list[str] # counter:Platform + + # view counts include views on components or files contained by this item + view_count: int = esdsl.mapped_field(esdsl.Long()) + view_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_session_count: int = esdsl.mapped_field(esdsl.Long()) + + # download counts of this item only (not including contained components or files) + download_count: int = esdsl.mapped_field(esdsl.Long()) + download_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PrivateSpamMetricsReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + + node_oopspam_flagged: int + node_oopspam_hammed: int + node_akismet_flagged: int + node_akismet_hammed: int + preprint_oopspam_flagged: int + preprint_oopspam_hammed: int + preprint_akismet_flagged: int + preprint_akismet_hammed: int diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index 9d71ea7e8c2..62479e359cd 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -120,6 +120,8 @@ def save(self, *args, **kwargs): @receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): + if not issubclass(sender, metrics.Metric): + return # skip es8 record types try: _unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS except AttributeError: diff --git a/osf/models/registrations.py b/osf/models/registrations.py index e1d819b43bf..e9114355649 100644 --- a/osf/models/registrations.py +++ b/osf/models/registrations.py @@ -14,9 +14,11 @@ UserObjectPermissionBase, ) from dirtyfields import DirtyFieldsMixin +import waffle from framework.auth import Auth from framework.exceptions import PermissionsError +from osf import features from osf.models import Identifier from osf.utils.fields import NonNaiveDateTimeField, LowercaseCharField from osf.utils.permissions import ADMIN, READ, WRITE @@ -782,7 +784,8 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment comment=comment ) action.save() - RegistriesModerationMetrics.record_transitions(action) + if waffle.switch_is_active(features.ELASTICSEARCH_METRICS): + RegistriesModerationMetrics.record_transitions(action) moderation_notifications = { RegistrationModerationTriggers.SUBMIT: notify.notify_submit, diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py index 9301cdb114f..5228e2342c5 100644 --- a/osf_tests/metrics/test_daily_report.py +++ b/osf_tests/metrics/test_daily_report.py @@ -10,8 +10,9 @@ class TestDailyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per day diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py new file mode 100644 index 00000000000..e93579628dc --- /dev/null +++ b/osf_tests/metrics/test_es8_metrics.py @@ -0,0 +1,53 @@ +from datetime import datetime + +from elasticsearch_metrics.tests.util import djelme_test_backends +import pytest + +from osf.metrics.es8_metrics import ( + PageviewInfo, + DownloadCountReportEs8, + OsfCountedUsageRecord, +) + + +class TestEs8Metrics: + """smoke tests to check that djelme records can be saved and searched""" + + @pytest.fixture(autouse=True) + def _real_elastic(self): + with djelme_test_backends(): + yield + + def test_nested_pageview_autofill(self): + usage = OsfCountedUsageRecord.record( + timestamp=datetime(2024, 1, 1, 15, 0), + sessionhour_id='blah', + database_iri='https://osf.example/provider', + item_iri='https://osf.example/itemm', + item_osfid='itemm', + item_public=True, + item_type='https://osf.example/Preprint', + platform_iri='https://osf.example', + user_is_authenticated=False, + pageview_info=PageviewInfo( + page_url='https://example.com/path/test', + referer_url='https://google.com', + route_name='foo.bar', + page_title='title title', + ), + ) + assert usage.pageview_info.page_path == '/path/test' + assert usage.pageview_info.referer_domain == 'google.com' + assert usage.pageview_info.hour_of_day == 15 + + def test_save_report(self): + _saved = DownloadCountReportEs8.record( + cycle_coverage='2026.1.1', + daily_file_downloads=17, + ) + DownloadCountReportEs8.refresh_timeseries_indexes() + _response = DownloadCountReportEs8.search().execute() + (_fetched,) = _response + assert _fetched.meta.id == _saved.meta.id + assert _fetched.cycle_coverage == '2026.1.1' + assert _fetched.daily_file_downloads == 17 diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index cc8c4137cb2..ba981e997d6 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -11,8 +11,9 @@ class TestMonthlyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per month @@ -79,6 +80,7 @@ class Meta: @pytest.mark.es_metrics +@pytest.mark.django_db class TestLastMonthReport: @pytest.fixture def osfid(self): diff --git a/poetry.lock b/poetry.lock index f0dca07d95c..90665bce81f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1101,8 +1101,8 @@ elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6" -resolved_reference = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6" +reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" +resolved_reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" [[package]] name = "django-extensions" @@ -4711,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "c3108a036ae092e35f7696ffe230e271b774bb12e546db77bb8b12b5fb6eca7d" +content-hash = "ef1d6d327f5557e43482793b276ccb6c5fd07989f27367af3a3736a8547b4d1a" diff --git a/pyproject.toml b/pyproject.toml index b40cdc704ab..013df3f448d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "bb1c84c148ac1d2b1079b2b113e52a01a861c8a6"} +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" diff --git a/website/settings/defaults.py b/website/settings/defaults.py index d0ae58dc863..1e8032cc95c 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -113,7 +113,10 @@ def parent_dir(path): SEARCH_ENGINE = 'elastic' # Can be 'elastic', or None ELASTIC_URI = '127.0.0.1:9200' ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201') -ELASTIC8_URI = os.environ.get('ELASTIC8_URI', '127.0.0.1:9202') +ELASTIC8_URI = os.environ.get('ELASTIC8_URI') +ELASTIC8_CERT_PATH = os.environ.get('ELASTIC8_CERT_PATH') +ELASTIC8_USERNAME = os.environ.get('ELASTIC8_USERNAME', 'elastic') +ELASTIC8_SECRET = os.environ.get('ELASTIC8_SECRET') ELASTIC_TIMEOUT = 10 ELASTIC_INDEX = 'website' ELASTIC_KWARGS = {