diff --git a/.docker-compose.env b/.docker-compose.env index 9cb7a59e274..80eebc8707b 100644 --- a/.docker-compose.env +++ b/.docker-compose.env @@ -7,6 +7,8 @@ INTERNAL_DOMAIN=http://192.168.168.167:5000/ API_DOMAIN=http://localhost:8000/ ELASTIC_URI=192.168.168.167:9200 ELASTIC6_URI=192.168.168.167:9201 +ELASTIC8_URI=http://192.168.168.167:9202 +ELASTIC8_USERNAME=elastic OSF_DB_HOST=192.168.168.167 DB_HOST=192.168.168.167 REDIS_HOST=redis://192.168.168.167:6379 diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml index 33942968529..011b621cca9 100644 --- a/.github/workflows/test-build.yml +++ b/.github/workflows/test-build.yml @@ -37,7 +37,19 @@ jobs: permissions: checks: write services: - postgres: + elasticsearch8: &ES8_SERVICE + image: elasticsearch:8.19.14 + ports: + - 9202:9200 + env: + discovery.type: single-node + xpack.security.enabled: false + options: >- + --health-cmd "curl -sf http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s" + --health-interval 10s + --health-timeout 30s + --health-retries 5 + postgres: &POSTGRES_SERVICE image: postgres env: POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} @@ -54,6 +66,8 @@ jobs: - uses: ./.github/actions/start-build - name: Run tests run: poetry run python3 -m invoke test-ci-addons --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -64,18 +78,7 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -91,18 +94,8 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -110,6 +103,8 @@ jobs: run: poetry run python3 -m invoke assets --dev - name: Run test run: poetry run python3 -m invoke test-ci-api1-and-js --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -120,23 +115,15 @@ jobs: permissions: checks: write services: - postgres: - image: postgres - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + elasticsearch8: *ES8_SERVICE + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build - name: Run tests run: poetry run python3 -m invoke test-ci-api2 --junit + env: + ELASTIC8_URI: http://localhost:9202 - name: Upload report if: (success() || failure()) # run this step even if previous step failed uses: ./.github/actions/gen-report @@ -147,19 +134,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build @@ -175,19 +150,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE mailhog: image: mailhog/mailhog ports: @@ -208,19 +171,7 @@ jobs: checks: write needs: build-cache services: - postgres: - image: postgres - - env: - POSTGRES_PASSWORD: ${{ env.OSF_DB_PASSWORD }} - options: >- - --health-cmd pg_isready - --health-interval 10s - --health-timeout 5s - --health-retries 5 - ports: - # Maps tcp port 5432 on service container to the host - - 5432:5432 + postgres: *POSTGRES_SERVICE steps: - uses: actions/checkout@v2 - uses: ./.github/actions/start-build diff --git a/addons/base/views.py b/addons/base/views.py index 5ff3d6e7093..ebcd662966b 100644 --- a/addons/base/views.py +++ b/addons/base/views.py @@ -14,7 +14,7 @@ import waffle from django.db import transaction from django.contrib.contenttypes.models import ContentType -from elasticsearch import exceptions as es_exceptions +from elasticsearch6 import exceptions as es_exceptions from rest_framework import status as http_status from api.caching.tasks import update_storage_usage_with_size diff --git a/api/base/elasticsearch_dsl_views.py b/api/base/elasticsearch_dsl_views.py index 6199fd82d0e..ecf2825d4e8 100644 --- a/api/base/elasticsearch_dsl_views.py +++ b/api/base/elasticsearch_dsl_views.py @@ -3,7 +3,7 @@ import datetime import typing -import elasticsearch_dsl as edsl +import elasticsearch6_dsl as edsl from rest_framework import generics, exceptions as drf_exceptions from rest_framework.settings import api_settings as drf_settings from api.base.settings.defaults import REPORT_FILENAME_FORMAT @@ -23,7 +23,7 @@ class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, abc.ABC): - '''abstract view class using `elasticsearch_dsl.Search` as a queryset-analogue + '''abstract view class using `elasticsearch6_dsl.Search` as a queryset-analogue builds a `Search` based on `self.get_default_search()` and the request's query parameters for filtering, sorting, and pagination -- fetches only @@ -36,7 +36,7 @@ class ElasticsearchListView(FilterMixin, JSONAPIBaseView, generics.ListAPIView, @abc.abstractmethod def get_default_search(self) -> edsl.Search | None: - '''the base `elasticsearch_dsl.Search` for this list, based on url path + '''the base `elasticsearch6_dsl.Search` for this list, based on url path (common jsonapi query parameters will be considered automatically) ''' @@ -95,7 +95,7 @@ def finalize_response(self, request, response, *args, **kwargs): # (filtering handled in-view to reuse logic from FilterMixin) filter_backends = () - # note: because elasticsearch_dsl.Search supports slicing and gives results when iterated on, + # note: because elasticsearch6_dsl.Search supports slicing and gives results when iterated on, # it works fine with default pagination # override rest_framework.generics.GenericAPIView diff --git a/api/base/settings/defaults.py b/api/base/settings/defaults.py index efddf2484b8..72e169c25a1 100644 --- a/api/base/settings/defaults.py +++ b/api/base/settings/defaults.py @@ -316,10 +316,23 @@ HASHIDS_SALT = 'pinkhimalayan' # django-elasticsearch-metrics -ELASTICSEARCH_DSL = { - 'default': { - 'hosts': osf_settings.ELASTIC6_URI, - 'retry_on_timeout': True, +DJELME_BACKENDS = { + 'osfmetrics_es6': { + 'elasticsearch_metrics.imps.elastic6': { + 'hosts': osf_settings.ELASTIC6_URI, + 'retry_on_timeout': True, + }, + }, + 'osfmetrics_es8': { + 'elasticsearch_metrics.imps.elastic8': { + 'hosts': osf_settings.ELASTIC8_URI, + 'ca_certs': osf_settings.ELASTIC8_CERT_PATH, + 'basic_auth': ( + (osf_settings.ELASTIC8_USERNAME, osf_settings.ELASTIC8_SECRET) + if osf_settings.ELASTIC8_SECRET is not None + else None + ), + }, }, } # Store yearly indices for time-series metrics diff --git a/api/metrics/views.py b/api/metrics/views.py index daaa684d13a..c6e4d56c9b9 100644 --- a/api/metrics/views.py +++ b/api/metrics/views.py @@ -6,8 +6,8 @@ from django.http import JsonResponse, HttpResponse, Http404 from django.utils import timezone -from elasticsearch.exceptions import NotFoundError, RequestError -from elasticsearch_dsl.connections import get_connection +from elasticsearch6.exceptions import NotFoundError, RequestError +from elasticsearch6_dsl.connections import get_connection from framework.auth.oauth_scopes import CoreScopes diff --git a/api_tests/institutions/views/test_institution_department_list.py b/api_tests/institutions/views/test_institution_department_list.py index c2a5c0fcf99..8b785504756 100644 --- a/api_tests/institutions/views/test_institution_department_list.py +++ b/api_tests/institutions/views/test_institution_department_list.py @@ -44,7 +44,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution): department_name='Old Department', public_project_count=1, private_project_count=1, - ).save(refresh=True) + ).save() _this_month = YearMonth.from_date(datetime.date.today()) @@ -56,7 +56,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution): department_name='New Department', public_project_count=1, private_project_count=1, - ).save(refresh=True) + ).save() # A second user entered the department InstitutionalUserReport( @@ -66,7 +66,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution): department_name='New Department', public_project_count=1, private_project_count=1, - ).save(refresh=True) + ).save() # A new department with a single user to test sorting InstitutionalUserReport( @@ -76,7 +76,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution): department_name='Smaller Department', public_project_count=1, private_project_count=1, - ).save(refresh=True) + ).save() # A user with no department InstitutionalUserReport( @@ -85,7 +85,7 @@ def populate_counts(self, user, user2, user3, user4, admin, institution): institution_id=institution._id, public_project_count=1, private_project_count=1, - ).save(refresh=True) + ).save() @pytest.fixture() def admin(self, institution): @@ -113,6 +113,7 @@ def test_auth(self, app, url, user, admin): assert resp.json['data'] == [] def test_get(self, app, url, admin, institution, populate_counts): + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) resp = app.get(url, auth=admin.auth) assert resp.json['data'] == [{ diff --git a/api_tests/institutions/views/test_institution_summary_metrics.py b/api_tests/institutions/views/test_institution_summary_metrics.py index 41983458d2e..6dd6c5bbda3 100644 --- a/api_tests/institutions/views/test_institution_summary_metrics.py +++ b/api_tests/institutions/views/test_institution_summary_metrics.py @@ -84,6 +84,7 @@ def test_get_empty(self, app, url, institutional_admin): assert resp.json['meta'] == {'version': '2.0'} def test_get_report(self, app, url, institutional_admin, institution, reports, unshown_reports): + InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern) resp = app.get(url, auth=institutional_admin.auth) assert resp.status_code == 200 @@ -149,6 +150,7 @@ def test_get_report_with_multiple_months_and_institutions( monthly_logged_in_user_count=270, monthly_active_user_count=260, ) + InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern) resp = app.get(url, auth=institutional_admin.auth) assert resp.status_code == 200 @@ -189,6 +191,7 @@ def test_get_with_valid_report_dates(self, app, url, institution, institutional_ institution, user_count=4133, ) + InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern) resp = app.get(f'{url}?report_yearmonth=2024-08', auth=institutional_admin.auth) assert resp.status_code == 200 @@ -213,6 +216,7 @@ def test_get_with_invalid_report_date(self, app, url, institution, institutional institution, user_count=999, ) + InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern) # Request with an invalid report_date format resp = app.get(f'{url}?report_yearmonth=invalid-date', auth=institutional_admin.auth) @@ -233,6 +237,7 @@ def test_get_without_report_date_uses_most_recent(self, app, url, institution, i institution, user_count=999, ) + InstitutionMonthlySummaryReport._get_connection().indices.refresh(InstitutionMonthlySummaryReport._template_pattern) resp = app.get(url, auth=institutional_admin.auth) assert resp.status_code == 200 @@ -247,5 +252,5 @@ def _summary_report_factory(yearmonth, institution, **kwargs): institution_id=institution._id, **kwargs, ) - report.save(refresh=True) + report.save() return report diff --git a/api_tests/institutions/views/test_institution_user_metric_list.py b/api_tests/institutions/views/test_institution_user_metric_list.py index 0826dcd0161..d2b99da435f 100644 --- a/api_tests/institutions/views/test_institution_user_metric_list.py +++ b/api_tests/institutions/views/test_institution_user_metric_list.py @@ -89,6 +89,7 @@ def test_get_empty(self, app, url, institutional_admin): assert _resp.json['data'] == [] def test_get_reports(self, app, url, institutional_admin, institution, reports, unshown_reports): + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) _resp = app.get(url, auth=institutional_admin.auth) assert _resp.status_code == 200 assert len(_resp.json['data']) == len(reports) @@ -100,6 +101,7 @@ def test_get_reports(self, app, url, institutional_admin, institution, reports, assert len(response_object['attributes']['contacts']) == 0 def test_filter_reports(self, app, url, institutional_admin, institution, reports, unshown_reports): + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) for _query, _expected_user_ids in ( ({'filter[department]': 'nunavum'}, set()), ({'filter[department]': 'incidentally'}, set()), @@ -135,6 +137,7 @@ def test_filter_reports(self, app, url, institutional_admin, institution, report assert set(_user_ids(_resp)) == _expected_user_ids def test_sort_reports(self, app, url, institutional_admin, institution, reports, unshown_reports): + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) for _query, _expected_user_id_list in ( ({'sort': 'storage_byte_count'}, ['u_sparse', 'u_orc', 'u_blargl', 'u_orcomma']), ({'sort': '-storage_byte_count'}, ['u_orcomma', 'u_blargl', 'u_orc', 'u_sparse']), @@ -144,6 +147,7 @@ def test_sort_reports(self, app, url, institutional_admin, institution, reports, assert list(_user_ids(_resp)) == _expected_user_id_list def test_paginate_reports(self, app, url, institutional_admin, institution, reports, unshown_reports): + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) for _query, _expected_user_id_list in ( ({'sort': 'storage_byte_count', 'page[size]': 2}, ['u_sparse', 'u_orc']), ({'sort': 'storage_byte_count', 'page[size]': 2, 'page': 2}, ['u_blargl', 'u_orcomma']), @@ -178,6 +182,7 @@ def test_get_report_formats_csv_tsv(self, app, url, institutional_admin, institu month_last_active='2018-02', month_last_login='2018-02', ) + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth) assert resp.status_code == 200 @@ -281,6 +286,7 @@ def test_csv_tsv_ignores_pagination(self, app, url, institutional_admin, institu str(736662999298 + i), f'Jalen Hurts #{i}', ]) + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) # Make request for CSV format with page[size]=10 resp = app.get(f'{url}?format={format_type}', auth=institutional_admin.auth) @@ -346,6 +352,7 @@ def test_get_report_format_table_json(self, app, url, institutional_admin, insti month_last_active='2018-02', month_last_login='2018-02', ) + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) resp = app.get(f'{url}?format=json_report', auth=institutional_admin.auth) assert resp.status_code == 200 @@ -411,6 +418,7 @@ def test_correct_number_of_contact_messages(self, app, url, institutional_admin, department_name='a department, or so, that happens, incidentally, to have commas', storage_byte_count=736662999298, ) + InstitutionalUserReport._get_connection().indices.refresh(InstitutionalUserReport._template_pattern) receiver = user1 with capture_notifications(): @@ -477,5 +485,5 @@ def _report_factory(yearmonth, institution, **kwargs): institution_id=institution._id, **kwargs, ) - _report.save(refresh=True) + _report.save() return _report diff --git a/api_tests/metrics/test_composite_query.py b/api_tests/metrics/test_composite_query.py index 0cd0b3bb180..016677c3a11 100644 --- a/api_tests/metrics/test_composite_query.py +++ b/api_tests/metrics/test_composite_query.py @@ -1,4 +1,3 @@ -import time import pytest from datetime import datetime from osf_tests.factories import ( @@ -75,7 +74,7 @@ def test_elasticsearch_agg_query(self, app, user, base_url, preprint): path=preprint.primary_file.path, timestamp=datetime(year=2020, month=2, day=1) ) - time.sleep(1) # gives ES some time to update + PreprintDownload._get_connection().indices.refresh(PreprintDownload._template_pattern) resp = app.post_json_api(post_url, payload, auth=user.auth) assert resp.status_code == 200 diff --git a/api_tests/metrics/test_counted_usage.py b/api_tests/metrics/test_counted_usage.py index 568d663be9e..e2cb7040037 100644 --- a/api_tests/metrics/test_counted_usage.py +++ b/api_tests/metrics/test_counted_usage.py @@ -38,8 +38,9 @@ def assert_saved_with(mock_save, *, expected_doc_id=None, expected_attrs): @pytest.fixture def mock_save(): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save @pytest.mark.django_db diff --git a/api_tests/metrics/test_preprint_metrics.py b/api_tests/metrics/test_preprint_metrics.py index 1bde8719b75..cd9b8041c2d 100644 --- a/api_tests/metrics/test_preprint_metrics.py +++ b/api_tests/metrics/test_preprint_metrics.py @@ -8,7 +8,7 @@ from django.utils import timezone from waffle.testutils import override_switch -from elasticsearch.exceptions import RequestError +from elasticsearch6.exceptions import RequestError from osf import features from api.base.settings import API_PRIVATE_BASE as API_BASE diff --git a/api_tests/metrics/test_registries_moderation_metrics.py b/api_tests/metrics/test_registries_moderation_metrics.py index 93469b1b3b5..f5d3a047b10 100644 --- a/api_tests/metrics/test_registries_moderation_metrics.py +++ b/api_tests/metrics/test_registries_moderation_metrics.py @@ -1,8 +1,5 @@ import pytest -from waffle.testutils import override_switch -import time -from osf import features from osf_tests.factories import RegistrationFactory, AuthUserFactory from osf.utils.workflows import RegistrationModerationStates, RegistrationModerationTriggers from osf.metrics import RegistriesModerationMetrics @@ -18,11 +15,6 @@ class TestRegistrationModerationMetrics: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.mark.es_metrics def test_record_transitions(self, registration): with capture_notifications(): @@ -32,7 +24,7 @@ def test_record_transitions(self, registration): registration.creator, 'Metrics is easy' ) - time.sleep(1) + RegistriesModerationMetrics._get_connection().indices.refresh(RegistriesModerationMetrics._template_pattern) assert RegistriesModerationMetrics.search().count() == 1 data = RegistriesModerationMetrics.search().execute()['hits']['hits'][0]['_source'] @@ -51,11 +43,6 @@ class TestRegistrationModerationMetricsView: def registration(self): return RegistrationFactory() - @pytest.fixture(autouse=True) - def enable_elasticsearch_metrics(self): - with override_switch(features.ELASTICSEARCH_METRICS, active=True): - yield - @pytest.fixture def user(self): user = AuthUserFactory() @@ -81,7 +68,7 @@ def test_registries_moderation_view(self, app, user, base_url, registration): registration.creator, 'Metrics is easy' ) - time.sleep(1) + RegistriesModerationMetrics._get_connection().indices.refresh(RegistriesModerationMetrics._template_pattern) res = app.get(base_url, auth=user.auth, expect_errors=True) data = res.json diff --git a/conftest.py b/conftest.py index 9494e3d296e..198316f1cc4 100644 --- a/conftest.py +++ b/conftest.py @@ -1,24 +1,24 @@ -import contextlib from unittest import mock import logging import os import re -from django.core.management import call_command from django.db import transaction -from elasticsearch import exceptions as es_exceptions -from elasticsearch_dsl.connections import connections -from elasticsearch_metrics.registry import registry as es_metrics_registry +from elasticsearch6_dsl.connections import connections +from elasticsearch_metrics.tests.util import djelme_test_backends from faker import Factory import pytest import responses import xml.etree.ElementTree as ET +from waffle.testutils import override_switch from api_tests.share import _utils as shtrove_test_utils from framework.celery_tasks import app as celery_app from osf.external.spam import tasks as spam_tasks from website import settings as website_settings from osf.management.commands.populate_notification_types import populate_notification_types +from osf import features + def pytest_configure(config): if not os.getenv('GITHUB_ACTIONS') == 'true': @@ -138,45 +138,20 @@ def es6_client(setup_connections): @pytest.fixture(scope='function', autouse=True) -def _es_metrics_marker(request, worker_id): +def _es_metrics_marker(request): """Clear out all indices and index templates before and after tests marked with `es_metrics`. """ marker = request.node.get_closest_marker('es_metrics') - if marker: - es6_client = request.getfixturevalue('es6_client') - _temp_prefix = 'temp_metrics_' - _temp_wildcard = f'{_temp_prefix}-{worker_id}*' - - def _teardown_es_temps(): - es6_client.indices.delete(index=_temp_wildcard) - try: - es6_client.indices.delete_template(_temp_wildcard) - except es_exceptions.NotFoundError: - pass - - @contextlib.contextmanager - def _mock_metric_names(): - with contextlib.ExitStack() as _exit: - for _metric_class in es_metrics_registry.get_metrics(): - _exit.enter_context(mock.patch.object( - _metric_class, - '_template_name', # also used to construct index names - f'{_temp_prefix}-{worker_id}{_metric_class._template_name}', - )) - _exit.enter_context(mock.patch.object( - _metric_class, - '_template', # a wildcard string for indexes and templates - f'{_temp_prefix}-{worker_id}{_metric_class._template}', - )) - yield - - _teardown_es_temps() - with _mock_metric_names(): - call_command('sync_metrics') - yield - _teardown_es_temps() - else: + + if not marker: + yield + return + + with ( + override_switch(features.ELASTICSEARCH_METRICS, active=True), + djelme_test_backends(), + ): yield diff --git a/docker-compose.yml b/docker-compose.yml index f00b589f7e0..83e8fd27483 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -13,6 +13,8 @@ volumes: external: false elasticsearch6_data_vol: external: false + elasticsearch8_data_vol: + external: false rabbitmq_vol: external: false ember_osf_web_dist_vol: @@ -76,6 +78,25 @@ services: - elasticsearch6_data_vol:/usr/share/elasticsearch/data stdin_open: true + elasticsearch8: + image: elasticsearch:8.19.14 + environment: + - discovery.type=single-node + - xpack.security.enabled=false + - ES_JAVA_OPTS=-Xms512m -Xmx512m # reduce memory usage + - xpack.ml.enabled=false + ports: + - 9202:9200 + volumes: + - elasticsearch8_data_vol:/usr/share/elasticsearch/data + healthcheck: + start_period: 15s + test: ["CMD", "curl", "-sf", "http://localhost:9200/_cluster/health?wait_for_status=yellow&timeout=30s"] + interval: 10s + timeout: 30s + retries: 5 + stdin_open: true + postgres: image: postgres:15.4 command: diff --git a/osf/management/commands/monthly_reporters_go.py b/osf/management/commands/monthly_reporters_go.py index 83ed5f6d985..218b45da1df 100644 --- a/osf/management/commands/monthly_reporters_go.py +++ b/osf/management/commands/monthly_reporters_go.py @@ -3,7 +3,7 @@ from django.core.management.base import BaseCommand from django.db import OperationalError as DjangoOperationalError -from elasticsearch.exceptions import ConnectionError as ElasticConnectionError +from elasticsearch6.exceptions import ConnectionError as ElasticConnectionError from psycopg2 import OperationalError as PostgresOperationalError from framework.celery_tasks import app as celery_app diff --git a/osf/management/commands/reindex_es6.py b/osf/management/commands/reindex_es6.py index c37d0e34f2c..8961ea6fff1 100644 --- a/osf/management/commands/reindex_es6.py +++ b/osf/management/commands/reindex_es6.py @@ -4,7 +4,7 @@ import logging from django.core.management.base import BaseCommand -from elasticsearch_dsl import connections +from elasticsearch6_dsl import connections from elasticsearch_metrics.registry import registry logger = logging.getLogger(__name__) diff --git a/osf/metrics/__init__.py b/osf/metrics/__init__.py index 0e7b1a1cf32..6056e6d92f3 100644 --- a/osf/metrics/__init__.py +++ b/osf/metrics/__init__.py @@ -17,6 +17,8 @@ StorageAddonUsage, UserSummaryReport, ) +from . import es8_metrics + DAILY_REPORTS = ( DownloadCountReport, @@ -36,4 +38,5 @@ 'PreprintView', 'PreprintDownload', 'RegistriesModerationMetrics', + 'es8_metrics', ) diff --git a/osf/metrics/counted_usage.py b/osf/metrics/counted_usage.py index 39b3b74129b..41ea012fda5 100644 --- a/osf/metrics/counted_usage.py +++ b/osf/metrics/counted_usage.py @@ -4,7 +4,7 @@ from urllib.parse import urlsplit from elasticsearch6_dsl import InnerDoc, analyzer, tokenizer -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from elasticsearch_metrics.signals import pre_save from django.dispatch import receiver import pytz diff --git a/osf/metrics/es8_metrics.py b/osf/metrics/es8_metrics.py new file mode 100644 index 00000000000..436a1c62d46 --- /dev/null +++ b/osf/metrics/es8_metrics.py @@ -0,0 +1,348 @@ +import datetime +import enum +from urllib.parse import urlsplit + +import elasticsearch8.dsl as esdsl +from elasticsearch_metrics import DAILY, MONTHLY +import elasticsearch_metrics.imps.elastic8 as djelme + +from osf.metrics.utils import YearMonth + + +### +# custom dsl fields + +class YearmonthField(esdsl.Date): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs, format='strict_year_month') + + def deserialize(self, data): + if isinstance(data, int): + # elasticsearch stores dates in milliseconds since the unix epoch + _as_datetime = datetime.datetime.fromtimestamp(data // 1000) + return YearMonth.from_date(_as_datetime) + elif data is None: + return None + try: + return YearMonth.from_any(data) + except ValueError: + raise ValueError(f'unsure how to deserialize "{data}" (of type {type(data)}) to YearMonth') + + def serialize(self, data): + if isinstance(data, str): + return data + elif isinstance(data, YearMonth): + return str(data) + elif isinstance(data, (datetime.datetime, datetime.date)): + return str(YearMonth.from_date(data)) + elif data is None: + return None + else: + raise ValueError(f'unsure how to serialize "{data}" (of type {type(data)}) as YYYY-MM') + + +### +# inner objects for events + +route_prefix_analyzer = esdsl.analyzer( + 'route_prefix_analyzer', + tokenizer=esdsl.tokenizer('route_prefix_tokenizer', 'path_hierarchy', delimiter='.'), +) + + +class PageviewInfo(esdsl.InnerDoc): + """PageviewInfo + + for CountedAuthUsage generated by viewing a web page + """ + + # fields that should be provided + referer_url: str + page_url: str + page_title: str + route_name: str = esdsl.mapped_field(esdsl.Keyword( + fields={ + 'by_prefix': esdsl.Text(analyzer=route_prefix_analyzer), + }, + )) + + # fields auto-filled + page_path: str + referer_domain: str + hour_of_day: int + + +### +# Event records + +class OsfCountedUsageRecord(djelme.CountedUsageRecord): + ''' + + inherited fields: + platform_iri: str + database_iri: str + item_iri: str + sessionhour_id: str + within_iris: list[str] + ''' + # osf-specific fields + item_osfid: str + item_type: str + item_public: bool + user_is_authenticated: bool + action_labels: list[str] + pageview_info: PageviewInfo + + def save(self, *args, **kwargs): + # autofill pageview_info fields + if self.pageview_info: + self.pageview_info.hour_of_day = self.timestamp.hour + _url = self.pageview_info.page_url + if _url: + self.pageview_info.page_path = urlsplit(_url).path.rstrip('/') + _ref_url = self.pageview_info.referer_url + if _ref_url: + self.pageview_info.referer_domain = urlsplit(_ref_url).netloc + super().save(*args, **kwargs) + + +class ActionLabel(enum.Enum): + SEARCH = 'search' # counter:Search + VIEW = 'view' # counter:Investigation + DOWNLOAD = 'download' # counter:Request + WEB = 'web' # counter:Regular (aka "pageview") + API = 'api' # counter:TDM (aka "non-web api usage") + + +class RegistriesModerationMetricsEs8(djelme.EventRecord): + registration_id: str + provider_id: str + trigger: str + from_state: str + to_state: str + user_id: str + comment: str + + class Index: + settings = { + 'number_of_shards': 1, + 'number_of_replicas': 1, + 'refresh_interval': '1s', + } + + +### +# Reusable inner objects for reports + +class RunningTotal(esdsl.InnerDoc): + total: int + total_daily: int + + +class FileRunningTotals(esdsl.InnerDoc): + total: int + public: int + private: int + total_daily: int + public_daily: int + private_daily: int + + +class NodeRunningTotals(esdsl.InnerDoc): + total: int + total_excluding_spam: int + public: int + private: int + total_daily: int + total_daily_excluding_spam: int + public_daily: int + private_daily: int + + +class RegistrationRunningTotals(esdsl.InnerDoc): + total: int + public: int + embargoed: int + embargoed_v2: int + withdrawn: int + total_daily: int + public_daily: int + embargoed_daily: int + embargoed_v2_daily: int + withdrawn_daily: int + + +class UsageByStorageAddon(esdsl.InnerDoc): + addon_shortname: str + enabled_usersettings: RunningTotal + linked_usersettings: RunningTotal + deleted_usersettings: RunningTotal + usersetting_links: RunningTotal + connected_nodesettings: RunningTotal + disconnected_nodesettings: RunningTotal + deleted_nodesettings: RunningTotal + + +### +# Cyclic reports + + +class StorageAddonUsageEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + usage_by_addon: list[UsageByStorageAddon] + + +class DownloadCountReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + daily_file_downloads: int + + +class InstitutionSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id',) + + institution_id: str + institution_name: str + users: RunningTotal + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class NewUserDomainReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'domain_name',) + + domain_name: str + new_user_count: int + + +class NodeSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + nodes: NodeRunningTotals + projects: NodeRunningTotals + registered_nodes: RegistrationRunningTotals + registered_projects: RegistrationRunningTotals + + +class OsfstorageFileCountReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + files: FileRunningTotals + + +class PreprintSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'provider_key',) + provider_key: str + preprint_count: int + + +class UserSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = DAILY + + active: int + deactivated: int + merged: int + new_users_daily: int + new_users_with_institution_daily: int + unconfirmed: int + + +class SpamSummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + + node_confirmed_spam: int + node_confirmed_ham: int + node_flagged: int + registration_confirmed_spam: int + registration_confirmed_ham: int + registration_flagged: int + preprint_confirmed_spam: int + preprint_confirmed_ham: int + preprint_flagged: int + user_marked_as_spam: int + user_marked_as_ham: int + + +class InstitutionalUserReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', 'user_id',) + + institution_id: str + # user info: + user_id: str + user_name: str + department_name: str + month_last_login = YearmonthField() + month_last_active = YearmonthField() + account_creation_date = YearmonthField() + orcid_id: str + # counts: + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + + +class InstitutionMonthlySummaryReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'institution_id', ) + + institution_id: str + user_count: int + public_project_count: int + private_project_count: int + public_registration_count: int + embargoed_registration_count: int + published_preprint_count: int + storage_byte_count: int = esdsl.mapped_field(esdsl.Long()) + public_file_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_logged_in_user_count: int = esdsl.mapped_field(esdsl.Long()) + monthly_active_user_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PublicItemUsageReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + UNIQUE_TOGETHER_FIELDS = ('cycle_coverage', 'item_osfid') + + # where noted, fields are meant to correspond to defined terms from COUNTER + # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html + # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html + item_osfid: str + item_type: list[str] # counter:Data-Type + provider_id: list[str] # counter:Database(?) + platform_iri: list[str] # counter:Platform + + # view counts include views on components or files contained by this item + view_count: int = esdsl.mapped_field(esdsl.Long()) + view_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_view_session_count: int = esdsl.mapped_field(esdsl.Long()) + + # download counts of this item only (not including contained components or files) + download_count: int = esdsl.mapped_field(esdsl.Long()) + download_session_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_count: int = esdsl.mapped_field(esdsl.Long()) + cumulative_download_session_count: int = esdsl.mapped_field(esdsl.Long()) + + +class PrivateSpamMetricsReportEs8(djelme.CyclicRecord): + CYCLE_TIMEDEPTH = MONTHLY + + node_oopspam_flagged: int + node_oopspam_hammed: int + node_akismet_flagged: int + node_akismet_hammed: int + preprint_oopspam_flagged: int + preprint_oopspam_hammed: int + preprint_akismet_flagged: int + preprint_akismet_hammed: int diff --git a/osf/metrics/metric_mixin.py b/osf/metrics/metric_mixin.py index 724ab1958da..df87d5123b1 100644 --- a/osf/metrics/metric_mixin.py +++ b/osf/metrics/metric_mixin.py @@ -2,7 +2,7 @@ from django.db import models from django.utils import timezone -from elasticsearch.exceptions import NotFoundError +from elasticsearch6.exceptions import NotFoundError import pytz diff --git a/osf/metrics/preprint_metrics.py b/osf/metrics/preprint_metrics.py index 9d02ec191a2..d284d80827e 100644 --- a/osf/metrics/preprint_metrics.py +++ b/osf/metrics/preprint_metrics.py @@ -1,5 +1,5 @@ -from elasticsearch.exceptions import NotFoundError -from elasticsearch_metrics import metrics +from elasticsearch6.exceptions import NotFoundError +import elasticsearch_metrics.imps.elastic6 as metrics from .metric_mixin import MetricMixin diff --git a/osf/metrics/registry_metrics.py b/osf/metrics/registry_metrics.py index 475dca28673..9c779fe8c0b 100644 --- a/osf/metrics/registry_metrics.py +++ b/osf/metrics/registry_metrics.py @@ -1,4 +1,4 @@ -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from osf.utils.workflows import RegistrationModerationTriggers, RegistrationModerationStates from .metric_mixin import MetricMixin diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py index cc401d50bd7..7df405d385f 100644 --- a/osf/metrics/reporters/public_item_usage.py +++ b/osf/metrics/reporters/public_item_usage.py @@ -4,7 +4,7 @@ import waffle if typing.TYPE_CHECKING: - import elasticsearch_dsl as edsl + import elasticsearch6_dsl as edsl import osf.features from osf.metadata.osf_gathering import OsfmapPartition diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index ffbcfb4c9b8..62479e359cd 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -4,7 +4,7 @@ from django.dispatch import receiver from elasticsearch6_dsl import InnerDoc -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from elasticsearch_metrics.signals import pre_save as metrics_pre_save from osf.metrics.utils import stable_key, YearMonth @@ -120,6 +120,8 @@ def save(self, *args, **kwargs): @receiver(metrics_pre_save) def set_report_id(sender, instance, **kwargs): + if not issubclass(sender, metrics.Metric): + return # skip es8 record types try: _unique_together_fields = instance.UNIQUE_TOGETHER_FIELDS except AttributeError: diff --git a/osf/models/registrations.py b/osf/models/registrations.py index e1d819b43bf..e9114355649 100644 --- a/osf/models/registrations.py +++ b/osf/models/registrations.py @@ -14,9 +14,11 @@ UserObjectPermissionBase, ) from dirtyfields import DirtyFieldsMixin +import waffle from framework.auth import Auth from framework.exceptions import PermissionsError +from osf import features from osf.models import Identifier from osf.utils.fields import NonNaiveDateTimeField, LowercaseCharField from osf.utils.permissions import ADMIN, READ, WRITE @@ -782,7 +784,8 @@ def _write_registration_action(self, from_state, to_state, initiated_by, comment comment=comment ) action.save() - RegistriesModerationMetrics.record_transitions(action) + if waffle.switch_is_active(features.ELASTICSEARCH_METRICS): + RegistriesModerationMetrics.record_transitions(action) moderation_notifications = { RegistrationModerationTriggers.SUBMIT: notify.notify_submit, diff --git a/osf_tests/management_commands/test_reindex_es6.py b/osf_tests/management_commands/test_reindex_es6.py index 5e01be656a8..36158c18da6 100644 --- a/osf_tests/management_commands/test_reindex_es6.py +++ b/osf_tests/management_commands/test_reindex_es6.py @@ -10,7 +10,7 @@ AuthUserFactory ) -from elasticsearch_metrics.field import Keyword +from elasticsearch6_dsl import Keyword from tests.json_api_test_app import JSONAPITestApp diff --git a/osf_tests/metrics/test_daily_report.py b/osf_tests/metrics/test_daily_report.py index 46375184f95..5228e2342c5 100644 --- a/osf_tests/metrics/test_daily_report.py +++ b/osf_tests/metrics/test_daily_report.py @@ -2,7 +2,7 @@ from unittest import mock import pytest -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from osf.metrics.reports import DailyReport, ReportInvalid @@ -10,8 +10,9 @@ class TestDailyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per day diff --git a/osf_tests/metrics/test_es8_metrics.py b/osf_tests/metrics/test_es8_metrics.py new file mode 100644 index 00000000000..e93579628dc --- /dev/null +++ b/osf_tests/metrics/test_es8_metrics.py @@ -0,0 +1,53 @@ +from datetime import datetime + +from elasticsearch_metrics.tests.util import djelme_test_backends +import pytest + +from osf.metrics.es8_metrics import ( + PageviewInfo, + DownloadCountReportEs8, + OsfCountedUsageRecord, +) + + +class TestEs8Metrics: + """smoke tests to check that djelme records can be saved and searched""" + + @pytest.fixture(autouse=True) + def _real_elastic(self): + with djelme_test_backends(): + yield + + def test_nested_pageview_autofill(self): + usage = OsfCountedUsageRecord.record( + timestamp=datetime(2024, 1, 1, 15, 0), + sessionhour_id='blah', + database_iri='https://osf.example/provider', + item_iri='https://osf.example/itemm', + item_osfid='itemm', + item_public=True, + item_type='https://osf.example/Preprint', + platform_iri='https://osf.example', + user_is_authenticated=False, + pageview_info=PageviewInfo( + page_url='https://example.com/path/test', + referer_url='https://google.com', + route_name='foo.bar', + page_title='title title', + ), + ) + assert usage.pageview_info.page_path == '/path/test' + assert usage.pageview_info.referer_domain == 'google.com' + assert usage.pageview_info.hour_of_day == 15 + + def test_save_report(self): + _saved = DownloadCountReportEs8.record( + cycle_coverage='2026.1.1', + daily_file_downloads=17, + ) + DownloadCountReportEs8.refresh_timeseries_indexes() + _response = DownloadCountReportEs8.search().execute() + (_fetched,) = _response + assert _fetched.meta.id == _saved.meta.id + assert _fetched.cycle_coverage == '2026.1.1' + assert _fetched.daily_file_downloads == 17 diff --git a/osf_tests/metrics/test_metric_mixin.py b/osf_tests/metrics/test_metric_mixin.py index 4a2c32f7e71..ec9b2d302de 100644 --- a/osf_tests/metrics/test_metric_mixin.py +++ b/osf_tests/metrics/test_metric_mixin.py @@ -1,6 +1,6 @@ from unittest import mock import pytest -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from osf.metrics.metric_mixin import MetricMixin from osf.models import OSFUser diff --git a/osf_tests/metrics/test_monthly_report.py b/osf_tests/metrics/test_monthly_report.py index 3c841e6555c..ba981e997d6 100644 --- a/osf_tests/metrics/test_monthly_report.py +++ b/osf_tests/metrics/test_monthly_report.py @@ -2,7 +2,7 @@ from unittest import mock import pytest -from elasticsearch_metrics import metrics +import elasticsearch_metrics.imps.elastic6 as metrics from osf.metrics.reports import MonthlyReport, ReportInvalid, PublicItemUsageReport from osf.metrics.utils import YearMonth @@ -11,8 +11,9 @@ class TestMonthlyReportKey: @pytest.fixture def mock_save(self): - with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: - yield mock_save + with mock.patch('elasticsearch_metrics.imps.elastic6.BaseMetric.check_index_template'): + with mock.patch('elasticsearch6_dsl.Document.save', autospec=True) as mock_save: + yield mock_save def test_default(self, mock_save): # only one of this type of report per month @@ -79,6 +80,7 @@ class Meta: @pytest.mark.es_metrics +@pytest.mark.django_db class TestLastMonthReport: @pytest.fixture def osfid(self): diff --git a/poetry.lock b/poetry.lock index 83ca13f7a00..90665bce81f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.3.2 and should not be changed by hand. [[package]] name = "amqp" @@ -1085,27 +1085,24 @@ Django = ">=2.0" [[package]] name = "django-elasticsearch-metrics" -version = "2022.0.6" +version = "2026.0.3" description = "Django app for storing time-series metrics in Elasticsearch." optional = false -python-versions = "*" +python-versions = ">=3.10,<4" groups = ["main"] files = [] develop = false -[package.dependencies] -elasticsearch6-dsl = ">=6.3.0,<7.0.0" - [package.extras] -dev = ["factory-boy (==2.11.1)", "flake8 (==5.0.4)", "flake8-bugbear (==18.8.0)", "konch (>=3.0.0)", "mock", "pre-commit (==2.17.0)", "pytest", "pytest-django (==3.10.0)", "tox"] -lint = ["flake8 (==5.0.4)", "flake8-bugbear (==18.8.0)", "pre-commit (==2.17.0)"] -tests = ["factory-boy (==2.11.1)", "mock", "pytest", "pytest-django (==3.10.0)"] +anydjango = ["django"] +elastic6 = ["elasticsearch6-dsl (>=6.3.0,<7.0.0)"] +elastic8 = ["elasticsearch8 (>=8.0.0,<9.0.0)"] [package.source] type = "git" url = "https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git" -reference = "f5b9312914154e213aa01731e934c593e3434269" -resolved_reference = "f5b9312914154e213aa01731e934c593e3434269" +reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" +resolved_reference = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6" [[package]] name = "django-extensions" @@ -1189,7 +1186,7 @@ files = [ [package.dependencies] autopep8 = "*" Django = ">=3.2" -gprof2dot = ">=2017.09.19" +gprof2dot = ">=2017.9.19" sqlparse = "*" [[package]] @@ -1361,14 +1358,14 @@ stone = ">=2" [[package]] name = "elastic-transport" -version = "8.13.0" +version = "8.17.1" description = "Transport classes and utilities shared among Python Elastic client libraries" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" groups = ["main"] files = [ - {file = "elastic-transport-8.13.0.tar.gz", hash = "sha256:2410ec1ff51221e8b3a01c0afa9f0d0498e1386a269283801f5c12f98e42dc45"}, - {file = "elastic_transport-8.13.0-py3-none-any.whl", hash = "sha256:aec890afdddd057762b27ff3553b0be8fa4673ec1a4fd922dfbd00325874bb3d"}, + {file = "elastic_transport-8.17.1-py3-none-any.whl", hash = "sha256:192718f498f1d10c5e9aa8b9cf32aed405e469a7f0e9d6a8923431dbb2c59fb8"}, + {file = "elastic_transport-8.17.1.tar.gz", hash = "sha256:5edef32ac864dca8e2f0a613ef63491ee8d6b8cfb52881fa7313ba9290cac6d2"}, ] [package.dependencies] @@ -1376,46 +1373,7 @@ certifi = "*" urllib3 = ">=1.26.2,<3" [package.extras] -develop = ["aiohttp", "furo", "httpx", "mock", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] - -[[package]] -name = "elasticsearch" -version = "6.8.2" -description = "Python client for Elasticsearch" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*, <4" -groups = ["main"] -files = [ - {file = "elasticsearch-6.8.2-py2.py3-none-any.whl", hash = "sha256:1aedf00b73f5d1e77cb4df70fec58f2efb664be4ce2686374239aa6c0373c65c"}, - {file = "elasticsearch-6.8.2.tar.gz", hash = "sha256:c3a560bb83e4981b5a5c82080d2ceb99686d33692ef53365656129478aa5ddb2"}, -] - -[package.dependencies] -urllib3 = ">=1.21.1" - -[package.extras] -develop = ["coverage", "mock", "nose", "nosexcover", "numpy", "pandas", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx (<1.7)", "sphinx-rtd-theme"] -requests = ["requests (>=2.4.0,<3.0.0)"] - -[[package]] -name = "elasticsearch-dsl" -version = "6.4.0" -description = "Python client for Elasticsearch" -optional = false -python-versions = "*" -groups = ["main"] -files = [ - {file = "elasticsearch-dsl-6.4.0.tar.gz", hash = "sha256:26416f4dd46ceca43d62ef74970d9de4bdd6f4b0f163316f0b432c9e61a08bec"}, - {file = "elasticsearch_dsl-6.4.0-py2.py3-none-any.whl", hash = "sha256:f60aea7fd756ac1fbe7ce114bbf4949aefbf495dfe8896640e787c67344f12f6"}, -] - -[package.dependencies] -elasticsearch = ">=6.0.0,<7.0.0" -python-dateutil = "*" -six = "*" - -[package.extras] -develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "sphinx", "sphinx-rtd-theme"] +develop = ["aiohttp", "furo", "httpx", "opentelemetry-api", "opentelemetry-sdk", "orjson", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "respx", "sphinx (>2)", "sphinx-autodoc-typehints", "trustme"] [[package]] name = "elasticsearch2" @@ -1471,6 +1429,32 @@ six = "*" [package.extras] develop = ["coverage (<5.0.0)", "mock", "pytest (>=3.0.0)", "pytest-cov", "pytz", "sphinx", "sphinx-rtd-theme"] +[[package]] +name = "elasticsearch8" +version = "8.19.3" +description = "Python client for Elasticsearch" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "elasticsearch8-8.19.3-py3-none-any.whl", hash = "sha256:4b52e59e68aea6f59bf37c28f6f4512333302dd8a52e26c17d0f10c076d833a1"}, + {file = "elasticsearch8-8.19.3.tar.gz", hash = "sha256:7effe95b360241b6d56ef68219037a90ad0f56723614db54bbe57d33058402f4"}, +] + +[package.dependencies] +elastic-transport = ">=8.15.1,<9" +python-dateutil = "*" +typing-extensions = "*" + +[package.extras] +async = ["aiohttp (>=3,<4)"] +dev = ["aiohttp", "black", "build", "coverage", "isort", "jinja2", "mapbox-vector-tile", "mypy", "nox", "numpy", "orjson", "pandas", "pyarrow ; python_version < \"3.14\"", "pyright", "pytest", "pytest-asyncio", "pytest-cov", "pytest-mock", "python-dateutil", "pyyaml (>=5.4)", "requests (>=2,<3)", "simsimd", "tqdm", "twine", "types-python-dateutil", "types-tqdm", "unasync"] +docs = ["sphinx", "sphinx-autodoc-typehints", "sphinx-rtd-theme (>=2.0)"] +orjson = ["orjson (>=3)"] +pyarrow = ["pyarrow (>=1)"] +requests = ["requests (>=2.4.0,!=2.32.2,<3.0.0)"] +vectorstore-mmr = ["numpy (>=1)", "simsimd (>=3)"] + [[package]] name = "email-validator" version = "2.1.1" @@ -1755,12 +1739,12 @@ files = [ [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" -proto-plus = ">=1.22.3,<2.0.0dev" +proto-plus = ">=1.22.3,<2.0.0.dev0" protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<6.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" [package.extras] -grpc = ["grpcio (>=1.33.2,<2.0dev)", "grpcio (>=1.49.1,<2.0dev) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] +grpc = ["grpcio (>=1.33.2,<2.0.dev0)", "grpcio (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\"", "grpcio-status (>=1.33.2,<2.0.dev0)", "grpcio-status (>=1.49.1,<2.0.dev0) ; python_version >= \"3.11\""] grpcgcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] @@ -1836,11 +1820,11 @@ files = [ ] [package.dependencies] -google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0dev" -google-auth = ">=1.25.0,<3.0dev" +google-api-core = ">=1.31.6,<2.0.dev0 || >2.3.0,<3.0.0.dev0" +google-auth = ">=1.25.0,<3.0.dev0" [package.extras] -grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] +grpc = ["grpcio (>=1.38.0,<2.0.dev0)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-storage" @@ -1855,15 +1839,15 @@ files = [ ] [package.dependencies] -google-api-core = ">=2.15.0,<3.0.0dev" -google-auth = ">=2.26.1,<3.0dev" -google-cloud-core = ">=2.3.0,<3.0dev" -google-crc32c = ">=1.0,<2.0dev" +google-api-core = ">=2.15.0,<3.0.0.dev0" +google-auth = ">=2.26.1,<3.0.dev0" +google-cloud-core = ">=2.3.0,<3.0.dev0" +google-crc32c = ">=1.0,<2.0.dev0" google-resumable-media = ">=2.6.0" -requests = ">=2.18.0,<3.0.0dev" +requests = ">=2.18.0,<3.0.0.dev0" [package.extras] -protobuf = ["protobuf (<5.0.0dev)"] +protobuf = ["protobuf (<5.0.0.dev0)"] [[package]] name = "google-crc32c" @@ -1918,11 +1902,11 @@ files = [ ] [package.dependencies] -google-crc32c = ">=1.0,<2.0dev" +google-crc32c = ">=1.0,<2.0.dev0" [package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] -requests = ["requests (>=2.18.0,<3.0.0dev)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "google-auth (>=1.22.0,<2.0.dev0)"] +requests = ["requests (>=2.18.0,<3.0.0.dev0)"] [[package]] name = "googleapis-common-protos" @@ -2301,7 +2285,7 @@ files = [ [package.dependencies] attrs = ">=22.2.0" -jsonschema-specifications = ">=2023.03.6" +jsonschema-specifications = ">=2023.3.6" referencing = ">=0.28.4" rpds-py = ">=0.7.1" @@ -3058,7 +3042,7 @@ files = [ ] [package.dependencies] -protobuf = ">=3.19.0,<6.0.0dev" +protobuf = ">=3.19.0,<6.0.0.dev0" [package.extras] testing = ["google-api-core (>=1.31.5)"] @@ -4088,10 +4072,10 @@ files = [ ] [package.dependencies] -botocore = ">=1.33.2,<2.0a.0" +botocore = ">=1.33.2,<2.0a0" [package.extras] -crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] +crt = ["botocore[crt] (>=1.33.2,<2.0a0)"] [[package]] name = "schema" @@ -4412,6 +4396,18 @@ files = [ {file = "types_python_dateutil-2.9.0.20240906-py3-none-any.whl", hash = "sha256:27c8cc2d058ccb14946eebcaaa503088f4f6dbc4fb6093d3d456a49aef2753f6"}, ] +[[package]] +name = "typing-extensions" +version = "4.15.0" +description = "Backported and Experimental Type Hints for Python 3.9+" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] + [[package]] name = "tzdata" version = "2024.1" @@ -4715,4 +4711,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.1" python-versions = "^3.12" -content-hash = "2bc7e95f03d05e8b3335514e887b590acdab5cb2a44fc47bde870bdf8e465bf2" +content-hash = "ef1d6d327f5557e43482793b276ccb6c5fd07989f27367af3a3736a8547b4d1a" diff --git a/pyproject.toml b/pyproject.toml index b1646584209..013df3f448d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,9 +31,10 @@ Markupsafe = "2.1.5" blinker = "1.7.0" furl = "2.1.3" elasticsearch2 = "2.5.1" -elasticsearch = "6.8.2" # max version to support elasticsearch6 -elasticsearch-dsl = "6.4.0" # max version to support elasticsearch6 -elastic-transport = "8.13.0" +elasticsearch6= "6.8.2" +elasticsearch6-dsl = "6.4.0" +elasticsearch8 = "8.19.3" +elastic-transport = "8.17.1" google-api-python-client = "2.123.0" google-auth = "2.29.0" Babel = "2.14.0" @@ -90,7 +91,7 @@ datacite = "1.1.3" rdflib = "7.0.0" colorlog = "6.8.2" # Metrics -django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "f5b9312914154e213aa01731e934c593e3434269"} # branch is feature/pin-esdsl +django-elasticsearch-metrics = {git ="https://github.com/CenterForOpenScience/django-elasticsearch-metrics.git", rev = "8025d58e23b4e0c562e1d59c98b10ec936eb56e6"} # Impact Metrics CSV Export djangorestframework-csv = "3.0.2" gevent = "24.2.1" diff --git a/website/settings/defaults.py b/website/settings/defaults.py index 2d3dcecba3b..1e8032cc95c 100644 --- a/website/settings/defaults.py +++ b/website/settings/defaults.py @@ -113,6 +113,10 @@ def parent_dir(path): SEARCH_ENGINE = 'elastic' # Can be 'elastic', or None ELASTIC_URI = '127.0.0.1:9200' ELASTIC6_URI = os.environ.get('ELASTIC6_URI', '127.0.0.1:9201') +ELASTIC8_URI = os.environ.get('ELASTIC8_URI') +ELASTIC8_CERT_PATH = os.environ.get('ELASTIC8_CERT_PATH') +ELASTIC8_USERNAME = os.environ.get('ELASTIC8_USERNAME', 'elastic') +ELASTIC8_SECRET = os.environ.get('ELASTIC8_SECRET') ELASTIC_TIMEOUT = 10 ELASTIC_INDEX = 'website' ELASTIC_KWARGS = {