diff --git a/.github/workflows/test_djelme.yml b/.github/workflows/test_djelme.yml index 22191ae..2a12a1b 100644 --- a/.github/workflows/test_djelme.yml +++ b/.github/workflows/test_djelme.yml @@ -12,11 +12,11 @@ jobs: lint: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 id: setup-py with: - python-version: '3.11' + python-version: '3.13' - run: pip install poetry - run: poetry install --no-root --with=lint - run: poetry run python -m elasticsearch_metrics.tests --lint @@ -27,7 +27,6 @@ jobs: matrix: python: ['3.10', '3.11', '3.12', '3.13', '3.14'] django: ['4.2', '5.1', '5.2'] - # TODO: elasticsearch: ['6', '7', '8', '9'] services: elasticsearch6: image: elasticsearch:6.8.23 @@ -42,8 +41,8 @@ jobs: node.name: singlenode cluster.initial_master_nodes: singlenode steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - uses: actions/checkout@v6 + - uses: actions/setup-python@v6 id: setup-py with: python-version: ${{ matrix.python }} diff --git a/README.md b/README.md index 8ee185d..49091c3 100644 --- a/README.md +++ b/README.md @@ -61,15 +61,9 @@ class UsageRecord(EventRecord): using = "my-es8-backend" # backend name -- required if multiple backends use the same imp ``` -Either enable autosetup... -```python -# ... in your django project settings file ... -DJELME_AUTOSETUP = True -``` - -...or be sure to run the `djelme_backend_setup` management command before trying to store anything. +and be sure to run the `djelme_backend_setup` management command before trying to store anything: ```shell -# This will create an index template for usagerecord timeseries indexes +# This will create an index template for each timeseries record type python manage.py djelme_backend_setup ``` @@ -100,18 +94,18 @@ UsageRecord.search_timeseries_range(datetime.date(2030, 1, 1), datetime.date(203 By default, behind the scenes, a new elasticsearch index is created for each record type for each day in which a record is saved (using UTC timezone). You can change this for a record type by setting -`Meta.timedepth`, or change the default timedepth with the setting `DJELME_DEFAULT_TIMEDEPTH` (see below). +`Meta.timeseries_index_timedepth`, or change the default timedepth with the setting `DJELME_DEFAULT_TIMEDEPTH` (see below). ```python class MyEventWithMonthlyIndexes(EventRecord): class Meta: - timedepth = 2 # year and month + timeseries_index_timedepth = 2 # year and month ``` -- index per year: `timedepth = 1` -- index per month: `timedepth = 2` -- index per day: `timedepth = 3` (default) -- index per hour: `timedepth = 4` +- index per year: `timeseries_index_timedepth = 1` +- index per month: `timeseries_index_timedepth = 2` +- index per day: `timeseries_index_timedepth = 3` (default) +- index per hour: `timeseries_index_timedepth = 4` ## Index settings @@ -192,10 +186,6 @@ class UsageRecord(MyBaseMetric): } ``` -* `DJELME_AUTOSETUP`: Optional feature, default `False` -- - set `True` to run backend setup automatically when your django app starts - (like creating index templates in elasticsearch, if they don't already exist) - * `DJELME_DEFAULT_TIMEDEPTH`: Set the granularity of timeseries indexes by the number of "time parts" in index names ``` DJELME_DEFAULT_TIMEDEPTH = 1 # yearly indexes; YYYY @@ -203,7 +193,7 @@ class UsageRecord(MyBaseMetric): DJELME_DEFAULT_TIMEDEPTH = 3 # daily indexes; YYYY.MM.DD (this is the default) DJELME_DEFAULT_TIMEDEPTH = 4 # hourly indexes; YYYY.MM.DD.HH ``` - you can also set `Meta.timedepth` on a specific record type; this will take precedence + you can also set `Meta.timeseries_index_timedepth` on a specific record type; this will take precedence ## Management commands diff --git a/elasticsearch_metrics/apps.py b/elasticsearch_metrics/apps.py index e44d24c..caa6ff1 100644 --- a/elasticsearch_metrics/apps.py +++ b/elasticsearch_metrics/apps.py @@ -1,38 +1,25 @@ import collections from django.apps import AppConfig -from django.conf import settings from django.utils.module_loading import autodiscover_modules from elasticsearch_metrics.registry import djelme_registry -AUTOSETUP_SETTING = "DJELME_AUTOSETUP" - class ElasticsearchMetricsConfig(AppConfig): name = "elasticsearch_metrics" def ready(self) -> None: - # load backends settings - _backend_names_by_module = collections.defaultdict(list) - for ( - _backend_name, - _imp_module_name, - _, - ) in djelme_registry.each_backend_settings(): - _backend_names_by_module[_imp_module_name].append(_backend_name) # discover any `foo.metrics` in installed apps autodiscover_modules("metrics") - # call `djelme_when_ready` for each imp module (only once) - for _imp_module_name, _backend_names in _backend_names_by_module.items(): + # call `djelme_when_ready` once for each djelme imp module used by a backend + _backends_by_imp: dict[str, list[str]] = collections.defaultdict(list) + for _backend_name, _imp_name, _ in djelme_registry.each_backend_settings(): + _backends_by_imp[_imp_name].append(_backend_name) + for _imp_module_name, _backend_names in _backends_by_imp.items(): _imp_module = djelme_registry.get_imp_module(_imp_module_name) _imp_module.djelme_when_ready( backends=[ djelme_registry.get_backend(_name) for _name in _backend_names ] ) - # autosetup? (default no) - if getattr(settings, AUTOSETUP_SETTING, False) is True: - _types_by_backend = djelme_registry.recordtypes_by_backend() - for _backend_name, _recordtypes in _types_by_backend.items(): - djelme_registry.get_backend(_backend_name).djelme_setup(_recordtypes) diff --git a/elasticsearch_metrics/exceptions.py b/elasticsearch_metrics/exceptions.py index ca80ff6..8b97f97 100644 --- a/elasticsearch_metrics/exceptions.py +++ b/elasticsearch_metrics/exceptions.py @@ -2,17 +2,33 @@ class DjelmeError(Exception): """Base class from which all django-elasticsearch-metrics -related exceptions inherit.""" -class TimeseriesSetupError(DjelmeError): +class DjelmeSetupError(DjelmeError): """for errors that might be solved by `djelme_backend_setup`""" -class IndexTemplateNotFoundError(TimeseriesSetupError): +class IndexNotFoundError(DjelmeSetupError): + """specific index not found""" + + def __init__(self, message, client_error): + self.client_error = client_error + super().__init__(message, client_error) + + +class IndexOutOfSyncError(DjelmeSetupError): + """specific index has different mappings than expected""" + + +class IndexTemplateNotFoundError(DjelmeSetupError): + """index template not found""" + def __init__(self, message, client_error): self.client_error = client_error super().__init__(message, client_error) -class IndexTemplateOutOfSyncError(TimeseriesSetupError): +class IndexTemplateOutOfSyncError(DjelmeSetupError): + """index template has different mappings, settings, or patterns than expected""" + def __init__(self, message, mappings_in_sync, patterns_in_sync, settings_in_sync): self.mappings_in_sync = mappings_in_sync self.patterns_in_sync = patterns_in_sync diff --git a/elasticsearch_metrics/imps/elastic6.py b/elasticsearch_metrics/imps/elastic6.py index ae4cd17..5933fc0 100644 --- a/elasticsearch_metrics/imps/elastic6.py +++ b/elasticsearch_metrics/imps/elastic6.py @@ -8,6 +8,7 @@ from collections.abc import Iterator import dataclasses import datetime +import functools import logging from django.apps import apps @@ -137,12 +138,12 @@ def construct_index(cls, opts, bases): @property def _template_name(self): - _prefix = self.get_timeseries_name_prefix() + _prefix = self.get_index_name_prefix() return f"{_prefix}{self.__template_name}" @property def _template_pattern(self): - _prefix = self.get_timeseries_name_prefix() + _prefix = self.get_index_name_prefix() return f"{_prefix}{self.__template_pattern}" @@ -188,7 +189,7 @@ def sync_index_template(cls, using=None): return index_template @classmethod - def check_index_template(cls, using: str | None = None) -> bool: + def check_index_template(cls, using: str | None = None) -> None: """Check if class is in sync with index template in Elasticsearch. :raise: IndexTemplateNotFoundError if index template does not exist. @@ -204,7 +205,7 @@ def check_index_template(cls, using: str | None = None) -> bool: template_name = cls._template_name metric_name = cls.__name__ raise exceptions.IndexTemplateNotFoundError( - "{template_name} does not exist for {metric_name}".format(**locals()), + f"Index template {template_name!r} does not exist for {metric_name}", client_error=client_error, ) from client_error else: @@ -241,18 +242,21 @@ def check_index_template(cls, using: str | None = None) -> bool: [key for key, value in word_map.items() if not value] ) raise exceptions.IndexTemplateOutOfSyncError( - "{template_name} is out of sync with {metric_name} ({out_of_sync})".format( - **locals() - ), + f"Index template {template_name!r} is out of sync with {metric_name} ({out_of_sync})", mappings_in_sync=mappings_in_sync, patterns_in_sync=patterns_in_sync, settings_in_sync=settings_in_sync, ) - return True @classmethod - def check_djelme_setup(cls, using: str | None = None) -> bool: - return cls.check_index_template(using) + def check_djelme_setup(cls, using: str | None = None) -> None: + cls.check_index_template(using) + + @classmethod + @functools.cache + def require_been_setup(cls, using: str | None = None) -> None: + """check setup once -- raise on failure, remember success""" + cls.check_djelme_setup(using) @classmethod def get_timeseries_index_template(cls): @@ -262,7 +266,7 @@ def get_timeseries_index_template(cls): ) @classmethod - def get_timeseries_name_prefix(cls) -> str: + def get_index_name_prefix(cls) -> str: return "" @classmethod @@ -302,6 +306,7 @@ def save(self, using=None, index=None, validate=True, **kwargs): """Same as `Document.save`, except will save into the index determined by the metric's timestamp field. """ + self.require_been_setup(using=using) # prevent automapped indexes self.timestamp = self.timestamp or timezone.now() if not index: index = self.get_index_name(date=self.timestamp) @@ -351,7 +356,7 @@ class DjelmeElastic6Backend: imp_kwargs: dict[str, str] @property - def elastic6_client(self): + def elastic_client(self): # assumes `connections.configure` was already called return connections.get_connection(self.backend_name) @@ -373,9 +378,10 @@ def djelme_teardown(self, recordtypes: collections.abc.Iterable[type]) -> None: logger.info("tearing down %r", _metric_type) _indexname_wildcard = _metric_type._template_pattern _templatename = _metric_type._template_name - self.elastic6_client.indices.delete(index=_indexname_wildcard) + _client = self.elastic_client + _client.indices.delete(index=_indexname_wildcard) try: - self.elastic6_client.indices.delete_template(_templatename) + _client.indices.delete_template(_templatename) except NotFoundError: pass diff --git a/elasticsearch_metrics/imps/elastic8.py b/elasticsearch_metrics/imps/elastic8.py index d03aacb..799078b 100644 --- a/elasticsearch_metrics/imps/elastic8.py +++ b/elasticsearch_metrics/imps/elastic8.py @@ -22,10 +22,10 @@ import collections import dataclasses import datetime +import functools import logging import typing -import django from django.core.exceptions import ImproperlyConfigured from django.conf import settings from elasticsearch8.exceptions import NotFoundError @@ -42,9 +42,9 @@ ProtoDjelmeRecord, ) from elasticsearch_metrics.util import timeseries_naming +from elasticsearch_metrics.util.django import find_app_label_for_module from elasticsearch_metrics.util.timeparts import format_full_timeparts, format_timeparts -from elasticsearch_metrics.util.unique_together import get_unique_id -from elasticsearch_metrics.util.anon_enough import opaque_sessionhour_id +from elasticsearch_metrics.util.anon_enough import opaque_key, opaque_sessionhour_id logger = logging.getLogger(__name__) @@ -62,25 +62,38 @@ def utcnow() -> datetime.datetime: # change default mapping for `str` annotations from Text to Keyword: esdsl.document_base.DocumentOptions.type_annotation_map[str] = (esdsl.Keyword, {}) +# change default timezone for `datetime` annotations from "local" to UTC: +esdsl.document_base.DocumentOptions.type_annotation_map[datetime.datetime] = ( + esdsl.Date, + {"default_timezone": "UTC"}, +) # changes to document metaclass behavior -class _DjelmeRecordtypeMetaclass(IndexMeta): - """Metaclass for the base `DjelmeRecordtype` class. +class _DjelmeRecordMetaclass(IndexMeta): + """Metaclass for the base `BaseDjelmeRecord` class. - overrides behavior in elasticsearch-py's `IndexMeta` to allow - additional config in a type's `class Meta` + extend elasticsearch-py's `IndexMeta` to: + - belong to a django app (identified by `app_label` property) + - register concrete types with elasticsearch_metrics.registry.djelme_registry + - allow abstract record types (similar to django's abstract models, with `Meta.abstract`) """ Meta: type - # override IndexMeta.__new__, to do a few things differently def __new__(mcls, name, bases, attrs): # noqa: B902 + """create a new BaseDjelmeRecord subclass + + extend elasticsearch-py's `IndexMeta.__new__` to: + - register concrete types with elasticsearch_metrics.registry.djelme_registry + - preserve a type's `class Meta` so it can hold additional custom config + - inherit field defaults (bug workaround?) + """ # save `class Meta` to un-remove it later _cls_meta = attrs.get("Meta") or type("Meta", (), {}) # call IndexMeta.__new__ _cls = super().__new__(mcls, name, bases, attrs) - assert isinstance(_cls, _DjelmeRecordtypeMetaclass) + assert isinstance(_cls, _DjelmeRecordMetaclass) # un-remove `class Meta` for later use if "Meta" in _cls.__dict__: for _attrname, _attrval in _cls_meta.__dict__.items(): @@ -88,13 +101,13 @@ def __new__(mcls, name, bases, attrs): # noqa: B902 setattr(_cls.Meta, _attrname, _attrval) else: # guarantee non-inherited Meta _cls.Meta = _cls_meta - # workaround elasticsearch.dsl inheriting only fields, not defaults + # workaround elasticsearch8.dsl inheriting only fields, not defaults for _b in bases: for _fieldname, _default in getattr(_b, "_defaults", {}).items(): _cls._defaults.setdefault(_fieldname, _default) # and register concrete record types with the djelme registry if not _cls.is_abstract: - assert issubclass(_cls, DjelmeRecordtype) + assert issubclass(_cls, BaseDjelmeRecord) _given_using = _cls._index._using _default_backend = ( _given_using @@ -110,6 +123,32 @@ def __new__(mcls, name, bases, attrs): # noqa: B902 ) return _cls + @classmethod + def construct_index(cls, opts, bases): + """ + Extend IndexMeta.construct_index so a new Index is created for each class + and Index.settings, Index.analyzers, and Index.using are inherited + (but not Index.name or Index.aliases) + """ + _base_index_configs = collections.ChainMap( + *(base._index.to_dict() for base in bases if hasattr(base, "_index")) + ) + _index_opts = opts or type("Index", (), {}) + if not hasattr(_index_opts, "settings") and ( + _inherited_settings := _base_index_configs.get("settings") + ): + _index_opts.settings = _inherited_settings + if not hasattr(_index_opts, "analyzers") and ( + _inherited_analyzers := _base_index_configs.get("analyzers") + ): + _index_opts.analyzers = _inherited_analyzers + if not hasattr(_index_opts, "using"): + _inherited_using = _base_index_configs.get("using") + if _inherited_using and (_inherited_using != "default"): + _index_opts.using = _inherited_using + assert _index_opts is not None # guarantee separate index per class + return super().construct_index(_index_opts, bases) + def _get_meta_attr(self, attr_name: str, default: typing.Any = None) -> typing.Any: _meta = getattr(self, "Meta", None) return getattr(_meta, attr_name, default) @@ -125,10 +164,10 @@ def app_label(self) -> str: return _app_label -class DjelmeRecordtype(esdsl.Document, metaclass=_DjelmeRecordtypeMetaclass): +class BaseDjelmeRecord(esdsl.Document, metaclass=_DjelmeRecordMetaclass): """a subclass of elasticsearch8.dsl.Document, with conveniences - >>> class MyAbstractRecord(DjelmeRecordtype): + >>> class MyAbstractRecord(BaseDjelmeRecord): ... foo: int ... class Meta: ... abstract = True @@ -153,29 +192,59 @@ class DjelmeRecordtype(esdsl.Document, metaclass=_DjelmeRecordtypeMetaclass): """ UNIQUE_TOGETHER_FIELDS: typing.ClassVar[collections.abc.Iterable[str]] = () - unique_id: str = esdsl.mapped_field(esdsl.Keyword(), default="") # filled on save class Meta: abstract = True + @classmethod + def check_djelme_setup(cls, using: str | None = None) -> None: + raise NotImplementedError # expected on subclasses + + @classmethod + def do_teardown(cls, es_client): + raise NotImplementedError # expected on subclasses + @classmethod def record( - cls, *, using: str | None = None, **kwargs: typing.Any + cls, + *, + using: str | typing.Literal[False] | None = None, + **kwargs: typing.Any, ) -> "typing.Self": # typing.Self added in py 3.11 -- str annotation until 3.10 eol - """Persist a record in Elasticsearch.""" + """Construct a record instance and save it in Elasticsearch. + + Keyword args: + using -- name of the djelme backend or elasticsearch8.dsl connection + to use to save, or `False` to skip saving (e.g. for use in a bulk operation) + all other kwargs passed thru to the class constructor + """ + assert not cls.is_abstract _instance = cls(**kwargs) - _instance.save(using=using) + if using is not False: + _instance.save(using=using) return _instance @classmethod - def check_djelme_setup(cls, using: str | None = None) -> bool: - # this base class has only a single index -- does it exist? - return bool(cls._index.get(using=using)) + @functools.cache + def require_been_setup(cls, using: str | None = None) -> None: + """check setup once -- raise on failure, remember success""" + cls.check_djelme_setup(using) + + @classmethod + def get_index_name_prefix(cls) -> str: + _name_prefix = ( + cls._get_meta_attr("index_name_prefix") + or cls._get_djelme_backend().default_index_name_prefix + or "" + ) + assert isinstance(_name_prefix, str) + return _name_prefix @classmethod - def _djelme_teardown(cls, es_client): - # this base class has only a single index -- delete it - cls._index.delete(using=es_client) + def _get_djelme_backend(cls) -> "DjelmeElastic8Backend": + _backend = djelme_registry.get_backend_for_recordtype(cls) + assert isinstance(_backend, DjelmeElastic8Backend) + return _backend @classmethod def _get_using( @@ -183,13 +252,13 @@ def _get_using( ) -> str | Elastic8Client: """get the elasticsearch8 connection name to use - overrides elasticsearch8.Document._get_using to allow - getting connection name from a djelme backend and default - to the first configured backend that uses this imp module + extend `elasticsearch8.dsl.Document._get_using` to: + - recognize djelme backend names in `using` (given or configured) + - if no `using` given or configured, find a djelme backend by imp module """ _backend: ProtoDjelmeBackend | None = None if using in (None, "default"): - _backend = djelme_registry.get_backend_for_recordtype(cls) + _backend = cls._get_djelme_backend() elif isinstance(using, str) and (using in djelme_registry.all_backends): _backend = djelme_registry.get_backend(using) if _backend is not None: @@ -197,50 +266,141 @@ def _get_using( return _backend._elastic8dsl_connection_name return super()._get_using(using) + def _get_index(self, index: str | None = None, required: bool = True) -> str | None: + """get the index name for this record + + extend elasticsearch8.dsl's DocumentBase._get_index to default to djelme_index_name() + """ + return super()._get_index(index or self.djelme_index_name(), required) + + def clean(self) -> None: + """fill fields based on other fields, before saving + + extend elasticsearch8.dsl's DocumentBase.clean to use the djelme index name + + subclasses that need to autofill or transform fields should further extend this method + """ + self._populate_unique_id() + super().clean() + def save( self, + *, using: str | Elastic8Client | None = None, index: str | None = None, - validate: bool = True, - skip_empty: bool = True, - return_doc_meta: bool = False, **kwargs: typing.Any, ) -> typing.Any: """save the record - overrides `save` to populate document_id and send pre_save/post_save signals + extend `elasticsearch8.dsl.Document.save` to: + - populate document id based on UNIQUE_TOGETHER_FIELDS + - send pre_save/post_save signals """ - self._populate_unique_id() + assert not self.__class__.is_abstract + self.require_been_setup(using=using) # prevent automapped indexes signals.pre_save.send(self.__class__, instance=self, using=using, index=index) - _saved = super().save( - using=using, index=index, validate=validate, skip_empty=skip_empty, **kwargs - ) + _saved = super().save(using=using, index=index, **kwargs) signals.post_save.send(self.__class__, instance=self, using=using, index=index) return _saved def _populate_unique_id(self) -> None: - _unique_id = self._get_unique_id() - assert _unique_id or not self.UNIQUE_TOGETHER_FIELDS - self.unique_id = _unique_id or "" - # make it unique by setting doc id in elasticsearch - if _unique_id: - self.meta.id = _unique_id - - def _get_unique_id(self) -> str | None: """ - Get a unique document id by hashing values of "unique together" + Set a unique document id by hashing values of "unique together" fields for "ON CONFLICT UPDATE" behavior -- if the document already exists, it will be replaced rather than duplicated. Cannot detect/avoid conflicts this way, but that's ok. """ - if not self.UNIQUE_TOGETHER_FIELDS: - return None - return get_unique_id( - (getattr(self, _field_name) for _field_name in self.UNIQUE_TOGETHER_FIELDS) - ) + _unique_together = self._get_unique_together_values() + assert _unique_together or not self.UNIQUE_TOGETHER_FIELDS + if _unique_together: + # make it unique by setting doc id in elasticsearch + self.meta.id = opaque_key(_unique_together) + + def _get_unique_together_values(self) -> list: + return [ + getattr(self, _field_name) + for _field_name in (self.UNIQUE_TOGETHER_FIELDS or ()) + ] + + +class _SimpleRecordMetaclass(_DjelmeRecordMetaclass): + @property + def _index(self): + if self.is_abstract: + return self.__index + # return a copy with `name` and `using` freshly computed + try: + _backend = self._get_djelme_backend() + except LookupError: + _using = None # may not be registered yet, is ok + _index_name = "" + else: + _using = _backend._elastic8dsl_connection_name + _index_name = self.djelme_index_name() + return self.__index.clone(name=_index_name, using=_using) + + @_index.setter + def _index(self, val): + # stash in a private attr so only the property getter can reach it + self.__index = val + + +class SimpleRecord(BaseDjelmeRecord, metaclass=_SimpleRecordMetaclass): + class Meta: + abstract = True + + @classmethod + def djelme_index_name(cls) -> str: + """the index name for this record type + for ProtoDjelmeRecord + """ + assert not cls.is_abstract + _app_label = find_app_label_for_module(cls.__module__) + _index_name = f"{_app_label.lower()}_{cls.__name__.lower()}" + _prefix = cls.get_index_name_prefix() + if not _index_name.startswith(_prefix): + _index_name = f"{_prefix}{_index_name}" + return _index_name -class TimeseriesRecord(DjelmeRecordtype): + @classmethod + def check_djelme_setup(cls, using: str | None = None) -> None: + """Check if class is in sync with index mappings in Elasticsearch. + + :raise: IndexNotFoundError if index does not exist. + :raise: IndexOutOfSyncError if mappings are out of sync. + :return: if index exists and mappings are in sync. + """ + assert not cls.is_abstract + _client = cls._get_connection(using) + _index_name = cls.djelme_index_name() + try: + _index_response = _client.indices.get(index=_index_name) + except NotFoundError as client_error: + raise exceptions.IndexNotFoundError( + f"Index {_index_name} does not exist for {cls.__name__}", + client_error=client_error, + ) from client_error + else: + _current_mappings = _index_response[_index_name]["mappings"] + _expected_mappings = cls._index.to_dict()["mappings"] + if _current_mappings != _expected_mappings: + raise exceptions.IndexOutOfSyncError( + f"Index {_index_name} has mappings out of sync with {cls.__name__}", + ) + + @classmethod + def do_teardown(cls, es_client): + assert not cls.is_abstract + cls._index.delete(using=es_client, ignore_unavailable=True) + + @classmethod + def refresh(cls, using: str | None = None) -> None: + assert not cls.is_abstract + cls._index.refresh(using=using) + + +class TimeseriesRecord(BaseDjelmeRecord): # the 'version' field type allows range queries on semver-like strings # that fit perfectly with "timeparts" representation of a UTC datetime # as a sequence of integers -- helps to avoid time zones and date math @@ -257,8 +417,7 @@ class Meta: def init(cls, index=None, using=None) -> None: """Create an index template with mappings for timeseries indexes - overrides elasticsearch.Document.init - (but doesn't call super().init(), which would create a "now" index) + override `elasticsearch8.dsl.Document.init` to create a template instead of an index """ assert not cls.is_abstract cls.sync_index_template(using=using) @@ -271,6 +430,7 @@ def search( ) -> esdsl.Search[ "typing.Self" ]: # typing.Self added in py 3.11 -- str annotation until 3.10 eol + assert not cls.is_abstract return super().search( using=using, index=(index or cls.format_timeseries_index_pattern()), @@ -280,8 +440,7 @@ def search( def search_timeseries_range( cls, from_when: tuple[int, ...] | datetime.date, - until_when: tuple[int, ...] | datetime.date | None, - **kwargs: typing.Any, + until_when: tuple[int, ...] | datetime.date, ) -> typing.Any: _index_pattern = cls.format_timeseries_index_pattern_for_range( from_when, until_when @@ -295,7 +454,8 @@ def search_timeseries_range( return cls.search(index=_index_pattern).filter(_timeseries_q) @classmethod - def refresh_timeseries_indexes(cls, using: str | None = None) -> None: + def refresh(cls, using: str | None = None) -> None: + assert not cls.is_abstract cls._get_connection(using).indices.refresh( index=cls.format_timeseries_index_pattern() ) @@ -313,7 +473,8 @@ def each_timeseries_index( yield _index_name, _index_info @classmethod - def _djelme_teardown(cls, es8_client: Elastic8Client) -> None: + def do_teardown(cls, es8_client: Elastic8Client) -> None: + assert not cls.is_abstract _indexname_wildcard = cls.format_timeseries_index_pattern() _indices = es8_client.indices.get(index=_indexname_wildcard, features=",") for _index_name in _indices.keys(): @@ -326,6 +487,7 @@ def _djelme_teardown(cls, es8_client: Elastic8Client) -> None: @classmethod def get_timeseries_template(cls) -> esdsl.ComposableIndexTemplate: + assert not cls.is_abstract return cls._index.as_composable_template( template_name=cls.get_timeseries_template_name(), pattern=cls.format_timeseries_index_pattern(), @@ -336,7 +498,7 @@ def get_timeseries_template_name(cls) -> str: _template_name = timeseries_naming.format_template_name( cls.app_label, cls.get_timeseries_recordtype_name() ) - return "".join((cls.get_timeseries_name_prefix(), _template_name)) + return "".join((cls.get_index_name_prefix(), _template_name)) @classmethod def get_timeseries_recordtype_name(cls) -> str: @@ -346,21 +508,15 @@ def get_timeseries_recordtype_name(cls) -> str: assert isinstance(_recordtype_name, str) return _recordtype_name - @classmethod - def get_timeseries_name_prefix(cls) -> str: - _name_prefix = cls._get_meta_attr("timeseries_name_prefix") or "" - assert isinstance(_name_prefix, str) - return _name_prefix - @classmethod def format_timeseries_index_pattern(cls, timeparts: tuple[int, ...] = ()) -> str: _pattern = timeseries_naming.format_index_pattern( app_label=cls.app_label, recordtype=cls.get_timeseries_recordtype_name(), timeparts=timeparts, - max_timedepth=cls.get_timedepth(), + max_timedepth=cls.get_timeseries_index_timedepth(), ) - return "".join((cls.get_timeseries_name_prefix(), _pattern)) + return "".join((cls.get_index_name_prefix(), _pattern)) @classmethod def format_timeseries_index_pattern_for_range( @@ -373,16 +529,18 @@ def format_timeseries_index_pattern_for_range( cls.get_timeseries_recordtype_name(), from_when, until_when or utcnow(), - timedepth=cls.get_timedepth(), + timedepth=cls.get_timeseries_index_timedepth(), ) - return "".join((cls.get_timeseries_name_prefix(), _pattern)) + return "".join((cls.get_index_name_prefix(), _pattern)) @classmethod - def get_timedepth(cls) -> int: + def get_timeseries_index_timedepth(cls) -> int: _default_timedepth = getattr( settings, _DEFAULT_TIMEDEPTH_SETTING, _DEFAULT_TIMEDEPTH ) - _timedepth = cls._get_meta_attr("timedepth", _default_timedepth) + _timedepth = cls._get_meta_attr( + "timeseries_index_timedepth", _default_timedepth + ) assert isinstance(_timedepth, int) return _timedepth @@ -409,7 +567,7 @@ def sync_index_template(cls, using=None): # -> ComposableIndexTemplate: return _template @classmethod - def check_djelme_setup(cls, using: str | Elastic8Client | None = None) -> bool: + def check_djelme_setup(cls, using: str | Elastic8Client | None = None) -> None: """Check if class is in sync with index template in Elasticsearch. :raise: IndexTemplateNotFoundError if index template does not exist. @@ -418,7 +576,6 @@ def check_djelme_setup(cls, using: str | Elastic8Client | None = None) -> bool: :return: True if index template exsits and mappings, settings, and index patterns are in sync. """ - super().check_djelme_setup() client = cls._get_connection(using) try: _template_response = client.indices.get_index_template( @@ -465,16 +622,10 @@ def check_djelme_setup(cls, using: str | Elastic8Client | None = None) -> bool: patterns_in_sync=patterns_in_sync, settings_in_sync=settings_in_sync, ) - return True ### # instance methods - def __init__(self, *args, **kwargs): - assert not self.__class__.is_abstract - super().__init__(*args, **kwargs) - self.timeseries_timeparts = self.get_timeseries_timeparts() - def get_timeseries_timeparts(self) -> str: """semverlike string of timeparts, used to choose a timeseries index""" raise NotImplementedError( @@ -486,32 +637,21 @@ def djelme_index_name(self) -> str: for ProtoDjelmeRecord """ - assert self.timeseries_timeparts _index_name = timeseries_naming.format_index_name( app_label=self.__class__.app_label, recordtype=self.get_timeseries_recordtype_name(), - timeparts=self.timeseries_timeparts, - max_timedepth=self.get_timedepth(), + timeparts=self.timeseries_timeparts or self.get_timeseries_timeparts(), + max_timedepth=self.get_timeseries_index_timedepth(), ) - return "".join((self.get_timeseries_name_prefix(), _index_name)) + return "".join((self.get_index_name_prefix(), _index_name)) - def save( - self, - using: str | Elastic8Client | None = None, - index: str | None = None, - validate: bool = True, - skip_empty: bool = True, - return_doc_meta: bool = False, - **kwargs: typing.Any, - ) -> typing.Any: + def clean(self) -> None: """save the record to a timeseries index - overrides `Document.save` to choose a specific timeseries index + extend `elasticsearch8.dsl.Document.save` to choose a specific timeseries index """ - if index is None: - index = self.djelme_index_name() - ret = super().save(using=using, index=index, **kwargs) - return ret + super().clean() + self.timeseries_timeparts = self.get_timeseries_timeparts() # TODO: EventRecord expiration @@ -553,32 +693,22 @@ def record( *, # each usage record needs a sessionhour_id -- for migrating old data, can set explicitly... sessionhour_id: str = "", - # ...but when saving new data, give either the dirty identifying strings: + # ...but when saving new data, give the dirty identifying strings + # (which won't be stored, but used to create an opaque sessionhour_id) user_id: str = "", - session_id: str = "", + client_session_id: str = "", request_host: str = "", request_useragent: str = "", - # ...or a django request to infer from - django_request: django.http.HttpRequest | None = None, # additional kwargs presumed to give field values **kwargs: typing.Any, ) -> "typing.Self": # typing.Self added in py 3.11 -- str annotation until 3.10 eol """CountedUsageRecord.record(...): construct and save a record""" - _useragent = ( - request_useragent - if (request_useragent or (django_request is None)) - else django_request.META.get("HTTP_USER_AGENT", "") - ) - _host = ( - request_host - if (request_host or (django_request is None)) - else django_request.get_host() - ) - _sessionhour_id = kwargs.pop("sessionhour_id", None) or opaque_sessionhour_id( - client_session_id=session_id, + _sessionhour_id = sessionhour_id or opaque_sessionhour_id( + client_session_id=client_session_id, user_id=user_id, - request_host=_useragent, - request_useragent=_host, + request_host=request_host, + request_useragent=request_useragent, + timestamp=kwargs.get("timestamp"), ) _new_record = super().record(**kwargs, sessionhour_id=_sessionhour_id) assert isinstance(_new_record, cls) @@ -627,6 +757,10 @@ def get_timeseries_timeparts(self) -> str: class DjelmeElastic8Backend: """DjelmeElastic8Backend: elastic8 backend for djelme (for use by generic djelme code)""" + _NON_PASSTHRU_KWARGS: typing.ClassVar[collections.abc.Collection[str]] = { + "djelme_default_index_name_prefix", + } + backend_name: str imp_kwargs: dict[str, str] # pass-thru to elasticsearch connection kwargs @@ -637,7 +771,11 @@ def djelme_imp_kwargs(self) -> dict[str, str]: # for ProtoDjelmeBackend return self.imp_kwargs @property - def elastic8_client(self) -> Elastic8Client: + def default_index_name_prefix(self) -> str: + return self.imp_kwargs.get("djelme_default_index_name_prefix", "") + + @property + def elastic_client(self) -> Elastic8Client: # assumes `connections.configure` was already called return esdsl.connections.get_connection(self._elastic8dsl_connection_name) @@ -647,27 +785,31 @@ def _elastic8dsl_connection_name(self) -> str: @property def _elastic8dsl_connection_kwargs(self) -> dict[str, typing.Any]: - return self.imp_kwargs + return { + _key: _val + for _key, _val in self.imp_kwargs.items() + if _key not in self._NON_PASSTHRU_KWARGS + } def djelme_setup(self, recordtypes: collections.abc.Iterable[type]) -> None: # for ProtoDjelmeBackend for _recordtype in recordtypes: # TODO: logger.info - assert issubclass(_recordtype, DjelmeRecordtype) + assert issubclass(_recordtype, BaseDjelmeRecord) _recordtype.init(using=self._elastic8dsl_connection_name) def djelme_teardown(self, recordtypes: collections.abc.Iterable[type]) -> None: # for ProtoDjelmeBackend for _recordtype in recordtypes: - assert issubclass(_recordtype, DjelmeRecordtype) - _recordtype._djelme_teardown(self.elastic8_client) + assert issubclass(_recordtype, BaseDjelmeRecord) + _recordtype.do_teardown(self.elastic_client) if __debug__ and typing.TYPE_CHECKING: # for static type-checking; verify intent _: type[ProtoCountedUsage] = CountedUsageRecord __: type[ProtoDjelmeBackend] = DjelmeElastic8Backend - ___: type[ProtoDjelmeRecord] = DjelmeRecordtype + ___: type[ProtoDjelmeRecord] = BaseDjelmeRecord ### # names expected by ProtoDjelmeImp diff --git a/elasticsearch_metrics/management/commands/djelme_backend_check.py b/elasticsearch_metrics/management/commands/djelme_backend_check.py index 9df68f8..ed039e9 100644 --- a/elasticsearch_metrics/management/commands/djelme_backend_check.py +++ b/elasticsearch_metrics/management/commands/djelme_backend_check.py @@ -1,6 +1,5 @@ -import sys import logging -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from django.utils.termcolors import colorize @@ -31,10 +30,7 @@ def handle(self, *args, **options): for _recordtype in djelme_registry.each_recordtype(app_label=_app_label): try: _recordtype.check_djelme_setup() - except ( - exceptions.IndexTemplateNotFoundError, - exceptions.IndexTemplateOutOfSyncError, - ) as error: + except exceptions.DjelmeSetupError as error: self.stdout.write(" " + error.args[0]) out_of_sync_count += 1 @@ -45,6 +41,6 @@ def handle(self, *args, **options): ) cmd = colorize("python manage.py djelme_backend_setup", opts=("bold",)) self.stdout.write("Run {cmd} to set up index templates.".format(cmd=cmd)) - sys.exit(1) + raise CommandError(1) else: self.stdout.write("All djelme recordtypes set up.", style.SUCCESS) diff --git a/elasticsearch_metrics/protocols.py b/elasticsearch_metrics/protocols.py index 632525f..2379bc5 100644 --- a/elasticsearch_metrics/protocols.py +++ b/elasticsearch_metrics/protocols.py @@ -48,7 +48,8 @@ def record( ... @classmethod - def check_djelme_setup(cls, using: str | None = None) -> bool: ... + def check_djelme_setup(cls, using: str | None = None) -> None: + """Check backend setup for this record type; raise helpful exceptions""" @classmethod def search_timeseries_range( diff --git a/elasticsearch_metrics/registry.py b/elasticsearch_metrics/registry.py index a473b38..6104148 100644 --- a/elasticsearch_metrics/registry.py +++ b/elasticsearch_metrics/registry.py @@ -14,7 +14,7 @@ ProtoDjelmeRecord, ProtoDjelmeImp, ) -from elasticsearch_metrics.util.django import find_app_label_for_type +from elasticsearch_metrics.util.django import find_app_label_for_module from elasticsearch_metrics.util.timeseries_naming import format_namepart __all__ = ("djelme_registry",) @@ -73,7 +73,7 @@ def register_recordtype( default_backend: str = "", ) -> None: """Add a record type to the registry.""" - _app_label = app_label or find_app_label_for_type(recordtype) + _app_label = app_label or find_app_label_for_module(recordtype.__module__) app_recordtypes = self._all_recordtypes[_app_label] recordtype_name = format_namepart(recordtype.__name__) if recordtype_name in app_recordtypes: @@ -162,10 +162,8 @@ def get_backend_name_for_recordtype( try: (_backend_name,) = _each_backend_name except StopIteration as _e: # no backends - breakpoint() raise LookupError(f"no backends for recordtype {recordtype!r}") from _e except ValueError as _e: # too many backends - breakpoint() raise LookupError( f"more than one backend for recordtype {recordtype!r}, must be set explicitly" ) from _e @@ -218,15 +216,13 @@ def recordtypes_by_backend( self, app_label: str = "" ) -> dict[str, collections.abc.Iterable[type[ProtoDjelmeRecord]]]: apps.check_apps_ready() # ensure django setup done - _by_backend_name: dict[str, list[type[ProtoDjelmeRecord]]] = ( - collections.defaultdict(list) - ) + _by_backend_name: dict[str, list[type[ProtoDjelmeRecord]]] = {} _app_labels = [app_label] if app_label else self._all_recordtypes.keys() for _app_label in _app_labels: for _recordtype in self._get_recordtypes_for_app(_app_label).values(): _backend_name = self.get_backend_name_for_recordtype(_recordtype) - _by_backend_name[_backend_name].append(_recordtype) - return dict(_by_backend_name.items()) + _by_backend_name.setdefault(_backend_name, []).append(_recordtype) + return _by_backend_name ### # private methods diff --git a/elasticsearch_metrics/tests/__main__.py b/elasticsearch_metrics/tests/__main__.py index 08be2c8..f211166 100644 --- a/elasticsearch_metrics/tests/__main__.py +++ b/elasticsearch_metrics/tests/__main__.py @@ -7,6 +7,7 @@ import collections import os import subprocess +import sys _parser = argparse.ArgumentParser() _parser.add_argument("--lint", action="store_true") # _args.lint @@ -40,7 +41,11 @@ def print_header(*args: str) -> None: def _run(*args: str, header: str = "") -> None: print_header(header) if header else print_header(*args) - subprocess.run(args, check=True) # stop on error + try: + subprocess.run(args, check=True) # stop on error + except subprocess.CalledProcessError as _e: + print(f"\n\n^^ errored ({_e.returncode}) ^^") + sys.exit(1) def run_lint() -> None: diff --git a/elasticsearch_metrics/tests/_test_util.py b/elasticsearch_metrics/tests/_test_util.py deleted file mode 100644 index ab5d3c1..0000000 --- a/elasticsearch_metrics/tests/_test_util.py +++ /dev/null @@ -1,97 +0,0 @@ -from io import StringIO -from unittest import mock -import types -import typing -import uuid - -from django.core.management import call_command -from django.core.management.base import BaseCommand -from django.test import SimpleTestCase - -from elasticsearch_metrics.registry import djelme_registry - - -class SimpleDjelmeTestCase(SimpleTestCase): - """SimpleDjelmeTestCase: base test case with djelme-specific conveniences""" - - def enterContext(self, context_manager): - # unittest.TestCase.enterContext added in python3.11 -- implementing here until 3.10 eol - result = context_manager.__enter__() - self.addCleanup(lambda: context_manager.__exit__(None, None, None)) - return result - - def run_mgmt_command( - self, cmd: str | BaseCommand | types.ModuleType, *args: str, **options: str - ) -> tuple[str, str]: - """run a django management command, return (stdout, stderr) tuple - - wraps django.core.management.call_command to handle string-io and - also accept a management command module - """ - _cmd = cmd.Command() if isinstance(cmd, types.ModuleType) else cmd - _out, _err = StringIO(), StringIO() - call_command(_cmd, *args, **options, stdout=_out, stderr=_err) - return _out.getvalue(), _err.getvalue() - - -class RealElasticTestCase(SimpleDjelmeTestCase): - """RealElasticTestCase: base test case with actual elasticsearch running""" - - __autosetup_backends: bool - __autoteardown_backends: bool - - def __init_subclass__( - cls, - /, # kwargs on class creation e.g. `Foo(RealElasticTestCase, autosetup_djelme_backends=False) - autosetup_djelme_backends: bool, # required - autoteardown_djelme_backends: bool = True, - **kwargs: typing.Any, - ): - super().__init_subclass__(**kwargs) - cls.__autosetup_backends = autosetup_djelme_backends - cls.__autoteardown_backends = autoteardown_djelme_backends - - def setUp(self): - super().setUp() - _name_prefix = f"{uuid.uuid4().hex}_" - self.enterContext( - mock.patch( - "elasticsearch_metrics.imps.elastic8.TimeseriesRecord.get_timeseries_name_prefix", - return_value=_name_prefix, - ) - ) - self.enterContext( - mock.patch( - "elasticsearch_metrics.imps.elastic6.BaseMetric.get_timeseries_name_prefix", - return_value=_name_prefix, - ), - ) - if self.__autosetup_backends: - self.setup_backends() - - def tearDown(self): - if self.__autoteardown_backends: - self.teardown_backends() - super().tearDown() - - def setup_backends(self): - # backends based on settings in django.conf.settings.DJELME_BACKENDS - _types_by_backend = djelme_registry.recordtypes_by_backend() - for _backend_name, _recordtypes in _types_by_backend.items(): - djelme_registry.get_backend(_backend_name).djelme_setup(_recordtypes) - - def teardown_backends(self): - # backends based on settings in django.conf.settings.DJELME_BACKENDS - _types_by_backend = djelme_registry.recordtypes_by_backend() - for _backend_name, _recordtypes in _types_by_backend.items(): - djelme_registry.get_backend(_backend_name).djelme_teardown(_recordtypes) - - -class MockSaveTestCase(SimpleDjelmeTestCase): - def setUp(self): - self.mocked_es6_save = self.enterContext( - mock.patch("elasticsearch_metrics.imps.elastic6.Document.save"), - ) - self.mocked_es8_save = self.enterContext( - mock.patch("elasticsearch_metrics.imps.elastic8.esdsl.Document.save"), - ) diff --git a/elasticsearch_metrics/tests/dummy8app/metrics.py b/elasticsearch_metrics/tests/dummy8app/metrics.py index ec3035e..c3f4e76 100644 --- a/elasticsearch_metrics/tests/dummy8app/metrics.py +++ b/elasticsearch_metrics/tests/dummy8app/metrics.py @@ -1,56 +1,54 @@ -from elasticsearch8.dsl import Text, mapped_field, analyzer, tokenizer +from elasticsearch8 import dsl as esdsl from elasticsearch_metrics.imps import elastic8 as djelme -dot_path_analyzer = analyzer( +dot_path_analyzer = esdsl.analyzer( "dot_path_analyzer", - tokenizer=tokenizer("dot_path_tokenizer", "path_hierarchy", delimiter="."), + tokenizer=esdsl.tokenizer("dot_path_tokenizer", "path_hierarchy", delimiter="."), ) class Dummy8Event(djelme.EventRecord): intensity: int - class Index: - using = "my_elastic8_events" - class Monthly8Event(djelme.EventRecord): intenzity: int class Meta: - timeseries_name_prefix = "dummy8evenz" + index_name_prefix = "dummy8evenz" timeseries_recordtype_name = "eventlog" - timedepth = 2 - - class Index: - using = "my_elastic8_events" + timeseries_index_timedepth = 2 class ThingHappened(djelme.EventRecord): thing_id: str = "" happen_code: str | None = None - dot_path: str | None = mapped_field(Text(analyzer=dot_path_analyzer), default=None) - commentary: str | None = mapped_field(Text(), default=None) + dot_path: str | None = esdsl.mapped_field( + esdsl.Text(analyzer=dot_path_analyzer), default=None + ) + commentary: str | None = esdsl.mapped_field(esdsl.Text(), default=None) class Index: settings = {"refresh_interval": "-1"} - using = "my_elastic8_events" class Meta: timeseries_recordtype_name = "happen" - timedepth = 1 # yearly timeseries indexes + timeseries_index_timedepth = 1 # yearly timeseries indexes -# TODO: tests using ThingHappeningsReport class ThingHappeningsReport(djelme.CyclicRecord): CYCLE_TIMEDEPTH = 2 + UNIQUE_TOGETHER_FIELDS = ("cycle_coverage", "thing_id") thing_id: str happen_count: int - class Index: - using = "my_elastic8_reports" - class Meta: - timeseries_name_prefix = "blarg_" - timedepth = 2 # monthly timeseries indexes + index_name_prefix = "blarg_" + timeseries_index_timedepth = 2 # monthly timeseries indexes + + +class SimpleKV(djelme.SimpleRecord): + UNIQUE_TOGETHER_FIELDS = ("key",) + key: str + val: int diff --git a/elasticsearch_metrics/tests/settings.py b/elasticsearch_metrics/tests/settings.py index 4b77c82..8de980e 100644 --- a/elasticsearch_metrics/tests/settings.py +++ b/elasticsearch_metrics/tests/settings.py @@ -13,19 +13,13 @@ MIDDLEWARE_CLASSES = [] DATABASES = {"default": {"ENGINE": "django.db.backends.sqlite3", "NAME": "test_djelme"}} -DJELME_AUTOSETUP = True DJELME_BACKENDS = { "my_elastic6": { "elasticsearch_metrics.imps.elastic6": { "hosts": os.environ.get("ELASTICSEARCH6_URL", ""), }, }, - "my_elastic8_events": { - "elasticsearch_metrics.imps.elastic8": { - "hosts": os.environ.get("ELASTICSEARCH8_URL", ""), - }, - }, - "my_elastic8_reports": { + "my_elastic8": { "elasticsearch_metrics.imps.elastic8": { "hosts": os.environ.get("ELASTICSEARCH8_URL", ""), }, diff --git a/elasticsearch_metrics/tests/test_autodiscovery.py b/elasticsearch_metrics/tests/test_autodiscovery.py index fae22f4..b5aa6de 100644 --- a/elasticsearch_metrics/tests/test_autodiscovery.py +++ b/elasticsearch_metrics/tests/test_autodiscovery.py @@ -1,5 +1,5 @@ from elasticsearch_metrics.registry import djelme_registry -from elasticsearch_metrics.tests._test_util import SimpleDjelmeTestCase +from elasticsearch_metrics.tests.util import SimpleDjelmeTestCase class TestAutodiscovery(SimpleDjelmeTestCase): diff --git a/elasticsearch_metrics/tests/test_imps_elastic6.py b/elasticsearch_metrics/tests/test_imps_elastic6.py index e4db776..f7de7aa 100644 --- a/elasticsearch_metrics/tests/test_imps_elastic6.py +++ b/elasticsearch_metrics/tests/test_imps_elastic6.py @@ -1,4 +1,5 @@ import unittest +from unittest import mock import datetime as dt from django.utils import timezone @@ -17,10 +18,11 @@ IndexTemplateNotFoundError, IndexTemplateOutOfSyncError, ) -from elasticsearch_metrics.tests._test_util import ( +from elasticsearch_metrics.tests.util import ( SimpleDjelmeTestCase, - MockSaveTestCase, + MockConnectionTestCase, RealElasticTestCase, + NoSetupRealElasticTestCase, ) from elasticsearch_metrics.tests.dummy6app.metrics import ( Dummy6Metric, @@ -188,29 +190,29 @@ class Meta: assert doc["_source"]["enabled"] is True -class TestRecord(MockSaveTestCase): - def test_calls_save(self): +class TestRecord(MockConnectionTestCase): + def test_calls_index(self): timestamp = dt.datetime(2017, 8, 21) p = PreprintView.record(timestamp=timestamp, provider_id="abc12") - assert self.mocked_es6_save.call_count == 1 + assert self.mock_es6_connection.index.call_count == 1 assert p.timestamp == timestamp assert p.provider_id == "abc12" - @unittest.mock.patch.object(timezone, "now") + @mock.patch.object(timezone, "now") def test_defaults_timestamp_to_now(self, mock_now): fake_now = dt.datetime(2016, 8, 21) mock_now.return_value = fake_now p = PreprintView.record(provider_id="abc12") - assert self.mocked_es6_save.call_count == 1 + assert self.mock_es6_connection.index.call_count == 1 assert p.timestamp == fake_now -class TestSignals(MockSaveTestCase): - @unittest.mock.patch.object(PreprintView, "get_timeseries_index_template") +class TestSignals(MockConnectionTestCase): + @mock.patch.object(PreprintView, "get_timeseries_index_template") def test_create_metric_sends_signals(self, mock_get_index_template): - mock_pre_index_template_listener = unittest.mock.Mock() - mock_post_index_template_listener = unittest.mock.Mock() + mock_pre_index_template_listener = mock.Mock() + mock_post_index_template_listener = mock.Mock() signals.pre_index_template_create.connect(mock_pre_index_template_listener) signals.post_index_template_create.connect(mock_post_index_template_listener) PreprintView.sync_index_template() @@ -225,8 +227,8 @@ def test_create_metric_sends_signals(self, mock_get_index_template): assert "using" in post_call_kwargs def test_save_sends_signals(self): - mock_pre_save_listener = unittest.mock.Mock() - mock_post_save_listener = unittest.mock.Mock() + mock_pre_save_listener = mock.Mock() + mock_post_save_listener = mock.Mock() signals.pre_save.connect(mock_pre_save_listener, sender=PreprintView) signals.post_save.connect(mock_post_save_listener, sender=PreprintView) @@ -253,7 +255,7 @@ def test_save_sends_signals(self): assert post_save_kwargs["sender"] is PreprintView -class TestIntegration(RealElasticTestCase, autosetup_djelme_backends=True): +class TestIntegration(RealElasticTestCase): @property def es6_client(self): return connections.get_connection("my_elastic6") @@ -279,7 +281,7 @@ def test_create_document(self): assert properties["preprint_id"] == {"type": "keyword"} -class TestIntegrationSetup(RealElasticTestCase, autosetup_djelme_backends=False): +class TestIntegrationSetup(NoSetupRealElasticTestCase): @property def es6_client(self): return connections.get_connection("my_elastic6") @@ -296,20 +298,20 @@ def test_init(self): def test_check_djelme_setup(self): with self.assertRaises(IndexTemplateNotFoundError): - assert PreprintView.check_djelme_setup() is False + PreprintView.check_djelme_setup() PreprintView.sync_index_template() - assert PreprintView.check_djelme_setup() is True + assert PreprintView.check_djelme_setup() is None # When settings change, template is out of sync PreprintView._index.settings( **{"refresh_interval": "1s", "number_of_shards": 1, "number_of_replicas": 2} ) with self.assertRaises(IndexTemplateOutOfSyncError) as excinfo: - assert PreprintView.check_djelme_setup() is False + PreprintView.check_djelme_setup() error = excinfo.exception assert error.settings_in_sync is False assert error.mappings_in_sync is True assert error.patterns_in_sync is True PreprintView.sync_index_template() - assert PreprintView.check_djelme_setup() is True + assert PreprintView.check_djelme_setup() is None diff --git a/elasticsearch_metrics/tests/test_imps_elastic8.py b/elasticsearch_metrics/tests/test_imps_elastic8.py index bd3b273..410d7a2 100644 --- a/elasticsearch_metrics/tests/test_imps_elastic8.py +++ b/elasticsearch_metrics/tests/test_imps_elastic8.py @@ -1,4 +1,4 @@ -import unittest +from unittest import mock import datetime as dt import elasticsearch8 @@ -14,25 +14,27 @@ IndexTemplateOutOfSyncError, ) from elasticsearch_metrics.registry import djelme_registry -from elasticsearch_metrics.tests._test_util import ( +from elasticsearch_metrics.tests.util import ( SimpleDjelmeTestCase, - MockSaveTestCase, + MockConnectionTestCase, RealElasticTestCase, + NoSetupRealElasticTestCase, ) from elasticsearch_metrics.tests.dummy8app.metrics import ( Dummy8Event, Monthly8Event, ThingHappened, ThingHappeningsReport, + SimpleKV, ) def _es8_client( - backend_name: str = "my_elastic8_events", + backend_name: str = "my_elastic8", ) -> elasticsearch8.Elasticsearch: _backend = djelme_registry.get_backend(backend_name) assert isinstance(_backend, djelme.DjelmeElastic8Backend) - return _backend.elastic8_client + return _backend.elastic_client class TestNamesAndPatterns(SimpleDjelmeTestCase): @@ -63,6 +65,11 @@ def test_index_name(self): _thingreport.djelme_index_name(), "blarg_dummy8app_thinghappeningsreport_1999.3.", ) + _kv = SimpleKV(key="wha", val=0) + self.assertEqual( + _kv.djelme_index_name(), + "dummy8app_simplekv", + ) def test_format_index_name_respects_date_format_setting(self): _stamp = dt.datetime(2020, 2, 14) @@ -222,9 +229,6 @@ def test_get_index_template_default_template_name(self): def test_get_index_template_uses_app_label_in_class_meta(self): class MyRecord(djelme.TimeseriesRecord): - class Index: - using = "my_elastic8_events" - class Meta: app_label = "myapp" @@ -246,7 +250,6 @@ class MyBaseRecord(djelme.TimeseriesRecord): class Index: settings = {"number_of_shards": 2} - using = "my_elastic8_events" class Meta: abstract = True @@ -261,9 +264,6 @@ class Meta: def test_source_may_be_enabled(self): class MyRecord(djelme.TimeseriesRecord): - class Index: - using = "my_elastic8_events" - class Meta: app_label = "dummy8app" timeseries_recordtype_name = "myrecord" @@ -275,30 +275,29 @@ class Meta: assert doc["_source"]["enabled"] is True -class TestRecord(MockSaveTestCase): +class TestRecord(MockConnectionTestCase): def test_calls_save(self): - timestamp = dt.datetime(2017, 8, 21) + timestamp = dt.datetime(2017, 8, 21, tzinfo=dt.timezone.utc) p = ThingHappened.record(timestamp=timestamp, thing_id="abc12") - assert self.mocked_es8_save.call_count == 1 + assert self.mock_es8_connection.index.call_count == 1 assert p.timestamp == timestamp assert p.timeseries_timeparts == "2017.8.21.0.0.0" assert p.thing_id == "abc12" def test_defaults_timestamp_to_now(self): _fake_now = dt.datetime(2016, 8, 21, tzinfo=dt.timezone.utc) - with unittest.mock.patch( + with mock.patch( "elasticsearch_metrics.imps.elastic8.utcnow", return_value=_fake_now ): p = ThingHappened.record(thing_id="abc12") - assert self.mocked_es8_save.call_count == 1 + assert self.mock_es8_connection.index.call_count == 1 assert p.timestamp == _fake_now -class TestSignals(MockSaveTestCase): - @unittest.mock.patch.object(ThingHappened, "get_timeseries_template") - def test_create_record_sends_signals(self, mock_timeseries_template): - mock_pre_index_template_listener = unittest.mock.Mock() - mock_post_index_template_listener = unittest.mock.Mock() +class TestSignals(MockConnectionTestCase): + def test_create_record_sends_signals(self): + mock_pre_index_template_listener = mock.Mock() + mock_post_index_template_listener = mock.Mock() signals.pre_index_template_create.connect(mock_pre_index_template_listener) signals.post_index_template_create.connect(mock_post_index_template_listener) ThingHappened.sync_index_template() @@ -313,8 +312,8 @@ def test_create_record_sends_signals(self, mock_timeseries_template): assert "using" in post_call_kwargs def test_save_sends_signals(self): - mock_pre_save_listener = unittest.mock.Mock() - mock_post_save_listener = unittest.mock.Mock() + mock_pre_save_listener = mock.Mock() + mock_post_save_listener = mock.Mock() signals.pre_save.connect(mock_pre_save_listener, sender=ThingHappened) signals.post_save.connect(mock_post_save_listener, sender=ThingHappened) @@ -338,7 +337,7 @@ def test_save_sends_signals(self): assert post_save_kwargs["sender"] is ThingHappened -class TestRealCreate(RealElasticTestCase, autosetup_djelme_backends=True): +class TestRealCreate(RealElasticTestCase): def test_save(self): _thing_id = "12345" _happen_code = "zyxwv" @@ -385,7 +384,44 @@ def test_record(self): assert properties["happen_code"] == {"type": "keyword"} -class TestInit(RealElasticTestCase, autosetup_djelme_backends=False): +class TestWithoutAutosetup(NoSetupRealElasticTestCase): + def test_cannot_save_without_template(self): + _event = Dummy8Event(intensity=2) + with self.assertRaises(IndexTemplateNotFoundError): + _event.save() + + def test_cannot_save_with_wrong_template_pattern(self): + with mock.patch.object( + Dummy8Event, + "format_timeseries_index_pattern", + return_value="wrong_pattern_haha_*", + ): + Dummy8Event.init() + with self.assertRaises(IndexTemplateOutOfSyncError): + Dummy8Event.record(intensity=2) + + def test_cannot_save_with_extra_property_mapping(self): + # create template with extra property mapping + _template = Dummy8Event.get_timeseries_template().to_dict() + _template["template"]["mappings"]["properties"]["foo"] = {"type": "keyword"} + _es8_client().indices.put_index_template( + name=Dummy8Event.get_timeseries_template_name(), + **_template, + ) + with self.assertRaises(IndexTemplateOutOfSyncError): + Dummy8Event.record(intensity=2) + + def test_cannot_save_with_missing_property_mapping(self): + # create template with missing property mapping + _template = Dummy8Event.get_timeseries_template().to_dict() + del _template["template"]["mappings"]["properties"]["intensity"] + _es8_client().indices.put_index_template( + name=Dummy8Event.get_timeseries_template_name(), + **_template, + ) + with self.assertRaises(IndexTemplateOutOfSyncError): + Dummy8Event.record(intensity=2) + def test_init(self): ThingHappened.init() _client = _es8_client() @@ -407,26 +443,26 @@ def test_init(self): def test_check_djelme_setup(self): with self.assertRaises(IndexTemplateNotFoundError): - assert ThingHappened.check_djelme_setup() is False + ThingHappened.check_djelme_setup() ThingHappened.sync_index_template() - assert ThingHappened.check_djelme_setup() is True + assert ThingHappened.check_djelme_setup() is None # When settings change, template is out of sync ThingHappened._index.settings( **{"refresh_interval": "1s", "number_of_shards": 1, "number_of_replicas": 2} ) with self.assertRaises(IndexTemplateOutOfSyncError) as excinfo: - assert ThingHappened.check_djelme_setup() is False + ThingHappened.check_djelme_setup() error = excinfo.exception assert error.settings_in_sync is False assert error.mappings_in_sync is True assert error.patterns_in_sync is True ThingHappened.sync_index_template() - assert ThingHappened.check_djelme_setup() is True + assert ThingHappened.check_djelme_setup() is None -class TestDailyIndexes(RealElasticTestCase, autosetup_djelme_backends=True): +class TestDailyIndexes(RealElasticTestCase): def setUp(self): super().setUp() Dummy8Event.record(timestamp=dt.datetime(1234, 5, 6), intensity=1) @@ -436,7 +472,7 @@ def setUp(self): Dummy8Event.record(timestamp=dt.datetime(1235, 5, 6), intensity=111) Dummy8Event.record(timestamp=dt.datetime(2345, 6, 9), intensity=11) Dummy8Event.record(timestamp=dt.datetime(2345, 7, 9), intensity=13) - Dummy8Event.refresh_timeseries_indexes() + Dummy8Event.refresh() def test_indexes(self): _index_names = { @@ -488,7 +524,7 @@ def _assert_intens(hits, expected_intensities): ) -class TestMonthlyIndexes(RealElasticTestCase, autosetup_djelme_backends=True): +class TestMonthlyIndexes(RealElasticTestCase): def setUp(self): super().setUp() Monthly8Event.record(timestamp=dt.datetime(1234, 5, 6), intenzity=1) @@ -498,7 +534,7 @@ def setUp(self): Monthly8Event.record(timestamp=dt.datetime(1235, 5, 6), intenzity=111) Monthly8Event.record(timestamp=dt.datetime(2345, 6, 9), intenzity=11) Monthly8Event.record(timestamp=dt.datetime(2345, 7, 9), intenzity=13) - Monthly8Event.refresh_timeseries_indexes() + Monthly8Event.refresh() def test_indexes(self): _index_names = { @@ -549,7 +585,7 @@ def _assert_intenz(hits, expected_intenzities): ) -class TestYearlyIndexes(RealElasticTestCase, autosetup_djelme_backends=True): +class TestYearlyIndexes(RealElasticTestCase): def setUp(self): super().setUp() ThingHappened.record(timestamp=dt.datetime(1234, 5, 6), thing_id="a") @@ -559,7 +595,7 @@ def setUp(self): ThingHappened.record(timestamp=dt.datetime(1235, 5, 6), thing_id="e") ThingHappened.record(timestamp=dt.datetime(2345, 6, 9), thing_id="f") ThingHappened.record(timestamp=dt.datetime(2345, 7, 9), thing_id="g") - ThingHappened.refresh_timeseries_indexes() + ThingHappened.refresh() def test_indexes(self): _index_names = { @@ -608,6 +644,91 @@ def _assert_things(hits, expected_thing_ids): ) +class TestCyclicRecord(RealElasticTestCase): + def setUp(self): + super().setUp() + ThingHappeningsReport.record( + cycle_coverage="2000.1", thing_id="a", happen_count=2 + ) + ThingHappeningsReport.record( + cycle_coverage="2000.1", thing_id="b", happen_count=3 + ) + ThingHappeningsReport.record( + cycle_coverage="2000.1", thing_id="c", happen_count=4 + ) + ThingHappeningsReport.record( + cycle_coverage="2000.2", thing_id="a", happen_count=5 + ) + ThingHappeningsReport.record( # this duplicate gets overwritten + cycle_coverage="2000.2", thing_id="b", happen_count=67 + ) + ThingHappeningsReport.record( # this duplicate overwrites + cycle_coverage="2000.2", thing_id="b", happen_count=6 + ) + ThingHappeningsReport.record( + cycle_coverage="2000.2", thing_id="c", happen_count=7 + ) + ThingHappeningsReport.refresh() + + def test_indexes(self): + _index_names = { + _strip_test_prefix(_name) + for _name, _ in ThingHappeningsReport.each_timeseries_index() + } + self.assertEqual( + _index_names, + { + "dummy8app_thinghappeningsreport_2000.1.", + "dummy8app_thinghappeningsreport_2000.2.", + }, + ) + + def test_search(self): + _b_search = ( + ThingHappeningsReport.search() + .query({"term": {"thing_id": "b"}}) + .sort("cycle_coverage") + ) + (_actual_1, _actual_2) = _b_search + self.assertEqual(_actual_1.cycle_coverage, "2000.1") + self.assertEqual(_actual_1.thing_id, "b") + self.assertEqual(_actual_1.happen_count, 3) + self.assertEqual(_actual_1.timeseries_timeparts, "2000.1") + self.assertEqual(_actual_2.cycle_coverage, "2000.2") + self.assertEqual(_actual_2.thing_id, "b") + self.assertEqual(_actual_2.happen_count, 6) + self.assertEqual(_actual_2.timeseries_timeparts, "2000.2") + + def test_search_range(self): + _b_search = ThingHappeningsReport.search_timeseries_range( + (2000, 2), (2001,) + ).query({"term": {"thing_id": "b"}}) + (_actual_2,) = _b_search + self.assertEqual(_actual_2.cycle_coverage, "2000.2") + self.assertEqual(_actual_2.thing_id, "b") + self.assertEqual(_actual_2.happen_count, 6) + self.assertEqual(_actual_2.timeseries_timeparts, "2000.2") + + +class TestSingleIndex(RealElasticTestCase): + def test_index_name(self): + self.assertEqual( + _strip_test_prefix(SimpleKV._index._name), + "dummy8app_simplekv", + ) + + def test_search(self): + SimpleKV.record(key="hello", val=2) + SimpleKV.record(key="goodbye", val=-2) + SimpleKV.refresh() + (_hello,) = SimpleKV.search().query({"term": {"key": "hello"}}).execute() + self.assertEqual(_hello.key, "hello") + self.assertEqual(_hello.val, 2) + (_goodbye,) = SimpleKV.search().query({"term": {"key": "goodbye"}}).execute() + self.assertEqual(_goodbye.key, "goodbye") + self.assertEqual(_goodbye.val, -2) + + def _strip_test_prefix(index_name: str) -> str: # strip uuid test prefix (_, _, _without_prefix) = index_name.partition("_") diff --git a/elasticsearch_metrics/tests/test_management_commands/test_djelme_check.py b/elasticsearch_metrics/tests/test_management_commands/test_djelme_check.py index 3872e29..06c93be 100644 --- a/elasticsearch_metrics/tests/test_management_commands/test_djelme_check.py +++ b/elasticsearch_metrics/tests/test_management_commands/test_djelme_check.py @@ -1,9 +1,11 @@ from unittest import mock +from django.core.management.base import CommandError + from elasticsearch_metrics import exceptions from elasticsearch_metrics.management.commands import djelme_backend_check from elasticsearch_metrics.registry import registry -from elasticsearch_metrics.tests._test_util import SimpleDjelmeTestCase +from elasticsearch_metrics.tests.util import SimpleDjelmeTestCase class TestCheckRecordtypes(SimpleDjelmeTestCase): @@ -11,11 +13,16 @@ def setUp(self): self.mock6_check_djelme_setup = self.enterContext( mock.patch("elasticsearch_metrics.imps.elastic6.Metric.check_djelme_setup"), ) - self.mock8_check_djelme_setup = self.enterContext( + self.mock8_timeseries_check_djelme_setup = self.enterContext( mock.patch( "elasticsearch_metrics.imps.elastic8.TimeseriesRecord.check_djelme_setup" ), ) + self.mock8_simple_check_djelme_setup = self.enterContext( + mock.patch( + "elasticsearch_metrics.imps.elastic8.SimpleRecord.check_djelme_setup" + ), + ) def test_exits_with_error_if_out_of_sync_6(self): self.mock6_check_djelme_setup.side_effect = ( @@ -23,24 +30,30 @@ def test_exits_with_error_if_out_of_sync_6(self): "Index template does not exist", client_error=None ) ) - with self.assertRaises(SystemExit): + with self.assertRaises(CommandError): self.run_mgmt_command(djelme_backend_check) def test_exits_with_error_if_out_of_sync_8(self): - self.mock8_check_djelme_setup.side_effect = ( + self.mock8_timeseries_check_djelme_setup.side_effect = ( exceptions.IndexTemplateNotFoundError( "Index template does not exist", client_error=None ) ) - with self.assertRaises(SystemExit): + with self.assertRaises(CommandError): + self.run_mgmt_command(djelme_backend_check) + + def test_exits_with_error_if_out_of_sync_8_simplerec(self): + self.mock8_simple_check_djelme_setup.side_effect = ( + exceptions.IndexNotFoundError("Index does not exist", client_error=None) + ) + with self.assertRaises(CommandError): self.run_mgmt_command(djelme_backend_check) def test_exits_with_success(self): - self.mock6_check_djelme_setup.return_value = True - self.mock8_check_djelme_setup.return_value = True self.run_mgmt_command(djelme_backend_check) _call_count = ( self.mock6_check_djelme_setup.call_count - + self.mock8_check_djelme_setup.call_count + + self.mock8_timeseries_check_djelme_setup.call_count + + self.mock8_simple_check_djelme_setup.call_count ) assert _call_count == len(list(registry.each_recordtype())) diff --git a/elasticsearch_metrics/tests/test_management_commands/test_djelme_setup.py b/elasticsearch_metrics/tests/test_management_commands/test_djelme_setup.py index 51b105b..dcd1729 100644 --- a/elasticsearch_metrics/tests/test_management_commands/test_djelme_setup.py +++ b/elasticsearch_metrics/tests/test_management_commands/test_djelme_setup.py @@ -5,31 +5,32 @@ from elasticsearch_metrics.management.commands import djelme_backend_setup from elasticsearch_metrics.imps import elastic6 from elasticsearch_metrics.registry import djelme_registry -from elasticsearch_metrics.tests._test_util import SimpleDjelmeTestCase +from elasticsearch_metrics.tests.util import SimpleDjelmeTestCase class TestDjelmeSetup(SimpleDjelmeTestCase): - mock6_sync_index_template: unittest.mock.Mock - mock8_sync_index_template: unittest.mock.Mock + mock_inits: list[unittest.mock.Mock] def setUp(self): - self.mock6_sync_index_template = self.enterContext( - unittest.mock.patch( - "elasticsearch_metrics.imps.elastic6.Metric.sync_index_template" + self.mock_inits = [ + self.enterContext( + unittest.mock.patch("elasticsearch_metrics.imps.elastic6.Metric.init"), ), - ) - self.mock8_sync_index_template = self.enterContext( - unittest.mock.patch( - "elasticsearch_metrics.imps.elastic8.TimeseriesRecord.sync_index_template" + self.enterContext( + unittest.mock.patch( + "elasticsearch_metrics.imps.elastic8.TimeseriesRecord.init" + ), ), - ) + self.enterContext( + unittest.mock.patch( + "elasticsearch_metrics.imps.elastic8.SimpleRecord.init" + ), + ), + ] def test_without_args(self): out, err = self.run_mgmt_command(djelme_backend_setup) - _call_count = ( - self.mock6_sync_index_template.call_count - + self.mock8_sync_index_template.call_count - ) + _call_count = sum(_mock.call_count for _mock in self.mock_inits) assert _call_count == len(list(djelme_registry.each_recordtype())) assert "Synchronized recordtypes." in out @@ -44,8 +45,5 @@ class Meta: app_label = "dummyapp2" out, err = self.run_mgmt_command(djelme_backend_setup, "dummyapp2") - _call_count = ( - self.mock6_sync_index_template.call_count - + self.mock8_sync_index_template.call_count - ) + _call_count = sum(_mock.call_count for _mock in self.mock_inits) assert _call_count == 1 diff --git a/elasticsearch_metrics/tests/test_management_commands/test_djelme_types.py b/elasticsearch_metrics/tests/test_management_commands/test_djelme_types.py index f94b88c..e1ab535 100644 --- a/elasticsearch_metrics/tests/test_management_commands/test_djelme_types.py +++ b/elasticsearch_metrics/tests/test_management_commands/test_djelme_types.py @@ -1,5 +1,5 @@ from elasticsearch_metrics.management.commands import djelme_backend_types -from elasticsearch_metrics.tests._test_util import SimpleDjelmeTestCase +from elasticsearch_metrics.tests.util import SimpleDjelmeTestCase class TestDjelmeTypesCommand(SimpleDjelmeTestCase): diff --git a/elasticsearch_metrics/tests/util.py b/elasticsearch_metrics/tests/util.py new file mode 100644 index 0000000..4845e2e --- /dev/null +++ b/elasticsearch_metrics/tests/util.py @@ -0,0 +1,145 @@ +import contextlib +from io import StringIO +import types +from unittest import mock +import uuid + +from django.core.management import call_command +from django.core.management.base import BaseCommand +from django.test import SimpleTestCase + +from elasticsearch_metrics.registry import djelme_registry + + +@contextlib.contextmanager +def djelme_test_backends(): + """context manager to wrap tests that use djelme with actual elasticsearch + + gives index names a unique prefix + + sets up and tears down all backends configured in django.conf.settings.DJELME_BACKENDS + """ + clear_setup_check_caches() + with prefixed_index_names(): + setup_backends() + try: + yield + finally: + teardown_backends() + + +class SimpleDjelmeTestCase(SimpleTestCase): + """SimpleDjelmeTestCase: base test case with djelme-specific conveniences""" + + def setUp(self): + super().setUp() + clear_setup_check_caches() + + def enterContext(self, context_manager): + # unittest.TestCase.enterContext added in python3.11 -- implementing here until 3.10 eol + result = context_manager.__enter__() + self.addCleanup(lambda: context_manager.__exit__(None, None, None)) + return result + + def run_mgmt_command( + self, cmd: str | BaseCommand | types.ModuleType, *args: str, **options: str + ) -> tuple[str, str]: + """run a django management command, return (stdout, stderr) tuple + + wraps django.core.management.call_command to handle string-io and + also accept a management command module + """ + _cmd = cmd.Command() if isinstance(cmd, types.ModuleType) else cmd + _out, _err = StringIO(), StringIO() + call_command(_cmd, *args, **options, stdout=_out, stderr=_err) + return _out.getvalue(), _err.getvalue() + + +class RealElasticTestCase(SimpleDjelmeTestCase): + """RealElasticTestCase: base test case with actual elasticsearch running""" + + def setUp(self): + self.enterContext(djelme_test_backends()) + + +class NoSetupRealElasticTestCase(SimpleDjelmeTestCase): + """NoSetupRealElasticTestCase: base test case with actual elasticsearch running + + same as RealElasticTestCase but skip djelme backend setup + """ + + def setUp(self): + clear_setup_check_caches() + self.enterContext(prefixed_index_names()) + super().setUp() + + def tearDown(self): + teardown_backends() + super().tearDown() + + +class MockConnectionTestCase(SimpleDjelmeTestCase): + def setUp(self): + super().setUp() + clear_setup_check_caches() + self.mock_es6_connection = mock.Mock() + self.mock_es8_connection = mock.Mock() + self.mock_es6_connection.index.return_value = {"result": "created"} + self.mock_es8_connection.index.return_value = {"result": "created"} + self.enterContext( + mock.patch( + "elasticsearch_metrics.imps.elastic6.Document._get_connection", + return_value=self.mock_es6_connection, + ), + ) + self.enterContext( + mock.patch( + "elasticsearch_metrics.imps.elastic8.esdsl.Document._get_connection", + return_value=self.mock_es8_connection, + ), + ) + self.mock_es6_require_been_setup = self.enterContext( + mock.patch("elasticsearch_metrics.imps.elastic6.Metric.require_been_setup"), + ) + self.mock_es8_require_been_setup = self.enterContext( + mock.patch( + "elasticsearch_metrics.imps.elastic8.BaseDjelmeRecord.require_been_setup" + ), + ) + + +@contextlib.contextmanager +def prefixed_index_names(prefix: str = ""): + _name_prefix = prefix or f"testrun{uuid.uuid4().hex}_" + with ( + mock.patch( + "elasticsearch_metrics.imps.elastic8.BaseDjelmeRecord.get_index_name_prefix", + return_value=_name_prefix, + ), + mock.patch( + "elasticsearch_metrics.imps.elastic6.BaseMetric.get_index_name_prefix", + return_value=_name_prefix, + ), + ): + yield + + +def clear_setup_check_caches(): + from elasticsearch_metrics.imps import elastic6, elastic8 + + elastic6.Metric.require_been_setup.cache_clear() + elastic8.BaseDjelmeRecord.require_been_setup.cache_clear() + + +def setup_backends(): + # backends based on settings in django.conf.settings.DJELME_BACKENDS + _types_by_backend = djelme_registry.recordtypes_by_backend() + for _backend_name, _recordtypes in _types_by_backend.items(): + djelme_registry.get_backend(_backend_name).djelme_setup(_recordtypes) + + +def teardown_backends(): + # backends based on settings in django.conf.settings.DJELME_BACKENDS + _types_by_backend = djelme_registry.recordtypes_by_backend() + for _backend_name, _recordtypes in _types_by_backend.items(): + djelme_registry.get_backend(_backend_name).djelme_teardown(_recordtypes) diff --git a/elasticsearch_metrics/util/anon_enough.py b/elasticsearch_metrics/util/anon_enough.py index 1deb22b..2c97684 100644 --- a/elasticsearch_metrics/util/anon_enough.py +++ b/elasticsearch_metrics/util/anon_enough.py @@ -21,6 +21,9 @@ def opaque_key( >>> opaque_key(['hello', 'hello', 'hello']) '*58u_=`?3#G!N(%j!3kqU7#Npt>Xvj=|3<75BRoi$0j;F-*3V+Cc?P1FvcVW76T_`5^NaI*_3787SsBn' """ + _parts = [str(_part) for _part in key_parts] + if not _parts: + raise ValueError("opaque_key expects at least one key part") _plain_key = json.dumps([str(_part) for _part in key_parts]) return base64.b85encode( hashlib.blake2b(bytes(_plain_key, encoding="utf")).digest() @@ -33,23 +36,24 @@ def opaque_sessionhour_id( user_id: str = "", request_host: str = "", request_useragent: str = "", + timestamp: datetime.datetime or None = None, ) -> str: """opaque_sessionhour_id: get a hashed id for a "user session" compatible with COUNTER code of practice: https://cop5.projectcounter.org/en/5.0.2/07-processing/03-counting-unique-items.html """ - _now = timezone.now().astimezone(datetime.timezone.utc) + _now = timestamp or timezone.now().astimezone(datetime.timezone.utc) _today_str = _now.date().isoformat() # "A user session is defined any of the following ways: ..." (quotes out of order) - if client_session_id: - # "...by a logged user cookie + transaction date + hour of day..." - _session_id_parts = [client_session_id, _today_str, _now.hour] - elif user_id: + if user_id: # "...by a logged user ID (if users log in with personal accounts) # + transaction date + hour of day (day is divided into 24 one-hour slices) ..." _session_id_parts = [user_id, _today_str, _now.hour] + elif client_session_id: + # "...by a logged user cookie + transaction date + hour of day..." + _session_id_parts = [client_session_id, _today_str, _now.hour] elif request_host and request_useragent: # "...or by a combination of IP address + user agent + transaction date + hour of day." _session_id_parts = [request_host, request_useragent, _today_str, _now.hour] @@ -65,8 +69,10 @@ def opaque_sessionhour_id( >>> _now_patcher.start() and None >>> opaque_sessionhour_id(client_session_id='foo') 'R26L*vmd?|G}S5AZ}ONXq>^B*T-!TLCE`uboEXF-LpK8Hysi$nUve^2aG~PTWiX<6BDv}wQtowotKSdV' ->>> opaque_sessionhour_id(client_session_id='feh', user_id='blah') -'JfeGFBfil1y$8fnmhi)8LU4}9vUBX6VfHmDPiVfiB~0nT&%3tKWsTTF_z2wynPj}`EF=}Y6=?}e5nDK0' +>>> opaque_sessionhour_id(user_id='blah') +'vToyLf@So{-(dZ*?<`d{f{|w+2j^OaNw{yYKEJ6}q4#288|8^lai=Hy@F-c?19rafSrA{mb&$z*p6AQ>' +>>> opaque_sessionhour_id(user_id='blah', client_session_id='feh') +'vToyLf@So{-(dZ*?<`d{f{|w+2j^OaNw{yYKEJ6}q4#288|8^lai=Hy@F-c?19rafSrA{mb&$z*p6AQ>' >>> opaque_sessionhour_id(request_host='999.999.999.999', request_useragent='hehe') 'Q^-x^v~@WQRHrWsbbji+pNmz)1`sp3SywCJ4n`W_aoY0tfbL6byxqUpw#DXoqU3>DtZC*^D@qjc7EmO=' >>> _now_patcher.stop() or None diff --git a/elasticsearch_metrics/util/django.py b/elasticsearch_metrics/util/django.py index 1f213d3..78ac57c 100644 --- a/elasticsearch_metrics/util/django.py +++ b/elasticsearch_metrics/util/django.py @@ -3,17 +3,15 @@ from django.apps import apps from django.core import exceptions -__all__ = ("find_app_label_for_type",) +__all__ = ("find_app_label_for_module",) -def find_app_label_for_type(given_type: type) -> str: - # look for an installed django app the given type is defined within - _given_module_name = given_type.__module__ +def find_app_label_for_module(module_name: str) -> str: _containing_app_configs = ( _app_config for _app_config in apps.get_app_configs() if (_app_config.module is not None) - and _given_module_name.startswith(_app_config.module.__name__) + and module_name.startswith(_app_config.module.__name__) ) _nearest_containing_app_config = max( _containing_app_configs, @@ -24,9 +22,7 @@ def find_app_label_for_type(given_type: type) -> str: ) if _nearest_containing_app_config is None: raise exceptions.ImproperlyConfigured( - f"type {given_type.__module__}.{given_type.__qualname__} " - "doesn't declare an explicit app_label and isn't in an " - "application in INSTALLED_APPS." + f"module {module_name} isn't in an application in INSTALLED_APPS." ) _label = _nearest_containing_app_config.label if not (_label and isinstance(_label, str)): diff --git a/elasticsearch_metrics/util/unique_together.py b/elasticsearch_metrics/util/unique_together.py deleted file mode 100644 index 0906d76..0000000 --- a/elasticsearch_metrics/util/unique_together.py +++ /dev/null @@ -1,16 +0,0 @@ -import collections - -from elasticsearch_metrics.util.anon_enough import opaque_key - - -def get_unique_id(unique_together_field_values: collections.abc.Iterable[str]) -> str: - # Set the document id to a hash of "unique together" fields - # for "ON CONFLICT UPDATE" behavior -- if the document - # already exists, it will be updated rather than duplicated. - # Cannot detect/avoid conflicts this way, but that's ok. - _key_values = [] - for _field_value in unique_together_field_values: - if not isinstance(_field_value, str): - raise ValueError(f"expected str, got {_field_value!r}") - _key_values.append(_field_value) - return opaque_key(_key_values) diff --git a/pyproject.toml b/pyproject.toml index d317014..b729aaf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "django-elasticsearch-metrics" -version = "2026.0.3" +version = "2026.0.4" description = "Django app for storing time-series metrics in Elasticsearch." authors = [ {name = "CenterForOpenScience", email = "support@cos.io"}