From 321b56b991d391a8b65e7bc14941d1a6a052afc9 Mon Sep 17 00:00:00 2001 From: docktermj Date: Mon, 23 Jun 2025 11:05:41 -0400 Subject: [PATCH] #298 Add punchlines --- src/senzing/szconfig.py | 11 ++-- src/senzing/szconfigmanager.py | 31 ++++++----- src/senzing/szdiagnostic.py | 10 ++-- src/senzing/szengine.py | 94 ++++++++++++++++++++++------------ src/senzing/szproduct.py | 7 ++- 5 files changed, 92 insertions(+), 61 deletions(-) diff --git a/src/senzing/szconfig.py b/src/senzing/szconfig.py index 387a18e1..a40ad266 100644 --- a/src/senzing/szconfig.py +++ b/src/senzing/szconfig.py @@ -35,7 +35,7 @@ class SzConfig(ABC): @abstractmethod def add_data_source(self, data_source_code: str) -> str: """ - The `add_data_source` method adds a data source to an existing in-memory configuration. + The `add_data_source` method adds a new data source to this instance. Args: data_source_code (str): Name of data source code to add. @@ -62,7 +62,7 @@ def add_data_source(self, data_source_code: str) -> str: @abstractmethod def delete_data_source(self, data_source_code: str) -> str: """ - The `delete_data_source` method removes a data source from an existing in-memory configuration. + The `delete_data_source` method deletes a data source from this instance. Args: data_source_code (str): Name of data source code to delete. @@ -86,7 +86,7 @@ def delete_data_source(self, data_source_code: str) -> str: @abstractmethod def export(self) -> str: """ - The `export` method creates a JSON string representation of the Senzing SzConfig object. + The `export` method retrieves the configuration definition for this instance. Args: @@ -112,8 +112,7 @@ def export(self) -> str: @abstractmethod def get_data_sources(self) -> str: """ - The `get_data_sources` method returns a JSON document of data sources - contained in an in-memory configuration. + The `get_data_sources` method gets the data sources for this instance. Args: @@ -142,7 +141,7 @@ def get_data_sources(self) -> str: def help(self, method_name: str = "") -> str: """ - Return the help for a particular message. + The `help` method returns help for a particular message. Args: method_name (str): The name of the method. (e.g. "init"). If empty, a list of methods and descriptions is returned. diff --git a/src/senzing/szconfigmanager.py b/src/senzing/szconfigmanager.py index 82e1fd8d..4bd138f1 100644 --- a/src/senzing/szconfigmanager.py +++ b/src/senzing/szconfigmanager.py @@ -36,8 +36,7 @@ class SzConfigManager(ABC): @abstractmethod def create_config_from_config_id(self, config_id: int) -> SzConfig: """ - The `create_config_from_config_id` method creates an in-memory Senzing configuration - from a specific Senzing configuration stored in the Senzing database. + The `create_config_from_config_id` method creates a new SzConfig instance for a configuration ID. Args: config_id (int): The configuration identifier of the desired Senzing configuration to retrieve. @@ -64,8 +63,7 @@ def create_config_from_config_id(self, config_id: int) -> SzConfig: @abstractmethod def create_config_from_string(self, config_definition: str) -> SzConfig: """ - The `create_config_from_string` method creates an in-memory Senzing configuration - from the given Senzing configuration JSON document. + The `create_config_from_string` method creates a new SzConfig instance from a configuration definition. Args: config_definition (str): The Senzing configuration JSON document. @@ -92,8 +90,10 @@ def create_config_from_string(self, config_definition: str) -> SzConfig: @abstractmethod def create_config_from_template(self) -> SzConfig: """ - The `create_config_from_template` method creates an in-memory Senzing configuration - from the template Senzing configuration JSON document located at PIPELINE.RESOURCEPATH/templates/g2config.json + The `create_config_from_template` method Creates a new SzConfig instance + from the template configuration definition. + + The template configuration is located at PIPELINE.RESOURCEPATH/templates/g2config.json Args: config_definition (str): The Senzing configuration JSON document. @@ -120,7 +120,7 @@ def create_config_from_template(self) -> SzConfig: @abstractmethod def get_config_registry(self) -> str: """ - The `get_config_registry` method retrieves a list of Senzing configurations from the Senzing database. + The `get_config_registry` method gets the configuration registry. Returns: str: A JSON document containing Senzing configurations. @@ -144,7 +144,7 @@ def get_config_registry(self) -> str: @abstractmethod def get_default_config_id(self) -> int: """ - The `get_default_config_id` method retrieves from the Senzing database the configuration identifier of the default Senzing configuration. + The `get_default_config_id` method gets the default configuration for the repository. Returns: int: A configuration identifier which identifies the current configuration in use. @@ -168,7 +168,7 @@ def get_default_config_id(self) -> int: @abstractmethod def register_config(self, config_definition: str, config_comment: str) -> int: """ - The `register_config` method adds a Senzing configuration JSON document to the Senzing database. + The `register_config` method registers a configuration definition with the repository. Args: config_definition (str): The Senzing configuration JSON document. @@ -196,7 +196,9 @@ def register_config(self, config_definition: str, config_comment: str) -> int: @abstractmethod def replace_default_config_id(self, current_default_config_id: int, new_default_config_id: int) -> None: """ - The `replace_default_config_id` method replaces the old configuration identifier with a new configuration identifier in the Senzing database. + The `replace_default_config_id` method replaces the existing default configuration ID with a new + configuration ID. + It is like a "compare-and-swap" instruction to serialize concurrent editing of configuration. If `current_default_config_id` is no longer the "current configuration identifier", the operation will fail. To simply set the default configuration ID, use `set_default_config_id`. @@ -218,7 +220,9 @@ def replace_default_config_id(self, current_default_config_id: int, new_default_ @abstractmethod def set_default_config(self, config_definition: str, config_comment: str) -> int: """ - The `set_default_config` method replaces the current default Senzing configuration in the Senzing database. + The `set_default_config` method registers a configuration with the repository and sets its ID as the default + for the repository. + To serialize modifying of the configuration identifier, see `replace_default_config_id`. Args: @@ -244,7 +248,8 @@ def set_default_config(self, config_definition: str, config_comment: str) -> int @abstractmethod def set_default_config_id(self, config_id: int) -> None: """ - The `set_default_config_id` method replaces and sets a new configuration identifier in the Senzing database. + The `set_default_config_id` method Sets the default configuration ID. + To serialize modifying of the configuration identifier, see `replace_default_config_id`. Args: @@ -266,7 +271,7 @@ def set_default_config_id(self, config_id: int) -> None: def help(self, method_name: str = "") -> str: """ - Return the help for a particular message. + The `help` method returns help for a particular message. Args: method_name (str): The name of the method. (e.g. "init"). If empty, a list of methods and descriptions is returned. diff --git a/src/senzing/szdiagnostic.py b/src/senzing/szdiagnostic.py index 2f8cd5c3..f541ac31 100644 --- a/src/senzing/szdiagnostic.py +++ b/src/senzing/szdiagnostic.py @@ -32,7 +32,7 @@ class SzDiagnostic(ABC): @abstractmethod def check_datastore_performance(self, seconds_to_run: int) -> str: """ - The `check_datastore_performance` method performs inserts to determine rate of insertion. + The `check_datastore_performance` method conducts a rudimentary datastore test to gauge I/O performance. Args: seconds_to_run (int): Duration of the test in seconds. @@ -60,7 +60,7 @@ def check_datastore_performance(self, seconds_to_run: int) -> str: @abstractmethod def get_datastore_info(self) -> str: """ - The `get_datastore_info` method returns a JSON document with details of the datastore + The `get_datastore_info` method returns overview information about the datastore. currently in use by Senzing. Raises: @@ -82,13 +82,13 @@ def get_datastore_info(self) -> str: # NOTE This is experimental and for internal diagnostics, not to be documented @abstractmethod def get_feature(self, feature_id: int) -> str: # pylint: disable=empty-docstring - """""" + """Experimental/internal for Senzing support use only.""" @abstractmethod def purge_repository(self) -> None: """ **Warning:** - The `purge_repository` method removes every record in the Senzing repository. + The `purge_repository` method purges all entity data in the entire repository. Before calling `purge_repository` all other instances of the Senzing API MUST be destroyed or shutdown. @@ -108,7 +108,7 @@ def purge_repository(self) -> None: def help(self, method_name: str = "") -> str: """ - Return the help for a particular message. + The `help` method returns help for a particular message. Args: method_name (str): The name of the method. (e.g. "init"). If empty, a list of methods and descriptions is returned. diff --git a/src/senzing/szengine.py b/src/senzing/szengine.py index b5cb85cf..e3aaa540 100644 --- a/src/senzing/szengine.py +++ b/src/senzing/szengine.py @@ -43,7 +43,8 @@ def add_record( flags: int = SzEngineFlags.SZ_ADD_RECORD_DEFAULT_FLAGS, ) -> str: """ - The `add_record` method adds a record into the Senzing repository. + The `add_record` method loads a record into the repository. + Can be called as many times as desired and from multiple threads at the same time. Args: @@ -73,7 +74,8 @@ def add_record( @abstractmethod def close_export(self, export_handle: int) -> None: """ - The `close_export` method closes the exported document created by `export_json_entity_report`. + The `close_export` method closes an export handle of a previous export operation. + It is part of the `export_json_entity_report`, `fetch_next`, `close_export` lifecycle of a list of sized entities. @@ -98,7 +100,7 @@ def close_export(self, export_handle: int) -> None: @abstractmethod def count_redo_records(self) -> int: """ - The `count_redo_records` method returns the number of records in need of redo-ing. + The `count_redo_records` method gets the number of redo records pending processing. Returns: int: The number of redo records in Senzing's redo queue. @@ -126,7 +128,8 @@ def delete_record( flags: int = SzEngineFlags.SZ_DELETE_RECORD_DEFAULT_FLAGS, ) -> str: """ - The `delete_record` method deletes a record from the Senzing repository. + The `delete_record` method deletes a record from the repository. + Can be called as many times as desired and from multiple threads at the same time. Args: @@ -158,10 +161,11 @@ def export_csv_entity_report( flags: int = SzEngineFlags.SZ_EXPORT_DEFAULT_FLAGS, ) -> int: """ + The `export_csv_entity_report` method initiates an export of entity data in CSV format. + **Warning:** `export_csv_entity_report` is not recommended for large systems as it does not scale. It is recommended larger systems implement real-time replication to a data warehouse. - The `export_csv_entity_report` method initializes a cursor over a document of exported entities. It is part of the `export_csv_entity_report`, `fetch_next`, `close_export` lifecycle of a list of entities to export. @@ -198,10 +202,11 @@ def export_csv_entity_report( @abstractmethod def export_json_entity_report(self, flags: int = SzEngineFlags.SZ_EXPORT_DEFAULT_FLAGS) -> int: """ + The `export_json_entity_report` method initiates an export of entity data in JSON format. + **Warning:** `export_json_entity_report` is not recommended for large systems as it does not scale. It is recommended larger systems implement real-time replication to a data warehouse. - The `export_json_entity_report` method initializes a cursor over a document of exported entities. It is part of the `export_json_entity_report`, `fetch_next`, `close_export` lifecycle of a list of entities to export. @@ -229,7 +234,8 @@ def export_json_entity_report(self, flags: int = SzEngineFlags.SZ_EXPORT_DEFAULT @abstractmethod def fetch_next(self, export_handle: int) -> str: """ - The `fetch_next` method is used to scroll through an exported document one entity at a time. + The `fetch_next` method fetches the next line of entity data from an open export operation. + Successive calls of `fetch_next` will export successive rows of entity data until there is no more. It is part of the `export_json_entity_report` or `export_json_entity_report`, `fetch_next`, `close_export` lifecycle of a list of exported entities. @@ -261,7 +267,11 @@ def fetch_next(self, export_handle: int) -> str: def find_interesting_entities_by_entity_id( self, entity_id: int, flags: int = SzEngineFlags.SZ_FIND_INTERESTING_ENTITIES_DEFAULT_FLAGS ) -> str: - """""" + """ + Experimental method. + + Contact Senzing support. + """ # NOTE - Not to be documented or examples, early adaptor feature, needs manual additions to config @abstractmethod @@ -271,7 +281,11 @@ def find_interesting_entities_by_record_id( record_id: str, flags: int = SzEngineFlags.SZ_FIND_INTERESTING_ENTITIES_DEFAULT_FLAGS, ) -> str: - """""" + """ + Experimental method. + + Contact Senzing support. + """ # pylint: enable=empty-docstring @@ -285,7 +299,9 @@ def find_network_by_entity_id( flags: int = SzEngineFlags.SZ_FIND_NETWORK_DEFAULT_FLAGS, ) -> str: """ - The `find_network_by_entity_id` method finds all entities surrounding a requested set of entities. + The `find_network_by_entity_id` method Discovers a network of entity relationships among entities + based on entity IDs. + This includes the requested entities, paths between them, and relations to other nearby entities. Returns a JSON document that identifies the path between the each set of search entities (if the path exists), and the information for the entities in the path. @@ -325,7 +341,9 @@ def find_network_by_record_id( flags: int = SzEngineFlags.SZ_FIND_NETWORK_DEFAULT_FLAGS, ) -> str: """ - The `find_network_by_record_id` method finds all entities surrounding a requested set of entities by their RECORD_ID values. + The `find_network_by_record_id` method discovers a network of entity relationships among entities + based on record IDs. + This includes the requested entities, paths between them, and relations to other nearby entities. Returns a JSON document that identifies the path between the each set of search entities (if the path exists), and the information for the entities in the path. @@ -366,7 +384,10 @@ def find_path_by_entity_id( flags: int = SzEngineFlags.SZ_FIND_PATH_DEFAULT_FLAGS, ) -> str: """ - The `find_path_by_entity_id` method finds the most efficient relationship between two entities path based on the parameters + The `find_path_by_entity_id` method searches for an entity relationship path between two entities based + on entity IDs. + + It finds the most efficient relationship between two entities path based on the parameters and returns a JSON document with an ENTITY_PATHS section that details the path between the entities. The ENTITIES sections details information on the entities. Paths are found using known relationships with other entities. Paths are found using known relationships with other entities. @@ -410,7 +431,10 @@ def find_path_by_record_id( flags: int = SzEngineFlags.SZ_FIND_PATH_DEFAULT_FLAGS, ) -> str: """ - The `find_path_by_record_id` method finds the most efficient relationship between + The `find_path_by_record_id` method searches for an entity relationship path between two entities + based on record IDs. + + It finds the most efficient relationship between two entities path based on the parameters by RECORD_ID values and returns a JSON document with an ENTITY_PATHS section that details the path between the entities. The ENTITIES sections details information on the entities. @@ -448,7 +472,7 @@ def find_path_by_record_id( @abstractmethod def get_active_config_id(self) -> int: """ - The `get_active_config_id` method returns the identifier of the currently active Senzing engine configuration. + The `get_active_config_id` method gets the currently active configuration ID. Returns: int: The identifier of the active Senzing Engine configuration. @@ -475,7 +499,7 @@ def get_entity_by_entity_id( flags: int = SzEngineFlags.SZ_ENTITY_DEFAULT_FLAGS, ) -> str: """ - The `get_entity_by_entity_id` method returns entity data based on the ID of a resolved identity. + The `get_entity_by_entity_id` method retrieves information about an entity based on entity ID. Args: entity_id (int): The unique identifier of an entity. @@ -507,7 +531,7 @@ def get_entity_by_record_id( flags: int = SzEngineFlags.SZ_ENTITY_DEFAULT_FLAGS, ) -> str: """ - The `get_entity_by_record_id` method returns entity data based on the ID of a record which is a member of the entity. + The `get_entity_by_record_id` method retrieves information about an entity based on record ID. Args: data_source_code (str): Identifies the provenance of the data. @@ -540,7 +564,8 @@ def get_record( flags: int = SzEngineFlags.SZ_RECORD_DEFAULT_FLAGS, ) -> str: """ - The `get_record` method returns a JSON document of a single record from the Senzing repository. + The `get_record` method gets the record definition for a record. + Can be called as many times as desired and from multiple threads at the same time. Args: @@ -569,7 +594,8 @@ def get_record( @abstractmethod def get_redo_record(self) -> str: """ - The `get_redo_record` method returns the next internally queued redo record from the Senzing repository. + The `get_redo_record` method retrieves a pending redo record from the reevaluation queue. + The `process_redo_record` method is called to process the redo record retrieved by `get_redo_record`. Returns: @@ -593,7 +619,8 @@ def get_redo_record(self) -> str: @abstractmethod def get_stats(self) -> str: """ - The `get_stats` method retrieves workload statistics for the current process. + The `get_stats` method gets the internal engine workload statistics for the current process. + These statistics will automatically reset after retrieval. Returns: @@ -621,8 +648,9 @@ def get_virtual_entity_by_record_id( flags: int = SzEngineFlags.SZ_VIRTUAL_ENTITY_DEFAULT_FLAGS, ) -> str: """ - The `get_virtual_entity_by_record_id` method creates a view of a virtual entity - using a list of existing loaded records. + The `get_virtual_entity_by_record_id` method describes what an entity would look like + for a given set of records. + The virtual entity is composed of only those records and their features. Entity resolution is not performed. @@ -655,7 +683,7 @@ def how_entity_by_entity_id( flags: int = SzEngineFlags.SZ_HOW_ENTITY_DEFAULT_FLAGS, ) -> str: """ - The `how_entity_by_entity_id` method determines and details steps-by-step *how* records resolved to a single entity. + The `how_entity_by_entity_id` method describes how an entity was constructed from its constituent records. In most cases, *how* provides more detailed information than *why* as the resolution is detailed step-by-step. @@ -688,7 +716,7 @@ def preprocess_record( flags: int = SzEngineFlags.SZ_PREPROCESS_RECORD_DEFAULT_FLAGS, ) -> str: """ - The `preprocess_record` method tests adding a record into the Senzing datastore. + The `preprocess_record` method describes the features resulting from the hypothetical load of a record. Args: record_definition (str): A JSON document containing the record to be tested. @@ -715,8 +743,9 @@ def preprocess_record( @abstractmethod def prime_engine(self) -> None: """ - The `prime_engine` method initializes high resource consumption components of Senzing - used in some functions. If this call is not made, these resources are initialized the + The `prime_engine` method pre-loads engine resources to reduce latency of initial entity resolution processing. + + If this call is not made, these resources are initialized the first time they are needed and can cause unusually long processing times the first time a function is called that requires these resources. @@ -732,7 +761,7 @@ def prime_engine(self) -> None: @abstractmethod def process_redo_record(self, redo_record: str, flags: int = 0) -> str: """ - The `process_redo_record` method is called to process the redo record retrieved by `get_redo_record`. + The `process_redo_record` method loads a record into the repository. Args: redo_record (str): A redo record retrieved from get_redo_record. @@ -815,7 +844,7 @@ def search_by_attributes( search_profile: str = "", ) -> str: """ - The `search_by_attributes` method retrieves entity data based on a user-specified set of entity attributes. + The `search_by_attributes` method searches for entities that match or relate to the provided attributes. Args: attributes (str): A JSON document with the attribute data to search for. @@ -848,7 +877,7 @@ def why_entities( flags: int = SzEngineFlags.SZ_WHY_ENTITIES_DEFAULT_FLAGS, ) -> str: """ - The `why_entities` method determines why entities did not resolve or why they do relate. + The `why_entities` method describes the ways two entities relate to each other. Args: entity_id_1 (int): The entity ID for the starting entity of the search path. @@ -881,7 +910,7 @@ def why_record_in_entity( flags: int = SzEngineFlags.SZ_WHY_RECORD_IN_ENTITY_DEFAULT_FLAGS, ) -> str: """ - The `why_record_in_entity` method determines why a record is included in an entity. + The `why_record_in_entity` method describes why a record is in its respective entity. Args: data_source_code (str): Identifies the provenance of the data. @@ -916,7 +945,7 @@ def why_records( flags: int = SzEngineFlags.SZ_WHY_RECORDS_DEFAULT_FLAGS, ) -> str: """ - The `why_records` determines if any two records can or cannot resolve together, or if they relate. + The `why_records` method describes the ways two records relate to each other. Args: data_source_code_1 (str): Identifies the provenance of the data. @@ -952,8 +981,7 @@ def why_search( search_profile: str = "", ) -> str: """ - The `why_search` method retrieves entity data based on a specific entity Id and a - user-specified set of entity attributes. + The `why_search` method describes why an entity did not match or relate to a set of search attributes. Args: attributes (str): A JSON document with the attribute data to search for. @@ -985,7 +1013,7 @@ def why_search( def help(self, method_name: str = "") -> str: """ - Return the help for a particular message. + The `help` method returns help for a particular message. Args: method_name (str): The name of the method. (e.g. "init"). If empty, a list of methods and descriptions is returned. diff --git a/src/senzing/szproduct.py b/src/senzing/szproduct.py index fe6b5ca6..cbbec25e 100644 --- a/src/senzing/szproduct.py +++ b/src/senzing/szproduct.py @@ -34,7 +34,7 @@ class SzProduct(ABC): @abstractmethod def get_license(self) -> str: """ - The `get_license` method retrieves information about the currently used license. + The `get_license` method gets the product license details. Returns: str: A JSON document containing Senzing license metadata. @@ -55,8 +55,7 @@ def get_license(self) -> str: @abstractmethod def get_version(self) -> str: """ - The `get_version` method returns the version of Senzing. - + The `get_version` method gets the product version details. Returns: str: A JSON document containing metadata about the Senzing Engine version being used. @@ -79,7 +78,7 @@ def get_version(self) -> str: def help(self, method_name: str = "") -> str: """ - Return the help for a particular message. + The `help` method returns help for a particular message. Args: method_name (str): The name of the method. (e.g. "init"). If empty, a list of methods and descriptions is returned.