diff --git a/docs/source/publishing/ogcapi-features.rst b/docs/source/publishing/ogcapi-features.rst index 162f1921f..b95158917 100644 --- a/docs/source/publishing/ogcapi-features.rst +++ b/docs/source/publishing/ogcapi-features.rst @@ -703,11 +703,27 @@ These are optional and if not specified, the default from the engine will be use # Number of seconds after which a TCP keepalive message that is not # acknowledged by the server should be retransmitted. keepalives_interval: 1 + # SQLAlchemy connection-pool tuning (optional). Defaults match + # SQLAlchemy's QueuePool and preserve previous behaviour. + # Persistent connections held open per worker process. + pool_size: 5 + # Extra short-lived connections allowed above pool_size. + max_overflow: 10 + # Recreate connections older than this many seconds. -1 (the + # default) never recycles; set a finite value (e.g. 300) so + # pooled connections cannot sit IDLE on the server indefinitely. + pool_recycle: -1 + # Seconds to wait for a connection from the pool before erroring. + pool_timeout: 30 + # Test connections with a lightweight ping before use. + pool_pre_ping: true id_field: osm_id table: hotosm_bdi_waterways geom_field: foo_geom count: true # Optional; Default true; Enable/disable count for improved performance. +`get_engine()` is cached per worker process, so providers that share the same database connection should use identical pool options to keep sharing a single engine; differing pool options intentionally create separate engines. + The PostgreSQL provider is also able to connect to Cloud SQL databases. .. code-block:: yaml diff --git a/pygeoapi/provider/sql.py b/pygeoapi/provider/sql.py index 868ee4f88..121f48ac2 100644 --- a/pygeoapi/provider/sql.py +++ b/pygeoapi/provider/sql.py @@ -615,6 +615,11 @@ def store_db_parameters( connection_data.get('search_path') or options.pop('search_path', ['public']) ) + # Connection-pool tuning keys (pool_size, max_overflow, pool_recycle, + # pool_timeout, pool_pre_ping) are intentionally left in ``options`` and + # flow through ``db_options`` to get_engine(), which separates them from + # the DBAPI connect_args. Their types are validated by the config JSON + # Schema, so no coercion is performed here. self.db_options = { k: v for k, v in options.items() @@ -622,6 +627,20 @@ def store_db_parameters( } +#: Connection-pool tuning keys recognised by get_engine(). These configure +#: SQLAlchemy's QueuePool rather than the DBAPI, so get_engine() separates +#: them from connect_args. The defaults reproduce pygeoapi's previous +#: behaviour exactly: SQLAlchemy's own QueuePool defaults, except for +#: pool_pre_ping, which was previously hardcoded to True. +POOL_OPTION_DEFAULTS = { + 'pool_size': 5, + 'max_overflow': 10, + 'pool_recycle': -1, # SQLAlchemy default; never recycles connections + 'pool_timeout': 30, + 'pool_pre_ping': True, +} + + @functools.cache def get_engine( driver_name: str, @@ -643,7 +662,11 @@ def get_engine( :param user: database user :param password: database password :param conn_str: optional connection URL - :param connect_args: custom connection arguments to pass to create_engine() + :param connect_args: keyword arguments forwarded from the provider's + ``options`` block. Connection-pool tuning keys (see + POOL_OPTION_DEFAULTS) are extracted and applied to + the engine's pool; any remaining keys are passed to + the DBAPI as connect_args. :returns: SQL Alchemy engine """ @@ -657,11 +680,28 @@ def get_engine( database=database ) + # Separate connection-pool tuning from DBAPI connect args. Pool keys are + # applied to create_engine() directly; everything left in connect_args is + # forwarded to the DBAPI. get_engine() stays functools.cache()-able + # because connect_args values are hashable scalars, so engine sharing per + # process is preserved; providers with differing pool config (or any + # other option) correctly get distinct engines. + pool_options = { + key: connect_args.pop(key, default) + for key, default in POOL_OPTION_DEFAULTS.items() + } + engine = create_engine( - conn_str, connect_args=connect_args, pool_pre_ping=True + conn_str, + connect_args=connect_args, + **pool_options + ) + + LOGGER.debug( + f'Created engine for {repr(engine.url)} ' + f'with pool options {pool_options}.' ) - LOGGER.debug(f'Created engine for {repr(engine.url)}.') return engine diff --git a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml index 4d0b77dd8..73190d22c 100644 --- a/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml +++ b/pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml @@ -734,8 +734,23 @@ definitions: type: integer keepalives_interval: type: integer + pool_size: + type: integer + description: persistent connections held open per worker process + max_overflow: + type: integer + description: extra short-lived connections allowed above pool_size + pool_recycle: + type: integer + description: recreate connections older than this many seconds (-1 never recycles) + pool_timeout: + type: integer + description: seconds to wait for a connection from the pool before erroring + pool_pre_ping: + type: boolean + description: test connections with a lightweight ping before use required: - server - logging - metadata - - resources + - resources \ No newline at end of file diff --git a/tests/provider/test_sql_pool_options.py b/tests/provider/test_sql_pool_options.py new file mode 100644 index 000000000..3c3419c2a --- /dev/null +++ b/tests/provider/test_sql_pool_options.py @@ -0,0 +1,56 @@ +# ================================================================= +# +# Authors: Edward Lewis +# +# Copyright (c) 2026 Edward Lewis +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# ================================================================= +# Test that get_engine() separates SQLAlchemy connection-pool tuning +# options from DBAPI connect_args. This is the contract introduced by +# the configurable-pool change; it needs no live database. +# ================================================================= + +from unittest import mock + +from pygeoapi.provider import sql + + +@mock.patch.object(sql, 'create_engine') +def test_get_engine_separates_pool_options_from_connect_args(mock_create): + sql.get_engine.cache_clear() + sql.get_engine( + 'postgresql+psycopg2', 'h', 5432, 'd', 'u', 'p', None, + pool_size=2, pool_recycle=300, connect_timeout=10, + ) + + _, kwargs = mock_create.call_args + # pool keys are applied to the engine (QueuePool), with overrides + # honoured and unset pool keys falling back to the documented defaults + assert kwargs['pool_size'] == 2 + assert kwargs['pool_recycle'] == 300 + assert kwargs['max_overflow'] == 10 + assert kwargs['pool_timeout'] == 30 + assert kwargs['pool_pre_ping'] is True + # genuine DBAPI args are forwarded via connect_args; pool keys are not + assert kwargs['connect_args'] == {'connect_timeout': 10}