Skip to content
Merged
16 changes: 16 additions & 0 deletions docs/source/publishing/ogcapi-features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -703,11 +703,27 @@ These are optional and if not specified, the default from the engine will be use
# Number of seconds after which a TCP keepalive message that is not
# acknowledged by the server should be retransmitted.
keepalives_interval: 1
# SQLAlchemy connection-pool tuning (optional). Defaults match
# SQLAlchemy's QueuePool and preserve previous behaviour.
# Persistent connections held open per worker process.
pool_size: 5
# Extra short-lived connections allowed above pool_size.
max_overflow: 10
# Recreate connections older than this many seconds. -1 (the
# default) never recycles; set a finite value (e.g. 300) so
# pooled connections cannot sit IDLE on the server indefinitely.
pool_recycle: -1
# Seconds to wait for a connection from the pool before erroring.
pool_timeout: 30
# Test connections with a lightweight ping before use.
pool_pre_ping: true
Comment thread
KoalaGeo marked this conversation as resolved.
id_field: osm_id
table: hotosm_bdi_waterways
geom_field: foo_geom
count: true # Optional; Default true; Enable/disable count for improved performance.

`get_engine()` is cached per worker process, so providers that share the same database connection should use identical pool options to keep sharing a single engine; differing pool options intentionally create separate engines.

The PostgreSQL provider is also able to connect to Cloud SQL databases.

.. code-block:: yaml
Expand Down
46 changes: 43 additions & 3 deletions pygeoapi/provider/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,13 +615,32 @@ def store_db_parameters(
connection_data.get('search_path') or
options.pop('search_path', ['public'])
)
# Connection-pool tuning keys (pool_size, max_overflow, pool_recycle,
# pool_timeout, pool_pre_ping) are intentionally left in ``options`` and
# flow through ``db_options`` to get_engine(), which separates them from
# the DBAPI connect_args. Their types are validated by the config JSON
# Schema, so no coercion is performed here.
self.db_options = {
k: v
for k, v in options.items()
if not isinstance(v, dict)
}


#: Connection-pool tuning keys recognised by get_engine(). These configure
#: SQLAlchemy's QueuePool rather than the DBAPI, so get_engine() separates
#: them from connect_args. The defaults reproduce pygeoapi's previous
#: behaviour exactly: SQLAlchemy's own QueuePool defaults, except for
#: pool_pre_ping, which was previously hardcoded to True.
POOL_OPTION_DEFAULTS = {
'pool_size': 5,
'max_overflow': 10,
'pool_recycle': -1, # SQLAlchemy default; never recycles connections
'pool_timeout': 30,
'pool_pre_ping': True,
}


@functools.cache
def get_engine(
driver_name: str,
Expand All @@ -643,7 +662,11 @@ def get_engine(
:param user: database user
:param password: database password
:param conn_str: optional connection URL
:param connect_args: custom connection arguments to pass to create_engine()
:param connect_args: keyword arguments forwarded from the provider's
``options`` block. Connection-pool tuning keys (see
POOL_OPTION_DEFAULTS) are extracted and applied to
the engine's pool; any remaining keys are passed to
the DBAPI as connect_args.

:returns: SQL Alchemy engine
"""
Expand All @@ -657,11 +680,28 @@ def get_engine(
database=database
)

# Separate connection-pool tuning from DBAPI connect args. Pool keys are
# applied to create_engine() directly; everything left in connect_args is
# forwarded to the DBAPI. get_engine() stays functools.cache()-able
# because connect_args values are hashable scalars, so engine sharing per
# process is preserved; providers with differing pool config (or any
# other option) correctly get distinct engines.
pool_options = {
key: connect_args.pop(key, default)
for key, default in POOL_OPTION_DEFAULTS.items()
}

engine = create_engine(
conn_str, connect_args=connect_args, pool_pre_ping=True
conn_str,
connect_args=connect_args,
**pool_options
)

LOGGER.debug(
f'Created engine for {repr(engine.url)} '
f'with pool options {pool_options}.'
)

LOGGER.debug(f'Created engine for {repr(engine.url)}.')
return engine


Expand Down
17 changes: 16 additions & 1 deletion pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml
Original file line number Diff line number Diff line change
Expand Up @@ -734,8 +734,23 @@ definitions:
type: integer
keepalives_interval:
type: integer
pool_size:
type: integer
description: persistent connections held open per worker process
max_overflow:
type: integer
description: extra short-lived connections allowed above pool_size
pool_recycle:
type: integer
description: recreate connections older than this many seconds (-1 never recycles)
pool_timeout:
type: integer
description: seconds to wait for a connection from the pool before erroring
pool_pre_ping:
type: boolean
description: test connections with a lightweight ping before use
required:
- server
- logging
- metadata
- resources
- resources
56 changes: 56 additions & 0 deletions tests/provider/test_sql_pool_options.py
Comment thread
KoalaGeo marked this conversation as resolved.
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# =================================================================
#
# Authors: Edward Lewis <eddlewis85@gmail.com>
#
# Copyright (c) 2026 Edward Lewis
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without
# restriction, including without limitation the rights to use,
# copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# =================================================================
# Test that get_engine() separates SQLAlchemy connection-pool tuning
# options from DBAPI connect_args. This is the contract introduced by
# the configurable-pool change; it needs no live database.
# =================================================================

from unittest import mock

from pygeoapi.provider import sql


@mock.patch.object(sql, 'create_engine')
def test_get_engine_separates_pool_options_from_connect_args(mock_create):
sql.get_engine.cache_clear()
sql.get_engine(
'postgresql+psycopg2', 'h', 5432, 'd', 'u', 'p', None,
pool_size=2, pool_recycle=300, connect_timeout=10,
)

_, kwargs = mock_create.call_args
# pool keys are applied to the engine (QueuePool), with overrides
# honoured and unset pool keys falling back to the documented defaults
assert kwargs['pool_size'] == 2
assert kwargs['pool_recycle'] == 300
assert kwargs['max_overflow'] == 10
assert kwargs['pool_timeout'] == 30
assert kwargs['pool_pre_ping'] is True
# genuine DBAPI args are forwarded via connect_args; pool keys are not
assert kwargs['connect_args'] == {'connect_timeout': 10}
Loading