Skip to content

Commit 4eaef8e

Browse files
authored
Pooling (#2345)
1 parent 27ec435 commit 4eaef8e

4 files changed

Lines changed: 131 additions & 4 deletions

File tree

docs/source/publishing/ogcapi-features.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,11 +703,27 @@ These are optional and if not specified, the default from the engine will be use
703703
# Number of seconds after which a TCP keepalive message that is not
704704
# acknowledged by the server should be retransmitted.
705705
keepalives_interval: 1
706+
# SQLAlchemy connection-pool tuning (optional). Defaults match
707+
# SQLAlchemy's QueuePool and preserve previous behaviour.
708+
# Persistent connections held open per worker process.
709+
pool_size: 5
710+
# Extra short-lived connections allowed above pool_size.
711+
max_overflow: 10
712+
# Recreate connections older than this many seconds. -1 (the
713+
# default) never recycles; set a finite value (e.g. 300) so
714+
# pooled connections cannot sit IDLE on the server indefinitely.
715+
pool_recycle: -1
716+
# Seconds to wait for a connection from the pool before erroring.
717+
pool_timeout: 30
718+
# Test connections with a lightweight ping before use.
719+
pool_pre_ping: true
706720
id_field: osm_id
707721
table: hotosm_bdi_waterways
708722
geom_field: foo_geom
709723
count: true # Optional; Default true; Enable/disable count for improved performance.
710724
725+
`get_engine()` is cached per worker process, so providers that share the same database connection should use identical pool options to keep sharing a single engine; differing pool options intentionally create separate engines.
726+
711727
The PostgreSQL provider is also able to connect to Cloud SQL databases.
712728

713729
.. code-block:: yaml

pygeoapi/provider/sql.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -615,13 +615,32 @@ def store_db_parameters(
615615
connection_data.get('search_path') or
616616
options.pop('search_path', ['public'])
617617
)
618+
# Connection-pool tuning keys (pool_size, max_overflow, pool_recycle,
619+
# pool_timeout, pool_pre_ping) are intentionally left in ``options`` and
620+
# flow through ``db_options`` to get_engine(), which separates them from
621+
# the DBAPI connect_args. Their types are validated by the config JSON
622+
# Schema, so no coercion is performed here.
618623
self.db_options = {
619624
k: v
620625
for k, v in options.items()
621626
if not isinstance(v, dict)
622627
}
623628

624629

630+
#: Connection-pool tuning keys recognised by get_engine(). These configure
631+
#: SQLAlchemy's QueuePool rather than the DBAPI, so get_engine() separates
632+
#: them from connect_args. The defaults reproduce pygeoapi's previous
633+
#: behaviour exactly: SQLAlchemy's own QueuePool defaults, except for
634+
#: pool_pre_ping, which was previously hardcoded to True.
635+
POOL_OPTION_DEFAULTS = {
636+
'pool_size': 5,
637+
'max_overflow': 10,
638+
'pool_recycle': -1, # SQLAlchemy default; never recycles connections
639+
'pool_timeout': 30,
640+
'pool_pre_ping': True,
641+
}
642+
643+
625644
@functools.cache
626645
def get_engine(
627646
driver_name: str,
@@ -643,7 +662,11 @@ def get_engine(
643662
:param user: database user
644663
:param password: database password
645664
:param conn_str: optional connection URL
646-
:param connect_args: custom connection arguments to pass to create_engine()
665+
:param connect_args: keyword arguments forwarded from the provider's
666+
``options`` block. Connection-pool tuning keys (see
667+
POOL_OPTION_DEFAULTS) are extracted and applied to
668+
the engine's pool; any remaining keys are passed to
669+
the DBAPI as connect_args.
647670
648671
:returns: SQL Alchemy engine
649672
"""
@@ -657,11 +680,28 @@ def get_engine(
657680
database=database
658681
)
659682

683+
# Separate connection-pool tuning from DBAPI connect args. Pool keys are
684+
# applied to create_engine() directly; everything left in connect_args is
685+
# forwarded to the DBAPI. get_engine() stays functools.cache()-able
686+
# because connect_args values are hashable scalars, so engine sharing per
687+
# process is preserved; providers with differing pool config (or any
688+
# other option) correctly get distinct engines.
689+
pool_options = {
690+
key: connect_args.pop(key, default)
691+
for key, default in POOL_OPTION_DEFAULTS.items()
692+
}
693+
660694
engine = create_engine(
661-
conn_str, connect_args=connect_args, pool_pre_ping=True
695+
conn_str,
696+
connect_args=connect_args,
697+
**pool_options
698+
)
699+
700+
LOGGER.debug(
701+
f'Created engine for {repr(engine.url)} '
702+
f'with pool options {pool_options}.'
662703
)
663704

664-
LOGGER.debug(f'Created engine for {repr(engine.url)}.')
665705
return engine
666706

667707

pygeoapi/resources/schemas/config/pygeoapi-config-0.x.yml

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -734,8 +734,23 @@ definitions:
734734
type: integer
735735
keepalives_interval:
736736
type: integer
737+
pool_size:
738+
type: integer
739+
description: persistent connections held open per worker process
740+
max_overflow:
741+
type: integer
742+
description: extra short-lived connections allowed above pool_size
743+
pool_recycle:
744+
type: integer
745+
description: recreate connections older than this many seconds (-1 never recycles)
746+
pool_timeout:
747+
type: integer
748+
description: seconds to wait for a connection from the pool before erroring
749+
pool_pre_ping:
750+
type: boolean
751+
description: test connections with a lightweight ping before use
737752
required:
738753
- server
739754
- logging
740755
- metadata
741-
- resources
756+
- resources
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# =================================================================
2+
#
3+
# Authors: Edward Lewis <eddlewis85@gmail.com>
4+
#
5+
# Copyright (c) 2026 Edward Lewis
6+
#
7+
# Permission is hereby granted, free of charge, to any person
8+
# obtaining a copy of this software and associated documentation
9+
# files (the "Software"), to deal in the Software without
10+
# restriction, including without limitation the rights to use,
11+
# copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
# copies of the Software, and to permit persons to whom the
13+
# Software is furnished to do so, subject to the following
14+
# conditions:
15+
#
16+
# The above copyright notice and this permission notice shall be
17+
# included in all copies or substantial portions of the Software.
18+
#
19+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
21+
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23+
# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24+
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25+
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
26+
# OTHER DEALINGS IN THE SOFTWARE.
27+
#
28+
# =================================================================
29+
# Test that get_engine() separates SQLAlchemy connection-pool tuning
30+
# options from DBAPI connect_args. This is the contract introduced by
31+
# the configurable-pool change; it needs no live database.
32+
# =================================================================
33+
34+
from unittest import mock
35+
36+
from pygeoapi.provider import sql
37+
38+
39+
@mock.patch.object(sql, 'create_engine')
40+
def test_get_engine_separates_pool_options_from_connect_args(mock_create):
41+
sql.get_engine.cache_clear()
42+
sql.get_engine(
43+
'postgresql+psycopg2', 'h', 5432, 'd', 'u', 'p', None,
44+
pool_size=2, pool_recycle=300, connect_timeout=10,
45+
)
46+
47+
_, kwargs = mock_create.call_args
48+
# pool keys are applied to the engine (QueuePool), with overrides
49+
# honoured and unset pool keys falling back to the documented defaults
50+
assert kwargs['pool_size'] == 2
51+
assert kwargs['pool_recycle'] == 300
52+
assert kwargs['max_overflow'] == 10
53+
assert kwargs['pool_timeout'] == 30
54+
assert kwargs['pool_pre_ping'] is True
55+
# genuine DBAPI args are forwarded via connect_args; pool keys are not
56+
assert kwargs['connect_args'] == {'connect_timeout': 10}

0 commit comments

Comments
 (0)