From c92bf27426485c3eb9d026fedd291ecf346d82ad Mon Sep 17 00:00:00 2001 From: Bishara Hodali Date: Fri, 24 Apr 2026 14:28:23 +0200 Subject: [PATCH 1/3] routing: add PostgreSQL database name validator Introduces is_valid_postgresql_db_name, a permissive validator that rejects database names that would break a CREATE DATABASE statement or corrupt a connection string (quotes, semicolons, whitespace, null and control characters, or names longer than PostgreSQL's 63-byte identifier limit). The validator is intentionally permissive: names that are legal only as quoted identifiers (e.g. 'test-product', '1team', 'user') are accepted because CodeChecker will quote the identifier when issuing CREATE DATABASE. See the companion commit wiring the validator into addProduct() and fixing the CREATE DATABASE statement. Unit-tested in test_request_routing.py. --- web/server/codechecker_server/routing.py | 31 ++++++++++ web/server/tests/unit/test_request_routing.py | 62 +++++++++++++++++++ 2 files changed, 93 insertions(+) diff --git a/web/server/codechecker_server/routing.py b/web/server/codechecker_server/routing.py index 813d5dd7d3..cc21cc93a0 100644 --- a/web/server/codechecker_server/routing.py +++ b/web/server/codechecker_server/routing.py @@ -63,6 +63,37 @@ def is_valid_product_endpoint(uripart): return True +def is_valid_postgresql_db_name(db_name): + """ + Returns whether or not the given string is a safe PostgreSQL database + name for CodeChecker to use. + + CodeChecker quotes the database identifier when issuing CREATE DATABASE, + so dashes, leading digits, and PostgreSQL reserved keywords are all + allowed (e.g. "test-product", "1team", "user" are accepted). However, + characters that would break even a quoted identifier, or that are + plainly dangerous in an SQL context, are rejected here so we fail fast + with a clear error rather than producing broken SQL or an unusable + product. + """ + if not db_name or not isinstance(db_name, str): + return False + + # PostgreSQL identifiers (even quoted) cannot exceed 63 bytes by + # default. Names longer than this are silently truncated by the + # server, which would produce a product that cannot be reconnected + # to under the name the user provided. Reject them outright. + if len(db_name.encode('utf-8')) > 63: + return False + + # Forbidden characters: anything that would prematurely terminate + # the quoted identifier, embed a statement separator, or corrupt the + # connection string. Whitespace is also rejected because a name with + # spaces is almost certainly a typo rather than an intent. + forbidden = set('"\'\\;\x00\r\n\t ') + return not any(c in forbidden for c in db_name) + + def is_supported_version(version): """ Returns whether or not the given version tag is supported by the current diff --git a/web/server/tests/unit/test_request_routing.py b/web/server/tests/unit/test_request_routing.py index 9a392b09a1..32c3974972 100644 --- a/web/server/tests/unit/test_request_routing.py +++ b/web/server/tests/unit/test_request_routing.py @@ -13,6 +13,7 @@ from codechecker_server.routing import split_client_GET_request from codechecker_server.routing import split_client_POST_request +from codechecker_server.routing import is_valid_postgresql_db_name def get(path, host="http://localhost:8001/"): @@ -61,3 +62,64 @@ def test_post(self): self.assertEqual(post('/DummyProduct/v6.0/FoobarService'), ('DummyProduct', '6.0', 'FoobarService')) + + +class PostgresqlDbNameValidationTest(unittest.TestCase): + """ + Tests for is_valid_postgresql_db_name, which guards against dangerous + or unusable PostgreSQL database names before CodeChecker issues a + CREATE DATABASE statement. + """ + + def test_accepts_plain_names(self): + """Names that are legal even as unquoted identifiers are allowed.""" + self.assertTrue(is_valid_postgresql_db_name('myapp')) + self.assertTrue(is_valid_postgresql_db_name('codechecker_test')) + self.assertTrue(is_valid_postgresql_db_name('db2')) + self.assertTrue(is_valid_postgresql_db_name('_internal')) + + def test_accepts_names_needing_quoting(self): + """ + Names that are legal only as quoted identifiers are allowed, since + CodeChecker always quotes the identifier when creating the database. + These are the cases reported in the bug ticket. + """ + self.assertTrue(is_valid_postgresql_db_name('test-product')) + self.assertTrue(is_valid_postgresql_db_name('1team')) + self.assertTrue(is_valid_postgresql_db_name('my-app-prod')) + # "user" is a PostgreSQL reserved word but valid when quoted. + self.assertTrue(is_valid_postgresql_db_name('user')) + + def test_rejects_empty_or_none(self): + """Empty string and non-string inputs are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('')) + self.assertFalse(is_valid_postgresql_db_name(None)) + self.assertFalse(is_valid_postgresql_db_name(123)) + + def test_rejects_dangerous_characters(self): + """ + Characters that could break out of a quoted identifier or embed + an SQL statement are rejected. + """ + self.assertFalse(is_valid_postgresql_db_name('foo"bar')) + self.assertFalse(is_valid_postgresql_db_name("foo'bar")) + self.assertFalse(is_valid_postgresql_db_name('foo;bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\\bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\x00bar')) + + def test_rejects_whitespace(self): + """Names containing any whitespace are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('foo bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\tbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\nbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\rbar')) + + def test_rejects_overlong_names(self): + """ + PostgreSQL silently truncates identifiers longer than 63 bytes, + which would produce a database CodeChecker cannot reconnect to. + """ + self.assertTrue(is_valid_postgresql_db_name('a' * 63)) + self.assertFalse(is_valid_postgresql_db_name('a' * 64)) + # 63 characters but more than 63 bytes due to multi-byte encoding. + self.assertFalse(is_valid_postgresql_db_name('é' * 32)) From 4e19a75bfb01a33273c796358db3c5b5d3ce60e7 Mon Sep 17 00:00:00 2001 From: Bishara Hodali Date: Fri, 24 Apr 2026 23:21:32 +0200 Subject: [PATCH 2/3] product: fix CREATE DATABASE failure on special characters The PostgreSQL path of _create_database() interpolated the user-supplied database name directly into a CREATE DATABASE statement via an f-string. This caused syntax errors for any name that is not a legal unquoted PostgreSQL identifier - in particular names containing a dash (e.g. 'test-product') or starting with a digit (e.g. '1team') - both reported by users via the GUI's product creation dialog. SQLAlchemy does not auto-quote identifiers in free-form text() clauses, so the fix has two parts: * Quote the identifier explicitly using the dialect's IdentifierPreparer before embedding it in the statement. This produces a properly double-quoted name such as CREATE DATABASE "test-product", which PostgreSQL accepts. * Validate the database name in addProduct() using the new is_valid_postgresql_db_name() helper, so that inputs containing quotes, semicolons, whitespace, control characters, or that exceed PostgreSQL's 63-byte identifier limit are rejected with a clear error message before any SQL is issued, rather than crashing later with an opaque driver error. --- .../codechecker_server/api/product_server.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/web/server/codechecker_server/api/product_server.py b/web/server/codechecker_server/api/product_server.py index 51f69ca800..3649319124 100644 --- a/web/server/codechecker_server/api/product_server.py +++ b/web/server/codechecker_server/api/product_server.py @@ -31,7 +31,7 @@ from .. import permissions from ..database.config_db_model import IDENTIFIER, Product, ProductPermission from ..database.database import DBSession, SQLServer, conv, escape_like -from ..routing import is_valid_product_endpoint +from ..routing import is_valid_product_endpoint, is_valid_postgresql_db_name from .thrift_enum_helper import confidentiality_enum, \ confidentiality_str @@ -368,7 +368,10 @@ def __create_product_database(self, product): with engine.connect() as conn: conn.execute(text("commit")) LOG.info("Creating database '%s'", db_name) - conn.execute(text(f"CREATE DATABASE {db_name}")) + quoted_db_name = engine.dialect.identifier_preparer \ + .quote_identifier(db_name) + conn.execute( + text(f"CREATE DATABASE {quoted_db_name}")) conn.close() except exc.ProgrammingError as e: LOG.error("ProgrammingError occurred: %s", str(e)) @@ -411,6 +414,19 @@ def addProduct(self, product): codechecker_api_shared.ttypes.ErrorCode.GENERAL, msg) + if dbc.engine == 'postgresql' \ + and not is_valid_postgresql_db_name(dbc.database): + msg = ( + f"The specified PostgreSQL database name " + f"'{dbc.database}' contains characters that are " + "not allowed (quotes, semicolons, whitespace, or " + "control characters), or is empty, or exceeds " + "PostgreSQL's 63-byte identifier limit.") + LOG.error(msg) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + msg) + if self.__server.get_product(product.endpoint): msg = \ f"A product endpoint '/{product.endpoint}' is already " \ From b10562916b1007656c91d1ca8a727c416b6f198e Mon Sep 17 00:00:00 2001 From: Bishara Hodali Date: Mon, 27 Apr 2026 01:17:29 +0200 Subject: [PATCH 3/3] address review: move validator to codechecker_common.util Per #4830 review feedback: is_valid_postgresql_db_name belongs with the other generic helpers in codechecker_common.util, not in the web server's routing module. The function and its tests are unchanged; only the import paths in product_server.py and test_request_routing.py are adjusted. --- codechecker_common/util.py | 23 ++++++ .../codechecker_server/api/product_server.py | 5 +- web/server/codechecker_server/routing.py | 31 -------- .../tests/unit/test_db_name_validation.py | 74 +++++++++++++++++++ web/server/tests/unit/test_request_routing.py | 62 ---------------- 5 files changed, 100 insertions(+), 95 deletions(-) create mode 100644 web/server/tests/unit/test_db_name_validation.py diff --git a/codechecker_common/util.py b/codechecker_common/util.py index b7ef94bdaf..bf3a9e5d4b 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -251,3 +251,26 @@ def format_size(num: float, suffix: str = 'B') -> str: return f"{num:3.1f} {unit}{suffix}" num /= 1024.0 return f"{num:.1f} Qi{suffix}" + + +def is_valid_postgresql_db_name(db_name): + """ + Returns whether or not the given string is a safe PostgreSQL database + name for CodeChecker to use. + + CodeChecker quotes the database identifier when issuing CREATE DATABASE, + so dashes, leading digits, and PostgreSQL reserved keywords are all + allowed. However, characters that would break even a quoted + identifier, or that are + plainly dangerous in an SQL context, are rejected here so we fail fast + with a clear error rather than producing broken SQL or an unusable + product. + """ + if not db_name or not isinstance(db_name, str): + return False + + if len(db_name.encode('utf-8')) > 63: + return False + + forbidden = set('"\'\\;\x00\r\n\t ') + return not any(c in forbidden for c in db_name) diff --git a/web/server/codechecker_server/api/product_server.py b/web/server/codechecker_server/api/product_server.py index 3649319124..5b5116c089 100644 --- a/web/server/codechecker_server/api/product_server.py +++ b/web/server/codechecker_server/api/product_server.py @@ -23,7 +23,8 @@ from codechecker_api.ProductManagement_v6 import ttypes from codechecker_common.logger import get_logger -from codechecker_common.util import path_for_fake_root +from codechecker_common.util import is_valid_postgresql_db_name, \ + path_for_fake_root from codechecker_server.profiler import timeit from codechecker_web.shared import convert @@ -31,7 +32,7 @@ from .. import permissions from ..database.config_db_model import IDENTIFIER, Product, ProductPermission from ..database.database import DBSession, SQLServer, conv, escape_like -from ..routing import is_valid_product_endpoint, is_valid_postgresql_db_name +from ..routing import is_valid_product_endpoint from .thrift_enum_helper import confidentiality_enum, \ confidentiality_str diff --git a/web/server/codechecker_server/routing.py b/web/server/codechecker_server/routing.py index cc21cc93a0..813d5dd7d3 100644 --- a/web/server/codechecker_server/routing.py +++ b/web/server/codechecker_server/routing.py @@ -63,37 +63,6 @@ def is_valid_product_endpoint(uripart): return True -def is_valid_postgresql_db_name(db_name): - """ - Returns whether or not the given string is a safe PostgreSQL database - name for CodeChecker to use. - - CodeChecker quotes the database identifier when issuing CREATE DATABASE, - so dashes, leading digits, and PostgreSQL reserved keywords are all - allowed (e.g. "test-product", "1team", "user" are accepted). However, - characters that would break even a quoted identifier, or that are - plainly dangerous in an SQL context, are rejected here so we fail fast - with a clear error rather than producing broken SQL or an unusable - product. - """ - if not db_name or not isinstance(db_name, str): - return False - - # PostgreSQL identifiers (even quoted) cannot exceed 63 bytes by - # default. Names longer than this are silently truncated by the - # server, which would produce a product that cannot be reconnected - # to under the name the user provided. Reject them outright. - if len(db_name.encode('utf-8')) > 63: - return False - - # Forbidden characters: anything that would prematurely terminate - # the quoted identifier, embed a statement separator, or corrupt the - # connection string. Whitespace is also rejected because a name with - # spaces is almost certainly a typo rather than an intent. - forbidden = set('"\'\\;\x00\r\n\t ') - return not any(c in forbidden for c in db_name) - - def is_supported_version(version): """ Returns whether or not the given version tag is supported by the current diff --git a/web/server/tests/unit/test_db_name_validation.py b/web/server/tests/unit/test_db_name_validation.py new file mode 100644 index 0000000000..0e4a40e430 --- /dev/null +++ b/web/server/tests/unit/test_db_name_validation.py @@ -0,0 +1,74 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" Unit tests for the PostgreSQL database name validator. """ + + +import unittest + +from codechecker_common.util import is_valid_postgresql_db_name + + +class PostgresqlDbNameValidationTest(unittest.TestCase): + """ + Tests for is_valid_postgresql_db_name, which guards against dangerous + or unusable PostgreSQL database names before CodeChecker issues a + CREATE DATABASE statement. + """ + + def test_accepts_plain_names(self): + """Names that are legal even as unquoted identifiers are allowed.""" + self.assertTrue(is_valid_postgresql_db_name('myapp')) + self.assertTrue(is_valid_postgresql_db_name('codechecker_test')) + self.assertTrue(is_valid_postgresql_db_name('db2')) + self.assertTrue(is_valid_postgresql_db_name('_internal')) + + def test_accepts_names_needing_quoting(self): + """ + Names that are legal only as quoted identifiers are allowed, since + CodeChecker always quotes the identifier when creating the database. + These are the cases reported in the bug ticket. + """ + self.assertTrue(is_valid_postgresql_db_name('test-product')) + self.assertTrue(is_valid_postgresql_db_name('1team')) + self.assertTrue(is_valid_postgresql_db_name('my-app-prod')) + # "user" is a PostgreSQL reserved word but valid when quoted. + self.assertTrue(is_valid_postgresql_db_name('user')) + + def test_rejects_empty_or_none(self): + """Empty string and non-string inputs are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('')) + self.assertFalse(is_valid_postgresql_db_name(None)) + self.assertFalse(is_valid_postgresql_db_name(123)) + + def test_rejects_dangerous_characters(self): + """ + Characters that could break out of a quoted identifier or embed + an SQL statement are rejected. + """ + self.assertFalse(is_valid_postgresql_db_name('foo"bar')) + self.assertFalse(is_valid_postgresql_db_name("foo'bar")) + self.assertFalse(is_valid_postgresql_db_name('foo;bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\\bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\x00bar')) + + def test_rejects_whitespace(self): + """Names containing any whitespace are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('foo bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\tbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\nbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\rbar')) + + def test_rejects_overlong_names(self): + """ + PostgreSQL silently truncates identifiers longer than 63 bytes, + which would produce a database CodeChecker cannot reconnect to. + """ + self.assertTrue(is_valid_postgresql_db_name('a' * 63)) + self.assertFalse(is_valid_postgresql_db_name('a' * 64)) + # 63 characters but more than 63 bytes due to multi-byte encoding. + self.assertFalse(is_valid_postgresql_db_name('é' * 32)) diff --git a/web/server/tests/unit/test_request_routing.py b/web/server/tests/unit/test_request_routing.py index 32c3974972..9a392b09a1 100644 --- a/web/server/tests/unit/test_request_routing.py +++ b/web/server/tests/unit/test_request_routing.py @@ -13,7 +13,6 @@ from codechecker_server.routing import split_client_GET_request from codechecker_server.routing import split_client_POST_request -from codechecker_server.routing import is_valid_postgresql_db_name def get(path, host="http://localhost:8001/"): @@ -62,64 +61,3 @@ def test_post(self): self.assertEqual(post('/DummyProduct/v6.0/FoobarService'), ('DummyProduct', '6.0', 'FoobarService')) - - -class PostgresqlDbNameValidationTest(unittest.TestCase): - """ - Tests for is_valid_postgresql_db_name, which guards against dangerous - or unusable PostgreSQL database names before CodeChecker issues a - CREATE DATABASE statement. - """ - - def test_accepts_plain_names(self): - """Names that are legal even as unquoted identifiers are allowed.""" - self.assertTrue(is_valid_postgresql_db_name('myapp')) - self.assertTrue(is_valid_postgresql_db_name('codechecker_test')) - self.assertTrue(is_valid_postgresql_db_name('db2')) - self.assertTrue(is_valid_postgresql_db_name('_internal')) - - def test_accepts_names_needing_quoting(self): - """ - Names that are legal only as quoted identifiers are allowed, since - CodeChecker always quotes the identifier when creating the database. - These are the cases reported in the bug ticket. - """ - self.assertTrue(is_valid_postgresql_db_name('test-product')) - self.assertTrue(is_valid_postgresql_db_name('1team')) - self.assertTrue(is_valid_postgresql_db_name('my-app-prod')) - # "user" is a PostgreSQL reserved word but valid when quoted. - self.assertTrue(is_valid_postgresql_db_name('user')) - - def test_rejects_empty_or_none(self): - """Empty string and non-string inputs are rejected.""" - self.assertFalse(is_valid_postgresql_db_name('')) - self.assertFalse(is_valid_postgresql_db_name(None)) - self.assertFalse(is_valid_postgresql_db_name(123)) - - def test_rejects_dangerous_characters(self): - """ - Characters that could break out of a quoted identifier or embed - an SQL statement are rejected. - """ - self.assertFalse(is_valid_postgresql_db_name('foo"bar')) - self.assertFalse(is_valid_postgresql_db_name("foo'bar")) - self.assertFalse(is_valid_postgresql_db_name('foo;bar')) - self.assertFalse(is_valid_postgresql_db_name('foo\\bar')) - self.assertFalse(is_valid_postgresql_db_name('foo\x00bar')) - - def test_rejects_whitespace(self): - """Names containing any whitespace are rejected.""" - self.assertFalse(is_valid_postgresql_db_name('foo bar')) - self.assertFalse(is_valid_postgresql_db_name('foo\tbar')) - self.assertFalse(is_valid_postgresql_db_name('foo\nbar')) - self.assertFalse(is_valid_postgresql_db_name('foo\rbar')) - - def test_rejects_overlong_names(self): - """ - PostgreSQL silently truncates identifiers longer than 63 bytes, - which would produce a database CodeChecker cannot reconnect to. - """ - self.assertTrue(is_valid_postgresql_db_name('a' * 63)) - self.assertFalse(is_valid_postgresql_db_name('a' * 64)) - # 63 characters but more than 63 bytes due to multi-byte encoding. - self.assertFalse(is_valid_postgresql_db_name('é' * 32))