Skip to content

Commit e8751fb

Browse files
authored
Fix PostgreSQL product creation with special characters in DB name (#4830)
* routing: add PostgreSQL database name validator Introduces is_valid_postgresql_db_name, a permissive validator that rejects database names that would break a CREATE DATABASE statement or corrupt a connection string (quotes, semicolons, whitespace, null and control characters, or names longer than PostgreSQL's 63-byte identifier limit). The validator is intentionally permissive: names that are legal only as quoted identifiers (e.g. 'test-product', '1team', 'user') are accepted because CodeChecker will quote the identifier when issuing CREATE DATABASE. See the companion commit wiring the validator into addProduct() and fixing the CREATE DATABASE statement. Unit-tested in test_request_routing.py. * product: fix CREATE DATABASE failure on special characters The PostgreSQL path of _create_database() interpolated the user-supplied database name directly into a CREATE DATABASE statement via an f-string. This caused syntax errors for any name that is not a legal unquoted PostgreSQL identifier - in particular names containing a dash (e.g. 'test-product') or starting with a digit (e.g. '1team') - both reported by users via the GUI's product creation dialog. SQLAlchemy does not auto-quote identifiers in free-form text() clauses, so the fix has two parts: * Quote the identifier explicitly using the dialect's IdentifierPreparer before embedding it in the statement. This produces a properly double-quoted name such as CREATE DATABASE "test-product", which PostgreSQL accepts. * Validate the database name in addProduct() using the new is_valid_postgresql_db_name() helper, so that inputs containing quotes, semicolons, whitespace, control characters, or that exceed PostgreSQL's 63-byte identifier limit are rejected with a clear error message before any SQL is issued, rather than crashing later with an opaque driver error. * address review: move validator to codechecker_common.util Per #4830 review feedback: is_valid_postgresql_db_name belongs with the other generic helpers in codechecker_common.util, not in the web server's routing module. The function and its tests are unchanged; only the import paths in product_server.py and test_request_routing.py are adjusted.
1 parent 621ca85 commit e8751fb

3 files changed

Lines changed: 116 additions & 2 deletions

File tree

codechecker_common/util.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,3 +251,26 @@ def format_size(num: float, suffix: str = 'B') -> str:
251251
return f"{num:3.1f} {unit}{suffix}"
252252
num /= 1024.0
253253
return f"{num:.1f} Qi{suffix}"
254+
255+
256+
def is_valid_postgresql_db_name(db_name):
257+
"""
258+
Returns whether or not the given string is a safe PostgreSQL database
259+
name for CodeChecker to use.
260+
261+
CodeChecker quotes the database identifier when issuing CREATE DATABASE,
262+
so dashes, leading digits, and PostgreSQL reserved keywords are all
263+
allowed. However, characters that would break even a quoted
264+
identifier, or that are
265+
plainly dangerous in an SQL context, are rejected here so we fail fast
266+
with a clear error rather than producing broken SQL or an unusable
267+
product.
268+
"""
269+
if not db_name or not isinstance(db_name, str):
270+
return False
271+
272+
if len(db_name.encode('utf-8')) > 63:
273+
return False
274+
275+
forbidden = set('"\'\\;\x00\r\n\t ')
276+
return not any(c in forbidden for c in db_name)

web/server/codechecker_server/api/product_server.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
from codechecker_api.ProductManagement_v6 import ttypes
2424

2525
from codechecker_common.logger import get_logger
26-
from codechecker_common.util import path_for_fake_root
26+
from codechecker_common.util import is_valid_postgresql_db_name, \
27+
path_for_fake_root
2728

2829
from codechecker_server.profiler import timeit
2930
from codechecker_web.shared import convert
@@ -367,7 +368,10 @@ def __create_product_database(self, product):
367368
with engine.connect() as conn:
368369
conn.execute(text("commit"))
369370
LOG.info("Creating database '%s'", db_name)
370-
conn.execute(text(f"CREATE DATABASE {db_name}"))
371+
quoted_db_name = engine.dialect.identifier_preparer \
372+
.quote_identifier(db_name)
373+
conn.execute(
374+
text(f"CREATE DATABASE {quoted_db_name}"))
371375
conn.close()
372376
except exc.ProgrammingError as e:
373377
LOG.error("ProgrammingError occurred: %s", str(e))
@@ -410,6 +414,19 @@ def addProduct(self, product):
410414
codechecker_api_shared.ttypes.ErrorCode.GENERAL,
411415
msg)
412416

417+
if dbc.engine == 'postgresql' \
418+
and not is_valid_postgresql_db_name(dbc.database):
419+
msg = (
420+
f"The specified PostgreSQL database name "
421+
f"'{dbc.database}' contains characters that are "
422+
"not allowed (quotes, semicolons, whitespace, or "
423+
"control characters), or is empty, or exceeds "
424+
"PostgreSQL's 63-byte identifier limit.")
425+
LOG.error(msg)
426+
raise codechecker_api_shared.ttypes.RequestFailed(
427+
codechecker_api_shared.ttypes.ErrorCode.DATABASE,
428+
msg)
429+
413430
if self.__server.get_product(product.endpoint):
414431
msg = \
415432
f"A product endpoint '/{product.endpoint}' is already " \
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
# -------------------------------------------------------------------------
2+
#
3+
# Part of the CodeChecker project, under the Apache License v2.0 with
4+
# LLVM Exceptions. See LICENSE for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# -------------------------------------------------------------------------
8+
""" Unit tests for the PostgreSQL database name validator. """
9+
10+
11+
import unittest
12+
13+
from codechecker_common.util import is_valid_postgresql_db_name
14+
15+
16+
class PostgresqlDbNameValidationTest(unittest.TestCase):
17+
"""
18+
Tests for is_valid_postgresql_db_name, which guards against dangerous
19+
or unusable PostgreSQL database names before CodeChecker issues a
20+
CREATE DATABASE statement.
21+
"""
22+
23+
def test_accepts_plain_names(self):
24+
"""Names that are legal even as unquoted identifiers are allowed."""
25+
self.assertTrue(is_valid_postgresql_db_name('myapp'))
26+
self.assertTrue(is_valid_postgresql_db_name('codechecker_test'))
27+
self.assertTrue(is_valid_postgresql_db_name('db2'))
28+
self.assertTrue(is_valid_postgresql_db_name('_internal'))
29+
30+
def test_accepts_names_needing_quoting(self):
31+
"""
32+
Names that are legal only as quoted identifiers are allowed, since
33+
CodeChecker always quotes the identifier when creating the database.
34+
These are the cases reported in the bug ticket.
35+
"""
36+
self.assertTrue(is_valid_postgresql_db_name('test-product'))
37+
self.assertTrue(is_valid_postgresql_db_name('1team'))
38+
self.assertTrue(is_valid_postgresql_db_name('my-app-prod'))
39+
# "user" is a PostgreSQL reserved word but valid when quoted.
40+
self.assertTrue(is_valid_postgresql_db_name('user'))
41+
42+
def test_rejects_empty_or_none(self):
43+
"""Empty string and non-string inputs are rejected."""
44+
self.assertFalse(is_valid_postgresql_db_name(''))
45+
self.assertFalse(is_valid_postgresql_db_name(None))
46+
self.assertFalse(is_valid_postgresql_db_name(123))
47+
48+
def test_rejects_dangerous_characters(self):
49+
"""
50+
Characters that could break out of a quoted identifier or embed
51+
an SQL statement are rejected.
52+
"""
53+
self.assertFalse(is_valid_postgresql_db_name('foo"bar'))
54+
self.assertFalse(is_valid_postgresql_db_name("foo'bar"))
55+
self.assertFalse(is_valid_postgresql_db_name('foo;bar'))
56+
self.assertFalse(is_valid_postgresql_db_name('foo\\bar'))
57+
self.assertFalse(is_valid_postgresql_db_name('foo\x00bar'))
58+
59+
def test_rejects_whitespace(self):
60+
"""Names containing any whitespace are rejected."""
61+
self.assertFalse(is_valid_postgresql_db_name('foo bar'))
62+
self.assertFalse(is_valid_postgresql_db_name('foo\tbar'))
63+
self.assertFalse(is_valid_postgresql_db_name('foo\nbar'))
64+
self.assertFalse(is_valid_postgresql_db_name('foo\rbar'))
65+
66+
def test_rejects_overlong_names(self):
67+
"""
68+
PostgreSQL silently truncates identifiers longer than 63 bytes,
69+
which would produce a database CodeChecker cannot reconnect to.
70+
"""
71+
self.assertTrue(is_valid_postgresql_db_name('a' * 63))
72+
self.assertFalse(is_valid_postgresql_db_name('a' * 64))
73+
# 63 characters but more than 63 bytes due to multi-byte encoding.
74+
self.assertFalse(is_valid_postgresql_db_name('é' * 32))

0 commit comments

Comments
 (0)