Skip to content

Commit c92bf27

Browse files
committed
routing: add PostgreSQL database name validator
Introduces is_valid_postgresql_db_name, a permissive validator that rejects database names that would break a CREATE DATABASE statement or corrupt a connection string (quotes, semicolons, whitespace, null and control characters, or names longer than PostgreSQL's 63-byte identifier limit). The validator is intentionally permissive: names that are legal only as quoted identifiers (e.g. 'test-product', '1team', 'user') are accepted because CodeChecker will quote the identifier when issuing CREATE DATABASE. See the companion commit wiring the validator into addProduct() and fixing the CREATE DATABASE statement. Unit-tested in test_request_routing.py.
1 parent a14dff8 commit c92bf27

2 files changed

Lines changed: 93 additions & 0 deletions

File tree

web/server/codechecker_server/routing.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,37 @@ def is_valid_product_endpoint(uripart):
6363
return True
6464

6565

66+
def is_valid_postgresql_db_name(db_name):
67+
"""
68+
Returns whether or not the given string is a safe PostgreSQL database
69+
name for CodeChecker to use.
70+
71+
CodeChecker quotes the database identifier when issuing CREATE DATABASE,
72+
so dashes, leading digits, and PostgreSQL reserved keywords are all
73+
allowed (e.g. "test-product", "1team", "user" are accepted). However,
74+
characters that would break even a quoted identifier, or that are
75+
plainly dangerous in an SQL context, are rejected here so we fail fast
76+
with a clear error rather than producing broken SQL or an unusable
77+
product.
78+
"""
79+
if not db_name or not isinstance(db_name, str):
80+
return False
81+
82+
# PostgreSQL identifiers (even quoted) cannot exceed 63 bytes by
83+
# default. Names longer than this are silently truncated by the
84+
# server, which would produce a product that cannot be reconnected
85+
# to under the name the user provided. Reject them outright.
86+
if len(db_name.encode('utf-8')) > 63:
87+
return False
88+
89+
# Forbidden characters: anything that would prematurely terminate
90+
# the quoted identifier, embed a statement separator, or corrupt the
91+
# connection string. Whitespace is also rejected because a name with
92+
# spaces is almost certainly a typo rather than an intent.
93+
forbidden = set('"\'\\;\x00\r\n\t ')
94+
return not any(c in forbidden for c in db_name)
95+
96+
6697
def is_supported_version(version):
6798
"""
6899
Returns whether or not the given version tag is supported by the current

web/server/tests/unit/test_request_routing.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from codechecker_server.routing import split_client_GET_request
1515
from codechecker_server.routing import split_client_POST_request
16+
from codechecker_server.routing import is_valid_postgresql_db_name
1617

1718

1819
def get(path, host="http://localhost:8001/"):
@@ -61,3 +62,64 @@ def test_post(self):
6162

6263
self.assertEqual(post('/DummyProduct/v6.0/FoobarService'),
6364
('DummyProduct', '6.0', 'FoobarService'))
65+
66+
67+
class PostgresqlDbNameValidationTest(unittest.TestCase):
68+
"""
69+
Tests for is_valid_postgresql_db_name, which guards against dangerous
70+
or unusable PostgreSQL database names before CodeChecker issues a
71+
CREATE DATABASE statement.
72+
"""
73+
74+
def test_accepts_plain_names(self):
75+
"""Names that are legal even as unquoted identifiers are allowed."""
76+
self.assertTrue(is_valid_postgresql_db_name('myapp'))
77+
self.assertTrue(is_valid_postgresql_db_name('codechecker_test'))
78+
self.assertTrue(is_valid_postgresql_db_name('db2'))
79+
self.assertTrue(is_valid_postgresql_db_name('_internal'))
80+
81+
def test_accepts_names_needing_quoting(self):
82+
"""
83+
Names that are legal only as quoted identifiers are allowed, since
84+
CodeChecker always quotes the identifier when creating the database.
85+
These are the cases reported in the bug ticket.
86+
"""
87+
self.assertTrue(is_valid_postgresql_db_name('test-product'))
88+
self.assertTrue(is_valid_postgresql_db_name('1team'))
89+
self.assertTrue(is_valid_postgresql_db_name('my-app-prod'))
90+
# "user" is a PostgreSQL reserved word but valid when quoted.
91+
self.assertTrue(is_valid_postgresql_db_name('user'))
92+
93+
def test_rejects_empty_or_none(self):
94+
"""Empty string and non-string inputs are rejected."""
95+
self.assertFalse(is_valid_postgresql_db_name(''))
96+
self.assertFalse(is_valid_postgresql_db_name(None))
97+
self.assertFalse(is_valid_postgresql_db_name(123))
98+
99+
def test_rejects_dangerous_characters(self):
100+
"""
101+
Characters that could break out of a quoted identifier or embed
102+
an SQL statement are rejected.
103+
"""
104+
self.assertFalse(is_valid_postgresql_db_name('foo"bar'))
105+
self.assertFalse(is_valid_postgresql_db_name("foo'bar"))
106+
self.assertFalse(is_valid_postgresql_db_name('foo;bar'))
107+
self.assertFalse(is_valid_postgresql_db_name('foo\\bar'))
108+
self.assertFalse(is_valid_postgresql_db_name('foo\x00bar'))
109+
110+
def test_rejects_whitespace(self):
111+
"""Names containing any whitespace are rejected."""
112+
self.assertFalse(is_valid_postgresql_db_name('foo bar'))
113+
self.assertFalse(is_valid_postgresql_db_name('foo\tbar'))
114+
self.assertFalse(is_valid_postgresql_db_name('foo\nbar'))
115+
self.assertFalse(is_valid_postgresql_db_name('foo\rbar'))
116+
117+
def test_rejects_overlong_names(self):
118+
"""
119+
PostgreSQL silently truncates identifiers longer than 63 bytes,
120+
which would produce a database CodeChecker cannot reconnect to.
121+
"""
122+
self.assertTrue(is_valid_postgresql_db_name('a' * 63))
123+
self.assertFalse(is_valid_postgresql_db_name('a' * 64))
124+
# 63 characters but more than 63 bytes due to multi-byte encoding.
125+
self.assertFalse(is_valid_postgresql_db_name('é' * 32))

0 commit comments

Comments
 (0)