diff --git a/codechecker_common/util.py b/codechecker_common/util.py index b7ef94bdaf..bf3a9e5d4b 100644 --- a/codechecker_common/util.py +++ b/codechecker_common/util.py @@ -251,3 +251,26 @@ def format_size(num: float, suffix: str = 'B') -> str: return f"{num:3.1f} {unit}{suffix}" num /= 1024.0 return f"{num:.1f} Qi{suffix}" + + +def is_valid_postgresql_db_name(db_name): + """ + Returns whether or not the given string is a safe PostgreSQL database + name for CodeChecker to use. + + CodeChecker quotes the database identifier when issuing CREATE DATABASE, + so dashes, leading digits, and PostgreSQL reserved keywords are all + allowed. However, characters that would break even a quoted + identifier, or that are + plainly dangerous in an SQL context, are rejected here so we fail fast + with a clear error rather than producing broken SQL or an unusable + product. + """ + if not db_name or not isinstance(db_name, str): + return False + + if len(db_name.encode('utf-8')) > 63: + return False + + forbidden = set('"\'\\;\x00\r\n\t ') + return not any(c in forbidden for c in db_name) diff --git a/web/server/codechecker_server/api/product_server.py b/web/server/codechecker_server/api/product_server.py index 51f69ca800..5b5116c089 100644 --- a/web/server/codechecker_server/api/product_server.py +++ b/web/server/codechecker_server/api/product_server.py @@ -23,7 +23,8 @@ from codechecker_api.ProductManagement_v6 import ttypes from codechecker_common.logger import get_logger -from codechecker_common.util import path_for_fake_root +from codechecker_common.util import is_valid_postgresql_db_name, \ + path_for_fake_root from codechecker_server.profiler import timeit from codechecker_web.shared import convert @@ -368,7 +369,10 @@ def __create_product_database(self, product): with engine.connect() as conn: conn.execute(text("commit")) LOG.info("Creating database '%s'", db_name) - conn.execute(text(f"CREATE DATABASE {db_name}")) + quoted_db_name = engine.dialect.identifier_preparer \ + .quote_identifier(db_name) + conn.execute( + text(f"CREATE DATABASE {quoted_db_name}")) conn.close() except exc.ProgrammingError as e: LOG.error("ProgrammingError occurred: %s", str(e)) @@ -411,6 +415,19 @@ def addProduct(self, product): codechecker_api_shared.ttypes.ErrorCode.GENERAL, msg) + if dbc.engine == 'postgresql' \ + and not is_valid_postgresql_db_name(dbc.database): + msg = ( + f"The specified PostgreSQL database name " + f"'{dbc.database}' contains characters that are " + "not allowed (quotes, semicolons, whitespace, or " + "control characters), or is empty, or exceeds " + "PostgreSQL's 63-byte identifier limit.") + LOG.error(msg) + raise codechecker_api_shared.ttypes.RequestFailed( + codechecker_api_shared.ttypes.ErrorCode.DATABASE, + msg) + if self.__server.get_product(product.endpoint): msg = \ f"A product endpoint '/{product.endpoint}' is already " \ diff --git a/web/server/tests/unit/test_db_name_validation.py b/web/server/tests/unit/test_db_name_validation.py new file mode 100644 index 0000000000..0e4a40e430 --- /dev/null +++ b/web/server/tests/unit/test_db_name_validation.py @@ -0,0 +1,74 @@ +# ------------------------------------------------------------------------- +# +# Part of the CodeChecker project, under the Apache License v2.0 with +# LLVM Exceptions. See LICENSE for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ------------------------------------------------------------------------- +""" Unit tests for the PostgreSQL database name validator. """ + + +import unittest + +from codechecker_common.util import is_valid_postgresql_db_name + + +class PostgresqlDbNameValidationTest(unittest.TestCase): + """ + Tests for is_valid_postgresql_db_name, which guards against dangerous + or unusable PostgreSQL database names before CodeChecker issues a + CREATE DATABASE statement. + """ + + def test_accepts_plain_names(self): + """Names that are legal even as unquoted identifiers are allowed.""" + self.assertTrue(is_valid_postgresql_db_name('myapp')) + self.assertTrue(is_valid_postgresql_db_name('codechecker_test')) + self.assertTrue(is_valid_postgresql_db_name('db2')) + self.assertTrue(is_valid_postgresql_db_name('_internal')) + + def test_accepts_names_needing_quoting(self): + """ + Names that are legal only as quoted identifiers are allowed, since + CodeChecker always quotes the identifier when creating the database. + These are the cases reported in the bug ticket. + """ + self.assertTrue(is_valid_postgresql_db_name('test-product')) + self.assertTrue(is_valid_postgresql_db_name('1team')) + self.assertTrue(is_valid_postgresql_db_name('my-app-prod')) + # "user" is a PostgreSQL reserved word but valid when quoted. + self.assertTrue(is_valid_postgresql_db_name('user')) + + def test_rejects_empty_or_none(self): + """Empty string and non-string inputs are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('')) + self.assertFalse(is_valid_postgresql_db_name(None)) + self.assertFalse(is_valid_postgresql_db_name(123)) + + def test_rejects_dangerous_characters(self): + """ + Characters that could break out of a quoted identifier or embed + an SQL statement are rejected. + """ + self.assertFalse(is_valid_postgresql_db_name('foo"bar')) + self.assertFalse(is_valid_postgresql_db_name("foo'bar")) + self.assertFalse(is_valid_postgresql_db_name('foo;bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\\bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\x00bar')) + + def test_rejects_whitespace(self): + """Names containing any whitespace are rejected.""" + self.assertFalse(is_valid_postgresql_db_name('foo bar')) + self.assertFalse(is_valid_postgresql_db_name('foo\tbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\nbar')) + self.assertFalse(is_valid_postgresql_db_name('foo\rbar')) + + def test_rejects_overlong_names(self): + """ + PostgreSQL silently truncates identifiers longer than 63 bytes, + which would produce a database CodeChecker cannot reconnect to. + """ + self.assertTrue(is_valid_postgresql_db_name('a' * 63)) + self.assertFalse(is_valid_postgresql_db_name('a' * 64)) + # 63 characters but more than 63 bytes due to multi-byte encoding. + self.assertFalse(is_valid_postgresql_db_name('é' * 32))