From 42cbeb79b2d22904db96045b9c0b0f09d838086e Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Mon, 1 Jun 2026 12:04:23 -0400 Subject: [PATCH 1/4] CBG-5425 avoid writing zero sized files into sgcollect --- tools-tests/tasks_test.py | 52 +++++++++++++++++++++++++++++++++++++++ tools/tasks.py | 3 +++ 2 files changed, 55 insertions(+) diff --git a/tools-tests/tasks_test.py b/tools-tests/tasks_test.py index 6de62eb50a..8bb85796e8 100644 --- a/tools-tests/tasks_test.py +++ b/tools-tests/tasks_test.py @@ -12,6 +12,7 @@ import pathlib import sys import unittest +import unittest.mock import password_remover import pytest @@ -78,6 +79,30 @@ def test_add_file_task(tmp_path, tag_userdata): assert expected in fh.read() +def test_add_file_task_zero_size(tmp_path): + filename = "empty.json" + empty_file = tmp_path / filename + empty_file.write_text("") + task = tasks.add_file_task( + sourcefile_path=empty_file, + output_path=empty_file.name, + ) + task.no_header = True + output_dir = tmp_path / "output" + output_dir.mkdir() + runner = tasks.TaskRunner( + verbosity=VERBOSE, + default_name="sg.log", + tmp_dir=output_dir, + ) + runner.run(task) + runner.close_all_files() + + output_path = pathlib.Path(runner.tmpdir) / filename + assert output_path.exists() + assert os.path.getsize(output_path) == 0 + + def test_make_curl_task(tmpdir, httpserver): output = "curltask" httpserver.expect_request("/").respond_with_json(json.loads(INPUT_CONFIG)) @@ -107,6 +132,33 @@ def test_make_curl_task(tmpdir, httpserver): httpserver.check() +def test_make_curl_task_zero_size(tmpdir, httpserver): + output = "curltask_empty" + httpserver.expect_request("/").respond_with_data("") + task = tasks.make_curl_task( + "curltask_empty", + httpserver.url_for("/"), + auth_headers={}, + log_file=output, + ) + task.no_header = True + + output_dir = tmpdir.mkdir("output") + runner = tasks.TaskRunner( + verbosity=VERBOSE, + default_name="sg.log", + tmp_dir=output_dir, + ) + runner.run(task) + runner.close_all_files() + + output_path = pathlib.Path(runner.tmpdir) / output + assert output_path.exists() + assert os.path.getsize(output_path) == 0 + + httpserver.check() + + def test_task_print_literal(tmp_path): task = tasks.AllOsTask("test_task", ["notacommand"], literal="literal") runner = tasks.TaskRunner(tmp_dir=tmp_path) diff --git a/tools/tasks.py b/tools/tasks.py index 83677ea497..a3b1b899df 100644 --- a/tools/tasks.py +++ b/tools/tasks.py @@ -229,6 +229,9 @@ def execute(self, fp): print("log_file: {0}. ".format(self.log_file)) try: result = self.callable() + if not result: + return 0 + try: fp.write(result.encode()) except (UnicodeEncodeError, AttributeError): From b1cd8dc2d35f9b79e5aff7be7dab2a32e7a59355 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 18 Jun 2026 11:49:37 -0400 Subject: [PATCH 2/4] fix code to return error messages into sync_gateway.log --- tools-tests/tasks_test.py | 135 ++++++++++++++++++++++++++++++++++++-- tools/tasks.py | 50 +++++++++----- 2 files changed, 162 insertions(+), 23 deletions(-) diff --git a/tools-tests/tasks_test.py b/tools-tests/tasks_test.py index 8bb85796e8..60ee66f203 100644 --- a/tools-tests/tasks_test.py +++ b/tools-tests/tasks_test.py @@ -7,17 +7,25 @@ # the file licenses/APL2.txt. import gzip +import io import json import os import pathlib import sys import unittest import unittest.mock +from zipfile import ZipFile import password_remover import pytest import tasks + +def assert_zip_all_nonempty(zip_path: str): + with ZipFile(zip_path) as zf: + for info in zf.infolist(): + assert info.file_size > 0, f"{info.filename} is zero-sized in zip" + VERBOSE = 2 INPUT_CONFIG = """\ @@ -78,6 +86,10 @@ def test_add_file_task(tmp_path, tag_userdata): with open(pathlib.Path(runner.tmpdir) / filename) as fh: assert expected in fh.read() + zip_path = str(tmp_path / "out.zip") + runner.zip(zip_path, "sg", "node1") + assert_zip_all_nonempty(zip_path) + def test_add_file_task_zero_size(tmp_path): filename = "empty.json" @@ -98,9 +110,10 @@ def test_add_file_task_zero_size(tmp_path): runner.run(task) runner.close_all_files() - output_path = pathlib.Path(runner.tmpdir) / filename - assert output_path.exists() - assert os.path.getsize(output_path) == 0 + zip_path = str(tmp_path / "out.zip") + runner.zip(zip_path, "sg", "node1") + with ZipFile(zip_path) as zf: + assert not any(filename in name for name in zf.namelist()) def test_make_curl_task(tmpdir, httpserver): @@ -129,6 +142,10 @@ def test_make_curl_task(tmpdir, httpserver): with open(pathlib.Path(runner.tmpdir) / output) as fh: assert REDACTED_OUTPUT in fh.read() + zip_path = str(pathlib.Path(tmpdir) / "out.zip") + runner.zip(zip_path, "sg", "node1") + assert_zip_all_nonempty(zip_path) + httpserver.check() @@ -152,9 +169,10 @@ def test_make_curl_task_zero_size(tmpdir, httpserver): runner.run(task) runner.close_all_files() - output_path = pathlib.Path(runner.tmpdir) / output - assert output_path.exists() - assert os.path.getsize(output_path) == 0 + zip_path = str(pathlib.Path(tmpdir) / "out.zip") + runner.zip(zip_path, "sg", "node1") + with ZipFile(zip_path) as zf: + assert not any(output in name for name in zf.namelist()) httpserver.check() @@ -289,3 +307,108 @@ def test_log_redact_file(tmp_path): redacted_text = gzip.open(redacted_file).read() assert redacted_text == updated_text + + +# --------------------------------------------------------------------------- +# PythonTask.execute() — direct return-value tests +# --------------------------------------------------------------------------- + + +@pytest.fixture +def python_task_fp(): + """A BytesIO buffer standing in for the log file descriptor.""" + return io.BytesIO() + + +def _make_python_task(callable, **kwargs): + return tasks.PythonTask("my task", callable, log_file="out.log", **kwargs) + + +def test_python_task_execute_no_output(python_task_fp): + task = _make_python_task(lambda: b"") + exit_code, message = task.execute(python_task_fp) + assert exit_code == 1 + assert message == "task my task had no output" + assert python_task_fp.getvalue() == b"" + + +def test_python_task_execute_bytes(python_task_fp): + task = _make_python_task(lambda: b"hello") + exit_code, message = task.execute(python_task_fp) + assert exit_code == 0 + assert message == "" + assert python_task_fp.getvalue() == b"hello" + + +def test_python_task_execute_str(python_task_fp): + task = _make_python_task(lambda: "hello") + exit_code, message = task.execute(python_task_fp) + assert exit_code == 0 + assert message == "" + assert python_task_fp.getvalue() == b"hello" + + +def test_python_task_execute_exception_silent(python_task_fp): + task = _make_python_task(lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=False) + exit_code, message = task.execute(python_task_fp) + assert exit_code == 1 + assert message == "" + + +def test_python_task_execute_exception_logged(python_task_fp, capsys): + task = _make_python_task(lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=True) + exit_code, message = task.execute(python_task_fp) + assert exit_code == 1 + assert message == "" + assert "Exception executing python task: boom" in capsys.readouterr().out + + +# --------------------------------------------------------------------------- +# TaskRunner.log_result() — stderr output +# --------------------------------------------------------------------------- + + +def test_log_result_ok(tmp_path, capsys): + runner = tasks.TaskRunner(tmp_dir=tmp_path) + runner.log_result((0, "")) + assert "OK" in capsys.readouterr().err + + +def test_log_result_with_message(tmp_path, capsys): + runner = tasks.TaskRunner(tmp_dir=tmp_path) + runner.log_result((1, "task my task had no output")) + assert "Exit code 1: task my task had no output" in capsys.readouterr().err + + +def test_log_result_no_message(tmp_path, capsys): + runner = tasks.TaskRunner(tmp_dir=tmp_path) + runner.log_result((127, "")) + assert "Exit code 127" in capsys.readouterr().err + + +# --------------------------------------------------------------------------- +# Task.execute() — return-code assertions +# --------------------------------------------------------------------------- + + +def test_task_execute_literal_return(): + task = tasks.AllOsTask("test", ["notacommand"], literal="hello") + exit_code, message = task.execute(io.BytesIO()) + assert exit_code == 0 + assert message == "" + + +def test_task_execute_popen_exception_return(): + task = tasks.AllOsTask("test", ["notacommand"]) + with unittest.mock.patch("subprocess.Popen") as popen: + popen.side_effect = OSError("Boom!") + exit_code, message = task.execute(io.BytesIO()) + assert exit_code == 127 + assert message == "" + + +def test_task_execute_timeout_return(): + task = tasks.AllOsTask("test", ["sleep", "5"], timeout=0.01) + exit_code, message = task.execute(io.BytesIO()) + assert exit_code != 0 + assert message == "" diff --git a/tools/tasks.py b/tools/tasks.py index a3b1b899df..b574df8deb 100644 --- a/tools/tasks.py +++ b/tools/tasks.py @@ -128,14 +128,19 @@ def __init__(self, description, command, timeout=None, **kwargs): self.timeout = timeout self.__dict__.update(kwargs) - def execute(self, fp): - """Run the task""" + def execute(self, fp) -> tuple[int, str]: + """Run the task. + + Returns (exit_code, error_message). exit_code is 0 on success. + error_message is empty on success; on failure it describes the problem + and is also written to fp so it appears in the collected log. + """ import subprocess use_shell = not isinstance(self.command, list) if "literal" in self.__dict__: fp.write(self.literal.encode("utf-8")) - return 0 + return 0, "" env = None if "addenv" in self.__dict__: @@ -159,7 +164,7 @@ def execute(self, fp): # automatically handle things like "failed to fork due # to some system limit". fp.write(f"Failed to execute {self.command}: {e}".encode("utf-8")) - return 127 + return 127, "" p.stdin.close() timer = None @@ -196,7 +201,7 @@ def on_timeout(): ) ) - return p.wait() + return p.wait(), "" def will_run(self): """Determine if this task will run on this platform.""" @@ -224,23 +229,29 @@ def __init__(self, description, callable, timeout=None, **kwargs): ) self.__dict__.update(kwargs) - def execute(self, fp): - """Run the task""" + def execute(self, fp) -> tuple[int, str]: + """Run the task. + + Returns (exit_code, error_message). exit_code is 0 on success. + error_message is empty on success; on failure it describes the problem + and is also printed to stdout. + """ print("log_file: {0}. ".format(self.log_file)) try: result = self.callable() if not result: - return 0 - - try: - fp.write(result.encode()) - except (UnicodeEncodeError, AttributeError): + msg = f"task {self.description} had no output" + print(msg) + return 1, msg + if isinstance(result, bytes): fp.write(result) - return 0 + else: + fp.write(result.encode()) + return 0, "" except Exception as e: if self.log_exception: print("Exception executing python task: {0}".format(e)) - return 1 + return 1, "" def will_run(self): """Determine if this task will run on this platform.""" @@ -316,11 +327,14 @@ def header(self, fp, title, subtitle): message = f"{separator}\n{title}\n{subtitle}\n{separator}\n" fp.write(message.encode()) - def log_result(self, result): - if result == 0: + def log_result(self, result: tuple[int, str]): + exit_code, message = result + if exit_code == 0: log("OK") + elif message: + log(f"Exit code {exit_code}: {message}") else: - log("Exit code %d" % result) + log(f"Exit code {exit_code}") def run(self, task): """Run a task with a file descriptor corresponding to its log file""" @@ -396,6 +410,8 @@ def __make_zip(prefix, filename, files): zf = ZipFile(filename, mode="w", compression=ZIP_DEFLATED) try: for name in files: + if os.path.getsize(name) == 0: + continue zf.write(name, f"{prefix}/{os.path.basename(name)}") finally: zf.close() From 63cc5214021a9fdb8485ea46db19067163ff6586 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Thu, 18 Jun 2026 13:23:09 -0400 Subject: [PATCH 3/4] remove type checking --- tools/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/tasks.py b/tools/tasks.py index b574df8deb..f8030f01b1 100644 --- a/tools/tasks.py +++ b/tools/tasks.py @@ -128,7 +128,7 @@ def __init__(self, description, command, timeout=None, **kwargs): self.timeout = timeout self.__dict__.update(kwargs) - def execute(self, fp) -> tuple[int, str]: + def execute(self, fp): """Run the task. Returns (exit_code, error_message). exit_code is 0 on success. @@ -229,7 +229,7 @@ def __init__(self, description, callable, timeout=None, **kwargs): ) self.__dict__.update(kwargs) - def execute(self, fp) -> tuple[int, str]: + def execute(self, fp): """Run the task. Returns (exit_code, error_message). exit_code is 0 on success. From 40b56996f9642520c5f2f0a42fce0412e80bc947 Mon Sep 17 00:00:00 2001 From: Tor Colvin Date: Tue, 23 Jun 2026 13:45:47 -0400 Subject: [PATCH 4/4] fix format --- tools-tests/tasks_test.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools-tests/tasks_test.py b/tools-tests/tasks_test.py index 60ee66f203..247d10992b 100644 --- a/tools-tests/tasks_test.py +++ b/tools-tests/tasks_test.py @@ -26,6 +26,7 @@ def assert_zip_all_nonempty(zip_path: str): for info in zf.infolist(): assert info.file_size > 0, f"{info.filename} is zero-sized in zip" + VERBOSE = 2 INPUT_CONFIG = """\ @@ -349,14 +350,18 @@ def test_python_task_execute_str(python_task_fp): def test_python_task_execute_exception_silent(python_task_fp): - task = _make_python_task(lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=False) + task = _make_python_task( + lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=False + ) exit_code, message = task.execute(python_task_fp) assert exit_code == 1 assert message == "" def test_python_task_execute_exception_logged(python_task_fp, capsys): - task = _make_python_task(lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=True) + task = _make_python_task( + lambda: (_ for _ in ()).throw(ValueError("boom")), log_exception=True + ) exit_code, message = task.execute(python_task_fp) assert exit_code == 1 assert message == ""