Skip to content

Commit 674d880

Browse files
committed
RSPEED-2465: add character pattern validation to rlsapi v1 fields
Add Pydantic pattern validators to RlsapiV1SystemInfo (os, version, arch, system_id) and RlsapiV1CLA (nevra, version) to restrict the character set on fields that flow into Splunk telemetry. Prevents injection of control characters, newlines, and HTML/script tags. Signed-off-by: Major Hayden <major@redhat.com>
1 parent 71b93ea commit 674d880

2 files changed

Lines changed: 200 additions & 6 deletions

File tree

src/models/rlsapi/requests.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,22 @@
44

55
from models.config import ConfigurationBase
66

7+
# Character validation patterns for fields flowing into Splunk telemetry.
8+
# Restrict the character set to prevent injection of control characters,
9+
# HTML/script tags, or other malicious content into the telemetry pipeline.
10+
11+
# Alphanumeric, dots, underscores, spaces, and hyphens.
12+
_SAFE_TEXT_PATTERN = r"^[a-zA-Z0-9._ \-]*$"
13+
14+
# Machine IDs: alphanumeric, dots, underscores, and hyphens (no spaces).
15+
_MACHINE_ID_PATTERN = r"^[a-zA-Z0-9._\-]*$"
16+
17+
# NEVRA (Name-Epoch:Version-Release.Arch): also needs colons, plus, and tilde.
18+
_NEVRA_PATTERN = r"^[a-zA-Z0-9._:+~\-]*$"
19+
20+
# Version strings: alphanumeric, dots, underscores, and hyphens.
21+
_VERSION_PATTERN = r"^[a-zA-Z0-9._\-]*$"
22+
723

824
class RlsapiV1Attachment(ConfigurationBase):
925
"""Attachment data from rlsapi v1 context.
@@ -49,16 +65,28 @@ class RlsapiV1SystemInfo(ConfigurationBase):
4965
system_id: The id of the client machine.
5066
"""
5167

52-
os: str = Field(default="", description="Operating system name", examples=["RHEL"])
68+
os: str = Field(
69+
default="",
70+
pattern=_SAFE_TEXT_PATTERN,
71+
description="Operating system name",
72+
examples=["RHEL"],
73+
)
5374
version: str = Field(
54-
default="", description="Operating system version", examples=["9.3", "8.10"]
75+
default="",
76+
pattern=_SAFE_TEXT_PATTERN,
77+
description="Operating system version",
78+
examples=["9.3", "8.10"],
5579
)
5680
arch: str = Field(
57-
default="", description="System architecture", examples=["x86_64", "aarch64"]
81+
default="",
82+
pattern=_SAFE_TEXT_PATTERN,
83+
description="System architecture",
84+
examples=["x86_64", "aarch64"],
5885
)
5986
system_id: str = Field(
6087
default="",
6188
alias="id",
89+
pattern=_MACHINE_ID_PATTERN,
6290
description="Client machine ID",
6391
examples=["01JDKR8N7QW9ZMXVGK3PB5TQWZ"],
6492
)
@@ -76,11 +104,13 @@ class RlsapiV1CLA(ConfigurationBase):
76104

77105
nevra: str = Field(
78106
default="",
107+
pattern=_NEVRA_PATTERN,
79108
description="CLA NEVRA identifier",
80109
examples=["command-line-assistant-0:0.2.0-1.el9.noarch"],
81110
)
82111
version: str = Field(
83112
default="",
113+
pattern=_VERSION_PATTERN,
84114
description="Command line assistant version",
85115
examples=["0.2.0"],
86116
)

tests/unit/models/rlsapi/test_requests.py

Lines changed: 167 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,173 @@ def test_input_combinations(
431431
)
432432
assert request.get_input_source() == expected
433433

434-
# -------------------------------------------------------------------------
435-
# Edge cases
436-
# -------------------------------------------------------------------------
434+
435+
# ---------------------------------------------------------------------------
436+
# Character pattern validation tests — RlsapiV1SystemInfo
437+
# ---------------------------------------------------------------------------
438+
439+
440+
class TestSystemInfoCharacterValidation:
441+
"""Test character pattern validation for RlsapiV1SystemInfo fields."""
442+
443+
@pytest.mark.parametrize(
444+
("field", "value"),
445+
[
446+
pytest.param("os", "RHEL", id="os-simple"),
447+
pytest.param("os", "Red Hat Enterprise Linux", id="os-with-spaces"),
448+
pytest.param("os", "CentOS-Stream", id="os-with-hyphen"),
449+
pytest.param("os", "RHEL_9", id="os-with-underscore"),
450+
pytest.param("os", "", id="os-empty-default"),
451+
pytest.param("version", "9.3", id="version-major-minor"),
452+
pytest.param("version", "8.10", id="version-two-digit-minor"),
453+
pytest.param("version", "9.3.0", id="version-three-part"),
454+
pytest.param("arch", "x86_64", id="arch-x86"),
455+
pytest.param("arch", "aarch64", id="arch-arm"),
456+
pytest.param("arch", "ppc64le", id="arch-ppc"),
457+
pytest.param("arch", "s390x", id="arch-s390"),
458+
],
459+
)
460+
def test_valid_values_accepted(self, field: str, value: str) -> None:
461+
"""Test that valid system info values are accepted."""
462+
sysinfo = RlsapiV1SystemInfo(**{field: value})
463+
assert getattr(sysinfo, field) == value
464+
465+
@pytest.mark.parametrize(
466+
("field", "value"),
467+
[
468+
pytest.param("os", "<script>alert('xss')</script>", id="os-html-script"),
469+
pytest.param("os", "RHEL\n", id="os-newline"),
470+
pytest.param("os", "RHEL\t", id="os-tab"),
471+
pytest.param("os", "RHEL\x00", id="os-null-byte"),
472+
pytest.param("version", "9.3<br>", id="version-html-tag"),
473+
pytest.param("version", "9.3\r\n", id="version-crlf"),
474+
pytest.param("arch", "x86_64; rm -rf /", id="arch-semicolon"),
475+
pytest.param("arch", "x86_64\x0b", id="arch-vertical-tab"),
476+
],
477+
)
478+
def test_invalid_values_rejected(self, field: str, value: str) -> None:
479+
"""Test that invalid characters are rejected in system info fields."""
480+
with pytest.raises(ValidationError, match="String should match pattern"):
481+
RlsapiV1SystemInfo(**{field: value})
482+
483+
@pytest.mark.parametrize(
484+
"value",
485+
[
486+
pytest.param("01JDKR8N7QW9ZMXVGK3PB5TQWZ", id="ulid"),
487+
pytest.param("550e8400-e29b-41d4-a716-446655440000", id="uuid"),
488+
pytest.param("machine-001", id="hostname-style"),
489+
pytest.param("", id="empty-default"),
490+
],
491+
)
492+
def test_valid_system_id(self, value: str) -> None:
493+
"""Test that valid machine IDs are accepted."""
494+
sysinfo = RlsapiV1SystemInfo(
495+
system_id=value # pyright: ignore[reportCallIssue]
496+
)
497+
assert sysinfo.system_id == value
498+
499+
@pytest.mark.parametrize(
500+
"value",
501+
[
502+
pytest.param("machine id with spaces", id="spaces"),
503+
pytest.param("id<script>", id="html-tag"),
504+
pytest.param("id\ninjection", id="newline"),
505+
],
506+
)
507+
def test_invalid_system_id(self, value: str) -> None:
508+
"""Test that invalid characters are rejected in system_id."""
509+
with pytest.raises(ValidationError, match="String should match pattern"):
510+
RlsapiV1SystemInfo(system_id=value) # pyright: ignore[reportCallIssue]
511+
512+
513+
# ---------------------------------------------------------------------------
514+
# Character pattern validation tests — RlsapiV1CLA
515+
# ---------------------------------------------------------------------------
516+
517+
518+
class TestCLACharacterValidation:
519+
"""Test character pattern validation for RlsapiV1CLA fields."""
520+
521+
@pytest.mark.parametrize(
522+
("field", "value"),
523+
[
524+
pytest.param(
525+
"nevra",
526+
"command-line-assistant-0:0.2.0-1.el9.noarch",
527+
id="nevra-with-epoch",
528+
),
529+
pytest.param(
530+
"nevra",
531+
"command-line-assistant-0.1.0-1.el9.noarch",
532+
id="nevra-without-epoch",
533+
),
534+
pytest.param(
535+
"nevra",
536+
"pkg~pre1+post1-0:1.0-1.el9.x86_64",
537+
id="nevra-tilde-plus",
538+
),
539+
pytest.param("nevra", "", id="nevra-empty-default"),
540+
pytest.param("version", "0.2.0", id="version-semver"),
541+
pytest.param("version", "1.0.0-rc1", id="version-prerelease"),
542+
pytest.param("version", "0.2.0.dev1", id="version-dev"),
543+
pytest.param("version", "", id="version-empty-default"),
544+
],
545+
)
546+
def test_valid_values_accepted(self, field: str, value: str) -> None:
547+
"""Test that valid CLA values are accepted."""
548+
cla = RlsapiV1CLA(**{field: value})
549+
assert getattr(cla, field) == value
550+
551+
@pytest.mark.parametrize(
552+
("field", "value"),
553+
[
554+
pytest.param("nevra", "pkg<script>", id="nevra-html-tag"),
555+
pytest.param("nevra", "pkg\nnewline", id="nevra-newline"),
556+
pytest.param("nevra", "pkg name spaces", id="nevra-spaces"),
557+
pytest.param("version", "0.2.0\x00", id="version-null-byte"),
558+
pytest.param("version", "0.2.0<br>", id="version-html"),
559+
pytest.param("version", "0.2.0\t", id="version-tab"),
560+
],
561+
)
562+
def test_invalid_values_rejected(self, field: str, value: str) -> None:
563+
"""Test that invalid characters are rejected in CLA fields."""
564+
with pytest.raises(ValidationError, match="String should match pattern"):
565+
RlsapiV1CLA(**{field: value})
566+
567+
568+
# ---------------------------------------------------------------------------
569+
# get_input_source() tests (continued edge cases)
570+
# ---------------------------------------------------------------------------
571+
572+
573+
class TestGetInputSourceEdgeCases:
574+
"""Edge case tests for RlsapiV1InferRequest.get_input_source()."""
575+
576+
@pytest.fixture(name="make_request")
577+
def make_request_fixture(self) -> Any:
578+
"""Factory fixture to build requests with specific context values."""
579+
580+
class _RequestBuilder: # pylint: disable=too-few-public-methods
581+
"""Helper to construct requests with variable context."""
582+
583+
@staticmethod
584+
def build(
585+
question: str = "q",
586+
stdin: str = "",
587+
attachment: str = "",
588+
terminal: str = "",
589+
) -> RlsapiV1InferRequest:
590+
"""Build an RlsapiV1InferRequest with specified context values."""
591+
return RlsapiV1InferRequest(
592+
question=question,
593+
context=RlsapiV1Context(
594+
stdin=stdin,
595+
attachments=RlsapiV1Attachment(contents=attachment),
596+
terminal=RlsapiV1Terminal(output=terminal),
597+
),
598+
)
599+
600+
return _RequestBuilder
437601

438602
def test_preserves_content_formatting(self, make_request: Any) -> None:
439603
"""Test that content formatting (newlines, special chars) is preserved."""

0 commit comments

Comments
 (0)