owasp-modsecurity · fzipi · Aug 25, 2025 · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -100,7 +100,7 @@ jobs:
       - name: Setup Dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y --no-install-recommends apache2-dev libxml2-dev liblua5.1-0-dev libcurl4-gnutls-dev libpcre2-dev libpcre3-dev libpcre3 pkg-config libyajl-dev apache2 apache2-bin apache2-data
+          sudo apt-get install -y --no-install-recommends apache2-dev libxml2-dev liblua5.1-0-dev libcurl4-gnutls-dev libpcre2-dev libpcre3-dev libpcre3 pkg-config libyajl-dev apache2 apache2-bin apache2-data python3 python3-venv
       - uses: actions/checkout@v4
         with:
           submodules: recursive
@@ -114,8 +114,14 @@ jobs:
         run: make -j `nproc`
       - name: install module
         run: sudo make install
-      - name: run tests
-        run: make test
+      - name: build msc_test
+        run: make -C tests msc_test
+      - name: install Python test dependencies
+        run: |
+          python3 -m venv .venv
+          .venv/bin/pip install -r tests/requirements.txt
+      - name: run op/tfn unit tests
+        run: .venv/bin/pytest tests/test_operators tests/test_transformations
 
   test-regression-linux:
     runs-on: ${{ matrix.os }}
@@ -130,7 +136,7 @@ jobs:
       - name: Setup Dependencies
         run: |
           sudo apt-get update -y -qq
-          sudo apt-get install -y --no-install-recommends apache2-dev libxml2-dev liblua5.1-0-dev libcurl4-gnutls-dev libpcre2-dev libpcre3-dev libpcre3 pkg-config libyajl-dev apache2 apache2-bin apache2-data perl libwww-perl ssdeep libfuzzy-dev libfuzzy2
+          sudo apt-get install -y --no-install-recommends apache2-dev libxml2-dev liblua5.1-0-dev libcurl4-gnutls-dev libpcre2-dev libpcre3-dev libpcre3 pkg-config libyajl-dev apache2 apache2-bin apache2-data perl libwww-perl ssdeep libfuzzy-dev libfuzzy2 python3 python3-venv
       - uses: actions/checkout@v4
         with:
           submodules: recursive
@@ -144,8 +150,19 @@ jobs:
         run: make -j `nproc`
       - name: install module
         run: sudo make install
+      - name: install Python test dependencies
+        run: |
+          python3 -m venv .venv
+          .venv/bin/pip install -r tests/requirements.txt
+      # dump_regression_fixtures.pl needs libwww-perl (installed above) to
+      # eval the .t files' HTTP::Request objects - fails loudly if the .t
+      # sources and the checked-in tests/regression/fixtures/*.json drift.
+      - name: check regression fixtures are up to date
+        run: |
+          bash tests/regenerate_regression_fixtures.sh /tmp/fixtures-check
+          diff -r tests/regression/fixtures /tmp/fixtures-check
       - name: run regression tests
-        run: make test-regression
+        run: .venv/bin/pytest tests/test_regression
 
   cppcheck:
     runs-on: [ubuntu-24.04]

diff --git a/.gitignore b/.gitignore
@@ -113,7 +113,10 @@ Makefile
 # tests
 tests/regression/server_root/**
 tests/*.pl
+!tests/dump_unit_fixtures.pl
+!tests/dump_regression_fixtures.pl
 tests/*.trs
 tests/*.log
+tests/msc_test
 
 
diff --git a/.sonarcloud.properties b/.sonarcloud.properties
@@ -0,0 +1,13 @@
+# Project identity (key/organization) is bound via SonarCloud's GitHub App
+# integration for Automatic Analysis - not set here, since automatic
+# analysis doesn't support overriding it from this file.
+#
+# tests/test_operators/ and tests/test_transformations/ contain literal test
+# data for IP-matching operators (@ipMatch, @geoLookup) - S1313 ("hardcoded
+# IP addresses") is a false positive there, since the whole point of the
+# test is to exercise those exact literals. All other rules still apply.
+sonar.issue.ignore.multicriteria=e1,e2
+sonar.issue.ignore.multicriteria.e1.ruleKey=python:S1313
+sonar.issue.ignore.multicriteria.e1.resourceKey=tests/test_operators/*.py
+sonar.issue.ignore.multicriteria.e2.ruleKey=python:S1313
+sonar.issue.ignore.multicriteria.e2.resourceKey=tests/test_transformations/*.py
diff --git a/configure.ac b/configure.ac
@@ -945,10 +945,7 @@ if test -e "$PERL"; then
     if test "$build_mlogc" -ne 0; then
         AC_CONFIG_FILES([mlogc/mlogc-batch-load.pl], [chmod +x mlogc/mlogc-batch-load.pl])
     fi
-    AC_CONFIG_FILES([tests/run-unit-tests.pl], [chmod +x tests/run-unit-tests.pl])
     AC_CONFIG_FILES([tests/run-regression-tests.pl], [chmod +x tests/run-regression-tests.pl])
-    AC_CONFIG_FILES([tests/gen_rx-pm.pl], [chmod +x tests/gen_rx-pm.pl])
-    AC_CONFIG_FILES([tests/csv_rx-pm.pl], [chmod +x tests/csv_rx-pm.pl])
     AC_CONFIG_FILES([tests/regression/server_root/conf/httpd.conf])
 
     # Perl based tools

diff --git a/tests/Makefile.am b/tests/Makefile.am
@@ -74,9 +74,6 @@ msc_test_LDFLAGS = @APR_LDFLAGS@ \
     @YAJL_LDFLAGS@ \
     @SSDEEP_LDFLAGS@
 
-check_SCRIPTS = run-unit-tests.pl
-TESTS = $(check_SCRIPTS)
-
 test: check
 
 test-regression: run-regression-tests.pl

diff --git a/tests/README.md b/tests/README.md
@@ -0,0 +1,157 @@
+# ModSecurity Pytest Testing Framework
+
+Python/pytest front end for ModSecurity v2's tests. There are two independent suites:
+
+- **Unit tests** (`test_operators/`, `test_transformations/`): exercise individual `@operator`s and
+  `t:transformation`s directly through the `msc_test` C binary. No Apache required. These files are
+  plain, hand-maintained pytest parametrize tables - add or edit cases directly.
+- **Regression tests** (`test_regression/`): exercise full `SecRule`/config behavior against a real
+  Apache + `mod_security2.so`, driven by `conftest.py`'s `apache_server`/`modsec_test` fixtures. This
+  suite is still generated from Perl `.t` data (`tests/regression/*/*.t`) rather than hand-ported -
+  see "How the regression test data flows" below for why and how.
+
+Unit tests were originally migrated the same way (`tests/op/*.t`/`tests/tfn/*.t` fed through a Perl
+dumper into generated `.py` files), but that data is flat literal strings with no ongoing need for
+a Perl round-trip, so the `.t` files, the dumper, and the converter script were retired once the
+`.py` files existed - the generated files are now the source of truth and are edited directly.
+
+## Prerequisites
+
+1. Build the module and, for unit tests, the `msc_test` binary, from the repo root:
+   ```bash
+   ./autogen.sh
+   ./configure --with-apxs=/path/to/apxs2   # add --with-yajl/--with-ssdeep for full coverage
+   make -j
+   make -C tests msc_test
+   ```
+   Regression tests need a real Apache; `tests/regression/server_root/conf/httpd.conf` is
+   generated by `./configure` from `httpd.conf.in` and already points at the module you just
+   built in `apache2/.libs/` - no `make install` needed.
+2. Python 3.8+, then install test dependencies:
+   ```bash
+   cd tests
+   pip install -r requirements.txt
+   ```
+3. Regenerating regression fixtures after editing a `regression/*/*.t` file also needs Perl with
+   `LWP::UserAgent` (`libwww-perl` on Debian/Ubuntu) - not needed just to run the tests.
+
+## Directory structure
+
+```
+tests/
+├── conftest.py                      # Pytest fixtures (apache_server, modsec_test, unit_test, ...)
+├── apache_server.py                 # Apache lifecycle: reuses regression/server_root/conf/httpd.conf
+├── modsec_test.py                   # LogMatcher/ResponseMatcher/ModSecurityTestCase/UnitTestRunner
+├── regression_fixtures.py           # Loads tests/regression/fixtures/*.json into Python objects
+│
+├── test_operators/                  # Hand-maintained pytest files, one per operator
+│   └── ...
+├── test_transformations/            # Hand-maintained pytest files, one per transformation
+│   └── ...
+├── op/pmFromFile-01.dat             # Runtime data file @pmFromFile's test reads - not test *code*,
+│                                     #   kept even though the op/*.t definitions that once lived
+│                                     #   alongside it are gone (see git history if you need them)
+│
+├── regression/*/*.t                 # Source of truth for regression tests (Perl data, unchanged format)
+├── dump_regression_fixtures.pl      # Evals a regression .t file the way run-regression-tests.pl does
+├── regenerate_regression_fixtures.sh  # Runs the dumper over every regression/*/*.t file
+├── regression/fixtures/*/*.json     # Generated output - one JSON file per .t file, checked in
+├── test_regression/
+│   └── test_fixtures.py             # Generic loader: one pytest case per entry in every fixture file
+│
+├── requirements.txt
+├── pytest.ini
+└── README.md                        # This file
+```
+
+## Running tests
+
+```bash
+cd tests
+
+# Everything
+pytest
+
+# Unit tests only (no Apache needed)
+pytest test_operators test_transformations
+
+# Regression tests only (needs Apache + the built module)
+pytest test_regression
+
+# A single case, by test ID (see `pytest --collect-only` for exact IDs)
+pytest "test_regression/test_fixtures.py::test_regression_fixture[action/00-disruptive-actions.t#0:pass in phase:1]"
+
+# Parallel
+pytest -n auto
+```
+
+Markers: `unit`, `regression`, `apache` (needs Apache), `slow`. `pytest -m unit` / `pytest -m regression`
+work the same as passing the directory.
+
+## Adding a unit test
+
+`test_operators/`/`test_transformations/` are plain pytest files - add a new parametrize tuple (or
+a whole new `test_<name>.py`, following an existing file's pattern) directly. `param`/`input_data`/
+`expected_output` accept either a `str` (for real Unicode text) or a `bytes` literal (for exact
+byte sequences, including invalid UTF-8 or embedded NULs - see `test_transformations/test_base64decode.py`
+for an example of both). `unit_test.unit_runner.run_operator_test()`/`run_transformation_test()`
+drive the `msc_test` binary directly; see `modsec_test.py`'s `UnitTestRunner` for the exact contract
+(`msc_test.c`'s own `-t op|tfn -n <name> -p <param> -r <expected_ret>`, stdin = input).
+
+## How the regression test data flows
+
+`tests/regression/*/*.t` files use real Perl syntax (`qr//` regexes with flags,
+`HTTP::Request->new(...)`, `$ENV{...}` interpolation, `\xHH` string escapes, occasional
+`conf => sub {...}` coderefs) that a text/regex-based Python parser cannot reliably reproduce.
+Instead, `dump_regression_fixtures.pl` sets up the same `%ENV` as `run-regression-tests.pl`, lets
+Perl itself evaluate the `.t` file (the same `@C = (...)` trick `run-regression-tests.pl` uses), and
+serializes the result to JSON: `qr//` → `{pattern, flags}`, `HTTP::Request` → `{method, uri, headers,
+content}`, `conf => sub {...}` coderefs (executed and captured), etc. - no Perl semantics need to be
+reimplemented in Python. `test_regression/test_fixtures.py` then discovers every
+`tests/regression/fixtures/*/*.json` file and parametrizes one pytest case per entry - there is no
+per-`.t`-file Python code to keep in sync. Re-run after editing a `regression/*/*.t` file:
+
+```bash
+tests/regenerate_regression_fixtures.sh
+```
+
+CI fails if the checked-in fixtures are stale (it regenerates into a temp dir and diffs).
+
+A `request`/`test`/`prerun` field that's a real (non-inert) Perl coderef can't be captured this way
+- the dumper marks it `unsupported` instead of silently dropping it, and the corresponding pytest
+case shows up as skipped with a message pointing at the `.t` file entry, not as a false pass.
+
+## Known skips
+
+A handful of regression cases are intentionally skipped rather than failed:
+
+- One `test => sub {...}` in `config/10-misc-directives.t` does real file-upload verification via a
+  Perl coderef this migration doesn't execute (see "unsupported" above).
+- Four `target/00-targets.t` cases check the exact byte content of ModSecurity's `FULL_REQUEST`/
+  `FULL_REQUEST_LENGTH` variables, which depends on the exact HTTP header set/order the *client*
+  sends. The original Perl harness uses `LWP::UserAgent`; these tests use Python's `requests`
+  (backed by `urllib3`, which injects its own `Accept-Encoding` beneath the header layer `requests`
+  exposes) - a client-library fingerprint difference, not a ModSecurity behavior gap.
+
+## Continuous integration
+
+`.github/workflows/ci.yml`'s `test-linux` job builds `msc_test` and runs the unit-test suite for
+every configure permutation in its matrix; `test-regression-linux` builds the module, checks the
+regression fixtures aren't stale, and runs the regression suite once (a single representative
+configure, since these are C-level build variations rarely relevant at this layer).
+
+## Troubleshooting
+
+- **`msc_test binary not found`**: run `make -C tests msc_test` from the repo root first (or set
+  `MSC_TEST_PATH`).
+- **Regression tests fail to start Apache**: confirm `./configure` has been run (it generates
+  `tests/regression/server_root/conf/httpd.conf`), and that `apache2/.libs/mod_security2.so` exists
+  (`make` builds it).
+- **`dump_regression_fixtures.pl`/`regenerate_regression_fixtures.sh` fail**: install
+  `LWP::UserAgent` (`libwww-perl` on Debian/Ubuntu, or `cpanm LWP::UserAgent` elsewhere). Not needed
+  to just run the tests - only to regenerate fixtures after editing a `.t` file.
+- **Port conflicts**: regression tests use the fixed port 8088 (matching
+  `run-regression-tests.pl`'s own default) and are not currently safe to run with `pytest-xdist`
+  parallelism against each other for that reason.
+- Logs from the most recent regression test run live under `tests/regression/server_root/logs/`
+  (`error.log`, `modsec_debug.log`, `modsec_audit.log`).
diff --git a/tests/__init__.py b/tests/__init__.py
@@ -0,0 +1,9 @@
+"""
+ModSecurity Pytest Testing Framework
+
+This package contains the modernized pytest-based testing framework for ModSecurity,
+converted from the original Perl-based system.
+"""
+
+__version__ = "2.0.0"
+__author__ = "ModSecurity Team"