Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,4 @@ uv.lock
/plots/
/test/testdrive/types.parquet*
/test/mz-deploy/**/target/
target-fuzz/
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,11 @@ exclude = [
"misc/wasm/*",
# Ignore any Rust dependencies that python packages might pull in.
"misc/python/venv/*",
# The `src/*/fuzz` cargo-fuzz crates need no entry here: each sets
# `package.workspace = "../../../test/cargo-fuzz"`, attaching it to the fuzz
# workspace, and a crate nested under a workspace member is never
# auto-included in the root workspace. They build on a nightly toolchain
# (libFuzzer) via `cargo +nightly fuzz run ...` or `ci/test/cargo-fuzz.sh`.
]

# Use Cargo's new feature resolver, which can handle target-specific features,
Expand Down
8 changes: 8 additions & 0 deletions ci/builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,14 @@ RUN mkdir rust \
&& cargo install --root /usr/local --version "=0.1.61" --locked --features=vendored-openssl cargo-udeps \
&& cargo install --root /usr/local --version "=0.4.0" --locked cargo-binutils \
&& cargo install --root /usr/local --version "=0.13.1" --locked wasm-pack \
&& if [ "$RUST_VERSION" = "nightly" ]; then \
# NOTE: no --locked, unlike the installs above. cargo-fuzz 0.13.1's \
# bundled Cargo.lock pins deps that fail on the pinned nightly \
# (yanked futures-util/zip, plus a crate using the perma-unstable \
# `rustc_layout_scalar_valid_range_*` attribute). Let cargo resolve \
# compatible versions instead. \
cargo install --root /usr/local --version "=0.13.1" cargo-fuzz; \
fi \
&& rm -rf /cargo/registry /cargo/git

# Shims for sanitizers
Expand Down
1 change: 1 addition & 0 deletions ci/plugins/mzcompose/hooks/command
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ cleanup() {
killall -9 -q clusterd || true # There might be remaining processes from a cargo-test run

if [ ! -s services.log ] \
&& [ "$BUILDKITE_LABEL" != ":rust: cargo-fuzz" ] \
&& [ "$BUILDKITE_LABEL" != "Maelstrom coverage of persist" ] \
&& [ "$BUILDKITE_LABEL" != "Long single-node Maelstrom coverage of persist" ] \
&& [ "$BUILDKITE_LABEL" != "Maelstrom coverage of txn-wal" ] \
Expand Down
2 changes: 2 additions & 0 deletions ci/plugins/mzcompose/plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,7 @@ configuration:
type: string
composition:
type: string
ci_builder:
type: string
required: ["composition"]
additionalProperties: false
23 changes: 23 additions & 0 deletions ci/release-qualification/pipeline.template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,29 @@ steps:
composition: sqlsmith
args: [--max-joins=15, --explain-only, --runtime=6000]

- id: cargo-fuzz
label: ":rust: cargo-fuzz"
depends_on: []
timeout_in_minutes: 1440
agents:
queue: hetzner-x86-64-dedi-48cpu-192gb
sanitizer: skip
plugins:
- ./ci/plugins/mzcompose:
composition: cargo-fuzz
ci_builder: nightly
args:
- --profile=fruitful
- --max-seconds=86400
- --wall-budget=84600
# Step hard-times out at 1440min (86400s). --wall-budget ends fuzzing
# at 84600s, leaving 1800s; cap minimize at 1200s so the corpus
# upload has ~600s of headroom before the kill.
- --minimize-timeout=1200
- --corpus-sync
artifact_paths:
- src/*/fuzz/artifacts/**/*

- id: test-preflight-check-rollback
label: Test with preflight check and rollback
depends_on: []
Expand Down
27 changes: 27 additions & 0 deletions ci/test/lint-buf/generate-buf-config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

SOURCE_DIR = "src/"
PROTO_FILE_GLOB = f"{SOURCE_DIR}**/*.proto"
# Each fuzz crate (`src/<crate>/fuzz`) is its own cargo `[workspace]`, so
# building it standalone creates `src/<crate>/fuzz/target/`. Build-script deps
# (e.g. `protobuf-src`) extract `.proto` files into that tree, which buf must
# not scan. We exclude every fuzz crate's `target/` instead of hand-listing them.
FUZZ_CRATE_GLOB = f"{SOURCE_DIR}*/fuzz"

GENERATION_COMMENT = "File generated by generate-buf-config.py - DO NOT EDIT"
BUF_INSTRUCTION_PREFIX = "// buf breaking:"
Expand All @@ -37,6 +42,11 @@ def is_ignore(self) -> bool:
def collect_proto_files() -> list[ProtoFile]:
print(f"Working dir: {os.getcwd()}")
proto_file_paths = glob.glob(PROTO_FILE_GLOB, recursive=True)
# Filter out build artifacts: each fuzz crate is its own `[workspace]`, so
# building it standalone creates `src/<crate>/fuzz/target/`. A build-script
# dep (`protobuf-src`) vendors protoc's bundled `.proto` files (Google's
# well-known types) into that tree, which is not source we want buf to scan.
proto_file_paths = [p for p in proto_file_paths if "/target/" not in p]
return [ProtoFile(path) for path in proto_file_paths]


Expand Down Expand Up @@ -82,6 +92,20 @@ def generate_buf_ignore_section(ignored_files: list[ProtoFile]) -> str:
return "\n".join(ignore_entry_lines).strip()


def generate_fuzz_target_excludes() -> str:
fuzz_crate_dirs = sorted(d for d in glob.glob(FUZZ_CRATE_GLOB) if os.path.isdir(d))
exclude_lines = []
for fuzz_dir in fuzz_crate_dirs:
# e.g. "src/transform/fuzz" -> "transform/fuzz/target"
relative_path = fuzz_dir.removeprefix(SOURCE_DIR)
exclude_lines.append(f" - {relative_path}/target")

if len(exclude_lines) == 0:
exclude_lines.append(" # none")

return "\n".join(exclude_lines).strip()


def write_buf_configuration(
template_path: str, target_path: str, ignored_files: list[ProtoFile]
) -> None:
Expand All @@ -92,6 +116,9 @@ def write_buf_configuration(
content = content.replace(
"${ignore-entries}", generate_buf_ignore_section(ignored_files)
)
content = content.replace(
"${fuzz-target-excludes}", generate_fuzz_target_excludes()
)

with open(target_path, "w") as output_file:
output_file.write(content)
Expand Down
3 changes: 3 additions & 0 deletions misc/python/materialize/cli/ci_annotate_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@
# \s\S is any character including newlines, so this matches multiline strings
# non-greedy using ? so that we don't match all the result comparison issues into one block
| ----------\ RESULT\ COMPARISON\ ISSUE\ START\ ----------[\s\S]*?----------\ RESULT\ COMPARISON\ ISSUE\ END\ ------------
# cargo-fuzz crash, emitted by the cargo-fuzz mzcompose runner (one block
# per failing target, with the crash input and a reproduce command)
| ----------\ CARGO-FUZZ\ FAILURE\ START\ ----------[\s\S]*?----------\ CARGO-FUZZ\ FAILURE\ END\ ----------
# output consistency tests
# | possibly\ invalid\ operation\ specification # disabled
# for miri test summary
Expand Down
5 changes: 5 additions & 0 deletions src/avro/fuzz/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
target/
corpus/
artifacts/
coverage/
Cargo.lock
43 changes: 43 additions & 0 deletions src/avro/fuzz/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Fuzz crate for mz-avro decoders. Avro bytes arrive from Kafka, so a
# decoder bug here is a crash/poisoning risk for source ingestion.
#
# Excluded from the main workspace because libFuzzer requires nightly Rust.
# Run via the repo-wide runner: `bin/ci-builder run nightly ci/test/cargo-fuzz.sh`,
# or locally:
# cd src/avro/fuzz
# cargo +nightly fuzz run reader_decode -- -max_total_time=60

[package]
workspace = "../../../test/cargo-fuzz"
name = "mz-avro-fuzz"
version = "0.0.0"
publish = false
edition = "2021"

[package.metadata]
cargo-fuzz = true

[dependencies]
libfuzzer-sys = "0.4"
mz-avro = { path = ".." }

[[bin]]
name = "reader_decode"
path = "fuzz_targets/reader_decode.rs"
test = false
doc = false
bench = false

[[bin]]
name = "schema_resolve"
path = "fuzz_targets/schema_resolve.rs"
test = false
doc = false
bench = false

[[bin]]
name = "avro_schema_parse"
path = "fuzz_targets/avro_schema_parse.rs"
test = false
doc = false
bench = false
44 changes: 44 additions & 0 deletions src/avro/fuzz/corpus.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# libFuzzer dictionary for the Avro reader_decode target.
#
# An Avro object-container file starts with the 4-byte magic "Obj\x01" followed
# by a map of metadata (notably "avro.schema" and "avro.codec") and a 16-byte
# sync marker. Without the magic the decoder bails immediately, so the most
# valuable token by far is the magic itself; the rest let the mutator build
# plausible headers and schemas.

# Object-container magic.
"Obj\x01"
# Header metadata keys / codecs.
"avro.schema"
"avro.codec"
"null"
"deflate"
"snappy"
"zstandard"
"bzip2"
# Schema JSON tokens.
"\"type\""
"\"name\""
"\"fields\""
"\"record\""
"\"enum\""
"\"array\""
"\"map\""
"\"union\""
"\"fixed\""
"\"symbols\""
"\"items\""
"\"values\""
"\"size\""
"\"namespace\""
"\"default\""
"\"logicalType\""
"\"string\""
"\"bytes\""
"\"int\""
"\"long\""
"\"float\""
"\"double\""
"\"boolean\""
"decimal"
"timestamp-millis"
Loading
Loading