Skip to content

Commit a5f6672

Browse files
committed
Made the Arrow-Flight transport optional behind a flight feature.
With `flight` off there is no `tonic` / `arrow-flight` and the in-memory transport is the default, so distributed plans still run. The integration suite runs over the in-memory transport in both build configurations: `start_localhost_context` builds an `InMemoryWorkerTransport` cluster instead of a gRPC one, and the gRPC harness moved to `start_localhost_flight_context`. A `unit-test-flight-transport` job sets `DATAFUSION_DISTRIBUTED_TEST_TRANSPORT=flight` to keep full Flight coverage. For the no-flight suite to run, not just build, the `tpch` / `tpcds` / `clickbench` / `stateful_data_cleanup` dataset tests move into the benchmarks crate: as a dev-dependency of the library they re-enabled `flight` on every test build through feature unification. With that gone, `cargo test --no-default-features --features integration --lib --tests` is genuinely Flight-free, and a `unit-test-no-flight` job runs it. The only tests still gated on `flight` are the ones that need a real wire: the `URLEmitter` routing tests, which assert per-URL worker identity, and the network-boundary connection metrics (`bytes_transferred`, latency). The rest, including the worker-hook and metrics tests, register `InMemoryWorkerTransport` directly and run either way.
1 parent 29eeae8 commit a5f6672

45 files changed

Lines changed: 666 additions & 173 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/workflows/ci.yml

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ jobs:
3232
components: clippy
3333
- run: cargo clippy --all-targets --all-features -- -D warnings
3434

35+
# The generic suite runs over the in-memory transport (what `start_localhost_context` builds by
36+
# default), so it covers the distributed paths without a gRPC server.
3537
unit-test:
3638
runs-on: ubuntu-latest
3739
steps:
@@ -41,12 +43,50 @@ jobs:
4143
- uses: ./.github/actions/setup
4244
- run: cargo test --features integration
4345

46+
# The same suite over the Arrow-Flight gRPC transport. `start_localhost_context` switches to it
47+
# when `DATAFUSION_DISTRIBUTED_TEST_TRANSPORT=flight`, so both transports get full coverage.
48+
unit-test-flight-transport:
49+
runs-on: ubuntu-latest
50+
env:
51+
DATAFUSION_DISTRIBUTED_TEST_TRANSPORT: flight
52+
steps:
53+
- uses: actions/checkout@v4
54+
with:
55+
lfs: true
56+
- uses: ./.github/actions/setup
57+
- run: cargo test --features integration
58+
59+
# Builds the lib and the suite with `flight` off: no `tonic` / `arrow-flight`, in-memory transport
60+
# as the default. The dataset suites moved to the benchmarks crate, so the suite no longer drags
61+
# the benchmarks dev-dependency in (which re-enabled `flight` through feature unification).
62+
# Examples are left out: they demo the Flight cluster, so `--all-features` clippy covers them.
63+
no-flight:
64+
runs-on: ubuntu-latest
65+
steps:
66+
- uses: actions/checkout@v4
67+
- uses: ./.github/actions/setup
68+
with:
69+
components: clippy
70+
- run: cargo build --no-default-features --features integration
71+
- run: cargo clippy --no-default-features --features integration --lib --tests -- -D warnings
72+
73+
# The integration suite over the in-memory transport with `flight` off, so the no-flight runtime
74+
# path is run, not just built. Flight-specific tests gate themselves out.
75+
unit-test-no-flight:
76+
runs-on: ubuntu-latest
77+
steps:
78+
- uses: actions/checkout@v4
79+
with:
80+
lfs: true
81+
- uses: ./.github/actions/setup
82+
- run: cargo test --no-default-features --features integration --lib --tests
83+
4484
tpch-test:
4585
runs-on: ubuntu-latest
4686
steps:
4787
- uses: actions/checkout@v4
4888
- uses: ./.github/actions/setup
49-
- run: cargo test --features tpch --test 'tpch_*'
89+
- run: cargo test -p datafusion-distributed-benchmarks --features tpch --test 'tpch_*'
5090

5191
tpcds-correctness-test:
5292
runs-on: ubuntu-latest
@@ -59,9 +99,9 @@ jobs:
5999
- uses: ./.github/actions/setup
60100
- uses: actions/cache@v4
61101
with:
62-
path: testdata/tpcds/main.zip
102+
path: benchmarks/testdata/tpcds/main.zip
63103
key: "main.zip"
64-
- run: cargo test --features tpcds --test tpcds_correctness_test shard${{ matrix.shard }}
104+
- run: cargo test -p datafusion-distributed-benchmarks --features tpcds --test tpcds_correctness_test shard${{ matrix.shard }}
65105

66106
tpcds-plans-test:
67107
runs-on: ubuntu-latest
@@ -70,9 +110,9 @@ jobs:
70110
- uses: ./.github/actions/setup
71111
- uses: actions/cache@v4
72112
with:
73-
path: testdata/tpcds/main.zip
113+
path: benchmarks/testdata/tpcds/main.zip
74114
key: "main.zip"
75-
- run: cargo test --features tpcds --test tpcds_plans_test
115+
- run: cargo test -p datafusion-distributed-benchmarks --features tpcds --test tpcds_plans_test
76116

77117
clickbench-test:
78118
runs-on: ubuntu-latest
@@ -81,9 +121,9 @@ jobs:
81121
- uses: ./.github/actions/setup
82122
- uses: actions/cache@v4
83123
with:
84-
path: testdata/clickbench/
124+
path: benchmarks/testdata/clickbench/
85125
key: "data"
86-
- run: cargo test --features clickbench --test 'clickbench_*'
126+
- run: cargo test -p datafusion-distributed-benchmarks --features clickbench --test 'clickbench_*'
87127

88128
format-check:
89129
runs-on: ubuntu-latest

Cargo.lock

Lines changed: 2 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ datafusion = { workspace = true, features = [
2323
"datetime_expressions",
2424
] }
2525
datafusion-proto = { workspace = true }
26-
arrow-flight = "58"
26+
arrow-flight = { version = "58", optional = true }
2727
arrow-select = "58"
2828
arrow-ipc = { version = "58", features = ["zstd"] }
2929
async-trait = "0.1.89"
3030
tokio = { version = "1.48", features = ["full"] }
31-
tonic = { version = "0.14.1", features = ["transport"] }
32-
tower = "0.5.2"
31+
tonic = { version = "0.14.1", features = ["transport"], optional = true }
32+
tower = { version = "0.5.2", optional = true }
3333
http = "1.3.1"
3434
itertools = "0.14.0"
3535
futures = "0.3.31"
@@ -49,7 +49,7 @@ crossbeam-queue = "0.3"
4949
sysinfo = { version = "0.30", optional = true }
5050
sketches-ddsketch = { version = "0.3", features = ["use_serde"] }
5151
bincode = "1"
52-
tonic-prost = "0.14.2"
52+
tonic-prost = { version = "0.14.2", optional = true }
5353

5454
# integration_tests deps
5555
insta = { version = "1.46.0", features = ["filters"], optional = true }
@@ -58,19 +58,22 @@ arrow = { version = "58", optional = true, features = ["test_utils"] }
5858
hyper-util = { version = "0.1.16", optional = true }
5959

6060
[features]
61+
default = ["flight"]
62+
# Arrow Flight gRPC transport. When off, the crate builds with no `tonic` / `arrow-flight`
63+
# and the in-memory transport becomes the default: distributed plans run inside the current
64+
# process. Multi-process execution then needs an embedder-registered transport.
65+
flight = ["dep:arrow-flight", "dep:tonic", "dep:tonic-prost", "dep:tower"]
6166
avro = ["datafusion/avro"]
67+
# Independent from `flight` so the integration suite runs against both the gRPC transport
68+
# (`--features integration`) and the in-memory one (`--no-default-features --features
69+
# integration`). The Flight-specific tests gate themselves on `flight`.
6270
integration = ["insta", "parquet", "arrow", "hyper-util"]
6371

6472
system-metrics = ["sysinfo"]
6573

66-
tpch = ["integration"]
67-
tpcds = ["integration"]
68-
clickbench = ["integration"]
69-
slow-tests = []
7074
sysinfo = ["dep:sysinfo"]
7175

7276
[dev-dependencies]
73-
datafusion-distributed-benchmarks = { path = "benchmarks" }
7477
structopt = "0.3"
7578
insta = { version = "1.46.0", features = ["filters"] }
7679
parquet = "58"

benchmarks/Cargo.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,20 @@ aws-sdk-ec2 = "1"
3333
openssl = { version = "0.10", features = ["vendored"] } # Keep this. Necessary for the remote benchmarks worker.
3434
mimalloc = "0.1"
3535

36+
[features]
37+
# Gates for the dataset test suites under `tests/`. They live here instead of the library because
38+
# they depend on it: as a dev-dependency of the library they re-enable `flight` on every test build
39+
# through feature unification, which makes the no-flight config untestable.
40+
tpch = []
41+
tpcds = []
42+
clickbench = []
43+
slow-tests = []
44+
3645
[dev-dependencies]
3746
criterion = "0.5"
3847
sysinfo = "0.30"
48+
pretty_assertions = "1.4"
49+
test-case = "3.3.1"
3950

4051
[build-dependencies]
4152
built = { version = "0.8", features = ["git2", "chrono"] }

tests/clickbench_correctness_test.rs renamed to benchmarks/tests/clickbench_correctness_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "clickbench", test))]
1+
#[cfg(all(feature = "clickbench", test))]
22
mod tests {
33
use datafusion::arrow::array::RecordBatch;
44
use datafusion::common::plan_err;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "clickbench", test))]
1+
#[cfg(all(feature = "clickbench", test))]
22
mod tests {
33
use datafusion::error::Result;
44
use datafusion_distributed::test_utils::in_memory_channel_resolver::start_in_memory_context;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "tpch", test))]
1+
#[cfg(all(feature = "tpch", test))]
22
mod tests {
33
use datafusion::common::instant::Instant;
44
use datafusion::error::Result;

tests/tpcds_correctness_test.rs renamed to benchmarks/tests/tpcds_correctness_test.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "tpcds", test))]
1+
#[cfg(all(feature = "tpcds", test))]
22
mod tests {
33
use datafusion::arrow::array::RecordBatch;
44
use datafusion::common::plan_err;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "tpcds", test))]
1+
#[cfg(all(feature = "tpcds", test))]
22
mod tests {
33
use datafusion::error::Result;
44
use datafusion_distributed::test_utils::in_memory_channel_resolver::start_in_memory_context;
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#[cfg(all(feature = "integration", feature = "tpch", test))]
1+
#[cfg(all(feature = "tpch", test))]
22
mod tests {
33
use datafusion::physical_plan::execute_stream;
44
use datafusion::prelude::SessionContext;

0 commit comments

Comments
 (0)