Skip to content

Commit e1a1889

Browse files
authored
Auto start testcontainers for datafusion-cli (apache#16644)
1 parent a45a4c4 commit e1a1889

7 files changed

Lines changed: 120 additions & 61 deletions

File tree

.github/workflows/rust.yml

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -314,18 +314,6 @@ jobs:
314314
fetch-depth: 1
315315
- name: Setup Rust toolchain
316316
run: rustup toolchain install stable
317-
- name: Setup Minio - S3-compatible storage
318-
run: |
319-
docker run -d --name minio-container \
320-
-p 9000:9000 \
321-
-e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \
322-
-v $(pwd)/datafusion/core/tests/data:/source quay.io/minio/minio \
323-
server /data
324-
docker exec minio-container /bin/sh -c "\
325-
mc ready local
326-
mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \
327-
mc mb localminio/data && \
328-
mc cp -r /source/* localminio/data"
329317
- name: Run tests (excluding doctests)
330318
env:
331319
RUST_BACKTRACE: 1
@@ -337,9 +325,6 @@ jobs:
337325
run: cargo test --profile ci -p datafusion-cli --lib --tests --bins
338326
- name: Verify Working Directory Clean
339327
run: git diff --exit-code
340-
- name: Minio Output
341-
if: ${{ !cancelled() }}
342-
run: docker logs minio-container
343328

344329

345330
linux-test-example:

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ rstest = "0.25.0"
174174
serde_json = "1"
175175
sqlparser = { version = "0.55.0", default-features = false, features = ["std", "visitor"] }
176176
tempfile = "3"
177+
testcontainers = { version = "0.24", features = ["default"] }
178+
testcontainers-modules = { version = "0.12" }
177179
tokio = { version = "1.46", features = ["macros", "rt", "sync"] }
178180
url = "2.5.4"
179181

datafusion-cli/CONTRIBUTING.md

Lines changed: 14 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -29,47 +29,26 @@ cargo test
2929

3030
## Running Storage Integration Tests
3131

32-
By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and
33-
then provide the necessary configuration for that object store.
32+
By default, storage integration tests are not run. These test use the `testcontainers` crate to start up a local MinIO server using docker on port 9000.
3433

35-
For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used.
36-
37-
### AWS
38-
39-
To test the S3 integration against [Minio](https://github.com/minio/minio)
40-
41-
First start up a container with Minio and load test files.
34+
To run them you will need to set `TEST_STORAGE_INTEGRATION`:
4235

4336
```shell
44-
docker run -d \
45-
--name datafusion-test-minio \
46-
-p 9000:9000 \
47-
-e MINIO_ROOT_USER=TEST-DataFusionLogin \
48-
-e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword \
49-
-v $(pwd)/../datafusion/core/tests/data:/source \
50-
quay.io/minio/minio server /data
51-
52-
docker exec datafusion-test-minio /bin/sh -c "\
53-
mc ready local
54-
mc alias set localminio http://localhost:9000 TEST-DataFusionLogin TEST-DataFusionPassword && \
55-
mc mb localminio/data && \
56-
mc cp -r /source/* localminio/data"
37+
TEST_STORAGE_INTEGRATION=1 cargo test
5738
```
5839

59-
Setup environment
40+
For some of the tests, [snapshots](https://datafusion.apache.org/contributor-guide/testing.html#snapshot-testing) are used.
6041

61-
```shell
62-
export TEST_STORAGE_INTEGRATION=1
63-
export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin
64-
export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword
65-
export AWS_ENDPOINT=http://127.0.0.1:9000
66-
export AWS_ALLOW_HTTP=true
67-
```
42+
### AWS
6843

69-
Note that `AWS_ENDPOINT` is set without slash at the end.
44+
S3 integration is tested against [Minio](https://github.com/minio/minio) with [TestContainers](https://github.com/testcontainers/testcontainers-rs)
45+
This requires Docker to be running on your machine and port 9000 to be free.
7046

71-
Run tests
47+
If you see an error mentioning "failed to load IMDS session token" such as
7248

73-
```shell
74-
cargo test
75-
```
49+
> ---- object_storage::tests::s3_object_store_builder_resolves_region_when_none_provided stdout ----
50+
> Error: ObjectStore(Generic { store: "S3", source: "Error getting credentials from provider: an error occurred while loading credentials: failed to load IMDS session token" })
51+
52+
You my need to disable trying to fetch S3 credentials from the environment using the `AWS_EC2_METADATA_DISABLED`, for example:
53+
54+
> $ AWS_EC2_METADATA_DISABLED=true TEST_STORAGE_INTEGRATION=1 cargo test

datafusion-cli/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,5 @@ insta = { workspace = true }
7272
insta-cmd = "0.6.0"
7373
predicates = "3.0"
7474
rstest = { workspace = true }
75+
testcontainers = { workspace = true }
76+
testcontainers-modules = { workspace = true, features = ["minio"] }

datafusion-cli/tests/cli_integration.rs

Lines changed: 98 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@ use rstest::rstest;
2121

2222
use insta::{glob, Settings};
2323
use insta_cmd::{assert_cmd_snapshot, get_cargo_bin};
24+
use std::path::PathBuf;
2425
use std::{env, fs};
26+
use testcontainers::core::{CmdWaitFor, ExecCommand, Mount};
27+
use testcontainers::runners::AsyncRunner;
28+
use testcontainers::{ContainerAsync, ImageExt, TestcontainersError};
29+
use testcontainers_modules::minio;
2530

2631
fn cli() -> Command {
2732
Command::new(get_cargo_bin("datafusion-cli"))
@@ -35,6 +40,83 @@ fn make_settings() -> Settings {
3540
settings
3641
}
3742

43+
async fn setup_minio_container() -> ContainerAsync<minio::MinIO> {
44+
const MINIO_ROOT_USER: &str = "TEST-DataFusionLogin";
45+
const MINIO_ROOT_PASSWORD: &str = "TEST-DataFusionPassword";
46+
47+
let data_path =
48+
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../datafusion/core/tests/data");
49+
50+
let absolute_data_path = data_path
51+
.canonicalize()
52+
.expect("Failed to get absolute path for test data");
53+
54+
let container = minio::MinIO::default()
55+
.with_env_var("MINIO_ROOT_USER", MINIO_ROOT_USER)
56+
.with_env_var("MINIO_ROOT_PASSWORD", MINIO_ROOT_PASSWORD)
57+
.with_mount(Mount::bind_mount(
58+
absolute_data_path.to_str().unwrap(),
59+
"/source",
60+
))
61+
.start()
62+
.await;
63+
64+
match container {
65+
Ok(container) => {
66+
// We wait for MinIO to be healthy and preprare test files. We do it via CLI to avoid s3 dependency
67+
let commands = [
68+
ExecCommand::new(["/usr/bin/mc", "ready", "local"]),
69+
ExecCommand::new([
70+
"/usr/bin/mc",
71+
"alias",
72+
"set",
73+
"localminio",
74+
"http://localhost:9000",
75+
MINIO_ROOT_USER,
76+
MINIO_ROOT_PASSWORD,
77+
]),
78+
ExecCommand::new(["/usr/bin/mc", "mb", "localminio/data"]),
79+
ExecCommand::new([
80+
"/usr/bin/mc",
81+
"cp",
82+
"-r",
83+
"/source/",
84+
"localminio/data/",
85+
]),
86+
];
87+
88+
for command in commands {
89+
let command =
90+
command.with_cmd_ready_condition(CmdWaitFor::Exit { code: Some(0) });
91+
92+
let cmd_ref = format!("{command:?}");
93+
94+
if let Err(e) = container.exec(command).await {
95+
let stdout = container.stdout_to_vec().await.unwrap_or_default();
96+
let stderr = container.stderr_to_vec().await.unwrap_or_default();
97+
98+
panic!(
99+
"Failed to execute command: {}\nError: {}\nStdout: {:?}\nStderr: {:?}",
100+
cmd_ref,
101+
e,
102+
String::from_utf8_lossy(&stdout),
103+
String::from_utf8_lossy(&stderr)
104+
);
105+
}
106+
}
107+
108+
container
109+
}
110+
111+
Err(TestcontainersError::Client(e)) => {
112+
panic!("Failed to start MinIO container. Ensure Docker is running and accessible: {e}");
113+
}
114+
Err(e) => {
115+
panic!("Failed to start MinIO container: {e}");
116+
}
117+
}
118+
}
119+
38120
#[cfg(test)]
39121
#[ctor::ctor]
40122
fn init() {
@@ -165,12 +247,22 @@ async fn test_cli() {
165247
return;
166248
}
167249

250+
let container = setup_minio_container().await;
251+
168252
let settings = make_settings();
169253
let _bound = settings.bind_to_scope();
170254

255+
let port = container.get_host_port_ipv4(9000).await.unwrap();
256+
171257
glob!("sql/integration/*.sql", |path| {
172258
let input = fs::read_to_string(path).unwrap();
173-
assert_cmd_snapshot!(cli().pass_stdin(input))
259+
assert_cmd_snapshot!(cli()
260+
.env_clear()
261+
.env("AWS_ACCESS_KEY_ID", "TEST-DataFusionLogin")
262+
.env("AWS_SECRET_ACCESS_KEY", "TEST-DataFusionPassword")
263+
.env("AWS_ENDPOINT", format!("http://localhost:{port}"))
264+
.env("AWS_ALLOW_HTTP", "true")
265+
.pass_stdin(input))
174266
});
175267
}
176268

@@ -186,20 +278,17 @@ async fn test_aws_options() {
186278
let settings = make_settings();
187279
let _bound = settings.bind_to_scope();
188280

189-
let access_key_id =
190-
env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set");
191-
let secret_access_key =
192-
env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set");
193-
let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set");
281+
let container = setup_minio_container().await;
282+
let port = container.get_host_port_ipv4(9000).await.unwrap();
194283

195284
let input = format!(
196285
r#"CREATE EXTERNAL TABLE CARS
197286
STORED AS CSV
198287
LOCATION 's3://data/cars.csv'
199288
OPTIONS(
200-
'aws.access_key_id' '{access_key_id}',
201-
'aws.secret_access_key' '{secret_access_key}',
202-
'aws.endpoint' '{endpoint_url}',
289+
'aws.access_key_id' 'TEST-DataFusionLogin',
290+
'aws.secret_access_key' 'TEST-DataFusionPassword',
291+
'aws.endpoint' 'http://localhost:{port}',
203292
'aws.allow_http' 'true'
204293
);
205294

datafusion/sqllogictest/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ rust_decimal = { version = "1.37.2", features = ["tokio-pg"] }
6060
sqllogictest = "0.28.3"
6161
sqlparser = { workspace = true }
6262
tempfile = { workspace = true }
63-
testcontainers = { version = "0.24", features = ["default"], optional = true }
64-
testcontainers-modules = { version = "0.12", features = ["postgres"], optional = true }
63+
testcontainers = { workspace = true, optional = true }
64+
testcontainers-modules = { workspace = true, features = ["postgres"], optional = true }
6565
thiserror = "2.0.12"
6666
tokio = { workspace = true }
6767
tokio-postgres = { version = "0.7.12", optional = true }

0 commit comments

Comments
 (0)