Skip to content

Commit dd49525

Browse files
authored
Merge pull request #2820 from ClickHouse/04/06/26/apache_arrow_experimental
[client-v2] Apache Arrow Example
2 parents 34cbe82 + dfa279d commit dd49525

12 files changed

Lines changed: 779 additions & 0 deletions

File tree

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#
2+
# https://help.github.com/articles/dealing-with-line-endings/
3+
#
4+
# Linux start script should use lf
5+
/gradlew text eol=lf
6+
7+
# These are Windows script files and should use crlf
8+
*.bat text eol=crlf
9+
10+
# Binary files should be left untouched
11+
*.jar binary
12+
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Ignore Gradle project-specific cache directory
2+
.gradle
3+
4+
# Ignore Gradle build output directory
5+
build
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Client V2 Apache Arrow Example
2+
3+
## Overview
4+
5+
This module contains a runnable example demonstrating how to use `client-v2`
6+
to insert and read data using the [Apache Arrow Stream](https://clickhouse.com/docs/en/interfaces/formats/#arrowstream)
7+
format (`ClickHouseFormat.ArrowStream`).
8+
9+
The example shows:
10+
11+
- writing a batch into ClickHouse from an Arrow `VectorSchemaRoot` via
12+
`ArrowStreamWriter`, using `Decimal256Vector` and `TimeStampMilliVector`;
13+
- reading rows back from ClickHouse with `ArrowStreamReader` and copying them
14+
into another table by streaming the same `VectorSchemaRoot` straight back to
15+
`client.insert(...)`.
16+
17+
Unlike the other examples in this repository, this one is built with **Gradle**
18+
(JDK 17 toolchain) and is not part of the Maven multi-module build.
19+
20+
## Requirements
21+
22+
- JDK 17 or newer (the Gradle toolchain will fetch one if it is missing).
23+
- A running ClickHouse server reachable from the machine running the example.
24+
25+
Apache Arrow needs access to direct memory and a few internal JDK APIs. The
26+
required `--add-opens` flags are already wired into `applicationDefaultJvmArgs`
27+
in `build.gradle.kts`, so running the example through Gradle just works:
28+
29+
```text
30+
--add-opens=java.base/java.nio=ALL-UNNAMED
31+
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED
32+
--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED
33+
```
34+
35+
If you run the produced jar manually, pass these flags to the JVM yourself.
36+
37+
## How to Run
38+
39+
From this directory:
40+
41+
```shell
42+
./gradlew run
43+
```
44+
45+
Connection properties can be supplied as system properties:
46+
47+
- `-DchEndpoint` - Endpoint to connect in the format of URL (default: `http://localhost:8123`)
48+
- `-DchUser` - ClickHouse user name (default: `default`)
49+
- `-DchPassword` - ClickHouse user password (default: empty)
50+
- `-DchDatabase` - ClickHouse database name (default: `default`)
51+
52+
Example with custom connection properties:
53+
54+
```shell
55+
./gradlew run \
56+
-DchEndpoint=http://localhost:8123 \
57+
-DchUser=default \
58+
-DchPassword= \
59+
-DchDatabase=default
60+
```
61+
62+
To see the wire-level data flow, raise the SLF4J log level:
63+
64+
```shell
65+
./gradlew run -Dorg.slf4j.simpleLogger.defaultLogLevel=DEBUG
66+
```
67+
68+
## Executable Example
69+
70+
`com.clickhouse.examples.arrow_format.ReadWriteArrow`
71+
72+
- Creates table `arrow_example (ts DateTime64, val1 Decimal(76,39))` and
73+
inserts a batch of 10 000 rows from Arrow vectors using
74+
`ClickHouseFormat.ArrowStream`.
75+
- Creates tables `arrow_read_example` and `arrow_read_example_copy`
76+
(`ts DateTime(3), val1 Decimal(76,62)`), populates the first one, reads it
77+
back with `ArrowStreamReader`, and streams each batch into the copy table.
78+
79+
The example truncates the demo tables on every run, so it is safe to rerun.
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
/*
2+
* This file was generated by the Gradle 'init' task.
3+
*
4+
* This generated file contains a sample Java application project to get you started.
5+
* For more details on building Java & JVM projects, please refer to https://docs.gradle.org/9.2.1/userguide/building_java_projects.html in the Gradle documentation.
6+
*/
7+
8+
plugins {
9+
// Apply the application plugin to add support for building a CLI application in Java.
10+
application
11+
}
12+
13+
repositories {
14+
// Use Maven Central for resolving dependencies.
15+
mavenCentral()
16+
mavenLocal()
17+
}
18+
19+
dependencies {
20+
// Use TestNG framework, also requires calling test.useTestNG() below
21+
testImplementation(libs.testng)
22+
23+
implementation(libs.clickhouseClient)
24+
25+
// BOM used to keep versions consistent across all Arrow libraries
26+
implementation(platform(libs.arrowBom))
27+
implementation(libs.arrowVector)
28+
implementation(libs.arrowMemory)
29+
implementation(libs.arrowCompression)
30+
31+
// Logging
32+
implementation(libs.slf4jSimple)
33+
implementation(libs.slf4jApi)
34+
35+
}
36+
37+
// Apply a specific Java toolchain to ease working on different environments.
38+
java {
39+
toolchain {
40+
languageVersion = JavaLanguageVersion.of(17)
41+
}
42+
}
43+
44+
application {
45+
// Define the main class for the application.
46+
mainClass = "com.clickhouse.examples.arrow_format.ReadWriteArrow"
47+
applicationDefaultJvmArgs = listOf(
48+
"-Dorg.slf4j.simpleLogger.defaultLogLevel=INFO", // change to DEBUG to see wire data flow
49+
"--add-opens=java.base/java.nio=ALL-UNNAMED", // needed for Apache Arrow
50+
"--add-opens=java.base/sun.nio.ch=ALL-UNNAMED", // needed for Apache Arrow
51+
"--add-opens=java.base/jdk.internal.misc=ALL-UNNAMED" // needed for Apache Arrow
52+
)
53+
}
54+
55+
tasks.named<Test>("test") {
56+
// Use TestNG for unit tests.
57+
useTestNG()
58+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# This file was generated by the Gradle 'init' task.
2+
# https://docs.gradle.org/current/userguide/build_environment.html#sec:gradle_configuration_properties
3+
4+
org.gradle.configuration-cache=true
5+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# This file was generated by the Gradle 'init' task.
2+
# https://docs.gradle.org/current/userguide/platforms.html#sub::toml-dependencies-format
3+
4+
[versions]
5+
testng = "7.5.1"
6+
arrowBom = "19.0.0"
7+
slf4j = "2.0.17"
8+
clickhouseClient = "0.9.8"
9+
10+
[libraries]
11+
testng = { module = "org.testng:testng", version.ref = "testng" }
12+
clickhouseClient = { module = "com.clickhouse:client-v2", version.ref = "clickhouseClient" }
13+
14+
# Arrow support
15+
arrowBom = {module = "org.apache.arrow:arrow-bom", version.ref = "arrowBom"}
16+
arrowVector = {module = "org.apache.arrow:arrow-vector" }
17+
arrowMemory = {module = "org.apache.arrow:arrow-memory-netty" }
18+
arrowCompression = { module = "org.apache.arrow:arrow-compression" }
19+
20+
# Logging
21+
slf4jSimple = { module = "org.slf4j:slf4j-simple", version.ref = "slf4j"}
22+
slf4jApi = { module = "org.slf4j:slf4j-api", version.ref = "slf4j"}
Binary file not shown.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
distributionBase=GRADLE_USER_HOME
2+
distributionPath=wrapper/dists
3+
distributionUrl=https\://services.gradle.org/distributions/gradle-9.2.1-bin.zip
4+
networkTimeout=10000
5+
validateDistributionUrl=true
6+
zipStoreBase=GRADLE_USER_HOME
7+
zipStorePath=wrapper/dists

0 commit comments

Comments
 (0)