Skip to content

Commit 01d2bad

Browse files
committed
feature: flow-php/arrow-ext
Introduce arrow-ext and expose parquet reader/writer. Introduce ParquetEngine abstraction into flow-php/parquet. Make flow-php/parquet use engines based on arrow extension availability.
1 parent ee89a3c commit 01d2bad

139 files changed

Lines changed: 12344 additions & 2002 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: Arrow Extension
2+
3+
on:
4+
workflow_call:
5+
secrets:
6+
CODECOV_TOKEN:
7+
required: false
8+
9+
jobs:
10+
build:
11+
name: Build and Test Extension
12+
runs-on: ${{ matrix.os }}
13+
strategy:
14+
fail-fast: false
15+
matrix:
16+
os: ['ubuntu-latest', 'macos-latest']
17+
php: ['8.3', '8.4', '8.5']
18+
19+
steps:
20+
- uses: actions/checkout@v5
21+
with:
22+
submodules: true
23+
24+
- name: Setup PHP Environment
25+
uses: ./.github/actions/setup-php-env
26+
with:
27+
php-version: ${{ matrix.php }}
28+
dependencies: locked
29+
extensions: ':psr, bcmath, dom, hash, json, mbstring, xml, xmlwriter, xmlreader, zlib'
30+
tools: 'composer:v2, php-config'
31+
install-dependencies: 'false'
32+
33+
- name: Install Rust toolchain
34+
uses: dtolnay/rust-toolchain@stable
35+
36+
- name: Cache cargo registry and build
37+
uses: actions/cache@v4
38+
with:
39+
path: |
40+
~/.cargo/registry
41+
~/.cargo/git
42+
src/extension/arrow-ext/target
43+
key: ${{ runner.os }}-php${{ matrix.php }}-cargo-${{ hashFiles('src/extension/arrow-ext/Cargo.lock') }}
44+
restore-keys: |
45+
${{ runner.os }}-php${{ matrix.php }}-cargo-
46+
47+
- name: Install build dependencies (Ubuntu)
48+
if: runner.os == 'Linux'
49+
run: |
50+
sudo apt-get update
51+
sudo apt-get install -y build-essential clang libclang-dev
52+
53+
- name: Install build dependencies (macOS)
54+
if: runner.os == 'macOS'
55+
run: |
56+
brew install llvm
57+
echo "LIBCLANG_PATH=$(brew --prefix llvm)/lib" >> $GITHUB_ENV
58+
59+
- name: Set LIBCLANG_PATH (Ubuntu)
60+
if: runner.os == 'Linux'
61+
run: |
62+
echo "LIBCLANG_PATH=$(llvm-config --libdir)" >> $GITHUB_ENV
63+
64+
- name: Build extension
65+
working-directory: src/extension/arrow-ext
66+
run: make build
67+
68+
- name: Run PHPT tests
69+
working-directory: src/extension/arrow-ext
70+
run: make test
71+
72+
- name: Install extension and verify
73+
working-directory: src/extension/arrow-ext
74+
run: |
75+
EXT_DIR=$(php -r 'echo ini_get("extension_dir");')
76+
sudo cp ext/modules/arrow.so "$EXT_DIR/"
77+
echo "extension=arrow.so" | sudo tee -a "$(php -r 'echo php_ini_loaded_file();')"
78+
php -m | grep arrow
79+
80+
- name: Install Composer Dependencies
81+
run: composer install --no-interaction --no-progress
82+
83+
- name: Run Parquet Integration Tests with Arrow
84+
run: tools/phpunit/vendor/bin/phpunit --testsuite lib-parquet-integration --group apache-parquet-testing

.github/workflows/job-tests.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ jobs:
6363
steps:
6464
- name: "Checkout"
6565
uses: "actions/checkout@v5"
66+
with:
67+
submodules: true
6668

6769
- name: "Start OpenTelemetry Collector"
6870
run: |

.github/workflows/monorepo-split.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ jobs:
103103

104104
- local_path: 'src/extension/pg-query-ext'
105105
split_repository: 'pg-query-ext'
106+
- local_path: 'src/extension/arrow-ext'
107+
split_repository: 'arrow-ext'
106108

107109
steps:
108110
- uses: actions/checkout@v5

.github/workflows/test-suite.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ jobs:
4545
uses: ./.github/workflows/job-pg-query-extension.yml
4646
secrets: inherit
4747

48+
arrow-extension:
49+
uses: ./.github/workflows/job-arrow-extension.yml
50+
secrets: inherit
51+
4852
windows-tests:
4953
uses: ./.github/workflows/job-windows-tests.yml
5054

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/Fixtures/parquet-testing"]
2+
path = src/lib/parquet/tests/Flow/Parquet/Tests/Integration/IO/Fixtures/parquet-testing
3+
url = https://github.com/apache/parquet-testing.git

.nix/pkgs/flow-php/package.nix

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@
55
php-brotli,
66
php-zstd,
77
php-pg-query-ext,
8+
php-arrow-ext,
89
with-pcov ? true,
910
with-xdebug ? false,
1011
with-blackfire ? false,
1112
with-pg-query-ext ? false,
13+
with-arrow-ext ? false,
1214
with-grpc ? false
1315
}:
1416

@@ -34,6 +36,7 @@ let
3436
++ (if with-pcov then [pcov] else [])
3537
++ (if with-blackfire then [blackfire] else [])
3638
++ (if with-pg-query-ext then [(php-pg-query-ext.override { inherit php; })] else [])
39+
++ (if with-arrow-ext then [(php-arrow-ext.override { inherit php; })] else [])
3740
++ (if with-grpc then [grpc] else [])
3841
);
3942
in
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
{
2+
php,
3+
lib,
4+
stdenv,
5+
rustPlatform,
6+
clang,
7+
llvmPackages,
8+
}:
9+
10+
let
11+
extSrc = builtins.path {
12+
path = ../../../src/extension/arrow-ext;
13+
name = "arrow-ext-src";
14+
filter = path: type:
15+
let baseName = baseNameOf path;
16+
in !(
17+
baseName == "target" ||
18+
baseName == "vendor" ||
19+
baseName == "ext" ||
20+
baseName == ".gitignore" ||
21+
baseName == ".gitattributes" ||
22+
baseName == "composer.json" ||
23+
baseName == "composer.lock"
24+
);
25+
};
26+
pkg = rustPlatform.buildRustPackage {
27+
pname = "php-arrow-ext";
28+
version = "0.1.0";
29+
30+
src = extSrc;
31+
32+
cargoLock.lockFile = ../../../src/extension/arrow-ext/Cargo.lock;
33+
34+
nativeBuildInputs = [
35+
clang
36+
llvmPackages.libclang
37+
php.unwrapped
38+
php.unwrapped.dev
39+
];
40+
41+
env = {
42+
LIBCLANG_PATH = "${llvmPackages.libclang.lib}/lib";
43+
PHP_CONFIG = "${php.unwrapped.dev}/bin/php-config";
44+
PHP = "${php.unwrapped}/bin/php";
45+
};
46+
47+
installPhase = let
48+
targetDir = "target/${stdenv.hostPlatform.rust.rustcTargetSpec}/release";
49+
in ''
50+
runHook preInstall
51+
mkdir -p $out/lib/php/extensions
52+
cp ${targetDir}/libarrow${stdenv.hostPlatform.extensions.sharedLibrary} $out/lib/php/extensions/arrow.so
53+
runHook postInstall
54+
'';
55+
56+
doCheck = false;
57+
58+
meta = with lib; {
59+
description = "Apache Arrow PHP extension powered by Rust";
60+
license = licenses.mit;
61+
};
62+
};
63+
in
64+
pkg // { extensionName = "arrow"; }

composer.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,8 @@
160160
"src/lib/snappy/src/Flow",
161161
"src/lib/telemetry/src/Flow",
162162
"src/lib/types/src/Flow",
163-
"src/tools/documentation/src/Flow"
163+
"src/tools/documentation/src/Flow",
164+
"src/extension/arrow-ext/php/Flow"
164165
],
165166
"Flow\\Doctrine\\Bulk\\": [
166167
"src/lib/doctrine-dbal-bulk/src/Flow/Doctrine/Bulk"
@@ -559,11 +560,14 @@
559560
"Google\\Task\\Composer::cleanup"
560561
],
561562
"post-install-cmd": [
563+
"@submodules:init",
562564
"@tools:install"
563565
],
564566
"post-update-cmd": [
567+
"@submodules:init",
565568
"@tools:update"
566569
],
570+
"submodules:init": "git submodule update --init",
567571
"tools:install": [
568572
"composer install --working-dir=./tools/blackfire",
569573
"composer install --working-dir=./tools/box",

0 commit comments

Comments
 (0)