Skip to content

Commit 3b6030a

Browse files
authored
Merge pull request #450 from influxdata/crepererum/numpy
feat: numpy
2 parents 3fb7674 + 7f3baaf commit 3b6030a

7 files changed

Lines changed: 68 additions & 8 deletions

File tree

guests/python/Justfile

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,15 +331,24 @@ link-lib profile: (build-lib profile) download-wasi-sdk download-wasi-adapter
331331
-L{{PYTHON_SDK_DIR}} \
332332
-lpython{{PYTHON_VERSION_HALF}}
333333
334-
# Link everything together.
335-
wasm-tools component link \
334+
# Link everything together:
335+
#
336+
# - Python compiled extensions (mapped to the correct place inside the guest file system)
337+
# - libdl stub/mock for `dlopen`/`dlsym`
338+
# - our Rust-based payload
339+
# - CPython
340+
# - WASI SDK libc/libc++ and emulated functionality/stubs
341+
find "{{PYTHON_SITE_PACKAGES}}" -iname '*.so' | sed -E 's|^({{PYTHON_SDK_DIR}})(/.*)$|--dl-openable=\2=\1\2|' | xargs -- \
342+
wasm-tools component link \
336343
--adapt={{WASI_ADAPTER}} \
337344
--output=$target/datafusion_udf_wasm_python.full.wasm \
338345
--use-built-in-libdl \
339346
--validate=true \
340347
$target/libdatafusion_udf_wasm_python.so \
341348
{{PYTHON_SDK_DIR}}/libpython{{PYTHON_VERSION_HALF}}.so \
342349
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libc.so \
350+
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libc++.so \
351+
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libc++abi.so \
343352
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libwasi-emulated-getpid.so \
344353
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libwasi-emulated-process-clocks.so \
345354
{{WASI_SYSROOT}}/lib/wasm32-wasip2/libwasi-emulated-signal.so

guests/python/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ Currently we bundle the following libraries:
7373

7474
- [`certifi`] (not really used though, see ["I/O > HTTP"](#http))
7575
- [`charset-normalizer`]
76+
- [`numpy`]
7677
- [`requests`]
7778
- [`urllib3`]
7879

@@ -261,6 +262,7 @@ There is NO other I/O available that escapes the sandbox.
261262
[`Int64`]: https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#variant.Int64
262263
[`None`]: https://docs.python.org/3/library/constants.html#None
263264
[`Null`]: https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#variant.Null
265+
[`numpy`]: https://numpy.org/
264266
[`time`]: https://docs.python.org/3/library/datetime.html#datetime.time
265267
[`Time64`]: https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html#variant.Time64
266268
[`timedelta`]: https://docs.python.org/3/library/datetime.html#datetime.timedelta

guests/python/build.rs

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
use std::{fs::File, io::Write, path::PathBuf};
1212

1313
/// File endings that should be skipped when bundling the up the Python lib.
14-
const SKIP_ENDINGS: &[&str] = &[".a", ".pyc", ".so", ".wasm"];
14+
const SKIP_ENDINGS: &[&str] = &[".a", ".pyc", ".wasm"];
15+
16+
/// File endings that are mocked as empty files.
17+
const MOCK_ENDINGS: &[&str] = &[".so"];
1518

1619
fn main() {
1720
println!("cargo:rerun-if-changed=build.rs");
@@ -48,7 +51,17 @@ fn bundle_python_lib() {
4851
continue;
4952
}
5053

51-
archive.append_path_with_name(path_abs, path_rel).unwrap();
54+
if MOCK_ENDINGS.iter().any(|ending| path_str.ends_with(ending)) {
55+
const MOCK: &[u8] = b"";
56+
57+
let mut header = tar::Header::new_gnu();
58+
header.set_size(MOCK.len() as _);
59+
header.set_cksum();
60+
61+
archive.append_data(&mut header, path_rel, MOCK).unwrap();
62+
} else {
63+
archive.append_path_with_name(path_abs, path_rel).unwrap();
64+
}
5265
}
5366
archive.finish().unwrap();
5467
archive.into_inner().unwrap().flush().unwrap();

guests/python/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ requests==2.33.0 --hash=sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133
55

66
# See https://github.com/influxdata/urllib3/pull/1 .
77
urllib3 @ https://github.com/influxdata/urllib3/releases/download/2.6.103/urllib3-2.6.103-py3-none-any.whl --hash=sha256:bd06baffa5df72a88f8b0edaeb13cf55b1880bcbcd67c62b391f3ec124add2c3
8+
9+
# our own WASI-native wheels
10+
numpy @ https://github.com/influxdata/python-wasm-wheels/releases/download/v1/numpy-1.26.0-py2.py3-none-any.whl --hash=sha256:ae15276be55efbeaf1de0c82468940aa799024f87f0b679d28351362ff96c466

host/tests/integration_tests/python/runtime/dependencies.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,36 @@ def foo(x: int) -> int:
8181
&Int64Array::from_iter([Some(11), Some(22), Some(11)]) as &dyn Array,
8282
);
8383
}
84+
85+
#[tokio::test]
86+
async fn numpy() {
87+
const CODE: &str = "
88+
import numpy as np
89+
90+
def foo(x: int) -> int:
91+
x = np.int64(x)
92+
x = np.sqrt(x)
93+
return int(x)
94+
";
95+
96+
let udf = python_scalar_udf(CODE).await.unwrap();
97+
let array = udf
98+
.invoke_async_with_args(ScalarFunctionArgs {
99+
args: vec![ColumnarValue::Array(Arc::new(Int64Array::from_iter([
100+
Some(1),
101+
Some(4),
102+
Some(9),
103+
])))],
104+
arg_fields: vec![Arc::new(Field::new("a1", DataType::Int64, true))],
105+
number_rows: 3,
106+
return_field: Arc::new(Field::new("r", DataType::Int64, true)),
107+
config_options: Arc::new(ConfigOptions::default()),
108+
})
109+
.await
110+
.unwrap()
111+
.unwrap_array();
112+
assert_eq!(
113+
array.as_ref(),
114+
&Int64Array::from_iter([Some(1), Some(2), Some(3)]) as &dyn Array,
115+
);
116+
}

host/tests/integration_tests/python/runtime/fs.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def listdir(cwd: str | None, dir: str) -> str:
3939
TestCase {
4040
cwd: None,
4141
dir: "/",
42-
results: &["LICENSE", "lib"],
42+
results: &["LICENSE", "lib", "libpython3.14.so"],
4343
},
4444
TestCase {
4545
cwd: None,
@@ -82,7 +82,7 @@ def listdir(cwd: str | None, dir: str) -> str:
8282
TestCase {
8383
cwd: Some("/lib"),
8484
dir: "..",
85-
results: &["LICENSE", "lib"],
85+
results: &["LICENSE", "lib", "libpython3.14.so"],
8686
},
8787
];
8888

host/tests/integration_tests/python/test_utils.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use crate::integration_tests::test_utils::FullError;
88

99
/// Memory limit in bytes.
1010
///
11-
/// 100MB.
12-
const MEMORY_LIMIT: usize = 100_000_000;
11+
/// 500MB.
12+
const MEMORY_LIMIT: usize = 500_000_000;
1313

1414
/// Static precompiled Python WASM component for tests
1515
static COMPONENT: OnceCell<WasmComponentPrecompiled> = OnceCell::const_new();

0 commit comments

Comments
 (0)