-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy path_plugin.py
More file actions
95 lines (75 loc) · 2.95 KB
/
Copy path_plugin.py
File metadata and controls
95 lines (75 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Copyright (c) QuantCo 2025-2026
# SPDX-License-Identifier: BSD-3-Clause
from collections.abc import Iterable
from pathlib import Path
from typing import TypeAlias
import polars as pl
from polars.plugins import register_plugin_function
PLUGIN_PATH = Path(__file__).parent
IntoExpr: TypeAlias = pl.Expr | str
def all_rules_horizontal(rules: IntoExpr | Iterable[IntoExpr]) -> pl.Expr:
"""Execute :mod:`~polars.all_horizontal` for a set of rules.
This implementation is more efficient and yields better errors than
:mod:`~polars.all_horizontal`.
Args:
rules: The rules to evaluate.
Returns:
A boolean expression with one result per row.
"""
return register_plugin_function(
plugin_path=PLUGIN_PATH,
function_name="all_rules_horizontal",
args=rules,
use_abs_path=True,
is_elementwise=True,
)
def all_rules(rules: IntoExpr | Iterable[IntoExpr]) -> pl.Expr:
"""Execute :mod:`~polars.all_horizontal` and `.all` for a set of rules.
This is more efficient than running the two operations one after the other.
Args:
rules: The rules to evaluate.
Returns:
A scalar boolean expression.
"""
return register_plugin_function(
plugin_path=PLUGIN_PATH,
function_name="all_rules",
args=rules,
use_abs_path=True,
returns_scalar=True,
)
def all_rules_required(
rules: IntoExpr | Iterable[IntoExpr],
*,
null_is_valid: bool = True,
schema_name: str,
) -> pl.Expr:
"""Execute :mod:`~polars.all_horizontal` and `.all` for a set of rules.
This method differs from :meth:`all_rules` in two ways:
- It raises a :mod:`~polars.exceptions.ComputeError` at execution time if any
rule indicates a validation failure. The `ComputeError` includes a helpful error
message.
- It broadcasts the resulting boolean series to the length of the input. This allows
element-wise evaluation and making this a non-blocking operation on the streaming
engine.
Args:
rules: The rules to evaluate.
schema_name: The name of the schema being validated. This is used to produce
better error messages.
null_is_valid: Whether to treat null values as valid (i.e., `true`).
Returns:
A scalar boolean expression.
"""
return register_plugin_function(
plugin_path=PLUGIN_PATH,
function_name="all_rules_required",
args=rules,
kwargs={"null_is_valid": null_is_valid, "schema_name": schema_name},
use_abs_path=True,
# NOTE: Conceptually, we're reducing the input to a single boolean value here.
# However, we set this option to ensure that the plugin does not become
# blocking on the streaming engine. A single boolean value is simply
# broadcast and we're indifferent to actually finding all validation failures
# during `validate` (and simply fail-fast).
is_elementwise=True,
)