Skip to content

Commit ace5c72

Browse files
committed
Expose contains for ExprTk, docs and tests
Signed-off-by: Andrew Stein <steinlink@gmail.com>
1 parent 1bc0929 commit ace5c72

7 files changed

Lines changed: 192 additions & 3 deletions

File tree

rust/perspective-client/src/rust/config/expressions.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ pub struct CompletionItemSuggestion {
259259
}
260260

261261
#[doc(hidden)]
262-
pub static COMPLETIONS: [CompletionItemSuggestion; 78] = [
262+
pub static COMPLETIONS: [CompletionItemSuggestion; 79] = [
263263
CompletionItemSuggestion {
264264
label: "var",
265265
insert_text: "var ${1:x := 1}",
@@ -542,6 +542,11 @@ pub static COMPLETIONS: [CompletionItemSuggestion; 78] = [
542542
insert_text: "coalesce(${1:x}, ${2:y})",
543543
documentation: "Returns the first non-null argument.",
544544
},
545+
CompletionItemSuggestion {
546+
label: "contains",
547+
insert_text: "contains(${1:x}, ${2:'substr'})",
548+
documentation: "Whether the string column or value contains the literal substring.",
549+
},
545550
CompletionItemSuggestion {
546551
label: "not",
547552
insert_text: "not(${1:x})",
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
2+
// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃
3+
// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃
4+
// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃
5+
// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃
6+
// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫
7+
// ┃ Copyright (c) 2017, the Perspective Authors. ┃
8+
// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃
9+
// ┃ This file is part of the Perspective library, distributed under the terms ┃
10+
// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃
11+
// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
12+
13+
import { test, expect } from "@perspective-dev/test";
14+
import perspective from "../perspective_client";
15+
16+
// Concrete use cases from issue #1527 ("Better string functions in
17+
// expressions"). The original issue body sketched these in a hypothetical
18+
// dialect; the tests below port them to the dialect Perspective actually
19+
// implements. Differences from the issue's pseudocode:
20+
//
21+
// - `find(str, substr) -> int` does not exist. The closest function is
22+
// `indexof(col, regex, out_vec) -> bool`, which performs a *regex* search,
23+
// writes [start, end] of the first capturing group into `out_vec`, and
24+
// requires the regex to have at least one capturing group (else it
25+
// returns STATUS_CLEAR). The tests therefore wrap the literal char in a
26+
// capturing group: `' '` -> `'( )'`, `','` -> `'(,)'`, `$` -> `'([$])'`.
27+
// - `null()` is not a function call; `null` is a literal.
28+
// - `strlen(s)` -> `length(s)`.
29+
// - `substring(s, start, count)` takes a *count*, not an end index, and
30+
// returns null if `start + count > length(s)`.
31+
// - String literals pass through ExprTK's `cleanup_escapes`, which drops
32+
// unrecognized escape characters (`\s` -> `s`, `\.` -> `.`).
33+
34+
((perspective) => {
35+
test.describe("Issue 1527 use cases", function () {
36+
test("contains literal substring", async function () {
37+
const table = await perspective.table({
38+
a: ["abcdef", "xyz", "abXabY", null, "abc"],
39+
});
40+
41+
const view = await table.view({
42+
expressions: {
43+
has_ab: "contains(\"a\", 'ab')",
44+
},
45+
});
46+
47+
const result = await view.to_columns();
48+
const schema = await view.expression_schema();
49+
expect(schema["has_ab"]).toEqual("boolean");
50+
expect(result["has_ab"]).toEqual([true, false, true, null, true]);
51+
view.delete();
52+
table.delete();
53+
});
54+
55+
// Parse "USD $1000"-style strings into Currency (string) and Value
56+
// (float) columns, tolerant of malformed rows.
57+
test("split currency/value string column", async function () {
58+
const table = await perspective.table({
59+
"Bad Column": [
60+
"USD $1000",
61+
"EUR $250",
62+
"malformed",
63+
null,
64+
"GBP $42",
65+
],
66+
});
67+
const view = await table.view({
68+
expressions: {
69+
Currency: `var v[2];
70+
if (indexof("Bad Column", '( )', v)) { substring("Bad Column", 0, v[0]) } else { null }`,
71+
Value: `var v[2];
72+
if (indexof("Bad Column", '([$])', v)) { float(substring("Bad Column", v[0] + 1)) } else { null }`,
73+
},
74+
});
75+
const result = await view.to_columns();
76+
const schema = await view.expression_schema();
77+
expect(schema["Currency"]).toEqual("string");
78+
expect(schema["Value"]).toEqual("float");
79+
expect(result["Currency"]).toEqual([
80+
"USD",
81+
"EUR",
82+
null,
83+
null,
84+
"GBP",
85+
]);
86+
expect(result["Value"]).toEqual([1000, 250, null, null, 42]);
87+
view.delete();
88+
table.delete();
89+
});
90+
91+
// Parse "(123, 456)"-style strings into Longitude and Latitude
92+
// float columns.
93+
test("split longitude/latitude string column", async function () {
94+
const table = await perspective.table({
95+
Coords: [
96+
"(123, 456)",
97+
"(1.5, -2.25)",
98+
"broken",
99+
null,
100+
"(0, 0)",
101+
],
102+
});
103+
const view = await table.view({
104+
expressions: {
105+
Longitude: `var v[2];
106+
if (indexof("Coords", '(,)', v)) { float(substring("Coords", 1, v[0] - 1)) } else { null }`,
107+
Latitude: `var v[2];
108+
if (indexof("Coords", '(,)', v)) { float(substring("Coords", v[0] + 1, length("Coords") - v[0] - 2)) } else { null }`,
109+
},
110+
});
111+
const result = await view.to_columns();
112+
const schema = await view.expression_schema();
113+
expect(schema["Longitude"]).toEqual("float");
114+
expect(schema["Latitude"]).toEqual("float");
115+
expect(result["Longitude"]).toEqual([123, 1.5, null, null, 0]);
116+
expect(result["Latitude"]).toEqual([456, -2.25, null, null, 0]);
117+
view.delete();
118+
table.delete();
119+
});
120+
121+
// Normalize spelling variants by stripping dots and whitespace.
122+
test("replace_all regex strips dots/whitespace", async function () {
123+
const table = await perspective.table({
124+
State: ["NC", "N.C.", "N. C.", "N .C.", "VA"],
125+
});
126+
const view = await table.view({
127+
expressions: {
128+
Normalized: `replace_all("State", '[. ]', '')`,
129+
},
130+
});
131+
const result = await view.to_columns();
132+
expect(result["Normalized"]).toEqual([
133+
"NC",
134+
"NC",
135+
"NC",
136+
"NC",
137+
"VA",
138+
]);
139+
view.delete();
140+
table.delete();
141+
});
142+
});
143+
})(perspective);

rust/perspective-server/cpp/perspective/src/cpp/computed_expression.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ computed_function::max_fn t_computed_expression_parser::MAX_FN =
3737
computed_function::coalesce t_computed_expression_parser::COALESCE_FN =
3838
computed_function::coalesce();
3939

40+
computed_function::contains t_computed_expression_parser::CONTAINS_FN =
41+
computed_function::contains();
42+
4043
computed_function::diff3 t_computed_expression_parser::diff3 =
4144
computed_function::diff3();
4245

@@ -540,6 +543,9 @@ t_computed_function_store::register_computed_functions(
540543
sym_table.add_function(
541544
"coalesce", t_computed_expression_parser::COALESCE_FN
542545
);
546+
sym_table.add_function(
547+
"contains", t_computed_expression_parser::CONTAINS_FN
548+
);
543549
sym_table.add_reserved_function(
544550
"diff3", t_computed_expression_parser::diff3
545551
);

rust/perspective-server/cpp/perspective/src/cpp/computed_function.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,36 @@ match_all::operator()(t_parameter_list parameters) {
485485
return rval;
486486
}
487487

488+
contains::contains() : exprtk::igeneric_function<t_tscalar>("TS") {}
489+
490+
contains::~contains() = default;
491+
492+
t_tscalar
493+
contains::operator()(t_parameter_list parameters) {
494+
t_tscalar rval;
495+
rval.clear();
496+
rval.m_type = DTYPE_BOOL;
497+
498+
t_scalar_view str_view(parameters[0]);
499+
t_string_view needle_view(parameters[1]);
500+
501+
t_tscalar str = str_view();
502+
std::string needle =
503+
std::string(needle_view.begin(), needle_view.end());
504+
505+
if (str.get_dtype() != DTYPE_STR || str.m_status == STATUS_CLEAR) {
506+
rval.m_status = STATUS_CLEAR;
507+
return rval;
508+
}
509+
510+
if (!str.is_valid()) {
511+
return rval;
512+
}
513+
514+
rval.set(str.to_string().find(needle) != std::string::npos);
515+
return rval;
516+
}
517+
488518
search::search(
489519
t_expression_vocab& expression_vocab,
490520
t_regex_mapping& regex_mapping,

rust/perspective-server/cpp/perspective/src/cpp/server.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ re_intern_strings(std::string&& expression) {
334334
static auto
335335
re_unintern_some_exprs(std::string&& expression) {
336336
static const RE2 interned_param(
337-
"(?:match|match_all|search|indexof|replace|replace_all)\\("
337+
"(?:match|match_all|search|indexof|replace|replace_all|contains)\\("
338338
"(?:.*?,\\s*(intern\\(('.*?')\\)))"
339339
);
340340
static const RE2 intern_match("intern\\(('.*?')\\)");

rust/perspective-server/cpp/perspective/src/include/perspective/computed_expression.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class PERSPECTIVE_EXPORT t_computed_expression_parser {
118118
static computed_function::min_fn MIN_FN;
119119
static computed_function::max_fn MAX_FN;
120120
static computed_function::coalesce COALESCE_FN;
121+
static computed_function::contains CONTAINS_FN;
121122
static computed_function::diff3 diff3;
122123
static computed_function::norm3 norm3;
123124
static computed_function::cross_product3 cross_product3;

rust/perspective-server/cpp/perspective/src/include/perspective/computed_function.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,11 @@ namespace computed_function {
128128
* @brief Given a string column and a non-regex string literal, check
129129
* whether each row in the string column contains the string literal.
130130
*/
131-
STRING_FUNCTION_HEADER(contains)
131+
struct contains final : public exprtk::igeneric_function<t_tscalar> {
132+
contains();
133+
~contains();
134+
t_tscalar operator()(t_parameter_list parameters) override;
135+
};
132136

133137
/**
134138
* @brief match(string, pattern) => True if the string or a substring

0 commit comments

Comments
 (0)