|
| 1 | +// ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ |
| 2 | +// ┃ ██████ ██████ ██████ █ █ █ █ █ █▄ ▀███ █ ┃ |
| 3 | +// ┃ ▄▄▄▄▄█ █▄▄▄▄▄ ▄▄▄▄▄█ ▀▀▀▀▀█▀▀▀▀▀ █ ▀▀▀▀▀█ ████████▌▐███ ███▄ ▀█ █ ▀▀▀▀▀ ┃ |
| 4 | +// ┃ █▀▀▀▀▀ █▀▀▀▀▀ █▀██▀▀ ▄▄▄▄▄ █ ▄▄▄▄▄█ ▄▄▄▄▄█ ████████▌▐███ █████▄ █ ▄▄▄▄▄ ┃ |
| 5 | +// ┃ █ ██████ █ ▀█▄ █ ██████ █ ███▌▐███ ███████▄ █ ┃ |
| 6 | +// ┣━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┫ |
| 7 | +// ┃ Copyright (c) 2017, the Perspective Authors. ┃ |
| 8 | +// ┃ ╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌ ┃ |
| 9 | +// ┃ This file is part of the Perspective library, distributed under the terms ┃ |
| 10 | +// ┃ of the [Apache License 2.0](https://www.apache.org/licenses/LICENSE-2.0). ┃ |
| 11 | +// ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ |
| 12 | + |
| 13 | +import { test, expect } from "@perspective-dev/test"; |
| 14 | +import perspective from "../perspective_client"; |
| 15 | + |
| 16 | +// Concrete use cases from issue #1527 ("Better string functions in |
| 17 | +// expressions"). The original issue body sketched these in a hypothetical |
| 18 | +// dialect; the tests below port them to the dialect Perspective actually |
| 19 | +// implements. Differences from the issue's pseudocode: |
| 20 | +// |
| 21 | +// - `find(str, substr) -> int` does not exist. The closest function is |
| 22 | +// `indexof(col, regex, out_vec) -> bool`, which performs a *regex* search, |
| 23 | +// writes [start, end] of the first capturing group into `out_vec`, and |
| 24 | +// requires the regex to have at least one capturing group (else it |
| 25 | +// returns STATUS_CLEAR). The tests therefore wrap the literal char in a |
| 26 | +// capturing group: `' '` -> `'( )'`, `','` -> `'(,)'`, `$` -> `'([$])'`. |
| 27 | +// - `null()` is not a function call; `null` is a literal. |
| 28 | +// - `strlen(s)` -> `length(s)`. |
| 29 | +// - `substring(s, start, count)` takes a *count*, not an end index, and |
| 30 | +// returns null if `start + count > length(s)`. |
| 31 | +// - String literals pass through ExprTK's `cleanup_escapes`, which drops |
| 32 | +// unrecognized escape characters (`\s` -> `s`, `\.` -> `.`). |
| 33 | + |
| 34 | +((perspective) => { |
| 35 | + test.describe("Issue 1527 use cases", function () { |
| 36 | + test("contains literal substring", async function () { |
| 37 | + const table = await perspective.table({ |
| 38 | + a: ["abcdef", "xyz", "abXabY", null, "abc"], |
| 39 | + }); |
| 40 | + |
| 41 | + const view = await table.view({ |
| 42 | + expressions: { |
| 43 | + has_ab: "contains(\"a\", 'ab')", |
| 44 | + }, |
| 45 | + }); |
| 46 | + |
| 47 | + const result = await view.to_columns(); |
| 48 | + const schema = await view.expression_schema(); |
| 49 | + expect(schema["has_ab"]).toEqual("boolean"); |
| 50 | + expect(result["has_ab"]).toEqual([true, false, true, null, true]); |
| 51 | + view.delete(); |
| 52 | + table.delete(); |
| 53 | + }); |
| 54 | + |
| 55 | + // Parse "USD $1000"-style strings into Currency (string) and Value |
| 56 | + // (float) columns, tolerant of malformed rows. |
| 57 | + test("split currency/value string column", async function () { |
| 58 | + const table = await perspective.table({ |
| 59 | + "Bad Column": [ |
| 60 | + "USD $1000", |
| 61 | + "EUR $250", |
| 62 | + "malformed", |
| 63 | + null, |
| 64 | + "GBP $42", |
| 65 | + ], |
| 66 | + }); |
| 67 | + const view = await table.view({ |
| 68 | + expressions: { |
| 69 | + Currency: `var v[2]; |
| 70 | +if (indexof("Bad Column", '( )', v)) { substring("Bad Column", 0, v[0]) } else { null }`, |
| 71 | + Value: `var v[2]; |
| 72 | +if (indexof("Bad Column", '([$])', v)) { float(substring("Bad Column", v[0] + 1)) } else { null }`, |
| 73 | + }, |
| 74 | + }); |
| 75 | + const result = await view.to_columns(); |
| 76 | + const schema = await view.expression_schema(); |
| 77 | + expect(schema["Currency"]).toEqual("string"); |
| 78 | + expect(schema["Value"]).toEqual("float"); |
| 79 | + expect(result["Currency"]).toEqual([ |
| 80 | + "USD", |
| 81 | + "EUR", |
| 82 | + null, |
| 83 | + null, |
| 84 | + "GBP", |
| 85 | + ]); |
| 86 | + expect(result["Value"]).toEqual([1000, 250, null, null, 42]); |
| 87 | + view.delete(); |
| 88 | + table.delete(); |
| 89 | + }); |
| 90 | + |
| 91 | + // Parse "(123, 456)"-style strings into Longitude and Latitude |
| 92 | + // float columns. |
| 93 | + test("split longitude/latitude string column", async function () { |
| 94 | + const table = await perspective.table({ |
| 95 | + Coords: [ |
| 96 | + "(123, 456)", |
| 97 | + "(1.5, -2.25)", |
| 98 | + "broken", |
| 99 | + null, |
| 100 | + "(0, 0)", |
| 101 | + ], |
| 102 | + }); |
| 103 | + const view = await table.view({ |
| 104 | + expressions: { |
| 105 | + Longitude: `var v[2]; |
| 106 | +if (indexof("Coords", '(,)', v)) { float(substring("Coords", 1, v[0] - 1)) } else { null }`, |
| 107 | + Latitude: `var v[2]; |
| 108 | +if (indexof("Coords", '(,)', v)) { float(substring("Coords", v[0] + 1, length("Coords") - v[0] - 2)) } else { null }`, |
| 109 | + }, |
| 110 | + }); |
| 111 | + const result = await view.to_columns(); |
| 112 | + const schema = await view.expression_schema(); |
| 113 | + expect(schema["Longitude"]).toEqual("float"); |
| 114 | + expect(schema["Latitude"]).toEqual("float"); |
| 115 | + expect(result["Longitude"]).toEqual([123, 1.5, null, null, 0]); |
| 116 | + expect(result["Latitude"]).toEqual([456, -2.25, null, null, 0]); |
| 117 | + view.delete(); |
| 118 | + table.delete(); |
| 119 | + }); |
| 120 | + |
| 121 | + // Normalize spelling variants by stripping dots and whitespace. |
| 122 | + test("replace_all regex strips dots/whitespace", async function () { |
| 123 | + const table = await perspective.table({ |
| 124 | + State: ["NC", "N.C.", "N. C.", "N .C.", "VA"], |
| 125 | + }); |
| 126 | + const view = await table.view({ |
| 127 | + expressions: { |
| 128 | + Normalized: `replace_all("State", '[. ]', '')`, |
| 129 | + }, |
| 130 | + }); |
| 131 | + const result = await view.to_columns(); |
| 132 | + expect(result["Normalized"]).toEqual([ |
| 133 | + "NC", |
| 134 | + "NC", |
| 135 | + "NC", |
| 136 | + "NC", |
| 137 | + "VA", |
| 138 | + ]); |
| 139 | + view.delete(); |
| 140 | + table.delete(); |
| 141 | + }); |
| 142 | + }); |
| 143 | +})(perspective); |
0 commit comments