Skip to content

Commit d30c2d5

Browse files
committed
Support pushdown for re2 extension
ClickHouse is working on a postgres extension using re2 for regex This addresses incompatibility between CH & PG regex, where engine depends on whether function is pushed down or not
1 parent 63f01bd commit d30c2d5

8 files changed

Lines changed: 689 additions & 0 deletions

File tree

src/custom_types.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,40 @@ chfdw_check_for_ordered_aggregate(Aggref * agg)
160160
return STR_EQUAL(extname, "pg_clickhouse");
161161
}
162162

163+
/*
164+
* Map sans-prefix pg_re2 function names to ClickHouse
165+
* case-sensitive names. Must be kept in lexicographic order.
166+
*/
167+
static char *re2_func_map[][2] = {
168+
{"countmatches", "countMatches"},
169+
{"countmatchescaseinsensitive", "countMatchesCaseInsensitive"},
170+
{"extractall", "extractAll"},
171+
{"extractgroups", "extractGroups"},
172+
{"multimatchallindices", "multiMatchAllIndices"},
173+
{"multimatchany", "multiMatchAny"},
174+
{"multimatchanyindex", "multiMatchAnyIndex"},
175+
{"regexpextract", "regexpExtract"},
176+
{"replaceregexpall", "replaceRegexpAll"},
177+
{"replaceregexpone", "replaceRegexpOne"},
178+
{NULL, NULL},
179+
};
180+
181+
inline static char *
182+
re2_func_name(char *proname)
183+
{
184+
Assert(strncmp(proname, "re2", 3) == 0);
185+
char *stripped = proname + 3;
186+
size_t i = 0;
187+
188+
while (re2_func_map[i][0] != NULL)
189+
{
190+
if (STR_EQUAL(re2_func_map[i][0], stripped))
191+
return re2_func_map[i][1];
192+
i++;
193+
}
194+
return stripped;
195+
}
196+
163197
/*
164198
* Map pg_clickhouse pushdown function names to ClickHouse case-sensitive
165199
* names. Must be kept in lexicographic order.
@@ -503,6 +537,12 @@ chfdw_check_for_custom_function(Oid funcid)
503537
strcpy(entry->custom_name, "indexOf");
504538
}
505539
}
540+
else if (STR_EQUAL(extname, "re2"))
541+
{
542+
/* pg_re2: 1:1 pushdown to ClickHouse RE2 functions. */
543+
entry->cf_type = CF_CH_FUNCTION;
544+
strlcpy(entry->custom_name, re2_func_name(proname), NAMEDATALEN);
545+
}
506546
else if (STR_EQUAL(extname, "pg_clickhouse"))
507547
{
508548
/* pg_clickhouse custom functions. */

test/expected/re2_functions.out

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
CREATE SERVER re2_svr FOREIGN DATA WRAPPER clickhouse_fdw OPTIONS(dbname 're2_test');
2+
CREATE USER MAPPING FOR CURRENT_USER SERVER re2_svr;
3+
SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS re2_test');
4+
clickhouse_raw_query
5+
----------------------
6+
7+
(1 row)
8+
9+
SELECT clickhouse_raw_query('CREATE DATABASE re2_test');
10+
clickhouse_raw_query
11+
----------------------
12+
13+
(1 row)
14+
15+
SELECT clickhouse_raw_query($$
16+
CREATE TABLE re2_test.t1 (
17+
id Int32,
18+
val String
19+
) ENGINE = MergeTree ORDER BY id
20+
$$);
21+
clickhouse_raw_query
22+
----------------------
23+
24+
(1 row)
25+
26+
SELECT clickhouse_raw_query($$
27+
INSERT INTO re2_test.t1 VALUES
28+
(1, 'POSIX uses BRE and ERE'),
29+
(2, 're2 uses finite automata'),
30+
(3, 'PCRE supports backtracking')
31+
$$);
32+
clickhouse_raw_query
33+
----------------------
34+
35+
(1 row)
36+
37+
CREATE SCHEMA re2_test;
38+
IMPORT FOREIGN SCHEMA re2_test FROM SERVER re2_svr INTO re2_test;
39+
SET search_path = re2_test, public;
40+
CREATE EXTENSION re2;
41+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2match(val, 're2');
42+
QUERY PLAN
43+
-------------------------------------------------------------------------
44+
Foreign Scan on re2_test.t1
45+
Output: id, val
46+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE (match(val, 're2'))
47+
(3 rows)
48+
49+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extract(val, '(re2)') = 're2';
50+
QUERY PLAN
51+
---------------------------------------------------------------------------------------
52+
Foreign Scan on re2_test.t1
53+
Output: id, val
54+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((extract(val, '(re2)') = 're2'))
55+
(3 rows)
56+
57+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extractall(val, '[A-Z]+') = ARRAY['POSIX','BRE','ERE'];
58+
QUERY PLAN
59+
-----------------------------------------------------------------------------------------------------------
60+
Foreign Scan on re2_test.t1
61+
Output: id, val
62+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((extractAll(val, '[A-Z]+') = ['POSIX','BRE','ERE']))
63+
(3 rows)
64+
65+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2regexpextract(val, '(re2)', 1) = 're2';
66+
QUERY PLAN
67+
------------------------------------------------------------------------------------------------
68+
Foreign Scan on re2_test.t1
69+
Output: id, val
70+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((regexpExtract(val, '(re2)', 1) = 're2'))
71+
(3 rows)
72+
73+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extractgroups(val, '(POSIX) uses (BRE)') = ARRAY['POSIX','BRE'];
74+
QUERY PLAN
75+
--------------------------------------------------------------------------------------------------------------------
76+
Foreign Scan on re2_test.t1
77+
Output: id, val
78+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((extractGroups(val, '(POSIX) uses (BRE)') = ['POSIX','BRE']))
79+
(3 rows)
80+
81+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replaceregexpone(val, 'POSIX', 're2') = 're2 uses BRE and ERE';
82+
QUERY PLAN
83+
------------------------------------------------------------------------------------------------------------------------
84+
Foreign Scan on re2_test.t1
85+
Output: id, val
86+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((replaceRegexpOne(val, 'POSIX', 're2') = 're2 uses BRE and ERE'))
87+
(3 rows)
88+
89+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replaceregexpall(val, ' ', '-') = 're2-uses-finite-automata';
90+
QUERY PLAN
91+
----------------------------------------------------------------------------------------------------------------------
92+
Foreign Scan on re2_test.t1
93+
Output: id, val
94+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((replaceRegexpAll(val, ' ', '-') = 're2-uses-finite-automata'))
95+
(3 rows)
96+
97+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countmatches(val, 'e') > 0;
98+
QUERY PLAN
99+
------------------------------------------------------------------------------------
100+
Foreign Scan on re2_test.t1
101+
Output: id, val
102+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((countMatches(val, 'e') > 0))
103+
(3 rows)
104+
105+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countmatchescaseinsensitive(val, 'E') > 0;
106+
QUERY PLAN
107+
---------------------------------------------------------------------------------------------------
108+
Foreign Scan on re2_test.t1
109+
Output: id, val
110+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((countMatchesCaseInsensitive(val, 'E') > 0))
111+
(3 rows)
112+
113+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchany(val, ARRAY['POSIX','PCRE']);
114+
QUERY PLAN
115+
--------------------------------------------------------------------------------------------
116+
Foreign Scan on re2_test.t1
117+
Output: id, val
118+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE (multiMatchAny(val, ['POSIX','PCRE']))
119+
(3 rows)
120+
121+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchanyindex(val, ARRAY['POSIX','PCRE']) > 0;
122+
QUERY PLAN
123+
-------------------------------------------------------------------------------------------------------
124+
Foreign Scan on re2_test.t1
125+
Output: id, val
126+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((multiMatchAnyIndex(val, ['POSIX','PCRE']) > 0))
127+
(3 rows)
128+
129+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchallindices(val, ARRAY['POSIX','PCRE']) = ARRAY[1];
130+
QUERY PLAN
131+
-----------------------------------------------------------------------------------------------------------
132+
Foreign Scan on re2_test.t1
133+
Output: id, val
134+
Remote SQL: SELECT id, val FROM re2_test.t1 WHERE ((multiMatchAllIndices(val, ['POSIX','PCRE']) = [1]))
135+
(3 rows)
136+
137+
DROP EXTENSION re2;
138+
DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr;
139+
SELECT clickhouse_raw_query('DROP DATABASE re2_test');
140+
clickhouse_raw_query
141+
----------------------
142+
143+
(1 row)
144+
145+
DROP SERVER re2_svr CASCADE;
146+
NOTICE: drop cascades to foreign table t1

test/expected/re2_functions_1.out

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
CREATE SERVER re2_svr FOREIGN DATA WRAPPER clickhouse_fdw OPTIONS(dbname 're2_test');
2+
CREATE USER MAPPING FOR CURRENT_USER SERVER re2_svr;
3+
SELECT clickhouse_raw_query('DROP DATABASE IF EXISTS re2_test');
4+
clickhouse_raw_query
5+
----------------------
6+
7+
(1 row)
8+
9+
SELECT clickhouse_raw_query('CREATE DATABASE re2_test');
10+
clickhouse_raw_query
11+
----------------------
12+
13+
(1 row)
14+
15+
SELECT clickhouse_raw_query($$
16+
CREATE TABLE re2_test.t1 (
17+
id Int32,
18+
val String
19+
) ENGINE = MergeTree ORDER BY id
20+
$$);
21+
clickhouse_raw_query
22+
----------------------
23+
24+
(1 row)
25+
26+
SELECT clickhouse_raw_query($$
27+
INSERT INTO re2_test.t1 VALUES
28+
(1, 'POSIX uses BRE and ERE'),
29+
(2, 're2 uses finite automata'),
30+
(3, 'PCRE supports backtracking')
31+
$$);
32+
clickhouse_raw_query
33+
----------------------
34+
35+
(1 row)
36+
37+
CREATE SCHEMA re2_test;
38+
IMPORT FOREIGN SCHEMA re2_test FROM SERVER re2_svr INTO re2_test;
39+
SET search_path = re2_test, public;
40+
CREATE EXTENSION re2;
41+
ERROR: extension "re2" is not available
42+
HINT: The extension must first be installed on the system where PostgreSQL is running.
43+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2match(val, 're2');
44+
ERROR: function re2match(text, unknown) does not exist
45+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2match(v...
46+
^
47+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
48+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extract(val, '(re2)') = 're2';
49+
ERROR: function re2extract(text, unknown) does not exist
50+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extract...
51+
^
52+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
53+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extractall(val, '[A-Z]+') = ARRAY['POSIX','BRE','ERE'];
54+
ERROR: function re2extractall(text, unknown) does not exist
55+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extract...
56+
^
57+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
58+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2regexpextract(val, '(re2)', 1) = 're2';
59+
ERROR: function re2regexpextract(text, unknown, integer) does not exist
60+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2regexpe...
61+
^
62+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
63+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extractgroups(val, '(POSIX) uses (BRE)') = ARRAY['POSIX','BRE'];
64+
ERROR: function re2extractgroups(text, unknown) does not exist
65+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2extract...
66+
^
67+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
68+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replaceregexpone(val, 'POSIX', 're2') = 're2 uses BRE and ERE';
69+
ERROR: function re2replaceregexpone(text, unknown, unknown) does not exist
70+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replace...
71+
^
72+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
73+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replaceregexpall(val, ' ', '-') = 're2-uses-finite-automata';
74+
ERROR: function re2replaceregexpall(text, unknown, unknown) does not exist
75+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2replace...
76+
^
77+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
78+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countmatches(val, 'e') > 0;
79+
ERROR: function re2countmatches(text, unknown) does not exist
80+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countma...
81+
^
82+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
83+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countmatchescaseinsensitive(val, 'E') > 0;
84+
ERROR: function re2countmatchescaseinsensitive(text, unknown) does not exist
85+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2countma...
86+
^
87+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
88+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchany(val, ARRAY['POSIX','PCRE']);
89+
ERROR: function re2multimatchany(text, text[]) does not exist
90+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multima...
91+
^
92+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
93+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchanyindex(val, ARRAY['POSIX','PCRE']) > 0;
94+
ERROR: function re2multimatchanyindex(text, text[]) does not exist
95+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multima...
96+
^
97+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
98+
EXPLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multimatchallindices(val, ARRAY['POSIX','PCRE']) = ARRAY[1];
99+
ERROR: function re2multimatchallindices(text, text[]) does not exist
100+
LINE 1: ...PLAIN (VERBOSE, COSTS OFF) SELECT * FROM t1 WHERE re2multima...
101+
^
102+
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
103+
DROP EXTENSION re2;
104+
ERROR: extension "re2" does not exist
105+
DROP USER MAPPING FOR CURRENT_USER SERVER re2_svr;
106+
SELECT clickhouse_raw_query('DROP DATABASE re2_test');
107+
clickhouse_raw_query
108+
----------------------
109+
110+
(1 row)
111+
112+
DROP SERVER re2_svr CASCADE;
113+
NOTICE: drop cascades to foreign table t1

0 commit comments

Comments
 (0)