|
| 1 | +# |
| 2 | +# MDEV-39520: Improve REGEXP_INSTR for MySQL 8.0 compatibility |
| 3 | +# |
| 4 | +# 1. Basic 2-argument form |
| 5 | +SELECT REGEXP_INSTR('abba', 'b{2}'); |
| 6 | +REGEXP_INSTR('abba', 'b{2}') |
| 7 | +2 |
| 8 | +SELECT REGEXP_INSTR('abba', 'x'); |
| 9 | +REGEXP_INSTR('abba', 'x') |
| 10 | +0 |
| 11 | +SELECT REGEXP_INSTR('hello world', 'world'); |
| 12 | +REGEXP_INSTR('hello world', 'world') |
| 13 | +7 |
| 14 | +SELECT REGEXP_INSTR('hello', ''); |
| 15 | +REGEXP_INSTR('hello', '') |
| 16 | +1 |
| 17 | +SELECT REGEXP_INSTR('', 'a'); |
| 18 | +REGEXP_INSTR('', 'a') |
| 19 | +0 |
| 20 | +SELECT REGEXP_INSTR('', ''); |
| 21 | +REGEXP_INSTR('', '') |
| 22 | +1 |
| 23 | +# 2. Three arguments: pos |
| 24 | +SELECT REGEXP_INSTR('abba', 'b{2}', 2); |
| 25 | +REGEXP_INSTR('abba', 'b{2}', 2) |
| 26 | +2 |
| 27 | +SELECT REGEXP_INSTR('abba', 'b{2}', 3); |
| 28 | +REGEXP_INSTR('abba', 'b{2}', 3) |
| 29 | +0 |
| 30 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1); |
| 31 | +REGEXP_INSTR('abbabba', 'b{2}', 1) |
| 32 | +2 |
| 33 | +SELECT REGEXP_INSTR('aabba', 'b', 3); |
| 34 | +REGEXP_INSTR('aabba', 'b', 3) |
| 35 | +3 |
| 36 | +SELECT REGEXP_INSTR('xyzabc', 'abc', 4); |
| 37 | +REGEXP_INSTR('xyzabc', 'abc', 4) |
| 38 | +4 |
| 39 | +SELECT REGEXP_INSTR('abc', 'c', 3); |
| 40 | +REGEXP_INSTR('abc', 'c', 3) |
| 41 | +3 |
| 42 | +SELECT REGEXP_INSTR('abc', 'c', 4); |
| 43 | +REGEXP_INSTR('abc', 'c', 4) |
| 44 | +0 |
| 45 | +# 3. Four arguments: occurrence |
| 46 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1); |
| 47 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 1) |
| 48 | +2 |
| 49 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2); |
| 50 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 2) |
| 51 | +5 |
| 52 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 3); |
| 53 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 3) |
| 54 | +0 |
| 55 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 1); |
| 56 | +REGEXP_INSTR('abbabba', 'b{2}', 2, 1) |
| 57 | +2 |
| 58 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 2, 2); |
| 59 | +REGEXP_INSTR('abbabba', 'b{2}', 2, 2) |
| 60 | +5 |
| 61 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 3, 2); |
| 62 | +REGEXP_INSTR('abbabba', 'b{2}', 3, 2) |
| 63 | +0 |
| 64 | +SELECT REGEXP_INSTR('aaa', 'a', 1, 1); |
| 65 | +REGEXP_INSTR('aaa', 'a', 1, 1) |
| 66 | +1 |
| 67 | +SELECT REGEXP_INSTR('aaa', 'a', 1, 2); |
| 68 | +REGEXP_INSTR('aaa', 'a', 1, 2) |
| 69 | +2 |
| 70 | +SELECT REGEXP_INSTR('aaa', 'a', 1, 3); |
| 71 | +REGEXP_INSTR('aaa', 'a', 1, 3) |
| 72 | +3 |
| 73 | +SELECT REGEXP_INSTR('aaa', 'a', 1, 4); |
| 74 | +REGEXP_INSTR('aaa', 'a', 1, 4) |
| 75 | +0 |
| 76 | +# 4. Five arguments: return_option |
| 77 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 0); |
| 78 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 0) |
| 79 | +2 |
| 80 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 1); |
| 81 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 1, 1) |
| 82 | +4 |
| 83 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 0); |
| 84 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 0) |
| 85 | +5 |
| 86 | +SELECT REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 1); |
| 87 | +REGEXP_INSTR('abbabba', 'b{2}', 1, 2, 1) |
| 88 | +7 |
| 89 | +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 0); |
| 90 | +REGEXP_INSTR('abcabc', 'b', 1, 1, 0) |
| 91 | +2 |
| 92 | +SELECT REGEXP_INSTR('abcabc', 'b', 1, 1, 1); |
| 93 | +REGEXP_INSTR('abcabc', 'b', 1, 1, 1) |
| 94 | +3 |
| 95 | +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 0); |
| 96 | +REGEXP_INSTR('abcabc', 'b', 1, 2, 0) |
| 97 | +5 |
| 98 | +SELECT REGEXP_INSTR('abcabc', 'b', 1, 2, 1); |
| 99 | +REGEXP_INSTR('abcabc', 'b', 1, 2, 1) |
| 100 | +6 |
| 101 | +SELECT REGEXP_INSTR('abcabc', 'z', 1, 1, 1); |
| 102 | +REGEXP_INSTR('abcabc', 'z', 1, 1, 1) |
| 103 | +0 |
| 104 | +# 5. Six arguments: match_type |
| 105 | +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'i'); |
| 106 | +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'i') |
| 107 | +1 |
| 108 | +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'c'); |
| 109 | +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'c') |
| 110 | +0 |
| 111 | +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ci'); |
| 112 | +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ci') |
| 113 | +1 |
| 114 | +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ic'); |
| 115 | +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, 'ic') |
| 116 | +0 |
| 117 | +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, 'm'); |
| 118 | +REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, 'm') |
| 119 | +3 |
| 120 | +SELECT REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, ''); |
| 121 | +REGEXP_INSTR('a\nb\nc', '^b$', 1, 1, 0, '') |
| 122 | +0 |
| 123 | +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, 'n'); |
| 124 | +REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, 'n') |
| 125 | +1 |
| 126 | +SELECT REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, ''); |
| 127 | +REGEXP_INSTR('a\nb\nc', 'a.b', 1, 1, 0, '') |
| 128 | +0 |
| 129 | +SELECT REGEXP_INSTR('a\nb', '^b$', 1, 1, 0, 'mu'); |
| 130 | +REGEXP_INSTR('a\nb', '^b$', 1, 1, 0, 'mu') |
| 131 | +3 |
| 132 | +SELECT REGEXP_INSTR('a\nB\nc', '^b$', 1, 1, 0, 'im'); |
| 133 | +REGEXP_INSTR('a\nB\nc', '^b$', 1, 1, 0, 'im') |
| 134 | +3 |
| 135 | +# 6. Multibyte characters |
| 136 | +SET NAMES utf8mb4; |
| 137 | +SELECT REGEXP_INSTR('áéí', 'é'); |
| 138 | +REGEXP_INSTR('áéí', 'é') |
| 139 | +2 |
| 140 | +SELECT REGEXP_INSTR('áéí', 'í'); |
| 141 | +REGEXP_INSTR('áéí', 'í') |
| 142 | +3 |
| 143 | +SELECT REGEXP_INSTR('αβγδ', 'γ'); |
| 144 | +REGEXP_INSTR('αβγδ', 'γ') |
| 145 | +3 |
| 146 | +SELECT REGEXP_INSTR('áéíó', 'í', 2); |
| 147 | +REGEXP_INSTR('áéíó', 'í', 2) |
| 148 | +3 |
| 149 | +SELECT REGEXP_INSTR('αβγδ', 'β', 2); |
| 150 | +REGEXP_INSTR('αβγδ', 'β', 2) |
| 151 | +2 |
| 152 | +SELECT REGEXP_INSTR('αβγδ', 'β', 1, 1, 1); |
| 153 | +REGEXP_INSTR('αβγδ', 'β', 1, 1, 1) |
| 154 | +3 |
| 155 | +# 7. NULL propagation |
| 156 | +SELECT REGEXP_INSTR(NULL, 'a'); |
| 157 | +REGEXP_INSTR(NULL, 'a') |
| 158 | +NULL |
| 159 | +SELECT REGEXP_INSTR('abc', NULL); |
| 160 | +REGEXP_INSTR('abc', NULL) |
| 161 | +NULL |
| 162 | +SELECT REGEXP_INSTR('abc', 'a', NULL); |
| 163 | +REGEXP_INSTR('abc', 'a', NULL) |
| 164 | +NULL |
| 165 | +SELECT REGEXP_INSTR('abc', 'a', 1, NULL); |
| 166 | +REGEXP_INSTR('abc', 'a', 1, NULL) |
| 167 | +NULL |
| 168 | +SELECT REGEXP_INSTR('abc', 'a', 1, 1, NULL); |
| 169 | +REGEXP_INSTR('abc', 'a', 1, 1, NULL) |
| 170 | +NULL |
| 171 | +SELECT REGEXP_INSTR('abc', 'a', 1, 1, 0, NULL); |
| 172 | +REGEXP_INSTR('abc', 'a', 1, 1, 0, NULL) |
| 173 | +NULL |
| 174 | +# 8. Edge cases |
| 175 | +SELECT REGEXP_INSTR('abc', '', 1, 1); |
| 176 | +REGEXP_INSTR('abc', '', 1, 1) |
| 177 | +1 |
| 178 | +SELECT REGEXP_INSTR('abc', '', 1, 2); |
| 179 | +REGEXP_INSTR('abc', '', 1, 2) |
| 180 | +2 |
| 181 | +SELECT REGEXP_INSTR('abc', '', 1, 3); |
| 182 | +REGEXP_INSTR('abc', '', 1, 3) |
| 183 | +3 |
| 184 | +SELECT REGEXP_INSTR('abc', '', 1, 4); |
| 185 | +REGEXP_INSTR('abc', '', 1, 4) |
| 186 | +4 |
| 187 | +SELECT REGEXP_INSTR('abc', '', 1, 5); |
| 188 | +REGEXP_INSTR('abc', '', 1, 5) |
| 189 | +0 |
| 190 | +SELECT REGEXP_INSTR('abcabc', '^abc'); |
| 191 | +REGEXP_INSTR('abcabc', '^abc') |
| 192 | +1 |
| 193 | +SELECT REGEXP_INSTR('abcabc', 'abc$'); |
| 194 | +REGEXP_INSTR('abcabc', 'abc$') |
| 195 | +4 |
| 196 | +SELECT REGEXP_INSTR('abcabc', '^abc$'); |
| 197 | +REGEXP_INSTR('abcabc', '^abc$') |
| 198 | +0 |
| 199 | +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 1); |
| 200 | +REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 1) |
| 201 | +5 |
| 202 | +SELECT REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 2); |
| 203 | +REGEXP_INSTR('foo bar baz', 'bar|baz', 1, 2) |
| 204 | +9 |
| 205 | +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 1); |
| 206 | +REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 1) |
| 207 | +1 |
| 208 | +SELECT REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 2); |
| 209 | +REGEXP_INSTR('aabbaabb', '(a+)(b+)\\1', 1, 2) |
| 210 | +0 |
| 211 | +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 1); |
| 212 | +REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 1) |
| 213 | +1 |
| 214 | +SELECT REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 200); |
| 215 | +REGEXP_INSTR(REPEAT('x', 1000), 'x{5}', 1, 200) |
| 216 | +996 |
| 217 | +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0); |
| 218 | +REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0) |
| 219 | +14 |
| 220 | +SELECT REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0, 'c'); |
| 221 | +REGEXP_INSTR('Test-abc-abc-abc', 'AB', 1, 3, 0, 'c') |
| 222 | +0 |
| 223 | +# Non-constant match_type with constant pattern |
| 224 | +CREATE TABLE foo (cond VARCHAR(50)); |
| 225 | +INSERT INTO foo VALUES ('c'), ('ic'); |
| 226 | +SELECT REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, cond) FROM foo; |
| 227 | +REGEXP_INSTR('Abba', 'ABBA', 1, 1, 0, cond) |
| 228 | +0 |
| 229 | +0 |
| 230 | +DROP TABLE foo; |
0 commit comments