Skip to content

Commit 8126367

Browse files
authored
Error handling for dot-containing field names (opensearch-project#4907)
1 parent 2942d4b commit 8126367

4 files changed

Lines changed: 384 additions & 7 deletions

File tree

docs/user/ppl/limitations/limitations.rst

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,27 @@ For the following functionalities, the query will be forwarded to the V2 query e
106106
* ``show datasources`` and command
107107

108108
* Commands with ``fetch_size`` parameter
109+
110+
Malformed Field Names in Object Fields
111+
======================================
112+
113+
OpenSearch normally rejects field names containing problematic dot patterns (such as ``.``, ``..``, ``.a``, ``a.``, or ``a..b``). However, when an object field has ``enabled: false``, OpenSearch bypasses field name validation and allows storing documents with any field names.
114+
115+
If a document contains malformed field names inside an object field, PPL ignores those malformed field names. Other valid fields in the document are returned normally.
116+
117+
**Example of affected data:**
118+
119+
.. code-block:: json
120+
121+
{
122+
"log": {
123+
".": "value1",
124+
".a": "value2",
125+
"a.": "value3",
126+
"a..b": "value4"
127+
}
128+
}
129+
130+
When ``log`` is an object field with ``enabled: false``, subfields with malformed names are ignored.
131+
132+
**Recommendation:** Avoid using field names that contain leading dots, trailing dots, consecutive dots, or consist only of dots. This aligns with OpenSearch's default field naming requirements.
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# Issue: https://github.com/opensearch-project/sql/issues/4896
2+
# ArrayIndexOutOfBoundsException when querying index with malformed field names in disabled object
3+
#
4+
# Root cause: When a document has a field name with problematic dot patterns (e.g., ".", "..", ".a",
5+
# "a.", "a..b"), the JsonPath parsing logic fails because String.split("\\.") produces empty strings.
6+
#
7+
# This can happen when an index has a disabled object field (enabled: false), which allows storing
8+
# documents without validating inner field names. Normal OpenSearch indices reject such field names.
9+
#
10+
# Fix: The query engine now detects malformed field names and returns null for those fields,
11+
# allowing the rest of the document to be processed normally.
12+
13+
setup:
14+
- do:
15+
query.settings:
16+
body:
17+
transient:
18+
plugins.calcite.enabled: true
19+
# Create index with disabled object field to allow unusual field names
20+
- do:
21+
indices.create:
22+
index: test_malformed_fields_4896
23+
body:
24+
mappings:
25+
properties:
26+
log:
27+
type: object
28+
enabled: false
29+
"@timestamp":
30+
type: date
31+
message:
32+
type: text
33+
status:
34+
type: keyword
35+
36+
# Use bulk indexing to create all test documents
37+
- do:
38+
bulk:
39+
index: test_malformed_fields_4896
40+
refresh: true
41+
body:
42+
- '{"index": {"_id": "1"}}'
43+
- '{"@timestamp": "2025-11-26T17:10:00.000Z", "message": "single dot test", "status": "ok", "log": {".": "dot only value", "valid": "normal value"}}'
44+
- '{"index": {"_id": "2"}}'
45+
- '{"@timestamp": "2025-11-26T17:11:00.000Z", "message": "double dot test", "status": "ok", "log": {"..": "double dot value", "valid": "normal value"}}'
46+
- '{"index": {"_id": "3"}}'
47+
- '{"@timestamp": "2025-11-26T17:12:00.000Z", "message": "triple dot test", "status": "ok", "log": {"...": "triple dot value", "valid": "normal value"}}'
48+
- '{"index": {"_id": "4"}}'
49+
- '{"@timestamp": "2025-11-26T17:13:00.000Z", "message": "leading dot test", "status": "ok", "log": {".a": "leading dot value", "valid": "normal value"}}'
50+
- '{"index": {"_id": "5"}}'
51+
- '{"@timestamp": "2025-11-26T17:14:00.000Z", "message": "trailing dot test", "status": "ok", "log": {"a.": "trailing dot value", "valid": "normal value"}}'
52+
- '{"index": {"_id": "6"}}'
53+
- '{"@timestamp": "2025-11-26T17:15:00.000Z", "message": "consecutive dots test", "status": "ok", "log": {"a..b": "consecutive dots value", "valid": "normal value"}}'
54+
- '{"index": {"_id": "7"}}'
55+
- '{"@timestamp": "2025-11-26T17:16:00.000Z", "message": "multiple malformed test", "status": "ok", "log": {".": "dot1", "..": "dot2", ".leading": "dot3", "trailing.": "dot4", "mid..dle": "dot5", "valid1": "normal1", "valid2": "normal2"}}'
56+
- '{"index": {"_id": "8"}}'
57+
- '{"@timestamp": "2025-11-26T17:17:00.000Z", "message": "valid nested test", "status": "ok", "log": {"nested.field": "nested value"}}'
58+
59+
---
60+
teardown:
61+
- do:
62+
query.settings:
63+
body:
64+
transient:
65+
plugins.calcite.enabled: false
66+
- do:
67+
indices.delete:
68+
index: test_malformed_fields_4896
69+
70+
---
71+
"Query all documents with unusual field names succeeds":
72+
- skip:
73+
features:
74+
- headers
75+
# Before the fix: ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
76+
# After the fix: Query succeeds for all documents
77+
- do:
78+
headers:
79+
Content-Type: 'application/json'
80+
ppl:
81+
body:
82+
query: source=test_malformed_fields_4896 | fields @timestamp, message, status | sort @timestamp
83+
- match: { "total": 8 }
84+
- match: { "datarows.0.0": "2025-11-26 17:10:00" }
85+
- match: { "datarows.0.1": "single dot test" }
86+
- match: { "datarows.7.0": "2025-11-26 17:17:00" }
87+
- match: { "datarows.7.1": "valid nested test" }
88+
89+
---
90+
"Single dot field name returns null for malformed field":
91+
- skip:
92+
features:
93+
- headers
94+
- do:
95+
headers:
96+
Content-Type: 'application/json'
97+
ppl:
98+
body:
99+
query: source=test_malformed_fields_4896 | where message = "single dot test" | fields log
100+
- match: { "total": 1 }
101+
# The "." field returns null, so log contains only the valid field
102+
- match: { "datarows.0.0": {"valid": "normal value"} }
103+
104+
---
105+
"Multiple dots field name returns null for malformed field":
106+
- skip:
107+
features:
108+
- headers
109+
- do:
110+
headers:
111+
Content-Type: 'application/json'
112+
ppl:
113+
body:
114+
query: source=test_malformed_fields_4896 | where message = "double dot test" | fields log
115+
- match: { "total": 1 }
116+
# The ".." field returns null, so log contains only the valid field
117+
- match: { "datarows.0.0": {"valid": "normal value"} }
118+
119+
- do:
120+
headers:
121+
Content-Type: 'application/json'
122+
ppl:
123+
body:
124+
query: source=test_malformed_fields_4896 | where message = "triple dot test" | fields log
125+
- match: { "total": 1 }
126+
# The "..." field returns null, so log contains only the valid field
127+
- match: { "datarows.0.0": {"valid": "normal value"} }
128+
129+
---
130+
"Leading dot field name returns null for malformed field":
131+
- skip:
132+
features:
133+
- headers
134+
- do:
135+
headers:
136+
Content-Type: 'application/json'
137+
ppl:
138+
body:
139+
query: source=test_malformed_fields_4896 | where message = "leading dot test" | fields log
140+
- match: { "total": 1 }
141+
# The ".a" field returns null, so log contains only the valid field
142+
- match: { "datarows.0.0": {"valid": "normal value"} }
143+
144+
---
145+
"Trailing dot field name returns null for malformed field":
146+
- skip:
147+
features:
148+
- headers
149+
- do:
150+
headers:
151+
Content-Type: 'application/json'
152+
ppl:
153+
body:
154+
query: source=test_malformed_fields_4896 | where message = "trailing dot test" | fields log
155+
- match: { "total": 1 }
156+
# The "a." field returns null, so log contains only the valid field
157+
- match: { "datarows.0.0": {"valid": "normal value"} }
158+
159+
---
160+
"Consecutive dots field name returns null for malformed field":
161+
- skip:
162+
features:
163+
- headers
164+
- do:
165+
headers:
166+
Content-Type: 'application/json'
167+
ppl:
168+
body:
169+
query: source=test_malformed_fields_4896 | where message = "consecutive dots test" | fields log
170+
- match: { "total": 1 }
171+
# The "a..b" field returns null, so log contains only the valid field
172+
- match: { "datarows.0.0": {"valid": "normal value"} }
173+
174+
---
175+
"Multiple malformed fields coexist with valid fields":
176+
- skip:
177+
features:
178+
- headers
179+
- do:
180+
headers:
181+
Content-Type: 'application/json'
182+
ppl:
183+
body:
184+
query: source=test_malformed_fields_4896 | where message = "multiple malformed test" | fields log
185+
- match: { "total": 1 }
186+
# All malformed fields return null, only valid fields remain
187+
- match: { "datarows.0.0": {"valid1": "normal1", "valid2": "normal2"} }
188+
189+
---
190+
"Valid nested field still works (issue #3477 compatibility)":
191+
- skip:
192+
features:
193+
- headers
194+
# This tests that the fix for #4896 doesn't break the flattening behavior from #3477
195+
- do:
196+
headers:
197+
Content-Type: 'application/json'
198+
ppl:
199+
body:
200+
query: source=test_malformed_fields_4896 | where message = "valid nested test" | fields log
201+
- match: { "total": 1 }
202+
# Valid nested field "nested.field" is properly expanded to nested structure
203+
- match: { "datarows.0.0": {"nested": {"field": "nested value"}} }
204+

opensearch/src/main/java/org/opensearch/sql/opensearch/data/value/OpenSearchExprValueFactory.java

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272

7373
/** Construct ExprValue from OpenSearch response. */
7474
public class OpenSearchExprValueFactory {
75+
7576
/** The Mapping of Field and ExprType. */
7677
private final Map<String, OpenSearchDataType> typeMapping;
7778

@@ -373,18 +374,58 @@ private ExprValue parseStruct(Content content, String prefix, boolean supportArr
373374
content
374375
.map()
375376
.forEachRemaining(
376-
entry ->
377+
entry -> {
378+
String fieldKey = entry.getKey();
379+
String fullFieldPath = makeField(prefix, fieldKey);
380+
// Check for malformed field names before creating JsonPath.
381+
// See isFieldNameMalformed() for details on what constitutes a malformed field name.
382+
if (isFieldNameMalformed(fieldKey)) {
383+
result.tupleValue().put(fieldKey, ExprNullValue.of());
384+
} else {
377385
populateValueRecursive(
378386
result,
379-
new JsonPath(entry.getKey()),
380-
parse(
381-
entry.getValue(),
382-
makeField(prefix, entry.getKey()),
383-
type(makeField(prefix, entry.getKey())),
384-
supportArrays)));
387+
new JsonPath(fieldKey),
388+
parse(entry.getValue(), fullFieldPath, type(fullFieldPath), supportArrays));
389+
}
390+
});
385391
return result;
386392
}
387393

394+
/**
395+
* Check if a field name is malformed and cannot be processed by JsonPath.
396+
*
397+
* <p>A field name is malformed if it contains dot patterns that would cause String.split("\\.")
398+
* to produce empty strings. This includes:
399+
*
400+
* <ul>
401+
* <li>Dot-only field names: ".", "..", "..."
402+
* <li>Leading dots: ".a", "..a"
403+
* <li>Trailing dots: "a.", "a.."
404+
* <li>Consecutive dots: "a..b", "a...b"
405+
* </ul>
406+
*
407+
* <p>Such field names can occur in disabled object fields (enabled: false) which bypass
408+
* OpenSearch's field name validation. Normal OpenSearch indices reject these field names.
409+
*
410+
* @param fieldName The field name to check.
411+
* @return true if the field name is malformed, false otherwise.
412+
*/
413+
static boolean isFieldNameMalformed(String fieldName) {
414+
// Use -1 limit to preserve trailing empty strings (e.g., "a." -> ["a", ""])
415+
String[] parts = fieldName.split("\\.", -1);
416+
// Dot-only field names produce empty array
417+
if (parts.length == 0) {
418+
return true;
419+
}
420+
// Check for empty parts which indicate leading, trailing, or consecutive dots
421+
for (String part : parts) {
422+
if (part.isEmpty()) {
423+
return true;
424+
}
425+
}
426+
return false;
427+
}
428+
388429
/**
389430
* Populate the current ExprTupleValue recursively.
390431
*

0 commit comments

Comments
 (0)