55
66package org .opensearch .sql .calcite .remote ;
77
8- import static org .junit .jupiter .api .Assertions .assertEquals ;
98import static org .junit .jupiter .api .Assertions .assertTrue ;
9+ import static org .opensearch .sql .legacy .TestsConstants .TEST_INDEX_BANK ;
1010import static org .opensearch .sql .legacy .TestsConstants .TEST_INDEX_WEBLOGS ;
1111import static org .opensearch .sql .util .MatcherUtils .rows ;
1212import static org .opensearch .sql .util .MatcherUtils .schema ;
@@ -27,6 +27,9 @@ public void init() throws Exception {
2727 enableCalcite ();
2828
2929 loadIndex (Index .WEBLOG );
30+ loadIndex (Index .TIME_TEST_DATA );
31+ loadIndex (Index .TIME_TEST_DATA_WITH_NULL );
32+ loadIndex (Index .BANK );
3033 appendDataForBadResponse ();
3134 }
3235
@@ -250,122 +253,180 @@ public void testCaseWhenInSubquery() throws IOException {
250253 }
251254
252255 @ Test
253- public void testCaseRangeAggregationPushdown () throws IOException {
254- // Test CASE expression that can be optimized to range aggregation
255- // Note: This has an implicit ELSE NULL, so it won't be optimized
256- // But it should still work correctly
257- JSONObject actual =
256+ public void testCaseCanBePushedDownAsRangeQuery () throws IOException {
257+ // CASE 1: Range - Metric
258+ // 1.1 Range - Metric
259+ JSONObject actual1 =
258260 executeQuery (
259261 String .format (
260- "source=%s | eval range_bucket = case("
261- + " cast(bytes as int) < 1000, 'small',"
262- + " cast(bytes as int) >= 1000 AND cast(bytes as int) < 5000, 'medium',"
263- + " cast(bytes as int) >= 5000, 'large'"
264- + ") | stats count() as total by range_bucket | sort range_bucket" ,
265- TEST_INDEX_WEBLOGS ));
262+ "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100') |"
263+ + " stats avg(age) as avg_age by age_range" ,
264+ TEST_INDEX_BANK ));
265+ verifySchema (actual1 , schema ("avg_age" , "double" ), schema ("age_range" , "string" ));
266+ verifyDataRows (actual1 , rows (28.0 , "u30" ), rows (35.0 , "u40" ));
266267
267- verifySchema (actual , schema ("range_bucket" , "string" ), schema ("total" , "bigint" ));
268-
269- // This should work but won't be optimized due to implicit NULL bucket
270- assertTrue (actual .getJSONArray ("datarows" ).length () > 0 );
271- }
272-
273- @ Test
274- public void testCaseRangeAggregationWithMetrics () throws IOException {
275- // Test CASE-to-range with additional aggregations
276- JSONObject actual =
268+ // 1.2 Range - Metric (COUNT)
269+ JSONObject actual2 =
277270 executeQuery (
278271 String .format (
279- "source=%s | eval size_category = case( cast(bytes as int) < 2000 , 'small', "
280- + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium', "
281- + " cast(bytes as int) >= 5000, 'large') | stats count() as total,"
282- + " avg(cast(bytes as int)) as avg_bytes by size_category | sort size_category" ,
283- TEST_INDEX_WEBLOGS ));
272+ "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age < 40 , 'u40' "
273+ + " else 'u100') | stats avg(age) by age_range" ,
274+ TEST_INDEX_BANK ));
275+ verifySchema ( actual2 , schema ( " avg(age)" , "double" ), schema ( "age_range" , "string" ));
276+ verifyDataRows ( actual2 , rows ( 28.0 , "u30" ), rows ( 35.0 , "u40" ));
284277
278+ // 1.3 Range - Range - Metric
279+ JSONObject actual3 =
280+ executeQuery (
281+ String .format (
282+ "source=%s | eval age_range = case(age < 30, 'u30', age < 40, 'u40' else 'u100'),"
283+ + " balance_range = case(balance < 20000, 'medium' else 'high') | stats"
284+ + " avg(balance) as avg_balance by age_range, balance_range" ,
285+ TEST_INDEX_BANK ));
285286 verifySchema (
286- actual ,
287- schema ("size_category " , "string " ),
288- schema ("total " , "bigint " ),
289- schema ("avg_bytes " , "double " ));
290-
291- // Verify we get results for each category
292- // The exact values may vary based on test data, but structure should be correct
293- assertEquals ( 3 , actual . getJSONArray ( "datarows" ). length ());
294- }
287+ actual3 ,
288+ schema ("avg_balance " , "double " ),
289+ schema ("age_range " , "string " ),
290+ schema ("balance_range " , "string " ));
291+ verifyDataRows (
292+ actual3 ,
293+ rows ( 32838.0 , "u30" , "high" ),
294+ rows ( 8761.333333333334 , "u40" , "medium" ),
295+ rows ( 42617.0 , "u40" , "high" ));
295296
296- @ Test
297- public void testCaseRangeAggregationWithElse () throws IOException {
298- // Test CASE with explicit ELSE clause
299- JSONObject actual =
297+ // 1.4 Range - Metric (With null & discontinuous ranges)
298+ JSONObject actual4 =
300299 executeQuery (
301300 String .format (
302- "source=%s | eval status_category = case( cast(response as int) < 300, 'success', "
303- + " cast(response as int) >= 300 AND cast(response as int) < 400, 'redirect', "
304- + " cast(response as int) >= 400 AND cast(response as int) < 500,"
305- + " 'client_error', cast(response as int) >= 500, 'server_error' else"
306- + " 'unknown') | stats count() by status_category | sort status_category" ,
307- TEST_INDEX_WEBLOGS ));
308-
309- verifySchema (actual , schema ("status_category" , "string" ), schema ("count()" , "bigint" ));
310-
311- // Should handle the ELSE case for null/non-numeric responses
312- assertTrue (actual .getJSONArray ("datarows" ).length () > 0 );
313- }
301+ "source=%s | eval age_range = case(age < 30, 'u30', (age >= 35 and age < 40) or age"
302+ + " >= 80, '30-40 or >=80') | stats avg(balance) by age_range" ,
303+ TEST_INDEX_BANK ));
304+ verifySchema (actual4 , schema ("avg(balance)" , "double" ), schema ("age_range" , "string" ));
305+ verifyDataRows (
306+ actual4 ,
307+ rows (32838.0 , "u30" ),
308+ rows (30497.0 , "null" ),
309+ rows (20881.333333333332 , "30-40 or >=80" ));
314310
315- @ Test
316- public void testNonOptimizableCaseExpression () throws IOException {
317- // Test CASE that cannot be optimized (different fields)
318- JSONObject actual =
311+ // 1.5 Should not be pushed because the range is not closed-open
312+ JSONObject actual5 =
319313 executeQuery (
320314 String .format (
321- "source=%s | eval mixed_condition = case("
322- + " cast(bytes as int) < 1000, 'small_bytes',"
323- + " cast(response as int) >= 400, 'error_response'"
324- + " else 'other'"
325- + ") | stats count() by mixed_condition" ,
326- TEST_INDEX_WEBLOGS ));
327-
328- verifySchema (actual , schema ("mixed_condition" , "string" ), schema ("count()" , "bigint" ));
315+ "source=%s | eval age_range = case(age < 30, 'u30', age >= 30 and age <= 40, 'u40'"
316+ + " else 'u100') | stats avg(age) as avg_age by age_range" ,
317+ TEST_INDEX_BANK ));
318+ verifySchema (actual5 , schema ("avg_age" , "double" ), schema ("age_range" , "string" ));
319+ verifyDataRows (actual5 , rows (35.0 , "u40" ), rows (28.0 , "u30" ));
329320
330- // This should work but won't be optimized
331- assertTrue (actual .getJSONArray ("datarows" ).length () > 0 );
332- }
333-
334- @ Test
335- public void testCaseWithNonLiteralResult () throws IOException {
336- // Test CASE that cannot be optimized (non-literal results)
337- JSONObject actual =
321+ // CASE 2: Composite - Range - Metric
322+ // 2.1 Composite (term) - Range - Metric
323+ JSONObject actual6 =
338324 executeQuery (
339325 String .format (
340- "source=%s | eval computed_result = case("
341- + " cast(bytes as int) < 1000, concat('small_', host),"
342- + " cast(bytes as int) >= 1000, concat('large_', host)"
343- + ") | stats count() by computed_result | head 3" ,
344- TEST_INDEX_WEBLOGS ));
345-
346- verifySchema (actual , schema ("computed_result" , "string" ), schema ("count()" , "bigint" ));
326+ "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats avg(balance)"
327+ + " by state, age_range" ,
328+ TEST_INDEX_BANK ));
329+ verifySchema (
330+ actual6 ,
331+ schema ("avg(balance)" , "double" ),
332+ schema ("state" , "string" ),
333+ schema ("age_range" , "string" ));
334+ verifyDataRows (
335+ actual6 ,
336+ rows (39225.0 , "IL" , "a30" ),
337+ rows (48086.0 , "IN" , "a30" ),
338+ rows (4180.0 , "MD" , "a30" ),
339+ rows (40540.0 , "PA" , "a30" ),
340+ rows (5686.0 , "TN" , "a30" ),
341+ rows (32838.0 , "VA" , "u30" ),
342+ rows (16418.0 , "WA" , "a30" ));
347343
348- // This should work but won't be optimized to range aggregation
349- assertTrue (actual .getJSONArray ("datarows" ).length () > 0 );
350- }
344+ // 2.2 Composite (date histogram) - Range - Metric
345+ JSONObject actual7 =
346+ executeQuery (
347+ "source=opensearch-sql_test_index_time_data | eval value_range = case(value < 7000,"
348+ + " 'small' else 'large') | stats avg(value) by value_range, span(@timestamp,"
349+ + " 1h)" );
350+ verifySchema (
351+ actual7 ,
352+ schema ("avg(value)" , "double" ),
353+ schema ("span(@timestamp,1h)" , "timestamp" ),
354+ schema ("value_range" , "string" ));
355+ // Verify we have results with both small and large ranges and timestamps
356+ assertTrue (actual7 .getJSONArray ("datarows" ).length () == 100 );
357+ // Verify some sample rows to check data correctness
358+ String resultStr = actual7 .toString ();
359+ assertTrue (resultStr .contains ("small" ) && resultStr .contains ("large" ));
360+ assertTrue (resultStr .contains ("2025-07-28" ) && resultStr .contains ("2025-07-29" ));
351361
352- @ Test
353- public void testOptimizableCaseRangeAggregation () throws IOException {
354- // Test CASE that could be optimized if all ranges are covered with explicit ELSE
355- JSONObject actual =
362+ // 2.3 Composite(2 fields) - Range - Metric (with count)
363+ JSONObject actual8 =
356364 executeQuery (
357365 String .format (
358- "source=%s | eval size_bucket = case("
359- + " cast(bytes as int) < 2000, 'small',"
360- + " cast(bytes as int) >= 2000 AND cast(bytes as int) < 5000, 'medium',"
361- + " cast(bytes as int) >= 5000, 'large'"
362- + " else 'unknown'"
363- + ") | stats count() by size_bucket | sort size_bucket" ,
364- TEST_INDEX_WEBLOGS ));
366+ "source=%s | eval age_range = case(age < 30, 'u30' else 'a30') | stats"
367+ + " avg(balance), count() by age_range, state, gender" ,
368+ TEST_INDEX_BANK ));
369+ verifySchema (
370+ actual8 ,
371+ schema ("avg(balance)" , "double" ),
372+ schema ("count()" , "bigint" ),
373+ schema ("age_range" , "string" ),
374+ schema ("state" , "string" ),
375+ schema ("gender" , "string" ));
376+ verifyDataRows (
377+ actual8 ,
378+ rows (5686.0 , 1 , "a30" , "TN" , "M" ),
379+ rows (16418.0 , 1 , "a30" , "WA" , "M" ),
380+ rows (40540.0 , 1 , "a30" , "PA" , "F" ),
381+ rows (4180.0 , 1 , "a30" , "MD" , "M" ),
382+ rows (32838.0 , 1 , "u30" , "VA" , "F" ),
383+ rows (39225.0 , 1 , "a30" , "IL" , "M" ),
384+ rows (48086.0 , 1 , "a30" , "IN" , "F" ));
365385
366- verifySchema (actual , schema ("size_bucket" , "string" ), schema ("count()" , "bigint" ));
386+ // 2.4 Composite (2 fields) - Range - Range - Metric (with count)
387+ JSONObject actual9 =
388+ executeQuery (
389+ String .format (
390+ "source=%s | eval age_range = case(age < 35, 'u35' else 'a35'), balance_range ="
391+ + " case(balance < 20000, 'medium' else 'high') | stats avg(balance) as"
392+ + " avg_balance by age_range, balance_range, state" ,
393+ TEST_INDEX_BANK ));
394+ verifySchema (
395+ actual9 ,
396+ schema ("avg_balance" , "double" ),
397+ schema ("age_range" , "string" ),
398+ schema ("balance_range" , "string" ),
399+ schema ("state" , "string" ));
400+ verifyDataRows (
401+ actual9 ,
402+ rows (39225.0 , "u35" , "high" , "IL" ),
403+ rows (48086.0 , "u35" , "high" , "IN" ),
404+ rows (4180.0 , "u35" , "medium" , "MD" ),
405+ rows (40540.0 , "a35" , "high" , "PA" ),
406+ rows (5686.0 , "a35" , "medium" , "TN" ),
407+ rows (32838.0 , "u35" , "high" , "VA" ),
408+ rows (16418.0 , "a35" , "medium" , "WA" ));
367409
368- // This should work - the explicit ELSE makes it potentially optimizable
369- assertTrue (actual .getJSONArray ("datarows" ).length () > 0 );
410+ // 2.5 Should not be pushed because case result expression is not constant
411+ JSONObject actual10 =
412+ executeQuery (
413+ String .format (
414+ "source=%s | eval age_range = case(age < 35, 'u35' else email) | stats avg(balance)"
415+ + " as avg_balance by age_range, state" ,
416+ TEST_INDEX_BANK ));
417+ verifySchema (
418+ actual10 ,
419+ schema ("avg_balance" , "double" ),
420+ schema ("age_range" , "string" ),
421+ schema ("state" , "string" ));
422+ verifyDataRows (
423+ actual10 ,
424+ rows (32838.0 , "u35" , "VA" ),
425+ rows (4180.0 , "u35" , "MD" ),
426+ rows (48086.0 , "u35" , "IN" ),
427+ rows (40540.0 , "virginiaayala@filodyne.com" , "PA" ),
428+ rows (39225.0 , "u35" , "IL" ),
429+ rows (5686.0 , "hattiebond@netagy.com" , "TN" ),
430+ rows (16418.0 , "elinorratliff@scentric.com" , "WA" ));
370431 }
371432}
0 commit comments