-
Notifications
You must be signed in to change notification settings - Fork 40
Fix client-side maxRows enforcement in ResultSet.next() #1448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
f5dcb2a
424da6d
e05f37b
6be661b
3ee53b0
b07ae45
4a20981
60e4d07
ac403aa
2976c11
7554dd6
c88d16d
e23ef7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,6 +82,15 @@ enum ResultSetType { | |
| // for the lifetime of a result set. | ||
| private final TelemetryCollector cachedTelemetryCollector; | ||
|
|
||
| // Client-side maxRows enforcement. This provides a single, implementation-agnostic | ||
| // truncation point in next(). Per-implementation maxRows enforcement (e.g. in | ||
| // StreamingInlineArrowResult, StreamingColumnarResult) is retained as defense-in-depth. | ||
| private final long maxRowsLimit; | ||
| private long rowsReturned = 0; | ||
| // Flag to bypass maxRows check during getUpdateCount() internal iteration, | ||
| // which calls next() to sum affected-row counts for DML statements. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F7 — Medium] Using a mutable instance field to alter the semantics of Fix: Extract a private — code-review-squad (language + maintainability + architecture)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bigger change, can be done in followup PR |
||
| private boolean countingUpdateRows = false; | ||
|
|
||
| // Constructor for SEA result set | ||
| public DatabricksResultSet( | ||
| StatementStatus statementStatus, | ||
|
|
@@ -121,6 +130,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -142,6 +152,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| this.complexDatatypeSupport = complexDatatypeSupport; | ||
|
|
@@ -189,6 +200,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -220,6 +232,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -251,6 +264,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -271,14 +285,45 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
||
| /** | ||
| * Advances the cursor to the next row of the result set. Returns {@code false} when no more rows | ||
| * are available or when the client-side {@code maxRows} limit has been reached. | ||
| * | ||
| * <p>{@code rowsReturned} tracks how many rows have been delivered to the caller through this | ||
| * ResultSet instance. It is only incremented during normal (non-DML-counting) iteration and | ||
| * assumes single-threaded access, which is the standard JDBC contract. | ||
| * | ||
| * @return {@code true} if the new current row is valid; {@code false} if there are no more rows | ||
| * @throws SQLException if the result set is closed or an error occurs | ||
| */ | ||
| @Override | ||
| public boolean next() throws SQLException { | ||
| checkIfClosed(); | ||
| // Client-side maxRows truncation: stop before delegating to the underlying result | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F10 — Medium] "Defense-in-depth" comment is misleading 🟨 The comment claims per-implementation enforcement is retained as defense-in-depth. Verified by grep: Fix: Either (a) reword the comment to reflect reality — "central enforcement; some streaming impls also enforce locally", or (b) consider removing per-impl enforcement now that tier-1 covers all paths (after integration tests prove it). Single source of truth is cleaner. — code-review-squad (maintainability + architecture + devils-advocate)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
| // implementation when the limit has been reached. This is skipped during | ||
| // getUpdateCount() internal iteration (countingUpdateRows) to avoid breaking DML | ||
| // row counting. Individual result-set implementations (e.g. StreamingInlineArrowResult, | ||
| // StreamingColumnarResult) also enforce maxRows independently as defense-in-depth, | ||
| // so truncation still works even if this central check is somehow bypassed. | ||
| if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit && !countingUpdateRows) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F3 — Critical] After client-side truncation, Live-verified against PECO: This is a new contract divergence for Fix: Track truncation explicitly. Add public boolean isAfterLast() throws SQLException {
checkIfClosed();
return truncated || executionResult.getCurrentRow() >= resultSetMetaData.getTotalRows();
}Similarly update — code-review-squad (consensus: architecture + devils-advocate; live confirmed)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| LOGGER.debug( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F9 — Medium] Truncation DEBUG log lacks In a connection-pooled environment with thousands of concurrent queries, this log line can't be tied back to a specific statement. The Fix: LOGGER.debug("maxRows limit ({}) reached for statement {}; returning false from next() after {} rows",
maxRowsLimit, statementId, rowsReturned);— code-review-squad (ops)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| "maxRows limit ({}) reached; returning false from next() after {} rows", | ||
| maxRowsLimit, | ||
| rowsReturned); | ||
| return false; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F1 — Critical] Result-set consumption telemetry is silently lost on every truncated query 🟥 The early Live-verified via mitmproxy: ran setMaxRows(3) on Fix: Call if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit && !countingUpdateRows) {
LOGGER.debug("maxRows limit ({}) reached for statement {}; returning false from next() after {} rows",
maxRowsLimit, statementId, rowsReturned);
if (cachedTelemetryCollector != null) {
cachedTelemetryCollector.recordResultSetIteration(
statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), false);
}
return false;
}— from code-review-squad (multi-reviewer consensus: ops, architecture, devils-advocate)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed |
||
| } | ||
| boolean hasNext = this.executionResult.next(); | ||
| // Only count rows for customer iteration, not internal DML counting | ||
| // (getUpdateCount() sets countingUpdateRows=true to iterate over affected-row counts | ||
| // without inflating the user-visible row counter). | ||
| if (hasNext && !countingUpdateRows) { | ||
| rowsReturned++; | ||
| } | ||
| if (cachedTelemetryCollector != null) { | ||
| cachedTelemetryCollector.recordResultSetIteration( | ||
| statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), hasNext); | ||
|
|
@@ -312,6 +357,17 @@ private static TelemetryCollector resolveTelemetryCollector( | |
| return null; | ||
| } | ||
|
|
||
| private static long resolveMaxRowsLimit(IDatabricksStatementInternal parentStatement) { | ||
| try { | ||
| if (parentStatement != null) { | ||
| return parentStatement.getMaxRows(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F2 — Critical]
Live-verified against PECO warehouse:
Fix:
— code-review-squad (multi-reviewer consensus: 7/7 reviewers; live + wire confirmed) |
||
| } | ||
| } catch (Exception e) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F4 — High]
Fix: Narrow the catch, log at WARN with statementId: private static long resolveMaxRowsLimit(IDatabricksStatementInternal parentStatement, StatementId statementId) {
if (parentStatement == null) return 0;
try {
return parentStatement.getLargeMaxRows(); // also fixes F2
} catch (DatabricksSQLException e) {
LOGGER.warn("Failed to resolve maxRows for statement {}; client-side truncation disabled", statementId, e);
return 0;
}
}Let unchecked exceptions propagate — they indicate real bugs that should surface, not silent feature loss. — code-review-squad (5/7 reviewers) |
||
| LOGGER.trace("Error resolving maxRows limit: {}", e.getMessage()); | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean wasNull() throws SQLException { | ||
| checkIfClosed(); | ||
|
|
@@ -1949,8 +2005,13 @@ public long getUpdateCount() throws SQLException { | |
| updateCount = 0L; | ||
| } else if (hasUpdateCount()) { | ||
| long rowsUpdated = 0; | ||
| while (next()) { | ||
| rowsUpdated += this.getLong(AFFECTED_ROWS_COUNT); | ||
| countingUpdateRows = true; | ||
| try { | ||
| while (next()) { | ||
| rowsUpdated += this.getLong(AFFECTED_ROWS_COUNT); | ||
| } | ||
| } finally { | ||
| countingUpdateRows = false; | ||
| } | ||
| updateCount = rowsUpdated; | ||
| } else { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,8 @@ | |
| import static org.mockito.ArgumentMatchers.anyInt; | ||
| import static org.mockito.Mockito.doReturn; | ||
| import static org.mockito.Mockito.mock; | ||
| import static org.mockito.Mockito.times; | ||
| import static org.mockito.Mockito.verify; | ||
| import static org.mockito.Mockito.when; | ||
|
|
||
| import com.databricks.jdbc.api.ExecutionState; | ||
|
|
@@ -1407,4 +1409,159 @@ void testTelemetryCollectorCachedOnceNotPerRow() throws SQLException { | |
| TelemetryCollectorManager.getInstance().getOrCreateCollector(verifyContext); | ||
| assertNotNull(collector); | ||
| } | ||
|
|
||
| // --- maxRows truncation tests --- | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F6 — High] No test exercises the actual production paths the PR claims to fix 🟧 All 9 new tests mock Verified: grep shows zero Existing integration tests don't cover this either:
Fix: Add at least one fakeservice integration test that returns more rows than |
||
|
|
||
| private DatabricksResultSet getResultSetWithMaxRows(int maxRows, IExecutionResult executionResult) | ||
| throws Exception { | ||
| IDatabricksStatementInternal stmt = mock(IDatabricksStatementInternal.class); | ||
| when(stmt.getMaxRows()).thenReturn(maxRows); | ||
| return new DatabricksResultSet( | ||
| new StatementStatus().setState(StatementState.SUCCEEDED), | ||
| STATEMENT_ID, | ||
| StatementType.QUERY, | ||
| stmt, | ||
| executionResult, | ||
| mockedResultSetMetadata, | ||
| false); | ||
| } | ||
|
|
||
| @Test | ||
| void testNextRespectsMaxRows() throws Exception { | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(3, mockedExecutionResult); | ||
|
|
||
| assertTrue(resultSet.next()); // row 1 | ||
| assertTrue(resultSet.next()); // row 2 | ||
| assertTrue(resultSet.next()); // row 3 | ||
| assertFalse(resultSet.next()); // limit reached | ||
| assertFalse(resultSet.next()); // still false | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsZeroNoLimit() throws Exception { | ||
| // maxRows=0 means no limit; all 100 next() calls should succeed | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(0, mockedExecutionResult); | ||
|
|
||
| for (int i = 0; i < 100; i++) { | ||
| assertTrue(resultSet.next(), "next() should return true at iteration " + i); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsNullParentNoLimit() throws Exception { | ||
| // parentStatement=null means maxRowsLimit=0 (no limit) | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = | ||
| new DatabricksResultSet( | ||
| new StatementStatus().setState(StatementState.SUCCEEDED), | ||
| STATEMENT_ID, | ||
| StatementType.QUERY, | ||
| null, // null parentStatement | ||
| mockedExecutionResult, | ||
| mockedResultSetMetadata, | ||
| false); | ||
|
|
||
| for (int i = 0; i < 100; i++) { | ||
| assertTrue(resultSet.next(), "next() should return true at iteration " + i); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsOneEdge() throws Exception { | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(1, mockedExecutionResult); | ||
|
|
||
| assertTrue(resultSet.next()); // row 1 | ||
| assertFalse(resultSet.next()); // limit reached | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsDoesNotCallExecutionResultAfterLimit() throws Exception { | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(3, mockedExecutionResult); | ||
|
|
||
| resultSet.next(); // row 1 | ||
| resultSet.next(); // row 2 | ||
| resultSet.next(); // row 3 | ||
| // These should NOT delegate to executionResult | ||
| resultSet.next(); | ||
| resultSet.next(); | ||
|
|
||
| // executionResult.next() should have been called exactly 3 times | ||
| verify(mockedExecutionResult, times(3)).next(); | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsWithEmptyResultSet() throws Exception { | ||
| // maxRows > 0 but the underlying result set is empty; next() should return false immediately | ||
| when(mockedExecutionResult.next()).thenReturn(false); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(5, mockedExecutionResult); | ||
|
|
||
| assertFalse(resultSet.next(), "next() should return false on an empty result set"); | ||
| assertFalse( | ||
| resultSet.next(), | ||
| "next() should still return false on subsequent calls to an empty result set"); | ||
| // executionResult.next() should have been called because rowsReturned (0) < maxRows (5) | ||
| verify(mockedExecutionResult, times(2)).next(); | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsGreaterThanActualRows() throws Exception { | ||
| // maxRows=10 but only 3 rows exist; result set should be naturally exhausted before the limit | ||
| when(mockedExecutionResult.next()).thenReturn(true, true, true, false); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(10, mockedExecutionResult); | ||
|
|
||
| assertTrue(resultSet.next()); // row 1 | ||
| assertTrue(resultSet.next()); // row 2 | ||
| assertTrue(resultSet.next()); // row 3 | ||
| assertFalse(resultSet.next(), "next() should return false when data is exhausted before limit"); | ||
| } | ||
|
|
||
| @Test | ||
| void testNextMaxRowsIdempotenceAfterLimit() throws Exception { | ||
| // Calling next() many times after the limit is reached should always return false | ||
| when(mockedExecutionResult.next()).thenReturn(true); | ||
| DatabricksResultSet resultSet = getResultSetWithMaxRows(2, mockedExecutionResult); | ||
|
|
||
| assertTrue(resultSet.next()); // row 1 | ||
| assertTrue(resultSet.next()); // row 2 (limit reached) | ||
| // All subsequent calls must consistently return false (idempotent behavior) | ||
| for (int i = 0; i < 10; i++) { | ||
| assertFalse(resultSet.next(), "next() must return false on repeated call #" + (i + 1)); | ||
| } | ||
| // executionResult.next() should have been called exactly 2 times (the limit) | ||
| verify(mockedExecutionResult, times(2)).next(); | ||
| } | ||
|
|
||
| @Test | ||
| void testGetUpdateCountBypassesMaxRows() throws Exception { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F8 — Medium] The test asserts Fix: Either (a) chain an additional — code-review-squad (test) |
||
| // Mock an UPDATE result set with maxRows=2 but 5 affected rows across 5 result rows. | ||
| // getUpdateCount() should sum all 5 rows (returning 5), proving the | ||
| // countingUpdateRows flag bypasses the maxRows limit during internal iteration. | ||
| InlineJsonResult mockExec = mock(InlineJsonResult.class); | ||
| when(mockExec.next()).thenReturn(true, true, true, true, true, false); | ||
| when(mockExec.getObject(0)).thenReturn(1L, 1L, 1L, 1L, 1L); | ||
|
|
||
| DatabricksResultSetMetaData mockMeta = mock(DatabricksResultSetMetaData.class); | ||
| when(mockMeta.getColumnType(1)).thenReturn(Types.BIGINT); | ||
| when(mockMeta.getColumnNameIndex(AFFECTED_ROWS_COUNT)).thenReturn(1); | ||
|
|
||
| IDatabricksStatementInternal stmt = mock(IDatabricksStatementInternal.class); | ||
| when(stmt.getMaxRows()).thenReturn(2); | ||
|
|
||
| DatabricksResultSet resultSet = | ||
| new DatabricksResultSet( | ||
| new StatementStatus().setState(StatementState.SUCCEEDED), | ||
| STATEMENT_ID, | ||
| StatementType.UPDATE, | ||
| stmt, | ||
| mockExec, | ||
| mockMeta, | ||
| false); | ||
|
|
||
| // getUpdateCount() must iterate all 5 rows despite maxRows=2 | ||
| assertEquals(5L, resultSet.getUpdateCount()); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[F5 — High]
setMaxRows()called afterexecuteQuery()is silently ignored 🟧maxRowsLimitisfinaland captured at construction. PostgreSQL JDBC and major drivers readmaxRowslazily on eachnext(). ORMs (Hibernate, JdbcTemplate) commonly callstmt.setMaxRows(N)after obtaining the ResultSet — and this PR cements snapshot-at-construction as canonical behavior.Live-verified against PECO:
The pre-existing per-impl enforcement had the same flaw, but the PR was an opportunity to fix it for
InlineJsonResult/ArrowStreamResult(the new paths).Fix options (either is acceptable):
parentStatement.getLargeMaxRows()lazily insidenext()— one virtual call per row, negligible cost.setMaxRowsmust be called beforeexecuteQuery.— code-review-squad (architecture + devils-advocate; live confirmed)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JDBC spec says setMaxRows affects the next execution, not an already-open ResultSet.