-
Notifications
You must be signed in to change notification settings - Fork 40
Fix client-side maxRows enforcement in ResultSet.next() #1448
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f5dcb2a
424da6d
e05f37b
6be661b
3ee53b0
b07ae45
4a20981
60e4d07
ac403aa
2976c11
7554dd6
c88d16d
e23ef7e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,6 +82,16 @@ enum ResultSetType { | |
| // for the lifetime of a result set. | ||
| private final TelemetryCollector cachedTelemetryCollector; | ||
|
|
||
| // Client-side maxRows enforcement. This central check in next() is the single | ||
| // enforcement point using the full long precision from getLargeMaxRows(). | ||
| // No per-impl maxRows enforcement exists — all implementations delegate to this check. | ||
| private final long maxRowsLimit; | ||
| private long rowsReturned = 0; | ||
| private boolean truncatedByMaxRows = false; // tracks client-side truncation for cursor methods | ||
| // Flag to bypass maxRows check during getUpdateCount() internal iteration, | ||
| // which calls next() to sum affected-row counts for DML statements. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F7 — Medium] Using a mutable instance field to alter the semantics of Fix: Extract a private — code-review-squad (language + maintainability + architecture)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bigger change, can be done in followup PR |
||
| private boolean countingUpdateRows = false; | ||
|
|
||
| // Constructor for SEA result set | ||
| public DatabricksResultSet( | ||
| StatementStatus statementStatus, | ||
|
|
@@ -121,6 +131,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -142,6 +153,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| this.complexDatatypeSupport = complexDatatypeSupport; | ||
|
|
@@ -189,6 +201,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = parentStatement; | ||
| this.cachedTelemetryCollector = resolveTelemetryCollector(parentStatement); | ||
| this.maxRowsLimit = resolveMaxRowsLimit(parentStatement); | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -220,6 +233,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -251,6 +265,7 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
@@ -271,14 +286,52 @@ public DatabricksResultSet( | |
| this.updateCount = null; | ||
| this.parentStatement = null; | ||
| this.cachedTelemetryCollector = null; | ||
| this.maxRowsLimit = 0; | ||
| this.isClosed = false; | ||
| this.wasNull = false; | ||
| } | ||
|
|
||
| /** | ||
| * Advances the cursor to the next row of the result set. Returns {@code false} when no more rows | ||
| * are available or when the client-side {@code maxRows} limit has been reached. | ||
| * | ||
| * <p>{@code rowsReturned} tracks how many rows have been delivered to the caller through this | ||
| * ResultSet instance. It is only incremented during normal (non-DML-counting) iteration and | ||
| * assumes single-threaded access, which is the standard JDBC contract. | ||
| * | ||
| * @return {@code true} if the new current row is valid; {@code false} if there are no more rows | ||
| * @throws SQLException if the result set is closed or an error occurs | ||
| */ | ||
| @Override | ||
| public boolean next() throws SQLException { | ||
| checkIfClosed(); | ||
| // Client-side maxRows truncation: stop before delegating to the underlying result | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F10 — Medium] "Defense-in-depth" comment is misleading 🟨 The comment claims per-implementation enforcement is retained as defense-in-depth. Verified by grep: Fix: Either (a) reword the comment to reflect reality — "central enforcement; some streaming impls also enforce locally", or (b) consider removing per-impl enforcement now that tier-1 covers all paths (after integration tests prove it). Single source of truth is cleaner. — code-review-squad (maintainability + architecture + devils-advocate)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated |
||
| // implementation when the limit has been reached. This is skipped during | ||
| // getUpdateCount() internal iteration (countingUpdateRows) to avoid breaking DML | ||
| // row counting. This is the single maxRows enforcement point for all implementations. | ||
| if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit && !countingUpdateRows) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F3 — Critical] After client-side truncation, Live-verified against PECO: This is a new contract divergence for Fix: Track truncation explicitly. Add public boolean isAfterLast() throws SQLException {
checkIfClosed();
return truncated || executionResult.getCurrentRow() >= resultSetMetaData.getTotalRows();
}Similarly update — code-review-squad (consensus: architecture + devils-advocate; live confirmed)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
| LOGGER.debug( | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F9 — Medium] Truncation DEBUG log lacks In a connection-pooled environment with thousands of concurrent queries, this log line can't be tied back to a specific statement. The Fix: LOGGER.debug("maxRows limit ({}) reached for statement {}; returning false from next() after {} rows",
maxRowsLimit, statementId, rowsReturned);— code-review-squad (ops)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
| "maxRows limit ({}) reached for statement {}; returning false after {} rows", | ||
| maxRowsLimit, | ||
| statementId, | ||
| rowsReturned); | ||
| if (!truncatedByMaxRows) { | ||
| truncatedByMaxRows = true; | ||
| // Record telemetry for truncated queries so dashboards reflect the truncation | ||
| if (cachedTelemetryCollector != null) { | ||
| cachedTelemetryCollector.recordResultSetIteration( | ||
| statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), false); | ||
| } | ||
| } | ||
| return false; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F1 — Critical] Result-set consumption telemetry is silently lost on every truncated query 🟥 The early Live-verified via mitmproxy: ran setMaxRows(3) on Fix: Call if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit && !countingUpdateRows) {
LOGGER.debug("maxRows limit ({}) reached for statement {}; returning false from next() after {} rows",
maxRowsLimit, statementId, rowsReturned);
if (cachedTelemetryCollector != null) {
cachedTelemetryCollector.recordResultSetIteration(
statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), false);
}
return false;
}— from code-review-squad (multi-reviewer consensus: ops, architecture, devils-advocate)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed |
||
| } | ||
| boolean hasNext = this.executionResult.next(); | ||
| // Only count rows for customer iteration, not internal DML counting | ||
| // (getUpdateCount() sets countingUpdateRows=true to iterate over affected-row counts | ||
| // without inflating the user-visible row counter). | ||
| if (hasNext && !countingUpdateRows) { | ||
| rowsReturned++; | ||
| } | ||
| if (cachedTelemetryCollector != null) { | ||
| cachedTelemetryCollector.recordResultSetIteration( | ||
| statementId.toSQLExecStatementId(), resultSetMetaData.getChunkCount(), hasNext); | ||
|
|
@@ -321,6 +374,20 @@ private static TelemetryCollector resolveTelemetryCollector( | |
| return null; | ||
| } | ||
|
|
||
| private static long resolveMaxRowsLimit(IDatabricksStatementInternal parentStatement) { | ||
| try { | ||
| if (parentStatement != null) { | ||
| // Use getLargeMaxRows() to preserve full long precision. | ||
| // getMaxRows() returns int and silently truncates values > Integer.MAX_VALUE. | ||
| return parentStatement.getLargeMaxRows(); | ||
| } | ||
| } catch (SQLException e) { | ||
| // Narrow to SQLException (the only checked exception from getLargeMaxRows). | ||
| LOGGER.warn("Error resolving maxRows limit: {}", e.getMessage()); | ||
| } | ||
| return 0; | ||
| } | ||
|
|
||
| @Override | ||
| public boolean wasNull() throws SQLException { | ||
| checkIfClosed(); | ||
|
|
@@ -662,7 +729,9 @@ public boolean isBeforeFirst() throws SQLException { | |
| @Override | ||
| public boolean isAfterLast() throws SQLException { | ||
| checkIfClosed(); | ||
| return executionResult.getCurrentRow() >= resultSetMetaData.getTotalRows(); | ||
| // Account for client-side maxRows truncation | ||
| return truncatedByMaxRows | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [F3 / HIGH] This change correctly adds executionResult.getCurrentRow() >= 0 && !executionResult.hasNext()After client-side truncation, the underlying execution result still has more rows → Per the JDBC spec, Recommendation: also add the truncation check to if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit) {
return true; // cursor is on the last row we will deliver
}…at the top of both branches of Flagged by 4 reviewers (architecture, language, maintainability, devils-advocate).
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Addressed. Added maxRows truncation check in |
||
| || executionResult.getCurrentRow() >= resultSetMetaData.getTotalRows(); | ||
| } | ||
|
|
||
| @Override | ||
|
|
@@ -691,6 +760,11 @@ public boolean isFirst() throws SQLException { | |
| @Override | ||
| public boolean isLast() throws SQLException { | ||
| checkIfClosed(); | ||
| // Account for client-side maxRows truncation: if the next next() call would | ||
| // hit the limit, this is the last row the caller will see. | ||
| if (maxRowsLimit > 0 && rowsReturned >= maxRowsLimit) { | ||
| return true; | ||
| } | ||
| if (executionResult instanceof LazyThriftResult | ||
| || executionResult instanceof StreamingColumnarResult | ||
| || executionResult instanceof LazyThriftInlineArrowResult | ||
|
|
@@ -731,6 +805,10 @@ public boolean last() throws SQLException { | |
| @Override | ||
| public int getRow() throws SQLException { | ||
| checkIfClosed(); | ||
| // JDBC spec: getRow() returns 0 when cursor is not on a valid row (after last) | ||
| if (truncatedByMaxRows) { | ||
| return 0; | ||
| } | ||
| return (int) executionResult.getCurrentRow() + 1; | ||
| } | ||
|
|
||
|
|
@@ -1958,8 +2036,13 @@ public long getUpdateCount() throws SQLException { | |
| updateCount = 0L; | ||
| } else if (hasUpdateCount()) { | ||
| long rowsUpdated = 0; | ||
| while (next()) { | ||
| rowsUpdated += this.getLong(AFFECTED_ROWS_COUNT); | ||
| countingUpdateRows = true; | ||
| try { | ||
| while (next()) { | ||
| rowsUpdated += this.getLong(AFFECTED_ROWS_COUNT); | ||
| } | ||
| } finally { | ||
| countingUpdateRows = false; | ||
| } | ||
| updateCount = rowsUpdated; | ||
| } else { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[F5 — High]
setMaxRows()called afterexecuteQuery()is silently ignored 🟧maxRowsLimitisfinaland captured at construction. PostgreSQL JDBC and major drivers readmaxRowslazily on eachnext(). ORMs (Hibernate, JdbcTemplate) commonly callstmt.setMaxRows(N)after obtaining the ResultSet — and this PR cements snapshot-at-construction as canonical behavior.Live-verified against PECO:
The pre-existing per-impl enforcement had the same flaw, but the PR was an opportunity to fix it for
InlineJsonResult/ArrowStreamResult(the new paths).Fix options (either is acceptable):
parentStatement.getLargeMaxRows()lazily insidenext()— one virtual call per row, negligible cost.setMaxRowsmust be called beforeexecuteQuery.— code-review-squad (architecture + devils-advocate; live confirmed)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
JDBC spec says setMaxRows affects the next execution, not an already-open ResultSet.