Skip to content

Commit 1e1076c

Browse files
committed
feat(firestore): 16MB docs support
WIP Adds support for 16MB docs in Firestore. Changes: - Increases the gRPC message receive size limit from 4MB to 17MB (16MB doc + overhead). - Implements a workaround for the `CursorWindow` 2MB row read limit for reading large docs from local storage. If a row's content is larger than 1MB, executes a seperate query that reads the content in chunks using `SUBSTR` with a chunk size smaller than the row read limit. This is a slow operation for 16MB docs, taking ~200ms in my testing on the local emulator. - Debug logs are now truncated if they exceed 128KB. Formatting proto responses with large documents to strings consumed a huge amount of memory. In one extreme case, formatting a response containing a doc with a field containing 16MB of emojis caused an OOM on emulators. - Adds tests explicitly for large documents.
1 parent 534ef3b commit 1e1076c

5 files changed

Lines changed: 499 additions & 8 deletions

File tree

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
// Copyright 2026 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.firebase.firestore;
16+
17+
import static com.google.firebase.firestore.testutil.IntegrationTestUtil.testFirestore;
18+
import static com.google.firebase.firestore.testutil.IntegrationTestUtil.waitFor;
19+
import static org.junit.Assert.assertEquals;
20+
import static org.junit.Assert.assertTrue;
21+
22+
import androidx.test.ext.junit.runners.AndroidJUnit4;
23+
import com.google.android.gms.tasks.Task;
24+
import java.util.Arrays;
25+
import org.junit.After;
26+
import org.junit.Test;
27+
import org.junit.runner.RunWith;
28+
29+
@RunWith(AndroidJUnit4.class)
30+
public class LargeDocumentTest {
31+
32+
// These tests require a pre-seeded database containing specific large documents.
33+
private static final String SEED_COLLECTION = "serverSdkTests";
34+
private static final String DOC_15_9MB_UNICODE = "doc_15_9MB_unicode";
35+
private static final String COL_LARGE_DOCS = "col_large_docs";
36+
37+
// Extended timeout required for large document tests due to gRPC flow control
38+
// window defaults (64KB), which result in longer read times over the network.
39+
private static final int TIMEOUT_MS = 60000;
40+
41+
@After
42+
public void tearDown() {
43+
com.google.firebase.firestore.testutil.IntegrationTestUtil.tearDown();
44+
}
45+
46+
private String generateString(int sizeInBytes) {
47+
char[] chars = new char[sizeInBytes];
48+
Arrays.fill(chars, 'a');
49+
return new String(chars);
50+
}
51+
52+
@Test(timeout = TIMEOUT_MS)
53+
public void testReadAndCacheLargeUnicodeDocument() {
54+
FirebaseFirestore db = testFirestore();
55+
DocumentReference docRef = db.collection(SEED_COLLECTION).document(DOC_15_9MB_UNICODE);
56+
57+
DocumentSnapshot serverSnapshot = waitFor(docRef.get(Source.SERVER));
58+
assertTrue(serverSnapshot.exists());
59+
60+
waitFor(db.disableNetwork());
61+
62+
DocumentSnapshot cacheSnapshot = waitFor(docRef.get(Source.CACHE));
63+
assertTrue(cacheSnapshot.exists());
64+
65+
assertEquals(serverSnapshot.getData(), cacheSnapshot.getData());
66+
67+
waitFor(db.enableNetwork());
68+
}
69+
70+
@Test(timeout = TIMEOUT_MS)
71+
public void testCacheIntegrityWithMultipleLargeDocuments() {
72+
FirebaseFirestore db = testFirestore();
73+
74+
// Copy existing test environment settings but set a normal cache size
75+
// to ensure we don't accidentally trigger async GC during the test.
76+
FirebaseFirestoreSettings existingSettings = db.getFirestoreSettings();
77+
FirebaseFirestoreSettings settings =
78+
new FirebaseFirestoreSettings.Builder(existingSettings)
79+
.setLocalCacheSettings(
80+
PersistentCacheSettings.newBuilder().setSizeBytes(104857600).build()) // 100MB
81+
.build();
82+
db.setFirestoreSettings(settings);
83+
84+
CollectionReference colRef = db.collection(COL_LARGE_DOCS);
85+
DocumentReference docA = colRef.document("doc_a");
86+
DocumentReference docB = colRef.document("doc_b");
87+
88+
waitFor(docA.get(Source.SERVER));
89+
waitFor(docB.get(Source.SERVER));
90+
91+
waitFor(db.disableNetwork());
92+
93+
DocumentSnapshot cacheSnapshotA = waitFor(docA.get(Source.CACHE));
94+
DocumentSnapshot cacheSnapshotB = waitFor(docB.get(Source.CACHE));
95+
96+
assertTrue("docA should exist in cache", cacheSnapshotA.exists());
97+
assertTrue("docB should exist in cache", cacheSnapshotB.exists());
98+
99+
// Sanity check
100+
assertTrue(cacheSnapshotA.getData().size() > 0);
101+
assertTrue(cacheSnapshotB.getData().size() > 0);
102+
103+
waitFor(db.enableNetwork());
104+
}
105+
106+
@Test(timeout = TIMEOUT_MS)
107+
public void testWatchStreamInitializationAndDiff() throws Exception {
108+
FirebaseFirestore db = testFirestore();
109+
DocumentReference docRef = db.collection(SEED_COLLECTION).document(DOC_15_9MB_UNICODE);
110+
111+
// Verify that the initial snapshot of a large document is received successfully
112+
// without triggering stream cancellation loops.
113+
Task<DocumentSnapshot> firstSnapshotTask = docRef.get(Source.SERVER);
114+
DocumentSnapshot firstSnapshot = waitFor(firstSnapshotTask);
115+
assertTrue(firstSnapshot.exists());
116+
117+
// TODO: Enable the differential update assertions below once client SDK write streams
118+
// support the 16MB limit.
119+
/*
120+
Map<String, Object> updateData = new HashMap<>();
121+
updateData.put("differential_field", "updated_value");
122+
waitFor(docRef.update(updateData));
123+
124+
// Wait for the snapshot listener to fire a second time to verify stream continuity.
125+
*/
126+
}
127+
128+
// TODO: Enable this test. Currently it times out after not receiving a response from the backend.
129+
/*
130+
@Test(timeout = TIMEOUT_MS)
131+
public void testOversizedPayloadRejection() {
132+
FirebaseFirestore db = testFirestore();
133+
DocumentReference docRef = db.collection(SEED_COLLECTION).document("temp_oversized_doc");
134+
135+
Map<String, Object> data = new HashMap<>();
136+
// 16.1MB payload
137+
int oversizedPayloadBytes = (16 * 1024 * 1024) + 102400;
138+
data.put("largeField", generateString(oversizedPayloadBytes));
139+
140+
try {
141+
waitFor(docRef.set(data));
142+
fail("Setting a document exceeding the maximum size limit should fail.");
143+
} catch (Exception e) {
144+
assertTrue(e.getCause() instanceof FirebaseFirestoreException);
145+
FirebaseFirestoreException firestoreException = (FirebaseFirestoreException) e.getCause();
146+
147+
assertEquals(Code.INVALID_ARGUMENT, firestoreException.getCode());
148+
}
149+
}
150+
*/
151+
152+
@Test(timeout = TIMEOUT_MS)
153+
public void testTransactionReadModifyWrite() {
154+
FirebaseFirestore db = testFirestore();
155+
DocumentReference docRef = db.collection(SEED_COLLECTION).document(DOC_15_9MB_UNICODE);
156+
157+
Task<Void> transactionTask =
158+
db.runTransaction(
159+
transaction -> {
160+
DocumentSnapshot snapshot = transaction.get(docRef);
161+
assertTrue(snapshot.exists());
162+
163+
transaction.update(docRef, "transaction_timestamp", System.currentTimeMillis());
164+
return null;
165+
});
166+
167+
waitFor(transactionTask);
168+
}
169+
170+
@Test(timeout = TIMEOUT_MS)
171+
public void testQueryLargeDocuments() {
172+
FirebaseFirestore db = testFirestore();
173+
CollectionReference colRef = db.collection(COL_LARGE_DOCS);
174+
175+
Query query = colRef.whereIn(FieldPath.documentId(), Arrays.asList("doc_a", "doc_b"));
176+
177+
QuerySnapshot serverSnapshot = waitFor(query.get(Source.SERVER));
178+
assertEquals(
179+
"Query should return exactly 2 large documents from server", 2, serverSnapshot.size());
180+
181+
waitFor(db.disableNetwork());
182+
183+
QuerySnapshot cacheSnapshot = waitFor(query.get(Source.CACHE));
184+
assertEquals(
185+
"Query should return exactly 2 large documents from cache", 2, cacheSnapshot.size());
186+
187+
assertEquals(
188+
"Cached query payload should exactly match server query payload",
189+
serverSnapshot.getDocuments().get(0).getData(),
190+
cacheSnapshot.getDocuments().get(0).getData());
191+
192+
waitFor(db.enableNetwork());
193+
}
194+
195+
@Test(timeout = TIMEOUT_MS)
196+
public void testQueryLargeDocumentsForcesLocalScan() {
197+
FirebaseFirestore db = testFirestore();
198+
CollectionReference colRef = db.collection(COL_LARGE_DOCS);
199+
200+
waitFor(colRef.document("doc_a").get(Source.SERVER));
201+
waitFor(colRef.document("doc_b").get(Source.SERVER));
202+
203+
waitFor(db.disableNetwork());
204+
205+
Query query = colRef.orderBy(FieldPath.documentId()).limit(2);
206+
207+
// Execute the query offline
208+
QuerySnapshot cacheSnapshot = waitFor(query.get(Source.CACHE));
209+
210+
assertEquals(
211+
"Query should find and return exactly 2 large documents from cache",
212+
2,
213+
cacheSnapshot.size());
214+
215+
assertTrue(
216+
"Payload should not be empty", cacheSnapshot.getDocuments().get(0).getData().size() > 0);
217+
218+
waitFor(db.enableNetwork());
219+
}
220+
}

firebase-firestore/src/main/java/com/google/firebase/firestore/local/SQLiteRemoteDocumentCache.java

Lines changed: 84 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ final class SQLiteRemoteDocumentCache implements RemoteDocumentCache {
5454
/** The number of bind args per collection group in {@link #getAll(String, IndexOffset, int)} */
5555
@VisibleForTesting static final int BINDS_PER_STATEMENT = 9;
5656

57+
/** The safe limit for CursorWindow (1 MB). */
58+
@VisibleForTesting static int SAFE_CURSOR_LIMIT = 1024 * 1024;
59+
5760
private final SQLitePersistence db;
5861
private final LocalSerializer serializer;
5962
private IndexManager indexManager;
@@ -163,7 +166,12 @@ public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKe
163166
SQLitePersistence.LongQuery longQuery =
164167
new SQLitePersistence.LongQuery(
165168
db,
166-
"SELECT contents, read_time_seconds, read_time_nanos, document_type, path "
169+
"SELECT "
170+
+ "CASE WHEN LENGTH(contents) <= "
171+
+ SAFE_CURSOR_LIMIT
172+
+ " THEN contents ELSE NULL END, "
173+
+ "read_time_seconds, read_time_nanos, document_type, path, "
174+
+ "LENGTH(contents) "
167175
+ "FROM remote_documents "
168176
+ "WHERE path IN (",
169177
bindVars,
@@ -173,7 +181,21 @@ public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKe
173181
while (longQuery.hasMoreSubqueries()) {
174182
longQuery
175183
.performNextSubquery()
176-
.forEach(row -> processRowInBackground(backgroundQueue, results, row, /*filter*/ null));
184+
.forEach(
185+
row -> {
186+
// Attempt to get the payload (NULL if > 1MB)
187+
byte[] payload = row.getBlob(0);
188+
189+
int blobLength = row.getInt(5);
190+
191+
if (payload == null && blobLength > 0) {
192+
// This is a massive document, fetch it in chunks.
193+
String path = row.getString(4);
194+
payload = fetchMassiveDocumentInChunks(db, path, blobLength, SAFE_CURSOR_LIMIT);
195+
}
196+
197+
processRowInBackground(backgroundQueue, results, row, payload, /*filter*/ null);
198+
});
177199
}
178200
backgroundQueue.drain();
179201

@@ -186,6 +208,46 @@ public Map<DocumentKey, MutableDocument> getAll(Iterable<DocumentKey> documentKe
186208
}
187209
}
188210

211+
/** Fetches a large document in chunks using SQLite's SUBSTR to bypass CursorWindow limits. */
212+
@VisibleForTesting
213+
byte[] fetchMassiveDocumentInChunks(
214+
SQLitePersistence db, String path, int totalLength, int chunkSize) {
215+
216+
byte[] fullPayload = new byte[totalLength];
217+
218+
int currentOffset = 1; // SQLite SUBSTR is 1-indexed for offsets
219+
int destPos = 0;
220+
221+
while (currentOffset <= totalLength) {
222+
byte[] chunk =
223+
db.query("SELECT SUBSTR(contents, ?, ?) FROM remote_documents WHERE path = ?")
224+
.binding(currentOffset, chunkSize, path)
225+
.firstValue(row -> row.getBlob(0));
226+
227+
if (chunk != null && chunk.length > 0) {
228+
System.arraycopy(chunk, 0, fullPayload, destPos, chunk.length);
229+
destPos += chunk.length;
230+
currentOffset += chunk.length;
231+
} else {
232+
throw new IllegalStateException(
233+
"Failed to fetch chunk for massive document at path: " + path);
234+
}
235+
}
236+
237+
if (destPos != totalLength) {
238+
throw new IllegalStateException(
239+
"Failed to fetch all chunks for massive document at path: "
240+
+ path
241+
+ ". Expected "
242+
+ totalLength
243+
+ " bytes, but read "
244+
+ destPos
245+
+ " bytes.");
246+
}
247+
248+
return fullPayload;
249+
}
250+
189251
@Override
190252
public Map<DocumentKey, MutableDocument> getAll(
191253
String collectionGroup, IndexOffset offset, int limit) {
@@ -230,7 +292,12 @@ private Map<DocumentKey, MutableDocument> getAll(
230292

231293
StringBuilder sql =
232294
repeatSequence(
233-
"SELECT contents, read_time_seconds, read_time_nanos, document_type, path "
295+
"SELECT "
296+
+ "CASE WHEN LENGTH(contents) <= "
297+
+ SAFE_CURSOR_LIMIT
298+
+ " THEN contents ELSE NULL END, "
299+
+ "read_time_seconds, read_time_nanos, document_type, path, "
300+
+ "LENGTH(contents) "
234301
+ "FROM remote_documents "
235302
+ "WHERE path >= ? AND path < ? AND path_length = ? "
236303
+ (tryFilterDocumentType == null
@@ -270,7 +337,19 @@ private Map<DocumentKey, MutableDocument> getAll(
270337
int cnt =
271338
db.query(sql.toString())
272339
.binding(bindVars)
273-
.forEach(row -> processRowInBackground(backgroundQueue, results, row, filter));
340+
.forEach(
341+
row -> {
342+
byte[] payload = row.getBlob(0);
343+
int blobLength = row.getInt(5);
344+
345+
if (payload == null && blobLength > 0) {
346+
// Massive document detected, fetch it in chunks
347+
String path = row.getString(4);
348+
payload = fetchMassiveDocumentInChunks(db, path, blobLength, SAFE_CURSOR_LIMIT);
349+
}
350+
351+
processRowInBackground(backgroundQueue, results, row, payload, filter);
352+
});
274353
if (context != null) {
275354
context.incrementDocumentReadCount(cnt);
276355
}
@@ -298,8 +377,8 @@ private void processRowInBackground(
298377
BackgroundQueue backgroundQueue,
299378
Map<DocumentKey, MutableDocument> results,
300379
Cursor row,
380+
byte[] rawDocument,
301381
@Nullable Function<MutableDocument, Boolean> filter) {
302-
byte[] rawDocument = row.getBlob(0);
303382
int readTimeSeconds = row.getInt(1);
304383
int readTimeNanos = row.getInt(2);
305384
boolean documentTypeIsNull = row.isNull(3);

0 commit comments

Comments
 (0)