Skip to content

Commit 37db307

Browse files
committed
large file streaming and hamt forest index
1 parent 5be9d45 commit 37db307

File tree

9 files changed

+2384
-65
lines changed

9 files changed

+2384
-65
lines changed
File renamed without changes.

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,41 @@ Raw S3 tools (AWS CLI, boto3) do NOT encrypt data - they upload plaintext that g
281281

282282
See [docs/PRIVACY.md](docs/PRIVACY.md) for full privacy policy.
283283

284+
### Large File Support (WNFS-inspired)
285+
286+
For files larger than 5MB, use chunked upload for better memory efficiency and partial read support:
287+
288+
```rust
289+
use fula_client::EncryptedClient;
290+
291+
// Large file - use chunked upload
292+
let large_data = std::fs::read("movie.mp4")?;
293+
if EncryptedClient::should_use_chunked(large_data.len()) {
294+
client.put_object_chunked(
295+
"my-bucket",
296+
"/videos/movie.mp4",
297+
&large_data,
298+
Some(512 * 1024), // 512KB chunks (optional)
299+
).await?;
300+
}
301+
302+
// Partial read - only downloads needed chunks
303+
let partial = client.get_object_range(
304+
"my-bucket",
305+
"/videos/movie.mp4",
306+
1024 * 1024, // offset: 1MB
307+
1024 * 1024, // length: 1MB
308+
).await?;
309+
```
310+
311+
**Benefits:**
312+
- Memory efficient: processes one chunk at a time
313+
- Partial reads: download only the bytes you need
314+
- Resumable: failed uploads can restart from last chunk
315+
- Integrity: Bao hash tree for verified streaming
316+
317+
See [docs/wnfs-borrowed-features.md](docs/wnfs-borrowed-features.md) for implementation details.
318+
284319
## Production Deployment
285320

286321
For production Ubuntu deployments with security hardening:

crates/fula-client/src/encryption.rs

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,7 @@ impl EncryptedClient {
940940
let forest = self.load_forest(bucket).await?;
941941

942942
let files: Vec<FileMetadata> = forest.list_all_files()
943+
.into_iter()
943944
.map(|entry| FileMetadata {
944945
storage_key: entry.storage_key.clone(),
945946
original_key: entry.path.clone(),
@@ -1271,6 +1272,314 @@ impl EncryptedClient {
12711272

12721273
Ok(report)
12731274
}
1275+
1276+
// ═══════════════════════════════════════════════════════════════════════════
1277+
// STREAMING ENCRYPTION FOR LARGE FILES (WNFS-inspired)
1278+
// Block-level encryption with index object pattern
1279+
// ═══════════════════════════════════════════════════════════════════════════
1280+
1281+
/// Upload a large file using chunked/streaming encryption
1282+
///
1283+
/// This method splits large files into encrypted chunks, uploads each
1284+
/// chunk as a separate object, and creates an index object with metadata.
1285+
/// Inspired by WNFS's "file = encrypted blocks + index" pattern.
1286+
///
1287+
/// # Arguments
1288+
/// * `bucket` - Target bucket
1289+
/// * `key` - Original file path/key
1290+
/// * `data` - File content
1291+
/// * `chunk_size` - Size of each chunk (default 256KB)
1292+
///
1293+
/// # Returns
1294+
/// Result with the storage key for the index object
1295+
pub async fn put_object_chunked(
1296+
&self,
1297+
bucket: &str,
1298+
key: &str,
1299+
data: &[u8],
1300+
chunk_size: Option<usize>,
1301+
) -> Result<PutObjectResult> {
1302+
use fula_crypto::chunked::{ChunkedEncoder, ChunkedFileMetadata, DEFAULT_CHUNK_SIZE};
1303+
1304+
let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE);
1305+
let dek = self.encryption.key_manager.generate_dek();
1306+
1307+
// Generate storage key using obfuscation (same as put_object_encrypted)
1308+
let storage_key = if self.encryption.metadata_privacy {
1309+
let path_dek = self.encryption.key_manager.derive_path_key(key);
1310+
obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone())
1311+
} else {
1312+
key.to_string()
1313+
};
1314+
1315+
// Create chunked encoder
1316+
let mut encoder = ChunkedEncoder::with_chunk_size(dek.clone(), chunk_size);
1317+
1318+
// Process all data and collect chunks
1319+
let mut chunks = encoder.update(data)?;
1320+
let (final_chunk, mut metadata, outboard) = encoder.finalize()?;
1321+
1322+
if let Some(chunk) = final_chunk {
1323+
chunks.push(chunk);
1324+
}
1325+
1326+
// Upload each chunk as a separate object
1327+
for chunk in &chunks {
1328+
let chunk_key = ChunkedFileMetadata::chunk_key(&storage_key, chunk.index);
1329+
1330+
self.inner.put_object_with_metadata(
1331+
bucket,
1332+
&chunk_key,
1333+
chunk.ciphertext.clone(),
1334+
Some(ObjectMetadata::new()
1335+
.with_content_type("application/octet-stream")
1336+
.with_metadata("x-fula-chunk", "true")
1337+
.with_metadata("x-fula-chunk-index", &chunk.index.to_string())),
1338+
).await?;
1339+
}
1340+
1341+
// Update metadata with content type detection
1342+
metadata.content_type = Some(
1343+
mime_guess::from_path(key)
1344+
.first_or_octet_stream()
1345+
.to_string()
1346+
);
1347+
1348+
// Wrap the DEK with HPKE
1349+
let encryptor = Encryptor::new(self.encryption.key_manager.public_key());
1350+
let wrapped_dek = encryptor.encrypt_dek(&dek)?;
1351+
1352+
// Create index object metadata
1353+
let kek_version = self.encryption.key_manager.version();
1354+
let enc_metadata = serde_json::json!({
1355+
"version": 3,
1356+
"format": "streaming-v1",
1357+
"algorithm": "AES-256-GCM",
1358+
"wrapped_key": serde_json::to_value(&wrapped_dek).unwrap(),
1359+
"kek_version": kek_version,
1360+
"chunked": metadata,
1361+
"bao_outboard": base64::Engine::encode(&base64::engine::general_purpose::STANDARD, outboard.to_bytes()),
1362+
});
1363+
1364+
// Upload index object (small, contains metadata only)
1365+
let index_metadata = ObjectMetadata::new()
1366+
.with_content_type("application/json")
1367+
.with_metadata("x-fula-encrypted", "true")
1368+
.with_metadata("x-fula-chunked", "true")
1369+
.with_metadata("x-fula-encryption", &enc_metadata.to_string());
1370+
1371+
let result = self.inner.put_object_with_metadata(
1372+
bucket,
1373+
&storage_key,
1374+
Bytes::from(b"CHUNKED".to_vec()), // Marker content
1375+
Some(index_metadata),
1376+
).await?;
1377+
1378+
// Update forest cache if we have one
1379+
if let Ok(mut cache) = self.forest_cache.write() {
1380+
if let Some(forest) = cache.get_mut(bucket) {
1381+
let now = chrono::Utc::now().timestamp();
1382+
forest.upsert_file(ForestFileEntry {
1383+
path: key.to_string(),
1384+
storage_key: storage_key.clone(),
1385+
size: data.len() as u64,
1386+
content_type: metadata.content_type.clone(),
1387+
created_at: now,
1388+
modified_at: now,
1389+
user_metadata: HashMap::new(),
1390+
content_hash: None,
1391+
});
1392+
}
1393+
}
1394+
1395+
Ok(result)
1396+
}
1397+
1398+
/// Download and decrypt a chunked file
1399+
///
1400+
/// Fetches the index object, then downloads and decrypts chunks as needed.
1401+
pub async fn get_object_chunked(
1402+
&self,
1403+
bucket: &str,
1404+
key: &str,
1405+
) -> Result<Bytes> {
1406+
use fula_crypto::chunked::{ChunkedDecoder, ChunkedFileMetadata};
1407+
1408+
// Resolve path to storage key (same as get_object_decrypted)
1409+
let storage_key = if self.encryption.metadata_privacy {
1410+
let path_dek = self.encryption.key_manager.derive_path_key(key);
1411+
obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone())
1412+
} else {
1413+
key.to_string()
1414+
};
1415+
1416+
// Fetch index object
1417+
let index_result = self.inner.get_object_with_metadata(bucket, &storage_key).await?;
1418+
1419+
// Check if chunked
1420+
let is_chunked = index_result.metadata
1421+
.get("x-fula-chunked")
1422+
.map(|v| v == "true")
1423+
.unwrap_or(false);
1424+
1425+
if !is_chunked {
1426+
// Fall back to regular decryption
1427+
return self.get_object_decrypted(bucket, key).await;
1428+
}
1429+
1430+
// Parse encryption metadata
1431+
let enc_metadata_str = index_result.metadata
1432+
.get("x-fula-encryption")
1433+
.ok_or_else(|| ClientError::Encryption(
1434+
fula_crypto::CryptoError::Decryption("Missing encryption metadata".to_string())
1435+
))?;
1436+
1437+
let enc_metadata: serde_json::Value = serde_json::from_str(enc_metadata_str)
1438+
.map_err(|e| ClientError::Encryption(
1439+
fula_crypto::CryptoError::Decryption(e.to_string())
1440+
))?;
1441+
1442+
// Unwrap DEK
1443+
let wrapped_dek: EncryptedData = serde_json::from_value(enc_metadata["wrapped_key"].clone())
1444+
.map_err(|e| ClientError::Encryption(
1445+
fula_crypto::CryptoError::Decryption(e.to_string())
1446+
))?;
1447+
1448+
let decryptor = Decryptor::new(self.encryption.key_manager.keypair());
1449+
let dek = decryptor.decrypt_dek(&wrapped_dek)?;
1450+
1451+
// Parse chunked metadata
1452+
let chunked_meta: ChunkedFileMetadata = serde_json::from_value(enc_metadata["chunked"].clone())
1453+
.map_err(|e| ClientError::Encryption(
1454+
fula_crypto::CryptoError::Decryption(e.to_string())
1455+
))?;
1456+
1457+
// Create decoder
1458+
let mut decoder = ChunkedDecoder::new(dek, chunked_meta.clone());
1459+
1460+
// Download and decrypt each chunk
1461+
for chunk_idx in 0..chunked_meta.num_chunks {
1462+
let chunk_key = ChunkedFileMetadata::chunk_key(&storage_key, chunk_idx);
1463+
let chunk_data = self.inner.get_object(bucket, &chunk_key).await?;
1464+
decoder.decrypt_chunk(chunk_idx, &chunk_data)?;
1465+
}
1466+
1467+
// Finalize and return
1468+
decoder.finalize()
1469+
.map_err(ClientError::Encryption)
1470+
}
1471+
1472+
/// Download a byte range from a chunked file (partial read)
1473+
///
1474+
/// Only downloads the chunks needed for the requested range.
1475+
pub async fn get_object_range(
1476+
&self,
1477+
bucket: &str,
1478+
key: &str,
1479+
offset: u64,
1480+
length: u64,
1481+
) -> Result<Bytes> {
1482+
use fula_crypto::chunked::ChunkedFileMetadata;
1483+
1484+
// Resolve path to storage key
1485+
let storage_key = if self.encryption.metadata_privacy {
1486+
let path_dek = self.encryption.key_manager.derive_path_key(key);
1487+
obfuscate_key(key, &path_dek, self.encryption.obfuscation_mode.clone())
1488+
} else {
1489+
key.to_string()
1490+
};
1491+
1492+
// Fetch index object
1493+
let index_result = self.inner.get_object_with_metadata(bucket, &storage_key).await?;
1494+
1495+
// Check if chunked
1496+
let is_chunked = index_result.metadata
1497+
.get("x-fula-chunked")
1498+
.map(|v| v == "true")
1499+
.unwrap_or(false);
1500+
1501+
if !is_chunked {
1502+
// Fall back to full download and slice
1503+
let full = self.get_object_decrypted(bucket, key).await?;
1504+
let start = offset as usize;
1505+
let end = (offset + length) as usize;
1506+
return Ok(full.slice(start.min(full.len())..end.min(full.len())));
1507+
}
1508+
1509+
// Parse encryption metadata
1510+
let enc_metadata_str = index_result.metadata
1511+
.get("x-fula-encryption")
1512+
.ok_or_else(|| ClientError::Encryption(
1513+
fula_crypto::CryptoError::Decryption("Missing encryption metadata".to_string())
1514+
))?;
1515+
1516+
let enc_metadata: serde_json::Value = serde_json::from_str(enc_metadata_str)
1517+
.map_err(|e| ClientError::Encryption(
1518+
fula_crypto::CryptoError::Decryption(e.to_string())
1519+
))?;
1520+
1521+
// Unwrap DEK
1522+
let wrapped_dek: EncryptedData = serde_json::from_value(enc_metadata["wrapped_key"].clone())
1523+
.map_err(|e| ClientError::Encryption(
1524+
fula_crypto::CryptoError::Decryption(e.to_string())
1525+
))?;
1526+
1527+
let decryptor = Decryptor::new(self.encryption.key_manager.keypair());
1528+
let dek = decryptor.decrypt_dek(&wrapped_dek)?;
1529+
1530+
// Parse chunked metadata
1531+
let chunked_meta: ChunkedFileMetadata = serde_json::from_value(enc_metadata["chunked"].clone())
1532+
.map_err(|e| ClientError::Encryption(
1533+
fula_crypto::CryptoError::Decryption(e.to_string())
1534+
))?;
1535+
1536+
// Determine which chunks we need
1537+
let needed_chunks = chunked_meta.chunks_for_range(offset, length);
1538+
1539+
// Download and decrypt only needed chunks
1540+
let mut decrypted_chunks = Vec::new();
1541+
1542+
for chunk_idx in needed_chunks {
1543+
let chunk_key = ChunkedFileMetadata::chunk_key(&storage_key, chunk_idx);
1544+
let chunk_data = self.inner.get_object(bucket, &chunk_key).await?;
1545+
1546+
let nonce = chunked_meta.get_chunk_nonce(chunk_idx)
1547+
.map_err(ClientError::Encryption)?;
1548+
1549+
// Decrypt this chunk with the DEK
1550+
let aead = Aead::new_default(&dek);
1551+
let plaintext = aead.decrypt(&nonce, &chunk_data)
1552+
.map_err(ClientError::Encryption)?;
1553+
1554+
decrypted_chunks.push((chunk_idx, plaintext));
1555+
}
1556+
1557+
// Extract the requested range from decrypted chunks
1558+
let chunk_size = chunked_meta.chunk_size as u64;
1559+
let mut result = Vec::with_capacity(length as usize);
1560+
1561+
for (chunk_idx, chunk_data) in decrypted_chunks {
1562+
let chunk_start = chunk_idx as u64 * chunk_size;
1563+
let chunk_end = chunk_start + chunk_data.len() as u64;
1564+
1565+
// Calculate overlap with requested range
1566+
let range_start = offset.max(chunk_start);
1567+
let range_end = (offset + length).min(chunk_end);
1568+
1569+
if range_start < range_end {
1570+
let local_start = (range_start - chunk_start) as usize;
1571+
let local_end = (range_end - chunk_start) as usize;
1572+
result.extend_from_slice(&chunk_data[local_start..local_end]);
1573+
}
1574+
}
1575+
1576+
Ok(Bytes::from(result))
1577+
}
1578+
1579+
/// Check if a file should use chunked upload based on size
1580+
pub fn should_use_chunked(size: usize) -> bool {
1581+
fula_crypto::should_use_chunked(size)
1582+
}
12741583
}
12751584

12761585
/// File metadata (without file content) - optimized for file managers

0 commit comments

Comments
 (0)