|
7 | 7 | //! |
8 | 8 | //! Each line represents one log entry and the keys it maps to. |
9 | 9 | //! Empty lines (entries with no keys) are skipped. |
| 10 | +//! |
| 11 | +//! On startup, the WAL is validated and truncated to match the expected |
| 12 | +//! tree size from the database. This prevents duplicate entries after a crash. |
10 | 13 |
|
11 | 14 | use crate::error::{Error, Result}; |
12 | 15 | use crate::types::LogIndex; |
13 | 16 | use std::fs::{File, OpenOptions}; |
14 | | -use std::io::{BufRead, BufReader, BufWriter, Write}; |
| 17 | +use std::io::{BufRead, BufReader, BufWriter, Read, Write}; |
15 | 18 | use std::path::Path; |
16 | 19 |
|
17 | 20 | use super::IndexKey; |
@@ -60,11 +63,18 @@ impl WalWriter { |
60 | 63 | Ok(()) |
61 | 64 | } |
62 | 65 |
|
63 | | - /// Flush the WAL to disk. |
| 66 | + /// Flush the WAL to disk with fsync for durability. |
64 | 67 | pub fn flush(&mut self) -> Result<()> { |
65 | 68 | self.writer |
66 | 69 | .flush() |
67 | 70 | .map_err(|e| Error::Internal(format!("failed to flush WAL: {}", e)))?; |
| 71 | + |
| 72 | + // fsync to ensure data is persisted to disk (not just OS buffer) |
| 73 | + self.writer |
| 74 | + .get_ref() |
| 75 | + .sync_data() |
| 76 | + .map_err(|e| Error::Internal(format!("failed to sync WAL to disk: {}", e)))?; |
| 77 | + |
68 | 78 | Ok(()) |
69 | 79 | } |
70 | 80 | } |
@@ -106,6 +116,106 @@ impl WalReader { |
106 | 116 | } |
107 | 117 | } |
108 | 118 |
|
| 119 | +/// Validate and truncate the WAL file to match the expected tree size. |
| 120 | +/// |
| 121 | +/// This function reads the WAL backwards to find the last complete entry, |
| 122 | +/// and truncates any entries with index >= expected_tree_size. |
| 123 | +/// This is critical for crash recovery: if the WAL was flushed but the database |
| 124 | +/// wasn't updated before a crash, we need to truncate the WAL to match the |
| 125 | +/// database state to avoid duplicate entries. |
| 126 | +/// |
| 127 | +/// Returns the actual tree size found in the WAL (may be less than expected if WAL is behind). |
| 128 | +pub fn validate_and_truncate_wal(path: impl AsRef<Path>, expected_tree_size: u64) -> Result<u64> { |
| 129 | + let path = path.as_ref(); |
| 130 | + |
| 131 | + if !path.exists() { |
| 132 | + return Ok(0); |
| 133 | + } |
| 134 | + |
| 135 | + let mut file = OpenOptions::new() |
| 136 | + .read(true) |
| 137 | + .write(true) |
| 138 | + .open(path) |
| 139 | + .map_err(|e| Error::Internal(format!("failed to open WAL for validation: {}", e)))?; |
| 140 | + |
| 141 | + let file_size = file |
| 142 | + .metadata() |
| 143 | + .map_err(|e| Error::Internal(format!("failed to get WAL metadata: {}", e)))? |
| 144 | + .len(); |
| 145 | + |
| 146 | + if file_size == 0 { |
| 147 | + return Ok(0); |
| 148 | + } |
| 149 | + |
| 150 | + // Read the entire file to find all entries and their positions |
| 151 | + let mut content = String::new(); |
| 152 | + file.read_to_string(&mut content) |
| 153 | + .map_err(|e| Error::Internal(format!("failed to read WAL: {}", e)))?; |
| 154 | + |
| 155 | + // Find all line boundaries and parse entries |
| 156 | + let mut last_valid_pos: u64 = 0; |
| 157 | + let mut max_valid_idx: Option<u64> = None; |
| 158 | + let mut current_pos: u64 = 0; |
| 159 | + |
| 160 | + for line in content.lines() { |
| 161 | + let line_len = line.len() as u64 + 1; // +1 for newline |
| 162 | + |
| 163 | + if line.trim().is_empty() { |
| 164 | + current_pos += line_len; |
| 165 | + continue; |
| 166 | + } |
| 167 | + |
| 168 | + match parse_wal_line(line) { |
| 169 | + Ok((idx, _)) => { |
| 170 | + let idx_val = idx.value(); |
| 171 | + if expected_tree_size == 0 || idx_val < expected_tree_size { |
| 172 | + // This entry is within bounds |
| 173 | + last_valid_pos = current_pos + line_len; |
| 174 | + max_valid_idx = Some(match max_valid_idx { |
| 175 | + Some(prev) => prev.max(idx_val), |
| 176 | + None => idx_val, |
| 177 | + }); |
| 178 | + } else { |
| 179 | + // Entry is beyond expected tree size - stop here |
| 180 | + tracing::warn!( |
| 181 | + "WAL entry {} >= expected tree size {}, truncating", |
| 182 | + idx_val, |
| 183 | + expected_tree_size |
| 184 | + ); |
| 185 | + break; |
| 186 | + } |
| 187 | + } |
| 188 | + Err(e) => { |
| 189 | + tracing::warn!( |
| 190 | + "Failed to parse WAL line, truncating at position {}: {}", |
| 191 | + current_pos, |
| 192 | + e |
| 193 | + ); |
| 194 | + break; |
| 195 | + } |
| 196 | + } |
| 197 | + |
| 198 | + current_pos += line_len; |
| 199 | + } |
| 200 | + |
| 201 | + // Truncate file if needed |
| 202 | + if last_valid_pos < file_size { |
| 203 | + tracing::info!( |
| 204 | + "Truncating WAL from {} to {} bytes (removing {} bytes)", |
| 205 | + file_size, |
| 206 | + last_valid_pos, |
| 207 | + file_size - last_valid_pos |
| 208 | + ); |
| 209 | + file.set_len(last_valid_pos) |
| 210 | + .map_err(|e| Error::Internal(format!("failed to truncate WAL: {}", e)))?; |
| 211 | + file.sync_all() |
| 212 | + .map_err(|e| Error::Internal(format!("failed to sync truncated WAL: {}", e)))?; |
| 213 | + } |
| 214 | + |
| 215 | + // Return the tree size based on max index found + 1 (since indices are 0-based) |
| 216 | + Ok(max_valid_idx.map(|idx| idx + 1).unwrap_or(0)) |
| 217 | +} |
| 218 | + |
109 | 219 | /// Parse a single WAL line. |
110 | 220 | fn parse_wal_line(line: &str) -> Result<(LogIndex, Vec<IndexKey>)> { |
111 | 221 | let line = line.trim(); |
@@ -205,4 +315,101 @@ mod tests { |
205 | 315 | assert_eq!(idx.value(), 123); |
206 | 316 | assert_eq!(keys.len(), 0); |
207 | 317 | } |
| 318 | + |
| 319 | + #[test] |
| 320 | + fn test_validate_and_truncate_wal_no_truncation_needed() { |
| 321 | + let temp_file = NamedTempFile::new().unwrap(); |
| 322 | + let path = temp_file.path(); |
| 323 | + |
| 324 | + // Write entries 0-4 |
| 325 | + { |
| 326 | + let mut writer = WalWriter::open(path).unwrap(); |
| 327 | + let key = [1u8; 32]; |
| 328 | + for i in 0..5 { |
| 329 | + writer.append(LogIndex::new(i), &[key]).unwrap(); |
| 330 | + } |
| 331 | + writer.flush().unwrap(); |
| 332 | + } |
| 333 | + |
| 334 | + // Validate with expected size 5 - no truncation needed |
| 335 | + let actual_size = validate_and_truncate_wal(path, 5).unwrap(); |
| 336 | + assert_eq!(actual_size, 5); |
| 337 | + |
| 338 | + // Verify all entries still present |
| 339 | + let mut reader = WalReader::open(path).unwrap(); |
| 340 | + for i in 0..5 { |
| 341 | + let (idx, _) = reader.next_entry().unwrap().unwrap(); |
| 342 | + assert_eq!(idx.value(), i); |
| 343 | + } |
| 344 | + assert!(reader.next_entry().unwrap().is_none()); |
| 345 | + } |
| 346 | + |
| 347 | + #[test] |
| 348 | + fn test_validate_and_truncate_wal_truncates_excess() { |
| 349 | + let temp_file = NamedTempFile::new().unwrap(); |
| 350 | + let path = temp_file.path(); |
| 351 | + |
| 352 | + // Write entries 0-9 |
| 353 | + { |
| 354 | + let mut writer = WalWriter::open(path).unwrap(); |
| 355 | + let key = [1u8; 32]; |
| 356 | + for i in 0..10 { |
| 357 | + writer.append(LogIndex::new(i), &[key]).unwrap(); |
| 358 | + } |
| 359 | + writer.flush().unwrap(); |
| 360 | + } |
| 361 | + |
| 362 | + // Validate with expected size 5 - should truncate entries 5-9 |
| 363 | + let actual_size = validate_and_truncate_wal(path, 5).unwrap(); |
| 364 | + assert_eq!(actual_size, 5); |
| 365 | + |
| 366 | + // Verify only entries 0-4 remain |
| 367 | + let mut reader = WalReader::open(path).unwrap(); |
| 368 | + for i in 0..5 { |
| 369 | + let (idx, _) = reader.next_entry().unwrap().unwrap(); |
| 370 | + assert_eq!(idx.value(), i); |
| 371 | + } |
| 372 | + assert!(reader.next_entry().unwrap().is_none()); |
| 373 | + } |
| 374 | + |
| 375 | + #[test] |
| 376 | + fn test_validate_and_truncate_wal_empty_file() { |
| 377 | + let temp_file = NamedTempFile::new().unwrap(); |
| 378 | + let path = temp_file.path(); |
| 379 | + |
| 380 | + // Create empty file |
| 381 | + File::create(path).unwrap(); |
| 382 | + |
| 383 | + let actual_size = validate_and_truncate_wal(path, 5).unwrap(); |
| 384 | + assert_eq!(actual_size, 0); |
| 385 | + } |
| 386 | + |
| 387 | + #[test] |
| 388 | + fn test_validate_and_truncate_wal_nonexistent_file() { |
| 389 | + let temp_dir = tempfile::tempdir().unwrap(); |
| 390 | + let path = temp_dir.path().join("nonexistent.wal"); |
| 391 | + |
| 392 | + let actual_size = validate_and_truncate_wal(&path, 5).unwrap(); |
| 393 | + assert_eq!(actual_size, 0); |
| 394 | + } |
| 395 | + |
| 396 | + #[test] |
| 397 | + fn test_validate_and_truncate_wal_wal_behind_expected() { |
| 398 | + let temp_file = NamedTempFile::new().unwrap(); |
| 399 | + let path = temp_file.path(); |
| 400 | + |
| 401 | + // Write only entries 0-2 (WAL is behind expected) |
| 402 | + { |
| 403 | + let mut writer = WalWriter::open(path).unwrap(); |
| 404 | + let key = [1u8; 32]; |
| 405 | + for i in 0..3 { |
| 406 | + writer.append(LogIndex::new(i), &[key]).unwrap(); |
| 407 | + } |
| 408 | + writer.flush().unwrap(); |
| 409 | + } |
| 410 | + |
| 411 | + // Validate with expected size 10 - WAL is behind, no truncation |
| 412 | + let actual_size = validate_and_truncate_wal(path, 10).unwrap(); |
| 413 | + assert_eq!(actual_size, 3); |
| 414 | + } |
208 | 415 | } |
0 commit comments