2323import org .apache .fluss .exception .LogSegmentOffsetOverflowException ;
2424import org .apache .fluss .exception .LogStorageException ;
2525import org .apache .fluss .metadata .LogFormat ;
26- import org .apache .fluss .server .exception .CorruptIndexException ;
2726import org .apache .fluss .utils .FlussPaths ;
2827import org .apache .fluss .utils .types .Tuple2 ;
2928
@@ -137,6 +136,7 @@ public LoadedLogOffsets load() throws IOException {
137136 */
138137 private Tuple2 <Long , Long > recoverLog () throws IOException {
139138 if (!isCleanShutdown ) {
139+ long recoverLogStart = System .currentTimeMillis ();
140140 List <LogSegment > unflushed =
141141 logSegments .values (recoveryPointCheckpoint , Long .MAX_VALUE );
142142 int numUnflushed = unflushed .size ();
@@ -153,50 +153,57 @@ private Tuple2<Long, Long> recoverLog() throws IOException {
153153 numUnflushed ,
154154 logSegments .getTableBucket ());
155155
156+ int truncatedBytes = -1 ;
156157 try {
157- segment .sanityCheck ();
158- } catch (NoSuchFileException | CorruptIndexException e ) {
158+ truncatedBytes = recoverSegment (segment );
159+ } catch (InvalidOffsetException e ) {
160+ long startOffset = segment .getBaseOffset ();
159161 LOG .warn (
160- "Found invalid index file corresponding log file {} for bucket {}, "
161- + "recovering segment and rebuilding index files..." ,
162- segment .getFileLogRecords ().file ().getAbsoluteFile (),
162+ "Found invalid offset during recovery for bucket {}. Deleting the corrupt segment "
163+ + "and creating an empty one with starting offset {}" ,
163164 logSegments .getTableBucket (),
164- e );
165-
166- int truncatedBytes = -1 ;
167- try {
168- truncatedBytes = recoverSegment (segment );
169- } catch (InvalidOffsetException invalidOffsetException ) {
170- long startOffset = segment .getBaseOffset ();
171- LOG .warn (
172- "Found invalid offset during recovery for bucket {}. Deleting the corrupt segment "
173- + "and creating an empty one with starting offset {}" ,
174- logSegments .getTableBucket (),
175- startOffset );
176- truncatedBytes = segment .truncateTo (startOffset );
177- }
165+ startOffset );
166+ truncatedBytes = segment .truncateTo (startOffset );
167+ }
178168
179- if (truncatedBytes > 0 ) {
180- // we had an invalid message, delete all remaining log
181- LOG .warn (
182- "Corruption found in segment {} for bucket {}, truncating to offset {}" ,
183- segment .getBaseOffset (),
184- logSegments .getTableBucket (),
185- segment .readNextOffset ());
186- removeAndDeleteSegments (unflushedIter );
187- truncated = true ;
188- }
169+ if (truncatedBytes > 0 ) {
170+ // we had an invalid message, delete all remaining log
171+ LOG .warn (
172+ "Corruption found in segment {} for bucket {}, truncating to offset {}" ,
173+ segment .getBaseOffset (),
174+ logSegments .getTableBucket (),
175+ segment .readNextOffset ());
176+ removeAndDeleteSegments (unflushedIter );
177+ truncated = true ;
178+ } else {
179+ numFlushed += 1 ;
189180 }
190- numFlushed += 1 ;
191181 }
182+ long recoverLogEnd = System .currentTimeMillis ();
183+ LOG .info (
184+ "Log recovery completed for bucket {} in {} ms" ,
185+ logSegments .getTableBucket (),
186+ recoverLogEnd - recoverLogStart );
192187 }
193188
194- // TODO truncate log to recover maybe unflush segments.
195189 if (logSegments .isEmpty ()) {
190+ // TODO: use logStartOffset if issue https://github.com/apache/fluss/issues/744 ready
196191 logSegments .add (LogSegment .open (logTabletDir , 0L , conf , logFormat ));
197192 }
193+
198194 long logEndOffset = logSegments .lastSegment ().get ().readNextOffset ();
199- return Tuple2 .of (recoveryPointCheckpoint , logEndOffset );
195+
196+ // Update the recovery point if there was a clean shutdown and did not perform any changes
197+ // to the segment. Otherwise, we just ensure that the recovery point is not ahead of the log
198+ // end offset. To ensure correctness and to make it easier to reason about, it's best to
199+ // only advance the recovery point when the log is flushed. If we advanced the recovery
200+ // point here, we could skip recovery for unflushed segments if the server crashed after we
201+ // checkpointed the recovery point and before we flush the segment.
202+ if (isCleanShutdown ) {
203+ return Tuple2 .of (logEndOffset , logEndOffset );
204+ } else {
205+ return Tuple2 .of (Math .min (recoveryPointCheckpoint , logEndOffset ), logEndOffset );
206+ }
200207 }
201208
202209 /**
@@ -294,6 +301,20 @@ private void loadSegmentFiles() throws IOException {
294301 long baseOffset = FlussPaths .offsetFromFile (file );
295302 LogSegment segment =
296303 LogSegment .open (logTabletDir , baseOffset , conf , true , 0 , logFormat );
304+
305+ try {
306+ segment .sanityCheck ();
307+ } catch (NoSuchFileException e ) {
308+ if (isCleanShutdown
309+ || segment .getBaseOffset () < recoveryPointCheckpoint ) {
310+ LOG .error (
311+ "Could not find offset index file corresponding to log file {} "
312+ + "for bucket {}, recovering segment and rebuilding index files..." ,
313+ segment .getFileLogRecords ().file ().getAbsoluteFile (),
314+ logSegments .getTableBucket ());
315+ }
316+ recoverSegment (segment );
317+ }
297318 logSegments .add (segment );
298319 }
299320 }
0 commit comments