root-project · jblomer · Jun 2, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 4, 2026
@@ -276,6 +276,9 @@ class TFile : public TDirectoryFile {
    };
    enum ERelativeTo { kBeg = 0, kCur = 1, kEnd = 2 };
    enum { kStartBigFile  = 2000000000 };
+   enum {
+      kMaxGapSize = 2000000000
+   }; // Maximum absolute value of the free segment on-disk size marker
    /// File type
    enum EFileType {
 // clang++ <v20 (-Wshadow) complains about shadowing TSystem.h global enum ESendRecvOptions. Let's silence warning:

@@ -21,6 +21,8 @@ class TBuffer;
 #include "TBuffer.h"
 #endif
 
+#include <array>
+
 class TBrowser;
 class TDirectory;
 class TFile;
@@ -38,6 +40,11 @@ class TKey : public TNamed {
    Int_t UnzipBuffer(char *targetBuffer, const char *compressedBuffer) const;
 
 protected:
+   // After a key that has been placed in a gap larger than the key itself, one or very rarely two marker bytes
+   // follow to restore the linked list of segments. The two char buffers are meant to be written to disk, i.e.
+   // they should contain big-endian encoded negative integer values for the size of a gap, or zero if unused.
+   using GapHeaderBuf_t = std::array<std::array<char, sizeof(Int_t)>, 2>;
+
    Int_t       fVersion;     ///< Key version identifier
    Int_t       fNbytes;      ///< Number of bytes for the whole key on file (key header and data)
    Int_t       fObjlen;      ///< Length of uncompressed object in bytes
@@ -47,7 +54,7 @@ class TKey : public TNamed {
    Long64_t    fSeekKey;     ///< Location of object on file
    Long64_t    fSeekPdir;    ///< Location of parent directory on file
    TString     fClassName;   ///< Object Class name
-   Int_t       fLeft;        ///< Number of bytes left in current segment
+   Long64_t    fLeft;        ///< Number of bytes left in current segment
    char       *fBuffer;      ///< Object buffer
    TBuffer    *fBufferRef;   ///< Pointer to the TBuffer object
    UShort_t    fPidOffset;   ///<!Offset to be added to the pid index in this key/buffer.  This is actually saved in the high bits of fSeekPdir
@@ -58,6 +65,7 @@ class TKey : public TNamed {
            void     Build(TDirectory* motherDir, const char* classname, Long64_t filepos);
            void     Reset(); // Currently only for the use of TBasket.
    virtual Int_t    WriteFileKeepBuffer(TFile *f = nullptr);
+           UShort_t FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const;
 
  public:
    TKey();

@@ -173,6 +173,8 @@ The structure of a directory is shown in TDirectoryFile::TDirectoryFile
 #include "ROOT/RConcurrentHashColl.hxx"
 #include <memory>
 #include <cinttypes>
+#include <cassert>
+#include <algorithm>
 
 #ifdef R__FBSD
 #include <sys/extattr.h>
@@ -1504,27 +1506,58 @@ Bool_t TFile::IsOpen() const
 
 void TFile::MakeFree(Long64_t first, Long64_t last)
 {
-   TFree *f1      = (TFree*)fFree->First();
-   if (!f1) return;
-   TFree *newfree = f1->AddFree(fFree,first,last);
-   if(!newfree) return;
-   Long64_t nfirst = newfree->GetFirst();
-   Long64_t nlast  = newfree->GetLast();
-   Long64_t nbytesl= nlast-nfirst+1;
-   if (nbytesl > 2000000000) nbytesl = 2000000000;
-   Int_t nbytes    = -Int_t (nbytesl);
-   char buffer[sizeof(Int_t)];
-   char *pbuffer = buffer;
-   tobuf(pbuffer, nbytes);
-   if (last == fEND-1) fEND = nfirst;
-   Seek(nfirst);
-   // We could not update the meta data for this block on the file.
-   // This is not fatal as this only means that we won't get it 'right'
-   // if we ever need to Recover the file before the block is actually
-   // (attempted to be reused.
-   // coverity[unchecked_value]
-   WriteBuffer(buffer, sizeof(buffer));
-   if (fMustFlush) Flush();
+   assert(0 < first && first < last && last < fEND);
+
+   TFree *f1 = static_cast<TFree *>(fFree->First());
+   assert(f1); // There must always be at least the virtual free segment at the end of the file
+
+   TFree *newfree = f1->AddFree(fFree, first, last);
+   assert(newfree); // AddFree() always succeeds
+
+   const Long64_t nfirst = newfree->GetFirst();
+   const Long64_t nlast = newfree->GetLast();
+   assert(nfirst > 0 && nfirst <= first && nlast >= last);
+   Long64_t nbytesl = std::min(nlast, fEND) - nfirst + 1;
+   assert(nbytesl >= static_cast<Long64_t>(sizeof(Int_t)));
+
+   auto fnWriteGapHeader = [this](ULong64_t offset, ULong64_t gapSize) {
+      assert((gapSize <= TFile::kMaxGapSize) && (fEND > 0) &&
+             ((offset + sizeof(Int_t)) <= static_cast<ULong64_t>(fEND)));
+
+      auto nbytes = -static_cast<Int_t>(gapSize);
+      char buffer[sizeof(Int_t)];
+      char *pbuffer = buffer;
+      tobuf(pbuffer, nbytes);
+
+      Seek(offset);
+      if (WriteBuffer(buffer, sizeof(buffer)) != 0) {
+         // Not fatal, this only means that we won't get it 'right'
+         // if we ever need to Recover the file before the block is actually
+         // attempted to be reused.
+         Warning("TFile::MakeFree()", "failed to write free segment header");
+      }
+   };
+
+   Long64_t offset = nfirst;
+   while (nbytesl > TFile::kMaxGapSize) {
+      // For gaps larger than 2GB, link several consecutive gaps together. This has to be done because the size
+      // marker on disk is 32 bits. The free list, however, will still have one large gap because the free list
+      // uses 64 bit [first..last] pairs to represent gaps. File recovery will merge consecutive gaps.
+
+      // Make sure that the second gap is large enough to write its size on disk
+      Long64_t gapSize = TFile::kMaxGapSize - sizeof(Int_t);
+      fnWriteGapHeader(offset, gapSize);
+
+      nbytesl -= gapSize;
+      offset += gapSize;
+   }
+   fnWriteGapHeader(offset, nbytesl);
+
+   if (last == fEND - 1)
+      fEND = nfirst;
+
+   if (fMustFlush)
+      Flush();
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -2125,7 +2158,14 @@ Int_t TFile::Recover()
 
    fEND = Long64_t(size);
 
-   if (fWritable && !fFree) fFree  = new TList;
+   if (fWritable) {
+      if (fFree) {
+         // Remove an existing free list because we will recover it from the chain of segments
+         fFree->Delete();
+         delete fFree;
+      }
+      fFree = new TList();
+   }
 
    Int_t nrecov = 0;
    nwheader = 1024;
@@ -2148,9 +2188,19 @@ Int_t TFile::Recover()
          break;
       }
       if (nbytes < 0) {
+         if ((-nbytes < static_cast<Int_t>(sizeof(Int_t))) || (-nbytes > static_cast<Int_t>(TFile::kMaxGapSize))) {
+            Error("Recover", "Address = %lld\tNbytes = %d\t=====E R R O R=======", idcur, nbytes);
+            break;
+         }
+         if (fWritable) {
+            const Long64_t last = idcur - nbytes - 1;
+            if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() + 1 == idcur) {
+               static_cast<TFree *>(fFree->Last())->SetLast(last);
+            } else {
+               new TFree(fFree, idcur, last);
+            }
+         }
          idcur -= nbytes;
-         if (fWritable) new TFree(fFree,idcur,idcur-nbytes-1);
-         Seek(idcur);
          continue;
       }
       Version_t versionkey;
@@ -2196,15 +2246,18 @@ Int_t TFile::Recover()
       idcur += nbytes;
    }
    if (fWritable) {
-      Long64_t max_file_size = Long64_t(kStartBigFile);
-      if (max_file_size < fEND) max_file_size = fEND+1000000000;
-      TFree *last = (TFree*)fFree->Last();
-      if (last) {
-         last->AddFree(fFree,fEND,max_file_size);
-      } else {
-         new TFree(fFree,fEND,max_file_size);
+      if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() == idcur - 1) {
+         // If the last recovered segment is a free segment, remove it and replace it by a newly created artificial one
+         fEND = static_cast<TFree *>(fFree->Last())->GetFirst();
+         delete fFree->Last();
+         fFree->Remove(fFree->LastLink());
       }
-      if (nrecov) Write();
+      Long64_t max_file_size = Long64_t(kStartBigFile);
+      if (max_file_size < fEND)
+         max_file_size = fEND + 1000000000;
+      new TFree(fFree, fEND, max_file_size);
+      if (nrecov)
+         Write();
    }
    return nrecov;
 }

@@ -92,7 +92,10 @@ TFree *TFree::AddFree(TList *lfree, Long64_t first, Long64_t last)
       }
       idcur = (TFree*)lfree->After(idcur);
    }
-   return 0;
+
+   // never here
+   assert(false);
+   return nullptr;
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -126,7 +129,6 @@ void TFree::FillBuffer(char *&buffer)
 TFree *TFree::GetBestFree(TList *lfree, Int_t nbytes)
 {
    TFree *idcur = this;
-   if (idcur == 0) return 0;
    TFree *idcur1 = 0;
    do {
       Long64_t nleft = Long64_t(idcur->fLast - idcur->fFirst +1);
@@ -186,4 +188,3 @@ Int_t TFree::Sizeof() const
    if (fLast > TFile::kStartBigFile) return 18;
    else                              return 10;
 }
-
@@ -540,7 +540,7 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
       fLeft   = -1;
       if (!fBuffer) fBuffer = new char[nsize];
    } else {
-      fLeft = Int_t(bestfree->GetLast() - fSeekKey - nsize + 1);
+      fLeft = bestfree->GetLast() - fSeekKey - nsize + 1;
    }
 //*-*----------------- Case where new object fills exactly a deleted gap
    fNbytes = nsize;
@@ -554,11 +554,11 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
 //*-*----------------- Case where new object is placed in a deleted gap larger than itself
    if (fLeft > 0) {    // found a bigger segment
       if (!fBuffer) {
+         // We reserve space for the new free segment size marker but we don't fill the buffer yet.
+         // We have to check (on writing, i.e. when we do I/O) if we are writing into a large gap (>2GB),
+         // in which case we need to read the first link size first.
          fBuffer = new char[nsize+sizeof(Int_t)];
       }
-      char *buffer  = fBuffer+nsize;
-      Int_t nbytesleft = -fLeft;  // set header of remaining record
-      tobuf(buffer, nbytesleft);
       bestfree->SetFirst(fSeekKey+nsize);
    }
 
@@ -1480,6 +1480,74 @@ void TKey::Streamer(TBuffer &b)
    }
 }
 
+/// Fills zero, one, or two (rare) free segment header buffers into the provided `buffers` parameter.
+/// If the key fits exactly in a provided gap or is at the end of the file, no buffer is filled. Otherwise,
+/// the buffer for the integer immediately after the key data is filled with the size of the remaining, shortened gap.
+/// In rare cases, it can be necessary to fill the second buffer with another free segment link.
+/// Returns the number of buffers filled.
+UShort_t TKey::FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const
+{
+   if (fLeft <= 0)
+      return 0;
+
+   // We must fill at least one free segment header
+
+   if (fLeft <= TFile::kMaxGapSize) {
+      char *pbuf = buffers[0].data();
+      tobuf(pbuf, -static_cast<Int_t>(fLeft));
+      return 1;
+   }
+
+   // Large gap, we need to read the free segment headers that are going to be overwritten by the key to find the
+   // first free segment header beyond the key. In fact, we must find a free segment header far enough beyond the
+   // key so that we can inject another, new free segment header between the key end and the existing one (which means
+   // key end + sizeof(int))
+   const auto newGapOffset = fSeekKey + fNbytes;
+   auto existingGapOffset = fSeekKey;
+   auto prevGapOffset = 0;
+   do {
+      char readBuf[sizeof(Int_t)];
+      Int_t header = 0;
+
+      f.Seek(existingGapOffset);
+      if (f.ReadBuffer(readBuf, sizeof(Int_t))) {
+         Error("FillGapHeaderBuffers", "cannot read free segment link size at %lld", existingGapOffset);
+         return 0;
+      }
+
+      char *pbuf = readBuf;
+      frombuf(pbuf, &header);
+      const Int_t gapSize = -header;
+      if (gapSize < static_cast<Int_t>(sizeof(Int_t)) || gapSize > TFile::kMaxGapSize ||
+          gapSize > (newGapOffset - existingGapOffset + fLeft)) {
+         Error("FillGapHeaderBuffers", "invalid free segment header size %d at %lld", gapSize, existingGapOffset);
+         return 0;
+      }
+
+      prevGapOffset = existingGapOffset;
+      existingGapOffset += gapSize;
+   } while (existingGapOffset <= newGapOffset + static_cast<Long64_t>(sizeof(Int_t)));
+
+   if ((prevGapOffset < newGapOffset) || (existingGapOffset - newGapOffset) <= TFile::kMaxGapSize) {
+      // Normal case: writing the new free segment header won't overwrite an existing one beyond the key.
+      // Rare case: the new free segment header will overwrite (part of) and existing one beyond the key.
+      //            We can then lengthen the gap beteween [prevGapOffset, newGapOffset] except if this has already
+      //            the maximum length.
+      char *pbuf = buffers[0].data();
+      tobuf(pbuf, -static_cast<Int_t>(existingGapOffset - newGapOffset));
+      return 1;
+   }
+
+   // Very rare case: we need two free segment headers after the key
+   char *pbuf = buffers[0].data();
+   tobuf(pbuf, -static_cast<Int_t>(sizeof(Int_t)));
+   const auto newGapSize = existingGapOffset - newGapOffset - sizeof(Int_t);
+   assert(newGapSize <= TFile::kMaxGapSize);
+   pbuf = buffers[1].data();
+   tobuf(pbuf, -static_cast<Int_t>(newGapSize));
+   return 2;
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 /// Write the encoded object supported by this key.
 /// The function returns the number of bytes committed to the file.
@@ -1498,9 +1566,21 @@ Int_t TKey::WriteFile(Int_t cycle, TFile* f)
       buffer = fBuffer;
    }
 
-   if (fLeft > 0) nsize += sizeof(Int_t);
+   GapHeaderBuf_t gapHeaderBuffers;
+   auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
+   assert(nGaps <= 2);
+   if (nGaps > 0) {
+      std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
+      nsize += sizeof(Int_t);
+   }
+
    f->Seek(fSeekKey);
    Bool_t result = f->WriteBuffer(buffer,nsize);
+
+   if (nGaps == 2) {
+      f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));
+      nsize += sizeof(Int_t);
+   }
    //f->Flush(); Flushing takes too much time.
    //            Let user flush the file when they want.
    if (gDebug) {
@@ -1525,9 +1605,21 @@ Int_t TKey::WriteFileKeepBuffer(TFile *f)
    Int_t nsize  = fNbytes;
    char *buffer = fBuffer;
 
-   if (fLeft > 0) nsize += sizeof(Int_t);
+   GapHeaderBuf_t gapHeaderBuffers;
+   auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
+   assert(nGaps <= 2);
+   if (nGaps > 0) {
+      std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
+      nsize += sizeof(Int_t);
+   }
+
    f->Seek(fSeekKey);
    Bool_t result = f->WriteBuffer(buffer,nsize);
+
+   if (nGaps == 2) {
+      f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));
+      nsize += sizeof(Int_t);
+   }
    //f->Flush(); Flushing takes too much time.
    //            Let user flush the file when they want.
    if (gDebug) {