Skip to content
Open
3 changes: 3 additions & 0 deletions io/io/inc/TFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ class TFile : public TDirectoryFile {
};
enum ERelativeTo { kBeg = 0, kCur = 1, kEnd = 2 };
enum { kStartBigFile = 2000000000 };
enum {
kMaxGapSize = 2000000000
}; // Maximum absolute value of the free segment on-disk size marker

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might wanna stress out (via a static_assert or comment) that this MUST be less than numeric_limits<Int_t>::max()

/// File type
enum EFileType {
// clang++ <v20 (-Wshadow) complains about shadowing TSystem.h global enum ESendRecvOptions. Let's silence warning:
Expand Down
10 changes: 9 additions & 1 deletion io/io/inc/TKey.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ class TBuffer;
#include "TBuffer.h"
#endif

#include <array>

class TBrowser;
class TDirectory;
class TFile;
Expand All @@ -38,6 +40,11 @@ class TKey : public TNamed {
Int_t UnzipBuffer(char *targetBuffer, const char *compressedBuffer) const;

protected:
// After a key that has been placed in a gap larger than the key itself, one or very rarely two marker bytes
// follow to restore the linked list of segments. The two char buffers are meant to be written to disk, i.e.
// they should contain big-endian encoded negative integer values for the size of a gap, or zero if unused.
using GapHeaderBuf_t = std::array<std::array<char, sizeof(Int_t)>, 2>;

Int_t fVersion; ///< Key version identifier
Int_t fNbytes; ///< Number of bytes for the whole key on file (key header and data)
Int_t fObjlen; ///< Length of uncompressed object in bytes
Expand All @@ -47,7 +54,7 @@ class TKey : public TNamed {
Long64_t fSeekKey; ///< Location of object on file
Long64_t fSeekPdir; ///< Location of parent directory on file
TString fClassName; ///< Object Class name
Int_t fLeft; ///< Number of bytes left in current segment
Long64_t fLeft; ///< Number of bytes left in current segment
char *fBuffer; ///< Object buffer
TBuffer *fBufferRef; ///< Pointer to the TBuffer object
UShort_t fPidOffset; ///<!Offset to be added to the pid index in this key/buffer. This is actually saved in the high bits of fSeekPdir
Expand All @@ -58,6 +65,7 @@ class TKey : public TNamed {
void Build(TDirectory* motherDir, const char* classname, Long64_t filepos);
void Reset(); // Currently only for the use of TBasket.
virtual Int_t WriteFileKeepBuffer(TFile *f = nullptr);
UShort_t FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const;

public:
TKey();
Expand Down
117 changes: 85 additions & 32 deletions io/io/src/TFile.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,8 @@ The structure of a directory is shown in TDirectoryFile::TDirectoryFile
#include "ROOT/RConcurrentHashColl.hxx"
#include <memory>
#include <cinttypes>
#include <cassert>
#include <algorithm>

#ifdef R__FBSD
#include <sys/extattr.h>
Expand Down Expand Up @@ -1504,27 +1506,58 @@ Bool_t TFile::IsOpen() const

void TFile::MakeFree(Long64_t first, Long64_t last)
{
TFree *f1 = (TFree*)fFree->First();
if (!f1) return;
TFree *newfree = f1->AddFree(fFree,first,last);
if(!newfree) return;
Long64_t nfirst = newfree->GetFirst();
Long64_t nlast = newfree->GetLast();
Long64_t nbytesl= nlast-nfirst+1;
if (nbytesl > 2000000000) nbytesl = 2000000000;
Int_t nbytes = -Int_t (nbytesl);
char buffer[sizeof(Int_t)];
char *pbuffer = buffer;
tobuf(pbuffer, nbytes);
if (last == fEND-1) fEND = nfirst;
Seek(nfirst);
// We could not update the meta data for this block on the file.
// This is not fatal as this only means that we won't get it 'right'
// if we ever need to Recover the file before the block is actually
// (attempted to be reused.
// coverity[unchecked_value]
WriteBuffer(buffer, sizeof(buffer));
if (fMustFlush) Flush();
assert(0 < first && first < last && last < fEND);

TFree *f1 = static_cast<TFree *>(fFree->First());
assert(f1); // There must always be at least the virtual free segment at the end of the file

TFree *newfree = f1->AddFree(fFree, first, last);
assert(newfree); // AddFree() always succeeds

const Long64_t nfirst = newfree->GetFirst();
const Long64_t nlast = newfree->GetLast();
assert(nfirst > 0 && nfirst <= first && nlast >= last);
Long64_t nbytesl = std::min(nlast, fEND) - nfirst + 1;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this change to std::min be already part of the previous commit?

assert(nbytesl >= static_cast<Long64_t>(sizeof(Int_t)));

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these assertions valid even if the current TFile comes from opening a "corrupted" file?
If yes, probably a comment explaining why would be beneficial; if no, this should be an exception


auto fnWriteGapHeader = [this](ULong64_t offset, ULong64_t gapSize) {
assert((gapSize <= TFile::kMaxGapSize) && (fEND > 0) &&
((offset + sizeof(Int_t)) <= static_cast<ULong64_t>(fEND)));

auto nbytes = -static_cast<Int_t>(gapSize);
char buffer[sizeof(Int_t)];
char *pbuffer = buffer;
tobuf(pbuffer, nbytes);

Seek(offset);
if (WriteBuffer(buffer, sizeof(buffer)) != 0) {
// Not fatal, this only means that we won't get it 'right'
// if we ever need to Recover the file before the block is actually
// attempted to be reused.
Warning("TFile::MakeFree()", "failed to write free segment header");
}
};

Long64_t offset = nfirst;
while (nbytesl > TFile::kMaxGapSize) {
// For gaps larger than 2GB, link several consecutive gaps together. This has to be done because the size
// marker on disk is 32 bits. The free list, however, will still have one large gap because the free list
// uses 64 bit [first..last] pairs to represent gaps. File recovery will merge consecutive gaps.

// Make sure that the second gap is large enough to write its size on disk
Long64_t gapSize = TFile::kMaxGapSize - sizeof(Int_t);
fnWriteGapHeader(offset, gapSize);

nbytesl -= gapSize;
offset += gapSize;
}
fnWriteGapHeader(offset, nbytesl);

if (last == fEND - 1)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I understand this check is not influenced by the while above, so in principle couldn't this be done before it and, if positive, used to skip the loop altogether? (since in this case we are truncating the file before the whole free chain if I understand correctly)

fEND = nfirst;

if (fMustFlush)
Flush();
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -2125,7 +2158,14 @@ Int_t TFile::Recover()

fEND = Long64_t(size);

if (fWritable && !fFree) fFree = new TList;
if (fWritable) {
if (fFree) {
// Remove an existing free list because we will recover it from the chain of segments
fFree->Delete();
delete fFree;
}
fFree = new TList();
}

Int_t nrecov = 0;
nwheader = 1024;
Expand All @@ -2148,9 +2188,19 @@ Int_t TFile::Recover()
break;
}
if (nbytes < 0) {
if ((-nbytes < static_cast<Int_t>(sizeof(Int_t))) || (-nbytes > static_cast<Int_t>(TFile::kMaxGapSize))) {
Error("Recover", "Address = %lld\tNbytes = %d\t=====E R R O R=======", idcur, nbytes);
break;
}
Comment thread
jblomer marked this conversation as resolved.
if (fWritable) {
const Long64_t last = idcur - nbytes - 1;
if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() + 1 == idcur) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels like rather than equality a greater than would be more 'stable', isn't it?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'd prefer the strict equality because we construct it this way in the loop before. I.e., we want to pass the condition exactly when we added a free segment in the last loop iteration and in this case we constructed idcur to point just after the last free byte.

static_cast<TFree *>(fFree->Last())->SetLast(last);
} else {
new TFree(fFree, idcur, last);
}
}
idcur -= nbytes;
if (fWritable) new TFree(fFree,idcur,idcur-nbytes-1);
Seek(idcur);
continue;
}
Version_t versionkey;
Expand Down Expand Up @@ -2196,15 +2246,18 @@ Int_t TFile::Recover()
idcur += nbytes;
}
if (fWritable) {
Long64_t max_file_size = Long64_t(kStartBigFile);
if (max_file_size < fEND) max_file_size = fEND+1000000000;
TFree *last = (TFree*)fFree->Last();
if (last) {
last->AddFree(fFree,fEND,max_file_size);
} else {
new TFree(fFree,fEND,max_file_size);
if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() == idcur - 1) {
// If the last recovered segment is a free segment, remove it and replace it by a newly created artificial one
fEND = static_cast<TFree *>(fFree->Last())->GetFirst();
delete fFree->Last();
fFree->Remove(fFree->LastLink());
}
if (nrecov) Write();
Long64_t max_file_size = Long64_t(kStartBigFile);
if (max_file_size < fEND)
max_file_size = fEND + 1000000000;
Comment thread
pcanal marked this conversation as resolved.
Comment thread
pcanal marked this conversation as resolved.
new TFree(fFree, fEND, max_file_size);
if (nrecov)
Write();
}
return nrecov;
}
Expand Down
7 changes: 4 additions & 3 deletions io/io/src/TFree.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ TFree *TFree::AddFree(TList *lfree, Long64_t first, Long64_t last)
}
idcur = (TFree*)lfree->After(idcur);
}
return 0;

// never here
assert(false);
return nullptr;
}

////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -126,7 +129,6 @@ void TFree::FillBuffer(char *&buffer)
TFree *TFree::GetBestFree(TList *lfree, Int_t nbytes)
{
TFree *idcur = this;
if (idcur == 0) return 0;
TFree *idcur1 = 0;
do {
Long64_t nleft = Long64_t(idcur->fLast - idcur->fFirst +1);
Expand Down Expand Up @@ -186,4 +188,3 @@ Int_t TFree::Sizeof() const
if (fLast > TFile::kStartBigFile) return 18;
else return 10;
}

104 changes: 98 additions & 6 deletions io/io/src/TKey.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
fLeft = -1;
if (!fBuffer) fBuffer = new char[nsize];
} else {
fLeft = Int_t(bestfree->GetLast() - fSeekKey - nsize + 1);
fLeft = bestfree->GetLast() - fSeekKey - nsize + 1;
}
//*-*----------------- Case where new object fills exactly a deleted gap
fNbytes = nsize;
Expand All @@ -554,11 +554,11 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
//*-*----------------- Case where new object is placed in a deleted gap larger than itself
if (fLeft > 0) { // found a bigger segment
if (!fBuffer) {
// We reserve space for the new free segment size marker but we don't fill the buffer yet.
// We have to check (on writing, i.e. when we do I/O) if we are writing into a large gap (>2GB),
// in which case we need to read the first link size first.
fBuffer = new char[nsize+sizeof(Int_t)];
}
char *buffer = fBuffer+nsize;
Int_t nbytesleft = -fLeft; // set header of remaining record
tobuf(buffer, nbytesleft);
bestfree->SetFirst(fSeekKey+nsize);
}

Expand Down Expand Up @@ -1480,6 +1480,74 @@ void TKey::Streamer(TBuffer &b)
}
}

/// Fills zero, one, or two (rare) free segment header buffers into the provided `buffers` parameter.
/// If the key fits exactly in a provided gap or is at the end of the file, no buffer is filled. Otherwise,
/// the buffer for the integer immediately after the key data is filled with the size of the remaining, shortened gap.
/// In rare cases, it can be necessary to fill the second buffer with another free segment link.
/// Returns the number of buffers filled.
UShort_t TKey::FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const
{
if (fLeft <= 0)
return 0;

// We must fill at least one free segment header

if (fLeft <= TFile::kMaxGapSize) {
char *pbuf = buffers[0].data();
tobuf(pbuf, -static_cast<Int_t>(fLeft));
return 1;
}

// Large gap, we need to read the free segment headers that are going to be overwritten by the key to find the
// first free segment header beyond the key. In fact, we must find a free segment header far enough beyond the
// key so that we can inject another, new free segment header between the key end and the existing one (which means
// key end + sizeof(int))
const auto newGapOffset = fSeekKey + fNbytes;
auto existingGapOffset = fSeekKey;
auto prevGapOffset = 0;
do {
char readBuf[sizeof(Int_t)];
Int_t header = 0;

f.Seek(existingGapOffset);
if (f.ReadBuffer(readBuf, sizeof(Int_t))) {
Error("FillGapHeaderBuffers", "cannot read free segment link size at %lld", existingGapOffset);
return 0;
}

char *pbuf = readBuf;
frombuf(pbuf, &header);
const Int_t gapSize = -header;
if (gapSize < static_cast<Int_t>(sizeof(Int_t)) || gapSize > TFile::kMaxGapSize ||
gapSize > (newGapOffset - existingGapOffset + fLeft)) {
Error("FillGapHeaderBuffers", "invalid free segment header size %d at %lld", gapSize, existingGapOffset);
return 0;
}

prevGapOffset = existingGapOffset;
existingGapOffset += gapSize;
} while (existingGapOffset <= newGapOffset + static_cast<Long64_t>(sizeof(Int_t)));

if ((prevGapOffset < newGapOffset) || (existingGapOffset - newGapOffset) <= TFile::kMaxGapSize) {
// Normal case: writing the new free segment header won't overwrite an existing one beyond the key.
// Rare case: the new free segment header will overwrite (part of) and existing one beyond the key.
// We can then lengthen the gap beteween [prevGapOffset, newGapOffset] except if this has already
// the maximum length.
char *pbuf = buffers[0].data();
tobuf(pbuf, -static_cast<Int_t>(existingGapOffset - newGapOffset));
return 1;
}

// Very rare case: we need two free segment headers after the key
char *pbuf = buffers[0].data();
tobuf(pbuf, -static_cast<Int_t>(sizeof(Int_t)));
const auto newGapSize = existingGapOffset - newGapOffset - sizeof(Int_t);
assert(newGapSize <= TFile::kMaxGapSize);
pbuf = buffers[1].data();
tobuf(pbuf, -static_cast<Int_t>(newGapSize));
return 2;
}

////////////////////////////////////////////////////////////////////////////////
/// Write the encoded object supported by this key.
/// The function returns the number of bytes committed to the file.
Expand All @@ -1498,9 +1566,21 @@ Int_t TKey::WriteFile(Int_t cycle, TFile* f)
buffer = fBuffer;
}

if (fLeft > 0) nsize += sizeof(Int_t);
GapHeaderBuf_t gapHeaderBuffers;
auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
assert(nGaps <= 2);
if (nGaps > 0) {
std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
nsize += sizeof(Int_t);
}

f->Seek(fSeekKey);
Bool_t result = f->WriteBuffer(buffer,nsize);

if (nGaps == 2) {
f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));
nsize += sizeof(Int_t);
}
//f->Flush(); Flushing takes too much time.
// Let user flush the file when they want.
if (gDebug) {
Expand All @@ -1525,9 +1605,21 @@ Int_t TKey::WriteFileKeepBuffer(TFile *f)
Int_t nsize = fNbytes;
char *buffer = fBuffer;

if (fLeft > 0) nsize += sizeof(Int_t);
GapHeaderBuf_t gapHeaderBuffers;
auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
assert(nGaps <= 2);
if (nGaps > 0) {
std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
nsize += sizeof(Int_t);
}

f->Seek(fSeekKey);
Bool_t result = f->WriteBuffer(buffer,nsize);

if (nGaps == 2) {
f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we need a f->Seek(end_of_first_gap) here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because for just wrote buffer before which included the first gap.

nsize += sizeof(Int_t);
}
//f->Flush(); Flushing takes too much time.
// Let user flush the file when they want.
if (gDebug) {
Expand Down
Loading
Loading