-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[io] Several fixes to handling of gaps and recovery #22481
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
c4dc0ca
19fd388
a198575
897fed9
4c58a12
da9bcf2
d6678be
17f2698
d983243
86a8bfd
0989124
1d071c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -173,6 +173,8 @@ The structure of a directory is shown in TDirectoryFile::TDirectoryFile | |
| #include "ROOT/RConcurrentHashColl.hxx" | ||
| #include <memory> | ||
| #include <cinttypes> | ||
| #include <cassert> | ||
| #include <algorithm> | ||
|
|
||
| #ifdef R__FBSD | ||
| #include <sys/extattr.h> | ||
|
|
@@ -1504,27 +1506,58 @@ Bool_t TFile::IsOpen() const | |
|
|
||
| void TFile::MakeFree(Long64_t first, Long64_t last) | ||
| { | ||
| TFree *f1 = (TFree*)fFree->First(); | ||
| if (!f1) return; | ||
| TFree *newfree = f1->AddFree(fFree,first,last); | ||
| if(!newfree) return; | ||
| Long64_t nfirst = newfree->GetFirst(); | ||
| Long64_t nlast = newfree->GetLast(); | ||
| Long64_t nbytesl= nlast-nfirst+1; | ||
| if (nbytesl > 2000000000) nbytesl = 2000000000; | ||
| Int_t nbytes = -Int_t (nbytesl); | ||
| char buffer[sizeof(Int_t)]; | ||
| char *pbuffer = buffer; | ||
| tobuf(pbuffer, nbytes); | ||
| if (last == fEND-1) fEND = nfirst; | ||
| Seek(nfirst); | ||
| // We could not update the meta data for this block on the file. | ||
| // This is not fatal as this only means that we won't get it 'right' | ||
| // if we ever need to Recover the file before the block is actually | ||
| // (attempted to be reused. | ||
| // coverity[unchecked_value] | ||
| WriteBuffer(buffer, sizeof(buffer)); | ||
| if (fMustFlush) Flush(); | ||
| assert(0 < first && first < last && last < fEND); | ||
|
|
||
| TFree *f1 = static_cast<TFree *>(fFree->First()); | ||
| assert(f1); // There must always be at least the virtual free segment at the end of the file | ||
|
|
||
| TFree *newfree = f1->AddFree(fFree, first, last); | ||
| assert(newfree); // AddFree() always succeeds | ||
|
|
||
| const Long64_t nfirst = newfree->GetFirst(); | ||
| const Long64_t nlast = newfree->GetLast(); | ||
| assert(nfirst > 0 && nfirst <= first && nlast >= last); | ||
| Long64_t nbytesl = std::min(nlast, fEND) - nfirst + 1; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this change to |
||
| assert(nbytesl >= static_cast<Long64_t>(sizeof(Int_t))); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are these assertions valid even if the current TFile comes from opening a "corrupted" file? |
||
|
|
||
| auto fnWriteGapHeader = [this](ULong64_t offset, ULong64_t gapSize) { | ||
| assert((gapSize <= TFile::kMaxGapSize) && (fEND > 0) && | ||
| ((offset + sizeof(Int_t)) <= static_cast<ULong64_t>(fEND))); | ||
|
|
||
| auto nbytes = -static_cast<Int_t>(gapSize); | ||
| char buffer[sizeof(Int_t)]; | ||
| char *pbuffer = buffer; | ||
| tobuf(pbuffer, nbytes); | ||
|
|
||
| Seek(offset); | ||
| if (WriteBuffer(buffer, sizeof(buffer)) != 0) { | ||
| // Not fatal, this only means that we won't get it 'right' | ||
| // if we ever need to Recover the file before the block is actually | ||
| // attempted to be reused. | ||
| Warning("TFile::MakeFree()", "failed to write free segment header"); | ||
| } | ||
| }; | ||
|
|
||
| Long64_t offset = nfirst; | ||
| while (nbytesl > TFile::kMaxGapSize) { | ||
| // For gaps larger than 2GB, link several consecutive gaps together. This has to be done because the size | ||
| // marker on disk is 32 bits. The free list, however, will still have one large gap because the free list | ||
| // uses 64 bit [first..last] pairs to represent gaps. File recovery will merge consecutive gaps. | ||
|
|
||
| // Make sure that the second gap is large enough to write its size on disk | ||
| Long64_t gapSize = TFile::kMaxGapSize - sizeof(Int_t); | ||
| fnWriteGapHeader(offset, gapSize); | ||
|
|
||
| nbytesl -= gapSize; | ||
| offset += gapSize; | ||
| } | ||
| fnWriteGapHeader(offset, nbytesl); | ||
|
|
||
| if (last == fEND - 1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I understand this check is not influenced by the |
||
| fEND = nfirst; | ||
|
|
||
| if (fMustFlush) | ||
| Flush(); | ||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
|
|
@@ -2125,7 +2158,14 @@ Int_t TFile::Recover() | |
|
|
||
| fEND = Long64_t(size); | ||
|
|
||
| if (fWritable && !fFree) fFree = new TList; | ||
| if (fWritable) { | ||
| if (fFree) { | ||
| // Remove an existing free list because we will recover it from the chain of segments | ||
| fFree->Delete(); | ||
| delete fFree; | ||
| } | ||
| fFree = new TList(); | ||
| } | ||
|
|
||
| Int_t nrecov = 0; | ||
| nwheader = 1024; | ||
|
|
@@ -2148,9 +2188,19 @@ Int_t TFile::Recover() | |
| break; | ||
| } | ||
| if (nbytes < 0) { | ||
| if ((-nbytes < static_cast<Int_t>(sizeof(Int_t))) || (-nbytes > static_cast<Int_t>(TFile::kMaxGapSize))) { | ||
| Error("Recover", "Address = %lld\tNbytes = %d\t=====E R R O R=======", idcur, nbytes); | ||
| break; | ||
| } | ||
|
jblomer marked this conversation as resolved.
|
||
| if (fWritable) { | ||
| const Long64_t last = idcur - nbytes - 1; | ||
| if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() + 1 == idcur) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It feels like rather than equality a greater than would be more 'stable', isn't it?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I'd prefer the strict equality because we construct it this way in the loop before. I.e., we want to pass the condition exactly when we added a free segment in the last loop iteration and in this case we constructed |
||
| static_cast<TFree *>(fFree->Last())->SetLast(last); | ||
| } else { | ||
| new TFree(fFree, idcur, last); | ||
| } | ||
| } | ||
| idcur -= nbytes; | ||
| if (fWritable) new TFree(fFree,idcur,idcur-nbytes-1); | ||
| Seek(idcur); | ||
| continue; | ||
| } | ||
| Version_t versionkey; | ||
|
|
@@ -2196,15 +2246,18 @@ Int_t TFile::Recover() | |
| idcur += nbytes; | ||
| } | ||
| if (fWritable) { | ||
| Long64_t max_file_size = Long64_t(kStartBigFile); | ||
| if (max_file_size < fEND) max_file_size = fEND+1000000000; | ||
| TFree *last = (TFree*)fFree->Last(); | ||
| if (last) { | ||
| last->AddFree(fFree,fEND,max_file_size); | ||
| } else { | ||
| new TFree(fFree,fEND,max_file_size); | ||
| if (fFree->Last() && static_cast<TFree *>(fFree->Last())->GetLast() == idcur - 1) { | ||
| // If the last recovered segment is a free segment, remove it and replace it by a newly created artificial one | ||
| fEND = static_cast<TFree *>(fFree->Last())->GetFirst(); | ||
| delete fFree->Last(); | ||
| fFree->Remove(fFree->LastLink()); | ||
| } | ||
| if (nrecov) Write(); | ||
| Long64_t max_file_size = Long64_t(kStartBigFile); | ||
| if (max_file_size < fEND) | ||
| max_file_size = fEND + 1000000000; | ||
|
pcanal marked this conversation as resolved.
pcanal marked this conversation as resolved.
|
||
| new TFree(fFree, fEND, max_file_size); | ||
| if (nrecov) | ||
| Write(); | ||
| } | ||
| return nrecov; | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -540,7 +540,7 @@ void TKey::Create(Int_t nbytes, TFile* externFile) | |
| fLeft = -1; | ||
| if (!fBuffer) fBuffer = new char[nsize]; | ||
| } else { | ||
| fLeft = Int_t(bestfree->GetLast() - fSeekKey - nsize + 1); | ||
| fLeft = bestfree->GetLast() - fSeekKey - nsize + 1; | ||
| } | ||
| //*-*----------------- Case where new object fills exactly a deleted gap | ||
| fNbytes = nsize; | ||
|
|
@@ -554,11 +554,11 @@ void TKey::Create(Int_t nbytes, TFile* externFile) | |
| //*-*----------------- Case where new object is placed in a deleted gap larger than itself | ||
| if (fLeft > 0) { // found a bigger segment | ||
| if (!fBuffer) { | ||
| // We reserve space for the new free segment size marker but we don't fill the buffer yet. | ||
| // We have to check (on writing, i.e. when we do I/O) if we are writing into a large gap (>2GB), | ||
| // in which case we need to read the first link size first. | ||
| fBuffer = new char[nsize+sizeof(Int_t)]; | ||
| } | ||
| char *buffer = fBuffer+nsize; | ||
| Int_t nbytesleft = -fLeft; // set header of remaining record | ||
| tobuf(buffer, nbytesleft); | ||
| bestfree->SetFirst(fSeekKey+nsize); | ||
| } | ||
|
|
||
|
|
@@ -1480,6 +1480,74 @@ void TKey::Streamer(TBuffer &b) | |
| } | ||
| } | ||
|
|
||
| /// Fills zero, one, or two (rare) free segment header buffers into the provided `buffers` parameter. | ||
| /// If the key fits exactly in a provided gap or is at the end of the file, no buffer is filled. Otherwise, | ||
| /// the buffer for the integer immediately after the key data is filled with the size of the remaining, shortened gap. | ||
| /// In rare cases, it can be necessary to fill the second buffer with another free segment link. | ||
| /// Returns the number of buffers filled. | ||
| UShort_t TKey::FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const | ||
| { | ||
| if (fLeft <= 0) | ||
| return 0; | ||
|
|
||
| // We must fill at least one free segment header | ||
|
|
||
| if (fLeft <= TFile::kMaxGapSize) { | ||
| char *pbuf = buffers[0].data(); | ||
| tobuf(pbuf, -static_cast<Int_t>(fLeft)); | ||
| return 1; | ||
| } | ||
|
|
||
| // Large gap, we need to read the free segment headers that are going to be overwritten by the key to find the | ||
| // first free segment header beyond the key. In fact, we must find a free segment header far enough beyond the | ||
| // key so that we can inject another, new free segment header between the key end and the existing one (which means | ||
| // key end + sizeof(int)) | ||
| const auto newGapOffset = fSeekKey + fNbytes; | ||
| auto existingGapOffset = fSeekKey; | ||
| auto prevGapOffset = 0; | ||
| do { | ||
| char readBuf[sizeof(Int_t)]; | ||
| Int_t header = 0; | ||
|
|
||
| f.Seek(existingGapOffset); | ||
| if (f.ReadBuffer(readBuf, sizeof(Int_t))) { | ||
| Error("FillGapHeaderBuffers", "cannot read free segment link size at %lld", existingGapOffset); | ||
| return 0; | ||
| } | ||
|
|
||
| char *pbuf = readBuf; | ||
| frombuf(pbuf, &header); | ||
| const Int_t gapSize = -header; | ||
| if (gapSize < static_cast<Int_t>(sizeof(Int_t)) || gapSize > TFile::kMaxGapSize || | ||
| gapSize > (newGapOffset - existingGapOffset + fLeft)) { | ||
| Error("FillGapHeaderBuffers", "invalid free segment header size %d at %lld", gapSize, existingGapOffset); | ||
| return 0; | ||
| } | ||
|
|
||
| prevGapOffset = existingGapOffset; | ||
| existingGapOffset += gapSize; | ||
| } while (existingGapOffset <= newGapOffset + static_cast<Long64_t>(sizeof(Int_t))); | ||
|
|
||
| if ((prevGapOffset < newGapOffset) || (existingGapOffset - newGapOffset) <= TFile::kMaxGapSize) { | ||
| // Normal case: writing the new free segment header won't overwrite an existing one beyond the key. | ||
| // Rare case: the new free segment header will overwrite (part of) and existing one beyond the key. | ||
| // We can then lengthen the gap beteween [prevGapOffset, newGapOffset] except if this has already | ||
| // the maximum length. | ||
| char *pbuf = buffers[0].data(); | ||
| tobuf(pbuf, -static_cast<Int_t>(existingGapOffset - newGapOffset)); | ||
| return 1; | ||
| } | ||
|
|
||
| // Very rare case: we need two free segment headers after the key | ||
| char *pbuf = buffers[0].data(); | ||
| tobuf(pbuf, -static_cast<Int_t>(sizeof(Int_t))); | ||
| const auto newGapSize = existingGapOffset - newGapOffset - sizeof(Int_t); | ||
| assert(newGapSize <= TFile::kMaxGapSize); | ||
| pbuf = buffers[1].data(); | ||
| tobuf(pbuf, -static_cast<Int_t>(newGapSize)); | ||
| return 2; | ||
| } | ||
|
|
||
| //////////////////////////////////////////////////////////////////////////////// | ||
| /// Write the encoded object supported by this key. | ||
| /// The function returns the number of bytes committed to the file. | ||
|
|
@@ -1498,9 +1566,21 @@ Int_t TKey::WriteFile(Int_t cycle, TFile* f) | |
| buffer = fBuffer; | ||
| } | ||
|
|
||
| if (fLeft > 0) nsize += sizeof(Int_t); | ||
| GapHeaderBuf_t gapHeaderBuffers; | ||
| auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers); | ||
| assert(nGaps <= 2); | ||
| if (nGaps > 0) { | ||
| std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t)); | ||
| nsize += sizeof(Int_t); | ||
| } | ||
|
|
||
| f->Seek(fSeekKey); | ||
| Bool_t result = f->WriteBuffer(buffer,nsize); | ||
|
|
||
| if (nGaps == 2) { | ||
| f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t)); | ||
| nsize += sizeof(Int_t); | ||
| } | ||
| //f->Flush(); Flushing takes too much time. | ||
| // Let user flush the file when they want. | ||
| if (gDebug) { | ||
|
|
@@ -1525,9 +1605,21 @@ Int_t TKey::WriteFileKeepBuffer(TFile *f) | |
| Int_t nsize = fNbytes; | ||
| char *buffer = fBuffer; | ||
|
|
||
| if (fLeft > 0) nsize += sizeof(Int_t); | ||
| GapHeaderBuf_t gapHeaderBuffers; | ||
| auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers); | ||
| assert(nGaps <= 2); | ||
| if (nGaps > 0) { | ||
| std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t)); | ||
| nsize += sizeof(Int_t); | ||
| } | ||
|
|
||
| f->Seek(fSeekKey); | ||
| Bool_t result = f->WriteBuffer(buffer,nsize); | ||
|
|
||
| if (nGaps == 2) { | ||
| f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t)); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why don't we need a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because for just wrote |
||
| nsize += sizeof(Int_t); | ||
| } | ||
| //f->Flush(); Flushing takes too much time. | ||
| // Let user flush the file when they want. | ||
| if (gDebug) { | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We might wanna stress out (via a
static_assertor comment) that this MUST be less thannumeric_limits<Int_t>::max()