Skip to content

Commit dc1cda1

Browse files
committed
[io] fix creating key in large gap
If a new key is created in a free segment such that the remaining size is larger than 2G, we have to first read the original free segment header before we update the new, smaller header. Along the way, we change TKey::fLeft from Int_t to Long64_t.
1 parent 0989124 commit dc1cda1

4 files changed

Lines changed: 267 additions & 13 deletions

File tree

io/io/inc/TKey.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ class TKey : public TNamed {
3838
Int_t UnzipBuffer(char *targetBuffer, const char *compressedBuffer) const;
3939

4040
protected:
41+
// After a key that has been placed in a gap larger than the key itself, one or very rarely two marker bytes
42+
// follow to restore the linked list of segments. The two char buffers are meant to be written to disk, i.e.
43+
// they should contain big-endian encoded negative integer values for the size of a gap, or zero if unused.
44+
using GapHeaderBuf_t = std::array<std::array<char, sizeof(Int_t)>, 2>;
45+
4146
Int_t fVersion; ///< Key version identifier
4247
Int_t fNbytes; ///< Number of bytes for the whole key on file (key header and data)
4348
Int_t fObjlen; ///< Length of uncompressed object in bytes
@@ -47,7 +52,7 @@ class TKey : public TNamed {
4752
Long64_t fSeekKey; ///< Location of object on file
4853
Long64_t fSeekPdir; ///< Location of parent directory on file
4954
TString fClassName; ///< Object Class name
50-
Int_t fLeft; ///< Number of bytes left in current segment
55+
Long64_t fLeft; ///< Number of bytes left in current segment
5156
char *fBuffer; ///< Object buffer
5257
TBuffer *fBufferRef; ///< Pointer to the TBuffer object
5358
UShort_t fPidOffset; ///<!Offset to be added to the pid index in this key/buffer. This is actually saved in the high bits of fSeekPdir
@@ -58,6 +63,7 @@ class TKey : public TNamed {
5863
void Build(TDirectory* motherDir, const char* classname, Long64_t filepos);
5964
void Reset(); // Currently only for the use of TBasket.
6065
virtual Int_t WriteFileKeepBuffer(TFile *f = nullptr);
66+
UShort_t FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const;
6167

6268
public:
6369
TKey();

io/io/src/TKey.cxx

Lines changed: 98 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,7 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
540540
fLeft = -1;
541541
if (!fBuffer) fBuffer = new char[nsize];
542542
} else {
543-
fLeft = Int_t(bestfree->GetLast() - fSeekKey - nsize + 1);
543+
fLeft = bestfree->GetLast() - fSeekKey - nsize + 1;
544544
}
545545
//*-*----------------- Case where new object fills exactly a deleted gap
546546
fNbytes = nsize;
@@ -554,11 +554,11 @@ void TKey::Create(Int_t nbytes, TFile* externFile)
554554
//*-*----------------- Case where new object is placed in a deleted gap larger than itself
555555
if (fLeft > 0) { // found a bigger segment
556556
if (!fBuffer) {
557+
// We reserve space for the new free segment size marker but we don't fill the buffer yet.
558+
// We have to check (on writing, i.e. when we do I/O) if we are writing into a large gap (>2GB),
559+
// in which case we need to read the first link size first.
557560
fBuffer = new char[nsize+sizeof(Int_t)];
558561
}
559-
char *buffer = fBuffer+nsize;
560-
Int_t nbytesleft = -fLeft; // set header of remaining record
561-
tobuf(buffer, nbytesleft);
562562
bestfree->SetFirst(fSeekKey+nsize);
563563
}
564564

@@ -1480,6 +1480,74 @@ void TKey::Streamer(TBuffer &b)
14801480
}
14811481
}
14821482

1483+
/// Fills zero, one, or two (rare) free segment header buffers into the provided `buffers` parameter.
1484+
/// If the key fits exactly in a provided gap or is at the end of the file, no buffer is filled. Otherwise,
1485+
/// the buffer for the integer immediately after the key data is filled with the size of the remaining, shortened gap.
1486+
/// In rare cases, it can be necessary to fill the second buffer with another free segment link.
1487+
/// Returns the number of buffers filled.
1488+
UShort_t TKey::FillGapHeaderBuffers(TFile &f, GapHeaderBuf_t &buffers) const
1489+
{
1490+
if (fLeft <= 0)
1491+
return 0;
1492+
1493+
// We must fill at least one free segment header
1494+
1495+
if (fLeft <= TFile::kMaxGapSize) {
1496+
char *pbuf = buffers[0].data();
1497+
tobuf(pbuf, -static_cast<Int_t>(fLeft));
1498+
return 1;
1499+
}
1500+
1501+
// Large gap, we need to read the free segment headers that are going to be overwritten by the key to find the
1502+
// first free segment header beyond the key. In fact, we must find a free segment header far enough beyond the
1503+
// key so that we can inject another, new free segment header between the key end and the existing one (which means
1504+
// key end + sizeof(int))
1505+
const auto newGapOffset = fSeekKey + fNbytes;
1506+
auto existingGapOffset = fSeekKey;
1507+
auto prevGapOffset = 0;
1508+
do {
1509+
char readBuf[sizeof(Int_t)];
1510+
Int_t header = 0;
1511+
1512+
f.Seek(existingGapOffset);
1513+
if (f.ReadBuffer(readBuf, sizeof(Int_t))) {
1514+
Error("FillGapHeaderBuffers", "cannot read free segment link size at %lld", existingGapOffset);
1515+
return 0;
1516+
}
1517+
1518+
char *pbuf = readBuf;
1519+
frombuf(pbuf, &header);
1520+
const Int_t gapSize = -header;
1521+
if (gapSize < static_cast<Int_t>(sizeof(Int_t)) || gapSize > TFile::kMaxGapSize ||
1522+
gapSize > (newGapOffset - existingGapOffset + fLeft)) {
1523+
Error("FillGapHeaderBuffers", "invalid free segment header size %d at %lld", gapSize, existingGapOffset);
1524+
return 0;
1525+
}
1526+
1527+
prevGapOffset = existingGapOffset;
1528+
existingGapOffset += gapSize;
1529+
} while (existingGapOffset <= newGapOffset + static_cast<Long64_t>(sizeof(Int_t)));
1530+
1531+
if ((prevGapOffset < newGapOffset) || (existingGapOffset - newGapOffset) <= TFile::kMaxGapSize) {
1532+
// Normal case: writing the new free segment header won't overwrite an existing one beyond the key.
1533+
// Rare case: the new free segment header will overwrite (part of) and existing one beyond the key.
1534+
// We can then lengthen the gap beteween [prevGapOffset, newGapOffset] except if this has already
1535+
// the maximum length.
1536+
char *pbuf = buffers[0].data();
1537+
tobuf(pbuf, -static_cast<Int_t>(existingGapOffset - newGapOffset));
1538+
return 1;
1539+
}
1540+
1541+
// Very rare case: we need two free segment headers after the key
1542+
char *pbuf = buffers[0].data();
1543+
tobuf(pbuf, -static_cast<Int_t>(sizeof(Int_t)));
1544+
const auto newGapSize = existingGapOffset - newGapOffset - sizeof(Int_t);
1545+
assert(newGapSize <= TFile::kMaxGapSize);
1546+
pbuf = buffers[1].data();
1547+
tobuf(pbuf, -static_cast<Int_t>(newGapSize));
1548+
return 2;
1549+
}
1550+
14831551
////////////////////////////////////////////////////////////////////////////////
14841552
/// Write the encoded object supported by this key.
14851553
/// The function returns the number of bytes committed to the file.
@@ -1498,9 +1566,21 @@ Int_t TKey::WriteFile(Int_t cycle, TFile* f)
14981566
buffer = fBuffer;
14991567
}
15001568

1501-
if (fLeft > 0) nsize += sizeof(Int_t);
1569+
GapHeaderBuf_t gapHeaderBuffers;
1570+
auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
1571+
assert(nGaps <= 2);
1572+
if (nGaps > 0) {
1573+
std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
1574+
nsize += sizeof(Int_t);
1575+
}
1576+
15021577
f->Seek(fSeekKey);
15031578
Bool_t result = f->WriteBuffer(buffer,nsize);
1579+
1580+
if (nGaps == 2) {
1581+
f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));
1582+
nsize += sizeof(Int_t);
1583+
}
15041584
//f->Flush(); Flushing takes too much time.
15051585
// Let user flush the file when they want.
15061586
if (gDebug) {
@@ -1525,9 +1605,21 @@ Int_t TKey::WriteFileKeepBuffer(TFile *f)
15251605
Int_t nsize = fNbytes;
15261606
char *buffer = fBuffer;
15271607

1528-
if (fLeft > 0) nsize += sizeof(Int_t);
1608+
GapHeaderBuf_t gapHeaderBuffers;
1609+
auto nGaps = FillGapHeaderBuffers(*f, gapHeaderBuffers);
1610+
assert(nGaps <= 2);
1611+
if (nGaps > 0) {
1612+
std::memcpy(fBuffer + fNbytes, gapHeaderBuffers[0].data(), sizeof(Int_t));
1613+
nsize += sizeof(Int_t);
1614+
}
1615+
15291616
f->Seek(fSeekKey);
15301617
Bool_t result = f->WriteBuffer(buffer,nsize);
1618+
1619+
if (nGaps == 2) {
1620+
f->WriteBuffer(gapHeaderBuffers[1].data(), sizeof(Int_t));
1621+
nsize += sizeof(Int_t);
1622+
}
15311623
//f->Flush(); Flushing takes too much time.
15321624
// Let user flush the file when they want.
15331625
if (gDebug) {

io/io/test/TFileTests.cxx

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,161 @@ TEST(TFile, DeleteKey)
480480
EXPECT_FALSE(f->TestBit(TFile::kRecovered));
481481
}
482482

483+
TEST(TFile, WriteInLargeGap)
484+
{
485+
// The following tests run out of memory on 32bit platforms
486+
if (sizeof(std::size_t) == 4) {
487+
GTEST_SKIP() << "Skipping test on 32bit platform.";
488+
}
489+
490+
ROOT::TestSupport::FileRaii fileGuard("tfile_test_large_gap_at_end.root");
491+
auto f = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE"));
492+
f->SetCompressionSettings(0);
493+
std::vector<char> v;
494+
v.resize(1000 * 1000 * 1000 - 100, 'x'); // almost 1GB
495+
f->WriteObject(&v, "v0");
496+
f->WriteObject(&v, "v1");
497+
f->WriteObject(&v, "v2");
498+
f->Write();
499+
f->Close();
500+
501+
f = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str(), "UPDATE"));
502+
EXPECT_GT(f->GetEND(), TFile::kStartBigFile);
503+
f->Delete("v0;*");
504+
f->Delete("v1;*");
505+
f->Delete("v2;*");
506+
v.clear();
507+
f->WriteObject(&v, "small"); //< This should not crash, i.e. the new key should be placed in the large gap
508+
509+
int nConsecutiveGapsAfterSmall = -1;
510+
for (const auto &k : f->WalkTKeys()) {
511+
if (k.fType == ROOT::Detail::TKeyMapNode::kGap && nConsecutiveGapsAfterSmall >= 0) {
512+
nConsecutiveGapsAfterSmall++;
513+
continue;
514+
}
515+
if (k.fKeyName == "small") {
516+
nConsecutiveGapsAfterSmall = 0;
517+
continue;
518+
}
519+
if (nConsecutiveGapsAfterSmall >= 0)
520+
break;
521+
}
522+
EXPECT_EQ(2, nConsecutiveGapsAfterSmall);
523+
524+
TNamed x("x", "");
525+
f->WriteObject(&x, "x"); //< Update the streamer info so that it doesn't block shrinking the file
526+
f->Write();
527+
f->Close();
528+
529+
f = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str()));
530+
EXPECT_LT(f->GetEND(), TFile::kStartBigFile);
531+
}
532+
533+
TEST(TFile, WriteInLargeGapCornerCase)
534+
{
535+
// The following tests run out of memory on 32bit platforms
536+
if (sizeof(std::size_t) == 4) {
537+
GTEST_SKIP() << "Skipping test on 32bit platform.";
538+
}
539+
540+
// Constructs a case that requires writing 2 free segment headers after a key
541+
542+
ROOT::TestSupport::FileRaii fileGuard("tfile_test_write_in_large_gap_corner_case.root");
543+
auto f = std::unique_ptr<TFile>(TFile::Open(fileGuard.GetPath().c_str(), "RECREATE"));
544+
f->SetCompressionSettings(0);
545+
std::vector<char> v;
546+
v.resize(1000 * 1000 * 1000 - 100, 'x'); // almost 1GB
547+
f->WriteObject(&v, "v0");
548+
f->WriteObject(&v, "v1");
549+
f->WriteObject(&v, "v2");
550+
f->Write();
551+
f->Delete("v0;*");
552+
f->Delete("v1;*");
553+
f->Delete("v2;*");
554+
f->Write();
555+
556+
// We should have two large consecutive gaps
557+
Long64_t seekGap = 0;
558+
Long64_t gapSize = 0; // the combined gap size
559+
for (const auto &k : f->WalkTKeys()) {
560+
if (seekGap > 0) {
561+
// second gap
562+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kGap, k.fType);
563+
gapSize += k.fLen;
564+
break;
565+
}
566+
if (k.fLen > 1000000) {
567+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kGap, k.fType);
568+
seekGap = k.fAddr;
569+
gapSize = k.fLen;
570+
}
571+
}
572+
ASSERT_GT(seekGap, 0);
573+
ASSERT_GT(gapSize, TFile::kMaxGapSize);
574+
575+
// Create new key in the large gap; not huge but large enough to only fit in the large gap
576+
TKey testKey("TEST", "TITLE", TObject::Class(), 1024 * 1024, f.get());
577+
EXPECT_EQ(seekGap, testKey.GetSeekKey());
578+
579+
// Manipulate the linked list of free segments in the large gap:
580+
// - 2 empty free gaps
581+
// - 1 max sized gap 1 byte after the key end
582+
// - final segment pointing to the original end
583+
std::array<char, sizeof(Int_t)> buf;
584+
char *pbuf = buf.data();
585+
586+
Long64_t offset = seekGap;
587+
f->Seek(offset);
588+
tobuf(pbuf, -static_cast<Int_t>(sizeof(Int_t)));
589+
EXPECT_FALSE(f->WriteBuffer(buf.data(), sizeof(Int_t)));
590+
591+
offset += sizeof(Int_t);
592+
pbuf = buf.data();
593+
tobuf(pbuf, -static_cast<Int_t>(seekGap + testKey.GetNbytes() + 1 - offset));
594+
EXPECT_FALSE(f->WriteBuffer(buf.data(), sizeof(Int_t)));
595+
596+
offset += testKey.GetNbytes() + 1 - sizeof(Int_t);
597+
f->Seek(offset);
598+
pbuf = buf.data();
599+
tobuf(pbuf, -static_cast<Int_t>(TFile::kMaxGapSize));
600+
EXPECT_FALSE(f->WriteBuffer(buf.data(), sizeof(Int_t)));
601+
602+
offset += TFile::kMaxGapSize;
603+
f->Seek(offset);
604+
EXPECT_GT(seekGap + gapSize, offset);
605+
EXPECT_LT(seekGap + gapSize - offset, TFile::kMaxGapSize);
606+
pbuf = buf.data();
607+
tobuf(pbuf, -static_cast<Int_t>(seekGap + gapSize - offset));
608+
EXPECT_FALSE(f->WriteBuffer(buf.data(), sizeof(Int_t)));
609+
610+
testKey.WriteFile(1, f.get());
611+
612+
int step = 0;
613+
for (const auto &k : f->WalkTKeys()) {
614+
if (step == 3) {
615+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kGap, k.fType);
616+
EXPECT_EQ(k.fAddr + k.fLen, seekGap + gapSize);
617+
step = 4;
618+
} else if (step == 2) {
619+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kGap, k.fType);
620+
EXPECT_LT(k.fLen, TFile::kMaxGapSize);
621+
step = 3;
622+
} else if (step == 1) {
623+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kGap, k.fType);
624+
EXPECT_EQ(sizeof(Int_t), k.fLen);
625+
step = 2;
626+
} else if (k.fAddr == static_cast<ULong64_t>(seekGap)) {
627+
EXPECT_EQ(ROOT::Detail::TKeyMapNode::kKey, k.fType);
628+
EXPECT_EQ(k.fLen, testKey.GetNbytes());
629+
step = 1;
630+
}
631+
}
632+
EXPECT_EQ(4, step);
633+
634+
f->Write();
635+
f->Close();
636+
}
637+
483638
TEST(TFile, KeySizeLimit)
484639
{
485640
// The following tests run out of memory on 32bit platforms

tree/ntuple/src/RMiniFile.cxx

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,8 @@ struct RTFileControlBlock {
618618
/// on some platforms.
619619
class RKeyBlob : public TKey {
620620
public:
621+
using TKey::GapHeaderBuf_t;
622+
621623
RKeyBlob() = default;
622624

623625
explicit RKeyBlob(TFile *file) : TKey(file)
@@ -634,7 +636,7 @@ class RKeyBlob : public TKey {
634636
*seekKey = fSeekKey;
635637
}
636638

637-
bool WasAllocatedInAFreeSlot() const { return fLeft > 0; }
639+
using TKey::FillGapHeaderBuffers;
638640

639641
ClassDefInlineOverride(RKeyBlob, 0)
640642
};
@@ -1173,11 +1175,10 @@ std::uint64_t ROOT::Internal::RNTupleFileWriter::ReserveBlobKey(T &caller, TFile
11731175
caller.Write(localKeyBuffer, kBlobKeyLen, offsetKey);
11741176
}
11751177

1176-
if (keyBlob.WasAllocatedInAFreeSlot()) {
1177-
// If the key was allocated in a free slot, the last 4 bytes of its buffer contain the new size
1178-
// of the remaining free slot and we need to write it to disk before the key gets destroyed at the end of the
1179-
// function.
1180-
caller.Write(keyBlob.GetBuffer() + nbytes, sizeof(Int_t), offsetKey + kBlobKeyLen + nbytes);
1178+
RKeyBlob::GapHeaderBuf_t buffers;
1179+
auto nGaps = keyBlob.FillGapHeaderBuffers(file, buffers);
1180+
for (unsigned int i = 0; i < nGaps; ++i) {
1181+
caller.Write(buffers[i].data(), sizeof(Int_t), offsetKey + kBlobKeyLen + nbytes + i * sizeof(Int_t));
11811182
}
11821183

11831184
auto offsetData = offsetKey + kBlobKeyLen;

0 commit comments

Comments
 (0)