Skip to content

Commit 7a013de

Browse files
committed
MidxPackFilter: Keep max one midx in the stack
We are keeping multiple disconnected midxs in the pack list (if valid). The only reason for this disconnection is a race condition in gc/compact and we don't need all of them in the pack list. Take only the most recent valid midx for the pack list. Remove all other uncovered midxs from the packlist. Change-Id: If6697e9f92d9cdf5fc7bd28be3cc93596a6a6964
1 parent 13cc2cb commit 7a013de

3 files changed

Lines changed: 161 additions & 38 deletions

File tree

org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,9 +1243,7 @@ public void midx_oneMidx_deleteMidxs_allObjectsOneGC() throws Exception {
12431243
.add("file.txt", git.blob("a blob")).parent(root).create();
12441244
assertEquals(3, countPacks(INSERT));
12451245

1246-
DfsPackDescription midx = DfsMidxWriter.writeMidx(
1247-
NullProgressMonitor.INSTANCE, odb,
1248-
Arrays.asList(odb.getPacks()), null);
1246+
DfsPackDescription midx = midx(Arrays.asList(odb.getPacks()), null);
12491247
odb.commitPack(List.of(midx), null);
12501248

12511249
gcNoTtl();
@@ -1279,13 +1277,11 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
12791277

12801278
List<DfsPackFile> basicPacks = Arrays.stream(odb.getPacks())
12811279
.collect(Collectors.toUnmodifiableList());
1282-
DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb,
1283-
basicPacks.subList(0, 9), null);
1284-
odb.commitPack(List.of(midx), null);
1280+
DfsPackDescription baseMidx = midx(basicPacks.subList(0, 9), null);
1281+
odb.commitPack(List.of(baseMidx), null);
12851282

1286-
DfsPackDescription midx2 = DfsMidxWriter.writeMidx(NULL_PM, odb,
1287-
basicPacks.subList(9, 21), midx);
1288-
odb.commitPack(List.of(midx2), null);
1283+
DfsPackDescription tipMidx = midx(basicPacks.subList(9, 21), baseMidx);
1284+
odb.commitPack(List.of(tipMidx), null);
12891285

12901286
// Verify we got one pack that is an midx
12911287
// This is testing the test code
@@ -1294,7 +1290,7 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
12941290
DfsPackDescription theDesc = odb.getPacks()[0].getPackDescription();
12951291
assertTrue(theDesc.hasFileExt(MULTI_PACK_INDEX));
12961292
assertEquals(12, theDesc.getCoveredPacks().size());
1297-
assertEquals(theDesc.getMultiPackIndexBase(), midx);
1293+
assertEquals(theDesc.getMultiPackIndexBase(), baseMidx);
12981294
assertEquals(9,
12991295
theDesc.getMultiPackIndexBase().getCoveredPacks().size());
13001296
gcNoTtl();
@@ -1308,8 +1304,8 @@ public void midx_chainedMidx_deleteMidxs_allObjsInOneGC() throws Exception {
13081304
for (RevCommit c : knownCommits) {
13091305
assertTrue(isObjectInPack(c, pack));
13101306
}
1311-
assertFalse(odb.listPacks().contains(midx));
1312-
assertFalse(odb.listPacks().contains(midx2));
1307+
assertFalse(odb.listPacks().contains(baseMidx));
1308+
assertFalse(odb.listPacks().contains(tipMidx));
13131309
}
13141310

13151311
@Test
@@ -1323,8 +1319,7 @@ public void midx_packAndMidx_deleteMidxs_allObjectsOneGC()
13231319
assertEquals(3, countPacks(INSERT));
13241320

13251321
List<DfsPackFile> packs = Arrays.stream(odb.getPacks()).toList();
1326-
DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb, packs,
1327-
null);
1322+
DfsPackDescription midx = midx(packs, null);
13281323
odb.commitPack(List.of(midx), null);
13291324

13301325
RevBlob blobOutOfMidx = git.blob("some content");
@@ -1500,6 +1495,15 @@ private static DfsPackFile findFirstBySource(DfsPackFile[] packs, PackSource sou
15001495
.findFirst().get();
15011496
}
15021497

1498+
private DfsPackDescription midx(List<DfsPackFile> coveredPacks,
1499+
DfsPackDescription base) throws IOException {
1500+
DfsPackDescription midx = DfsMidxWriter.writeMidx(NULL_PM, odb,
1501+
coveredPacks, base);
1502+
git.tick(1);
1503+
midx.setLastModified(git.getInstant().toEpochMilli());
1504+
return midx;
1505+
}
1506+
15031507
private TestRepository<InMemoryRepository>.CommitBuilder commit() {
15041508
return git.commit();
15051509
}

org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/MidxPackFilterTest.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,81 @@ public void useMidx_nestedMidxAndOnePack_topMidxAndPack() {
129129
assertTrue(reorgPacks.contains(uncovered));
130130
}
131131

132+
@Test
133+
public void useMidx_unconnectedValidMidx_onlyTop() {
134+
135+
DfsPackDescription gc = pack("aaaa", GC, PACK);
136+
DfsPackDescription compact = pack("cccc", COMPACT, PACK);
137+
DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
138+
firstMidx.setCoveredPacks(List.of(gc, compact));
139+
140+
DfsPackDescription unconnectedValid = pack("randoMidx", GC,
141+
MULTI_PACK_INDEX);
142+
unconnectedValid.setCoveredPacks(List.of(gc));
143+
144+
DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
145+
DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
146+
DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
147+
topMidx.setCoveredPacks(List.of(compact2, compact3));
148+
topMidx.setMultiPackIndexBase(firstMidx);
149+
150+
List<DfsPackDescription> reorgPacks = MidxPackFilter
151+
.useMidx(List.of(gc, compact, firstMidx, unconnectedValid,
152+
compact2, compact3, topMidx));
153+
assertEquals(1, reorgPacks.size());
154+
assertTrue(reorgPacks.contains(topMidx));
155+
}
156+
157+
@Test
158+
public void useMidx_unconnectedInvalidMidx_onlyTop() {
159+
DfsPackDescription gc = pack("aaaa", GC, PACK);
160+
DfsPackDescription compact = pack("cccc", COMPACT, PACK);
161+
DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
162+
firstMidx.setCoveredPacks(List.of(gc, compact));
163+
164+
DfsPackDescription notCommitted = pack("not-in-db", GC, PACK);
165+
DfsPackDescription unconnectedInvalid = pack("randoMidx", GC,
166+
MULTI_PACK_INDEX);
167+
unconnectedInvalid.setCoveredPacks(List.of(notCommitted));
168+
169+
DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
170+
DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
171+
DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
172+
topMidx.setCoveredPacks(List.of(compact2, compact3));
173+
topMidx.setMultiPackIndexBase(firstMidx);
174+
175+
List<DfsPackDescription> reorgPacks = MidxPackFilter
176+
.useMidx(List.of(gc, compact, firstMidx, unconnectedInvalid,
177+
compact2, compact3, topMidx));
178+
assertEquals(1, reorgPacks.size());
179+
assertTrue(reorgPacks.contains(topMidx));
180+
}
181+
182+
@Test
183+
public void useMidx_latestMidxInvalid_takeNext() {
184+
DfsPackDescription gc = pack("aaaa", GC, PACK);
185+
DfsPackDescription compact = pack("cccc", COMPACT, PACK);
186+
DfsPackDescription firstMidx = pack("midx1", GC, MULTI_PACK_INDEX);
187+
firstMidx.setCoveredPacks(List.of(gc, compact));
188+
189+
DfsPackDescription compact2 = pack("dddd", COMPACT, PACK);
190+
DfsPackDescription compact3 = pack("eeee", COMPACT, PACK);
191+
DfsPackDescription topMidx = pack("midx2", GC, MULTI_PACK_INDEX);
192+
topMidx.setCoveredPacks(List.of(compact2, compact3));
193+
topMidx.setMultiPackIndexBase(firstMidx);
194+
195+
DfsPackDescription notCommitted = pack("not-in-db", GC, PACK);
196+
DfsPackDescription unconnectedInvalid = pack("randoMidx", GC,
197+
MULTI_PACK_INDEX);
198+
unconnectedInvalid.setCoveredPacks(List.of(notCommitted));
199+
200+
List<DfsPackDescription> reorgPacks = MidxPackFilter
201+
.useMidx(List.of(gc, compact, firstMidx, unconnectedInvalid,
202+
compact2, compact3, topMidx));
203+
assertEquals(1, reorgPacks.size());
204+
assertTrue(reorgPacks.contains(topMidx));
205+
}
206+
132207
@Test
133208
public void skipMidx_oneMidxCoversAll_allPacks() {
134209
DfsPackDescription gc = pack("aaaa", GC, PACK);

org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/MidxPackFilter.java

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,16 @@
99
*/
1010
package org.eclipse.jgit.internal.storage.dfs;
1111

12-
import org.eclipse.jgit.internal.storage.pack.PackExt;
13-
12+
import java.util.ArrayList;
13+
import java.util.Comparator;
1414
import java.util.HashSet;
1515
import java.util.List;
16+
import java.util.Optional;
1617
import java.util.Set;
1718
import java.util.stream.Collectors;
1819

20+
import org.eclipse.jgit.internal.storage.pack.PackExt;
21+
1922
/**
2023
* Format a flat list of packs and midxs into a valid list of packs.
2124
* <p>
@@ -30,6 +33,23 @@ public class MidxPackFilter {
3033
private MidxPackFilter() {
3134
}
3235

36+
private static Comparator<DfsPackDescription> midxComparator = Comparator
37+
.comparingLong(DfsPackDescription::getLastModified)
38+
// If multiple midx where written at the same time, take the one
39+
// with covering most objects first (it must be the tip)
40+
.thenComparingLong(MidxPackFilter::getTotalCoveredObjects)
41+
.reversed();
42+
43+
private static long getTotalCoveredObjects(DfsPackDescription desc) {
44+
long objCount = 0;
45+
DfsPackDescription current = desc;
46+
while (current != null) {
47+
objCount += current.getObjectCount();
48+
current = current.getMultiPackIndexBase();
49+
}
50+
return objCount;
51+
}
52+
3353
/**
3454
* Reorganize the flat list of packs removing the midxs
3555
*
@@ -59,40 +79,64 @@ public static List<DfsPackDescription> skipMidxs(
5979
*/
6080
public static List<DfsPackDescription> useMidx(
6181
List<DfsPackDescription> packs) {
62-
// Take the packs covered by the midxs out of the list
6382
List<DfsPackDescription> midxs = packs.stream()
6483
.filter(desc -> desc.hasFileExt(PackExt.MULTI_PACK_INDEX))
65-
.toList();
84+
.sorted(midxComparator).toList();
85+
for (DfsPackDescription d : midxs) {
86+
System.out.println(String.format(" %s - %d - %d", d.getPackName(),
87+
d.getLastModified(), getTotalCoveredObjects(d)));
88+
}
6689
if (midxs.isEmpty()) {
6790
return packs;
6891
}
6992

70-
Set<DfsPackDescription> inputPacks = new HashSet<>(packs);
71-
Set<DfsPackDescription> allCoveredPacks = new HashSet<>();
72-
for (DfsPackDescription midx : midxs) {
73-
Set<DfsPackDescription> coveredPacks = new HashSet<>();
74-
findCoveredPacks(midx, coveredPacks);
75-
if (!inputPacks.containsAll(coveredPacks)) {
76-
// This midx references packs not in the pack db.
77-
// It could be part of a chain, so we just ignore all midxs
78-
return skipMidxs(packs);
79-
}
80-
allCoveredPacks.addAll(coveredPacks);
93+
Set<DfsPackDescription> packsSet = new HashSet<>(packs);
94+
Optional<DfsPackDescription> bestMidx = midxs.stream()
95+
.filter(midx -> isValid(midx, packsSet)).findFirst();
96+
if (bestMidx.isEmpty()) {
97+
return skipMidxs(packs);
8198
}
8299

83-
return packs.stream().filter(d -> !allCoveredPacks.contains(d))
84-
.collect(Collectors.toList());
100+
// Take the packs covered by the midxs and other midxs themselves out of
101+
// the list
102+
Set<DfsPackDescription> coveredPacksAndMidxs = getAllCoveredPacks(
103+
bestMidx.get());
104+
return packs.stream().filter(p -> !coveredPacksAndMidxs.contains(p))
105+
// At this point, any midx in the list besides bestMidx is a
106+
// straggler.
107+
.filter(p -> !p.hasFileExt(PackExt.MULTI_PACK_INDEX)
108+
|| p.equals(bestMidx.get()))
109+
.collect(Collectors.toCollection(ArrayList::new));
85110
}
86111

87-
private static void findCoveredPacks(DfsPackDescription midx,
88-
Set<DfsPackDescription> covered) {
89-
if (!midx.getCoveredPacks().isEmpty()) {
90-
covered.addAll(midx.getCoveredPacks());
112+
private static boolean isValid(DfsPackDescription midx,
113+
Set<DfsPackDescription> packs) {
114+
DfsPackDescription tip = midx;
115+
while (tip != null) {
116+
if (!packs.containsAll(tip.getCoveredPacks())) {
117+
return false;
118+
}
119+
120+
tip = tip.getMultiPackIndexBase();
91121
}
122+
return true;
123+
}
124+
125+
private static Set<DfsPackDescription> getAllCoveredPacks(
126+
DfsPackDescription midx) {
127+
Set<DfsPackDescription> covered = new HashSet<>();
128+
DfsPackDescription current = midx;
129+
while (current != null) {
130+
if (!current.getCoveredPacks().isEmpty()) {
131+
covered.addAll(current.getCoveredPacks());
132+
}
92133

93-
if (midx.getMultiPackIndexBase() != null) {
94-
findCoveredPacks(midx.getMultiPackIndexBase(), covered);
95-
covered.add(midx.getMultiPackIndexBase());
134+
DfsPackDescription base = current.getMultiPackIndexBase();
135+
if (base != null) {
136+
covered.add(base);
137+
}
138+
current = base;
96139
}
140+
return covered;
97141
}
98142
}

0 commit comments

Comments
 (0)