Skip to content

Commit 7ee50b6

Browse files
committed
DfsPackCompactor: Accept DfsPackDescription as input
DfsPackFile keeps a reference to its index and keeps it alive until compact is done. With DfsPackDescriptions, we can create the DfsPackFile on demand and discard it as soon as we are done with it. This helps to reduce peak memory consumption when compacting lots of packs. Accept DfsPackDescriptions as input. Callers can use the compactor with DfsPackFile/DfsReftables OR with DfsPackDescriptions, but not both. Change-Id: I40ee21eaf080dab5458242fcaf43e2d16a6a6964
1 parent e629e9e commit 7ee50b6

1 file changed

Lines changed: 112 additions & 65 deletions

File tree

org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java

Lines changed: 112 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@
2020
import java.util.ArrayList;
2121
import java.util.Collection;
2222
import java.util.Collections;
23-
import java.util.Comparator;
2423
import java.util.HashSet;
2524
import java.util.Iterator;
2625
import java.util.List;
2726
import java.util.Set;
27+
import java.util.stream.Collectors;
2828

2929
import org.eclipse.jgit.annotations.Nullable;
3030
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
@@ -64,8 +64,11 @@
6464
*/
6565
public class DfsPackCompactor {
6666
private final DfsRepository repo;
67-
private final List<DfsPackFile> srcPacks;
68-
private final List<DfsReftable> srcReftables;
67+
68+
private final List<DfsPackDescription> inputDescsPacks;
69+
70+
private final List<DfsPackDescription> inputDescsReftables;
71+
6972
private final List<ObjectIdSet> exclude;
7073

7174
private final List<DfsPackDescription> prune;
@@ -143,8 +146,8 @@ public DfsPackCompactor setPreCommitHook(PreCommitHook hook) {
143146
*/
144147
public DfsPackCompactor(DfsRepository repository) {
145148
repo = repository;
146-
srcPacks = new ArrayList<>();
147-
srcReftables = new ArrayList<>();
149+
inputDescsPacks = new ArrayList<>();
150+
inputDescsReftables = new ArrayList<>();
148151
exclude = new ArrayList<>(4);
149152
prune = new ArrayList<>();
150153
}
@@ -175,8 +178,7 @@ public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
175178
* @return {@code this}
176179
*/
177180
public DfsPackCompactor add(DfsPackFile pack) {
178-
srcPacks.add(pack);
179-
return this;
181+
return addPack(pack.getPackDescription());
180182
}
181183

182184
/**
@@ -187,7 +189,54 @@ public DfsPackCompactor add(DfsPackFile pack) {
187189
* @return {@code this}
188190
*/
189191
public DfsPackCompactor add(DfsReftable table) {
190-
srcReftables.add(table);
192+
return addReftable(table.getPackDescription());
193+
}
194+
195+
/**
196+
* Add a pack to be compacted
197+
* <p>
198+
* This adds the objects in the pack to the compaction. If the description
199+
* has reftables, they are ignored. Caller must explicitely call
200+
* {@link #addReftable(DfsPackDescription)} with the same description to
201+
* include them.
202+
* <p>
203+
* This method creates DfsPackFile instances when needed and can close them
204+
* as soon as is done with them. This reduces the peak memory while
205+
* processing many files.
206+
*
207+
* @param desc
208+
* a pack description to combine.
209+
* @return {@code this}
210+
* @since 7.7
211+
*/
212+
public DfsPackCompactor addPack(DfsPackDescription desc) {
213+
if (!desc.hasFileExt(PACK)) {
214+
throw new IllegalArgumentException(
215+
"Adding for pack compaction a pack without data");
216+
}
217+
inputDescsPacks.add(desc);
218+
return this;
219+
}
220+
221+
/**
222+
* Add a reftable to be compacted.
223+
* <p>
224+
* This adds the reftables in the pack to the compaction. If the description
225+
* has pack data (objects), they are ignored. Caller must explicitely call
226+
* {@link #addPack(DfsPackDescription)} with the same description to include
227+
* them.
228+
*
229+
* @param desc
230+
* a pack description to combine.
231+
* @return {@code this}
232+
* @since 7.7
233+
*/
234+
public DfsPackCompactor addReftable(DfsPackDescription desc) {
235+
if (!desc.hasFileExt(REFTABLE)) {
236+
throw new IllegalArgumentException(
237+
"Adding for reftable compaction a pack without reftables");
238+
}
239+
inputDescsReftables.add(desc);
191240
return this;
192241
}
193242

@@ -248,7 +297,7 @@ public void compact(ProgressMonitor pm) throws IOException {
248297

249298
DfsObjDatabase objdb = repo.getObjectDatabase();
250299
try (DfsReader ctx = objdb.newReader()) {
251-
if (reftableConfig != null && !srcReftables.isEmpty()) {
300+
if (reftableConfig != null && !inputDescsReftables.isEmpty()) {
252301
compactReftables(ctx);
253302
}
254303
compactPacks(ctx, pm);
@@ -313,18 +362,24 @@ private long estimatePackSize() {
313362
// Include the final pack file header and trailer size here and ignore
314363
// the same from individual pack files.
315364
long size = 32;
316-
for (DfsPackFile pack : srcPacks) {
317-
size += pack.getPackDescription().getFileSize(PACK) - 32;
365+
for (DfsPackDescription desc : inputDescsPacks) {
366+
if (desc.hasFileExt(PACK)) {
367+
size += desc.getFileSize(PACK) - 32;
368+
}
318369
}
319370
return size;
320371
}
321372

322373
private void compactReftables(DfsReader ctx) throws IOException {
323374
DfsObjDatabase objdb = repo.getObjectDatabase();
324-
Collections.sort(srcReftables, objdb.reftableComparator());
375+
Collections.sort(inputDescsReftables,
376+
DfsPackDescription.reftableComparator());
377+
List<DfsReftable> tables = inputDescsReftables.stream()
378+
.map(desc -> new DfsReftable(DfsBlockCache.getInstance(), desc))
379+
.collect(Collectors.toList());
325380

326381
initOutDesc(objdb);
327-
try (DfsReftableStack stack = DfsReftableStack.open(ctx, srcReftables);
382+
try (DfsReftableStack stack = DfsReftableStack.open(ctx, tables);
328383
DfsOutputStream out = objdb.writeFile(outDesc, REFTABLE)) {
329384
ReftableCompactor compact = new ReftableCompactor(out);
330385
compact.addAll(stack.readers());
@@ -348,14 +403,11 @@ private void initOutDesc(DfsObjDatabase objdb) throws IOException {
348403
* @return all of the source packs that fed into this compaction.
349404
*/
350405
public Collection<DfsPackDescription> getSourcePacks() {
351-
Set<DfsPackDescription> src = new HashSet<>();
352-
for (DfsPackFile pack : srcPacks) {
353-
src.add(pack.getPackDescription());
354-
}
355-
for (DfsReftable table : srcReftables) {
356-
src.add(table.getPackDescription());
357-
}
358-
return src;
406+
HashSet<DfsPackDescription> all = new HashSet<>(
407+
inputDescsPacks.size() + inputDescsReftables.size());
408+
all.addAll(inputDescsPacks);
409+
all.addAll(inputDescsReftables);
410+
return all;
359411
}
360412

361413
/**
@@ -383,15 +435,8 @@ public List<PackStatistics> getNewPackStatistics() {
383435
}
384436

385437
private Set<DfsPackDescription> toPrune() {
386-
Set<DfsPackDescription> packs = new HashSet<>();
387-
for (DfsPackFile pack : srcPacks) {
388-
packs.add(pack.getPackDescription());
389-
}
390-
391-
Set<DfsPackDescription> reftables = new HashSet<>();
392-
for (DfsReftable table : srcReftables) {
393-
reftables.add(table.getPackDescription());
394-
}
438+
Set<DfsPackDescription> packs = new HashSet<>(inputDescsPacks);
439+
Set<DfsPackDescription> reftables = new HashSet<>(inputDescsReftables);
395440

396441
for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
397442
DfsPackDescription d = i.next();
@@ -418,49 +463,20 @@ private Set<DfsPackDescription> toPrune() {
418463
private void addObjectsToPack(PackWriter pw, DfsReader ctx,
419464
ProgressMonitor pm) throws IOException,
420465
IncorrectObjectTypeException {
421-
// Sort packs by description ordering, this places newer packs before
422-
// older packs, allowing the PackWriter to be handed newer objects
423-
// first and older objects last.
424-
Collections.sort(
425-
srcPacks,
426-
Comparator.comparing(
427-
DfsPackFile::getPackDescription,
428-
DfsPackDescription.objectLookupComparator()));
429-
430466
rw = new RevWalk(ctx);
431467
added = rw.newFlag("ADDED"); //$NON-NLS-1$
432468
isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
433469
List<RevObject> baseObjects = new BlockList<>();
434470

435471
pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
436-
for (DfsPackFile src : srcPacks) {
437-
List<ObjectIdWithOffset> want = toInclude(src, ctx);
438-
if (want.isEmpty())
439-
continue;
440-
441-
PackReverseIndex rev = src.getReverseIdx(ctx);
442-
DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
443-
for (ObjectIdWithOffset id : want) {
444-
int type = src.getObjectType(ctx, id.offset);
445-
RevObject obj = rw.lookupAny(id, type);
446-
if (obj.has(added))
447-
continue;
448472

449-
pm.update(1);
450-
pw.addObject(obj);
451-
obj.add(added);
452-
453-
src.fillRepresentation(rep, id.offset, ctx, rev);
454-
if (rep.getFormat() != PACK_DELTA)
455-
continue;
456-
457-
RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
458-
if (!base.has(added) && !base.has(isBase)) {
459-
baseObjects.add(base);
460-
base.add(isBase);
461-
}
462-
}
473+
inputDescsPacks.sort(DfsPackDescription.objectLookupComparator());
474+
for (DfsPackDescription desc : inputDescsPacks) {
475+
DfsPackFile src = repo.getObjectDatabase()
476+
.createDfsPackFile(DfsBlockCache.getInstance(), desc);
477+
addObjectFromPack(pw, ctx, pm, src, baseObjects);
463478
}
479+
464480
for (RevObject obj : baseObjects) {
465481
if (!obj.has(added)) {
466482
pm.update(1);
@@ -471,6 +487,37 @@ private void addObjectsToPack(PackWriter pw, DfsReader ctx,
471487
pm.endTask();
472488
}
473489

490+
private void addObjectFromPack(PackWriter pw, DfsReader ctx,
491+
ProgressMonitor pm, DfsPackFile src, List<RevObject> baseObjects)
492+
throws IOException {
493+
List<ObjectIdWithOffset> want = toInclude(src, ctx);
494+
if (want.isEmpty())
495+
return;
496+
497+
PackReverseIndex rev = src.getReverseIdx(ctx);
498+
DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
499+
for (ObjectIdWithOffset id : want) {
500+
int type = src.getObjectType(ctx, id.offset);
501+
RevObject obj = rw.lookupAny(id, type);
502+
if (obj.has(added))
503+
continue;
504+
505+
pm.update(1);
506+
pw.addObject(obj);
507+
obj.add(added);
508+
509+
src.fillRepresentation(rep, id.offset, ctx, rev);
510+
if (rep.getFormat() != PACK_DELTA)
511+
continue;
512+
513+
RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
514+
if (!base.has(added) && !base.has(isBase)) {
515+
baseObjects.add(base);
516+
base.add(isBase);
517+
}
518+
}
519+
}
520+
474521
private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
475522
throws IOException {
476523
PackIndex srcIdx = src.getPackIndex(ctx);

0 commit comments

Comments
 (0)