[DTLTO] Refactor the DTLTO code.#192629
Conversation
|
@llvm/pr-subscribers-lld-coff @llvm/pr-subscribers-lto Author: Konstantin Belochapka (kbelochapka) ChangesDTLTO implementation code has been moved from Patch is 82.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/192629.diff 13 Files Affected:
diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test
index 5dd67a50ab2c3..82c13eb80dca0 100644
--- a/cross-project-tests/dtlto/dtlto-cache.test
+++ b/cross-project-tests/dtlto/dtlto-cache.test
@@ -31,8 +31,8 @@ RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps
-# Check that there are no backend compilation jobs occurred.
-RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx "\s*1"
+# At this point we expect full cache hits. Distributor will not be invoked and a Json file will not be created.
+RUN: not ls cache.dir/*.dist-file.json
RUN: ls cache.dir | count 3
RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
diff --git a/cross-project-tests/dtlto/savetemps-lock.test b/cross-project-tests/dtlto/savetemps-lock.test
index 822744b1dfc4f..17ada42a5ef14 100644
--- a/cross-project-tests/dtlto/savetemps-lock.test
+++ b/cross-project-tests/dtlto/savetemps-lock.test
@@ -30,12 +30,12 @@ RUN: | FileCheck %s --implicit-check-not=warning
CHECK-DAG: Lock any files in the output directory.
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.[[#PID:]].dist-file.json': {{.*}}
# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o.
+CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET:]]).1.[[#%X,HEXPID:]].o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET:]]).1.[[#%X,HEXPID:]].1.[[#PID]].native.o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET]]).1.[[#%X,HEXPID]].1.[[#PID]].native.o.thinlto.bc': {{.*}}
+CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET:]]).2.[[#%X,HEXPID]].o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET:]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o.thinlto.bc': {{.*}}
-CHECK-DAG: warning: could not remove temporary DTLTO input file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET]]).1.[[#%X,HEXPID]].o': {{.*}}
-CHECK-DAG: warning: could not remove temporary DTLTO input file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET]]).2.[[#%X,HEXPID]].o': {{.*}}
#--- t1.c
__attribute__((retain)) int t1(int x) { return x; }
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 445ea52e995da..53a8cd2eb3210 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -126,17 +126,7 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
// Initialize ltoObj.
lto::ThinBackend backend;
- if (!ctx.config.dtltoDistributor.empty()) {
- backend = lto::createOutOfProcessThinBackend(
- llvm::hardware_concurrency(ctx.config.thinLTOJobs),
- /*OnWrite=*/nullptr,
- /*ShouldEmitIndexFiles=*/false,
- /*ShouldEmitImportFiles=*/false, ctx.config.outputFile,
- ctx.config.dtltoDistributor, ctx.config.dtltoDistributorArgs,
- ctx.config.dtltoCompiler, ctx.config.dtltoCompilerPrependArgs,
- ctx.config.dtltoCompilerArgs, !ctx.config.saveTempsArgs.empty(),
- createAddBufferFn(files, file_names));
- } else if (ctx.config.thinLTOIndexOnly) {
+ if (ctx.config.thinLTOIndexOnly || !ctx.config.dtltoDistributor.empty()) {
auto OnIndexWrite = [&](StringRef S) { thinIndices.erase(S); };
backend = lto::createWriteIndexesThinBackend(
llvm::hardware_concurrency(ctx.config.thinLTOJobs),
@@ -155,7 +145,12 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
else
ltoObj = std::make_unique<lto::DTLTO>(
createConfig(), backend, ctx.config.ltoPartitions,
- llvm::lto::LTO::LTOKind::LTOK_Default, ctx.config.outputFile,
+ llvm::lto::LTO::LTOKind::LTOK_Default, nullptr,
+ ctx.config.thinLTOEmitImportsFiles, ctx.config.thinLTOIndexOnly,
+ ctx.config.outputFile, ctx.config.dtltoDistributor,
+ ctx.config.dtltoDistributorArgs, ctx.config.dtltoCompiler,
+ ctx.config.dtltoCompilerPrependArgs, ctx.config.dtltoCompilerArgs,
+ createAddBufferFn(files, file_names),
!ctx.config.saveTempsArgs.empty());
}
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 7b0fe2001439e..d87e83f8a1f38 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -184,21 +184,13 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
// Initialize ltoObj.
lto::ThinBackend backend;
auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); };
- if (ctx.arg.thinLTOIndexOnly) {
+ if (ctx.arg.thinLTOIndexOnly || !ctx.arg.dtltoDistributor.empty()) {
backend = lto::createWriteIndexesThinBackend(
llvm::hardware_concurrency(ctx.arg.thinLTOJobs),
std::string(ctx.arg.thinLTOPrefixReplaceOld),
std::string(ctx.arg.thinLTOPrefixReplaceNew),
std::string(ctx.arg.thinLTOPrefixReplaceNativeObject),
ctx.arg.thinLTOEmitImportsFiles, indexFile.get(), onIndexWrite);
- } else if (!ctx.arg.dtltoDistributor.empty()) {
- backend = lto::createOutOfProcessThinBackend(
- llvm::hardware_concurrency(ctx.arg.thinLTOJobs), onIndexWrite,
- ctx.arg.thinLTOEmitIndexFiles, ctx.arg.thinLTOEmitImportsFiles,
- ctx.arg.outputFile, ctx.arg.dtltoDistributor,
- ctx.arg.dtltoDistributorArgs, ctx.arg.dtltoCompiler,
- ctx.arg.dtltoCompilerPrependArgs, ctx.arg.dtltoCompilerArgs,
- !ctx.arg.saveTempsArgs.empty(), createAddBufferFn(files, filenames));
} else {
backend = lto::createInProcessThinBackend(
llvm::heavyweight_hardware_concurrency(ctx.arg.thinLTOJobs),
@@ -210,6 +202,7 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
llvm::lto::LTO::LTOKind::LTOK_UnifiedThin,
llvm::lto::LTO::LTOKind::LTOK_UnifiedRegular,
llvm::lto::LTO::LTOKind::LTOK_Default};
+
if (ctx.arg.dtltoDistributor.empty())
ltoObj = std::make_unique<lto::LTO>(createConfig(ctx), backend,
ctx.arg.ltoPartitions,
@@ -217,7 +210,11 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
else
ltoObj = std::make_unique<lto::DTLTO>(
createConfig(ctx), backend, ctx.arg.ltoPartitions,
- ltoModes[ctx.arg.ltoKind], ctx.arg.outputFile,
+ ltoModes[ctx.arg.ltoKind], onIndexWrite, ctx.arg.thinLTOEmitIndexFiles,
+ ctx.arg.thinLTOEmitImportsFiles, ctx.arg.outputFile,
+ ctx.arg.dtltoDistributor, ctx.arg.dtltoDistributorArgs,
+ ctx.arg.dtltoCompiler, ctx.arg.dtltoCompilerPrependArgs,
+ ctx.arg.dtltoCompilerArgs, createAddBufferFn(files, filenames),
!ctx.arg.saveTempsArgs.empty());
// Initialize usedStartStop.
if (ctx.bitcodeFiles.empty())
diff --git a/lld/test/ELF/dtlto/timetrace.test b/lld/test/ELF/dtlto/timetrace.test
index 4567b0a1d4b02..f43f51284b5c4 100644
--- a/lld/test/ELF/dtlto/timetrace.test
+++ b/lld/test/ELF/dtlto/timetrace.test
@@ -32,12 +32,12 @@ RUN: %python filter_order_and_pprint.py %t.json | FileCheck %s
## Check that DTLTO add input file events are recorded.
CHECK: "name": "Add input for DTLTO"
CHECK: "name": "Add input for DTLTO"
-CHECK: "name": "Remove temporary inputs for DTLTO"
+CHECK: "name": "Remove DTLTO temporary files"
CHECK: "name": "Serialize bitcode input for DTLTO"
CHECK-SAME: "detail": "t1.a(t1.bc at [[#ARCHIVE_OFFSET:]]).1.[[PID:[A-F0-9]+]].o"
CHECK: "name": "Total Add input for DTLTO"
CHECK-SAME: "count": 2,
-CHECK: "name": "Total Remove temporary inputs for DTLTO"
+CHECK: "name": "Total Remove DTLTO temporary files"
CHECK-SAME: "count": 1,
CHECK: "name": "Total Serialize bitcode input for DTLTO"
CHECK-SAME: "count": 1,
diff --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h
index 5a8566b71bc16..248b86674e6bd 100644
--- a/llvm/include/llvm/DTLTO/DTLTO.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -1,83 +1,402 @@
-//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//===- DTLTO.h - Distributed ThinLTO implementation -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Declarations for Distributed ThinLTO, including the DTLTO class and the
+// distribution driver. The implementation focuses on preparing input files for
+// distribution.
+//
+//===----------------------------------------------------------------------===//
#ifndef LLVM_DTLTO_DTLTO_H
#define LLVM_DTLTO_DTLTO_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Signals.h"
+
+#include <functional>
+#include <vector>
namespace llvm {
namespace lto {
-// The purpose of this class is to prepare inputs so that distributed ThinLTO
-// backend compilations can succeed.
-//
-// For distributed compilation, each input must exist as an individual bitcode
-// file on disk and be loadable via its ModuleID. This requirement is not met
-// for archive members, as an archive is a collection of files rather than a
-// standalone file. Similarly, for FatLTO objects, the bitcode is stored in a
-// section of the containing ELF object file. To address this, the class ensures
-// that an individual bitcode file exists for each input (by writing it out if
-// necessary) and that the ModuleID is updated to point to it. Module IDs are
-// also normalized on Windows to remove short 8.3 form paths that cannot be
-// loaded on remote machines.
-//
-// The class ensures that lto::InputFile objects are preserved until enough of
-// the LTO pipeline has executed to determine the required per-module
-// information, such as whether a module will participate in ThinLTO.
+class DTLTO;
+
+/// Prepares inputs for Distributed ThinLTO so that backend compilations can use
+/// individual bitcode paths and consistent module IDs.
+///
+/// Each input must exist as an individual bitcode file on disk and be loadable
+/// via its ModuleID. Archive members and FatLTO objects do not satisfy that by
+/// default; this class writes bitcode out when needed and updates ModuleID.
+/// On Windows, module IDs are normalized to remove short 8.3 path components
+/// that are machine-local and break distribution; other normalization is left
+/// to DTLTO distributors.
+///
+/// Input files are kept until the pipeline has determined per-module ThinLTO
+/// participation. addInput() performs: (1) register the input; (2) on Windows,
+/// normalize module ID for standalone bitcode; (3) for thin archive members,
+/// set module ID to the on-disk member path; (4) for other archives and FatLTO,
+/// set module ID to a unique path and serialize content in
+/// handleArchiveInputs().
class DTLTO : public LTO {
using Base = LTO;
public:
LLVM_ABI DTLTO(Config Conf, ThinBackend Backend,
unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode,
- StringRef LinkerOutputFile, bool SaveTemps)
+ IndexWriteCallback OnWrite, bool EmitIndexFiles,
+ bool EmitImportsFiles, StringRef LinkerOutputFile,
+ StringRef Distributor, ArrayRef<StringRef> DistributorArgs,
+ StringRef RemoteCompiler,
+ ArrayRef<StringRef> RemoteCompilerPrependArgs,
+ ArrayRef<StringRef> RemoteCompilerArgs,
+ AddBufferFn AddBufferArg, bool SaveTempsArg)
: Base(std::move(Conf), Backend, ParallelCodeGenParallelismLevel,
LTOMode),
- LinkerOutputFile(LinkerOutputFile), SaveTemps(SaveTemps) {
+ OnWriteCb(OnWrite), ShouldEmitIndexFiles(EmitIndexFiles),
+ ShouldEmitImportFiles(EmitImportsFiles),
+ DistributorParams{Distributor, DistributorArgs,
+ RemoteCompiler, RemoteCompilerPrependArgs,
+ RemoteCompilerArgs, LinkerOutputFile},
+ AddBuffer(AddBufferArg), SaveTemps(SaveTempsArg) {
assert(!LinkerOutputFile.empty() && "expected a valid linker output file");
}
- // Add an input file and prepare it for distribution.
+ /// Add an input file and prepare it for distribution.
+ ///
+ /// This function performs the following tasks:
+ /// 1. Add the input file to the LTO object's list of input files.
+ /// 2. For individual bitcode file inputs on Windows only, overwrite the
+ /// module
+ /// ID with a normalized path to remove short 8.3 form components.
+ /// 3. For thin archive members, overwrite the module ID with the path
+ /// (normalized on Windows) to the member file on disk.
+ /// 4. For archive members and FatLTO objects, overwrite the module ID with a
+ /// unique path (normalized on Windows) naming a file that will contain the
+ /// member content. The file is created and populated later (see
+ /// serializeInputs()).
LLVM_ABI Expected<std::shared_ptr<InputFile>>
addInput(std::unique_ptr<InputFile> InputPtr) override;
-protected:
- // Save the contents of ThinLTO-enabled input files that must be serialized
- // for distribution, such as archive members and FatLTO objects, to individual
- // bitcode files named after the module ID.
- LLVM_ABI llvm::Error serializeInputsForDistribution() override;
+ /// Runs the DTLTO pipeline. This function calls the supplied AddStream
+ /// function to add native object files to the link.
+ ///
+ /// The Cache parameter is optional. If supplied, it will be used to cache
+ /// native object files and add them to the link.
+ ///
+ /// The client will receive at most one callback (via either AddStream or
+ /// Cache) for each task identifier.
+ LLVM_ABI virtual Error run(AddStreamFn AddStream,
+ FileCache Cache = {}) override;
+
+private:
+ /// DTLTO archives support.
+ ///
+ /// Save the contents of ThinLTO-enabled input files that must be serialized
+ /// for distribution, such as archive members and FatLTO objects, to
+ /// individual bitcode files named after the module ID.
+ ///
+ /// Must be called after all input files are added but before optimization
+ /// begins. If a file with that name already exists, it is likely a leftover
+ /// from a previously terminated linker process and can be safely overwritten.
+ LLVM_ABI Error handleArchiveInputs();
+
+ // Remove temporary files created to enable distribution.
+ LLVM_ABI void cleanup();
+
+public:
+ // Mutable and const accessors to the LTO configuration object.
+ Config &getConfig() { return Conf; }
+ const Config &getConfig() const { return Conf; }
- LLVM_ABI void cleanup() override;
+ // Set the LTO kind.
+ void setLTOMode(LTOKind Knd) { LTOMode = Knd; }
+ // Replace the ThinLTO backend (e.g. WriteIndexesThinBackend for the thin
+ // link).
+ void setThinBackend(ThinBackend Backend) { ThinLTO.Backend = Backend; }
private:
- // Bump allocator for a purpose of saving updated module IDs.
+ // Bump allocator for saving updated module IDs.
BumpPtrAllocator PtrAlloc;
+ // String saver backed by PtrAlloc.
StringSaver Saver{PtrAlloc};
- /// The output file to which this LTO invocation will contribute.
- StringRef LinkerOutputFile;
+ using SString = SmallString<128>;
- /// The normalized output directory, derived from LinkerOutputFile.
- StringRef LinkerOutputDir;
+ // Function pointer that defines the callback to add a pre-existing file.
+ AddBufferFn AddBuffer;
+ // Count of jobs that hit the cache.
+ std::atomic<size_t> CachedJobs{0};
+ // Normalized output directory from LinkerOutputFile.
+ SString LinkerOutputDir;
+ // Keep temporary files when true.
+ bool SaveTemps = false;
- /// Controls preservation of any created temporary files.
- bool SaveTemps;
+public:
+ struct Job {
+ // Task index (combines RegularLTO parallel codegen offset with module
+ // index).
+ unsigned Task;
+ // Module identifier (bitcode path) for the ThinLTO module.
+ StringRef ModuleID;
+ // Native object path.
+ StringRef NativeObjectPath;
+ // Per-module summary index path.
+ StringRef SummaryIndexPath;
+ // Per-module imports list path.
+ StringRef ImportsPath;
+ // Bitcode files from which this module imports.
+ ArrayRef<std::string> ImportsFiles;
+ // Cache key from thin link.
+ std::string CacheKey;
+ // On cache miss, stream used to store the compiled object in the cache.
+ AddStreamFn CacheAddStream;
+ // Set when the object was already supplied via the cache callback.
+ bool Cached = false;
+ };
- // Array of input bitcode files for LTO.
- std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
+private:
+ // Backend compilation jobs, one per module.
+ SmallVector<Job> Jobs;
+ // Task index offset for first ThinLTO job.
+ unsigned ThinLTOTaskOffset;
+ // Optional cache for native objects.
+ FileCache Cache;
+ // Keep summary index files when true.
+ bool ShouldEmitIndexFiles = false;
+ // Keep summary import files when true.
+ bool ShouldEmitImportFiles = false;
+ // On index file write callback.
+ IndexWriteCallback OnWriteCb;
- // Cache of whether a path refers to a thin archive.
- StringMap<bool> ArchiveIsThinCache;
+ /// Probes the LTO cache for a compiled native object for the given job.
+ ///
+ /// If no cache is configured (Cache.isValid() is false), returns immediately
+ /// without modifying the job.
+ ///
+ /// Otherwise, looks up the cache using J.CacheKey. On a cache hit, the cached
+ /// object has already been passed to the linker via the Cache callback, so
+ /// J.Cached is set to true, CachedJobs is incremented, and the distributor
+ /// can skip this job. On a cache miss, the cache returns an AddStreamFn; we
+ /// store it in J.CacheAddStream for use when storing the freshly compiled
+ /// object after the distributor runs.
+ ///
+ /// \param J The job to check. Must have Task, CacheKey, and ModuleID set.
+ /// On return, J.Cached and J.CacheAddStream may be updated.
+ ///
+ /// \returns Error::success() on success, or an Error from the cache lookup.
+ Error checkCacheHit(Job &J);
+
+ /// Prepares a single DTLTO backend compilation job for a ThinLTO module.
+ ///
+ /// Called once per module during performCodegen(). This function:
+ ///
+ /// 1. Computes output paths for the native object and summary index files.
+ /// Both are placed in the linker output directory with names of the form
+ /// stem.Task.UID.native.o and stem.Task.UID.thinlto.bc, where stem is
+ /// derived from ModulePath.
+ ///
+ /// 2. Initializes the Job struct with Task, ModuleID (ModulePath), paths,
+ /// ImportsFiles and CacheKey from thin link results, and default values
+ /// for CacheAddStream and Cached.
+ ///
+ /// 3. Calls checkCacheHit() to probe the cache. On a cache hit, J.Cached is
+ /// set and the cached object has already been passed to the linker; the
+ /// distributor will skip this job. On a cache miss, J.CacheAddStream is
+ /// set for later use when storing the compiled object.
+ ///
+ /// 4. Writes the per-module summary index to disk only on cache miss. The
+ /// remote compiler will read this via -fthinlto-index=.
+ ///
+ /// 5. Registers the job's temporary files for removal on abnormal process
+ /// exit when SaveTemps is false (only for files that will be created).
+ ///
+ /// \param ModulePath The module identifier (bitcode path) for the ThinLTO
+ /// module.
+ /// \param Task The task index (combines RegularLTO.ParallelCodeGen
+ /// parallelism offset with the module index).
+ ///
+ /// \returns Error::success() on success, or an Error from saveBuffer() or
+ /// checkCacheHit().
+ Error prepareDtltoJob(StringRef ModulePath, unsigned Task);
+
+ /// Initializes DTLTO state and prepares a job for each ThinLTO module.
+ ///
+ /// Sets task offset, target triple, UID, and Jobs. For each module,...
[truncated]
|
|
@llvm/pr-subscribers-lld-elf Author: Konstantin Belochapka (kbelochapka) ChangesDTLTO implementation code has been moved from Patch is 82.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/192629.diff 13 Files Affected:
diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test
index 5dd67a50ab2c3..82c13eb80dca0 100644
--- a/cross-project-tests/dtlto/dtlto-cache.test
+++ b/cross-project-tests/dtlto/dtlto-cache.test
@@ -31,8 +31,8 @@ RUN: -Wl,--thinlto-remote-compiler=%clang \
RUN: -Wl,--thinlto-cache-dir=cache.dir \
RUN: -Wl,--save-temps
-# Check that there are no backend compilation jobs occurred.
-RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx "\s*1"
+# At this point we expect full cache hits. Distributor will not be invoked and a Json file will not be created.
+RUN: not ls cache.dir/*.dist-file.json
RUN: ls cache.dir | count 3
RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o
diff --git a/cross-project-tests/dtlto/savetemps-lock.test b/cross-project-tests/dtlto/savetemps-lock.test
index 822744b1dfc4f..17ada42a5ef14 100644
--- a/cross-project-tests/dtlto/savetemps-lock.test
+++ b/cross-project-tests/dtlto/savetemps-lock.test
@@ -30,12 +30,12 @@ RUN: | FileCheck %s --implicit-check-not=warning
CHECK-DAG: Lock any files in the output directory.
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.[[#PID:]].dist-file.json': {{.*}}
# Filename composition: <archive>(<member> at <offset>).<task>.<pid>.<task>.<pid>.native.o.
+CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET:]]).1.[[#%X,HEXPID:]].o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET:]]).1.[[#%X,HEXPID:]].1.[[#PID]].native.o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET]]).1.[[#%X,HEXPID]].1.[[#PID]].native.o.thinlto.bc': {{.*}}
+CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET:]]).2.[[#%X,HEXPID]].o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET:]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o': {{.*}}
CHECK-DAG: warning: could not remove the file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET]]).2.[[#%X,HEXPID]].2.[[#PID]].native.o.thinlto.bc': {{.*}}
-CHECK-DAG: warning: could not remove temporary DTLTO input file 'locked{{/|\\}}t.a(t1.o at [[#T1_OFFSET]]).1.[[#%X,HEXPID]].o': {{.*}}
-CHECK-DAG: warning: could not remove temporary DTLTO input file 'locked{{/|\\}}t.a(t2.o at [[#T2_OFFSET]]).2.[[#%X,HEXPID]].o': {{.*}}
#--- t1.c
__attribute__((retain)) int t1(int x) { return x; }
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index 445ea52e995da..53a8cd2eb3210 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -126,17 +126,7 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
// Initialize ltoObj.
lto::ThinBackend backend;
- if (!ctx.config.dtltoDistributor.empty()) {
- backend = lto::createOutOfProcessThinBackend(
- llvm::hardware_concurrency(ctx.config.thinLTOJobs),
- /*OnWrite=*/nullptr,
- /*ShouldEmitIndexFiles=*/false,
- /*ShouldEmitImportFiles=*/false, ctx.config.outputFile,
- ctx.config.dtltoDistributor, ctx.config.dtltoDistributorArgs,
- ctx.config.dtltoCompiler, ctx.config.dtltoCompilerPrependArgs,
- ctx.config.dtltoCompilerArgs, !ctx.config.saveTempsArgs.empty(),
- createAddBufferFn(files, file_names));
- } else if (ctx.config.thinLTOIndexOnly) {
+ if (ctx.config.thinLTOIndexOnly || !ctx.config.dtltoDistributor.empty()) {
auto OnIndexWrite = [&](StringRef S) { thinIndices.erase(S); };
backend = lto::createWriteIndexesThinBackend(
llvm::hardware_concurrency(ctx.config.thinLTOJobs),
@@ -155,7 +145,12 @@ BitcodeCompiler::BitcodeCompiler(COFFLinkerContext &c) : ctx(c) {
else
ltoObj = std::make_unique<lto::DTLTO>(
createConfig(), backend, ctx.config.ltoPartitions,
- llvm::lto::LTO::LTOKind::LTOK_Default, ctx.config.outputFile,
+ llvm::lto::LTO::LTOKind::LTOK_Default, nullptr,
+ ctx.config.thinLTOEmitImportsFiles, ctx.config.thinLTOIndexOnly,
+ ctx.config.outputFile, ctx.config.dtltoDistributor,
+ ctx.config.dtltoDistributorArgs, ctx.config.dtltoCompiler,
+ ctx.config.dtltoCompilerPrependArgs, ctx.config.dtltoCompilerArgs,
+ createAddBufferFn(files, file_names),
!ctx.config.saveTempsArgs.empty());
}
diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp
index 7b0fe2001439e..d87e83f8a1f38 100644
--- a/lld/ELF/LTO.cpp
+++ b/lld/ELF/LTO.cpp
@@ -184,21 +184,13 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
// Initialize ltoObj.
lto::ThinBackend backend;
auto onIndexWrite = [&](StringRef s) { thinIndices.erase(s); };
- if (ctx.arg.thinLTOIndexOnly) {
+ if (ctx.arg.thinLTOIndexOnly || !ctx.arg.dtltoDistributor.empty()) {
backend = lto::createWriteIndexesThinBackend(
llvm::hardware_concurrency(ctx.arg.thinLTOJobs),
std::string(ctx.arg.thinLTOPrefixReplaceOld),
std::string(ctx.arg.thinLTOPrefixReplaceNew),
std::string(ctx.arg.thinLTOPrefixReplaceNativeObject),
ctx.arg.thinLTOEmitImportsFiles, indexFile.get(), onIndexWrite);
- } else if (!ctx.arg.dtltoDistributor.empty()) {
- backend = lto::createOutOfProcessThinBackend(
- llvm::hardware_concurrency(ctx.arg.thinLTOJobs), onIndexWrite,
- ctx.arg.thinLTOEmitIndexFiles, ctx.arg.thinLTOEmitImportsFiles,
- ctx.arg.outputFile, ctx.arg.dtltoDistributor,
- ctx.arg.dtltoDistributorArgs, ctx.arg.dtltoCompiler,
- ctx.arg.dtltoCompilerPrependArgs, ctx.arg.dtltoCompilerArgs,
- !ctx.arg.saveTempsArgs.empty(), createAddBufferFn(files, filenames));
} else {
backend = lto::createInProcessThinBackend(
llvm::heavyweight_hardware_concurrency(ctx.arg.thinLTOJobs),
@@ -210,6 +202,7 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
llvm::lto::LTO::LTOKind::LTOK_UnifiedThin,
llvm::lto::LTO::LTOKind::LTOK_UnifiedRegular,
llvm::lto::LTO::LTOKind::LTOK_Default};
+
if (ctx.arg.dtltoDistributor.empty())
ltoObj = std::make_unique<lto::LTO>(createConfig(ctx), backend,
ctx.arg.ltoPartitions,
@@ -217,7 +210,11 @@ BitcodeCompiler::BitcodeCompiler(Ctx &ctx) : ctx(ctx) {
else
ltoObj = std::make_unique<lto::DTLTO>(
createConfig(ctx), backend, ctx.arg.ltoPartitions,
- ltoModes[ctx.arg.ltoKind], ctx.arg.outputFile,
+ ltoModes[ctx.arg.ltoKind], onIndexWrite, ctx.arg.thinLTOEmitIndexFiles,
+ ctx.arg.thinLTOEmitImportsFiles, ctx.arg.outputFile,
+ ctx.arg.dtltoDistributor, ctx.arg.dtltoDistributorArgs,
+ ctx.arg.dtltoCompiler, ctx.arg.dtltoCompilerPrependArgs,
+ ctx.arg.dtltoCompilerArgs, createAddBufferFn(files, filenames),
!ctx.arg.saveTempsArgs.empty());
// Initialize usedStartStop.
if (ctx.bitcodeFiles.empty())
diff --git a/lld/test/ELF/dtlto/timetrace.test b/lld/test/ELF/dtlto/timetrace.test
index 4567b0a1d4b02..f43f51284b5c4 100644
--- a/lld/test/ELF/dtlto/timetrace.test
+++ b/lld/test/ELF/dtlto/timetrace.test
@@ -32,12 +32,12 @@ RUN: %python filter_order_and_pprint.py %t.json | FileCheck %s
## Check that DTLTO add input file events are recorded.
CHECK: "name": "Add input for DTLTO"
CHECK: "name": "Add input for DTLTO"
-CHECK: "name": "Remove temporary inputs for DTLTO"
+CHECK: "name": "Remove DTLTO temporary files"
CHECK: "name": "Serialize bitcode input for DTLTO"
CHECK-SAME: "detail": "t1.a(t1.bc at [[#ARCHIVE_OFFSET:]]).1.[[PID:[A-F0-9]+]].o"
CHECK: "name": "Total Add input for DTLTO"
CHECK-SAME: "count": 2,
-CHECK: "name": "Total Remove temporary inputs for DTLTO"
+CHECK: "name": "Total Remove DTLTO temporary files"
CHECK-SAME: "count": 1,
CHECK: "name": "Total Serialize bitcode input for DTLTO"
CHECK-SAME: "count": 1,
diff --git a/llvm/include/llvm/DTLTO/DTLTO.h b/llvm/include/llvm/DTLTO/DTLTO.h
index 5a8566b71bc16..248b86674e6bd 100644
--- a/llvm/include/llvm/DTLTO/DTLTO.h
+++ b/llvm/include/llvm/DTLTO/DTLTO.h
@@ -1,83 +1,402 @@
-//===- DTLTO.h - Distributed ThinLTO functions and classes ----*- C++ -*-===//
+//===- DTLTO.h - Distributed ThinLTO implementation -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
-//===---------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// Declarations for Distributed ThinLTO, including the DTLTO class and the
+// distribution driver. The implementation focuses on preparing input files for
+// distribution.
+//
+//===----------------------------------------------------------------------===//
#ifndef LLVM_DTLTO_DTLTO_H
#define LLVM_DTLTO_DTLTO_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Signals.h"
+
+#include <functional>
+#include <vector>
namespace llvm {
namespace lto {
-// The purpose of this class is to prepare inputs so that distributed ThinLTO
-// backend compilations can succeed.
-//
-// For distributed compilation, each input must exist as an individual bitcode
-// file on disk and be loadable via its ModuleID. This requirement is not met
-// for archive members, as an archive is a collection of files rather than a
-// standalone file. Similarly, for FatLTO objects, the bitcode is stored in a
-// section of the containing ELF object file. To address this, the class ensures
-// that an individual bitcode file exists for each input (by writing it out if
-// necessary) and that the ModuleID is updated to point to it. Module IDs are
-// also normalized on Windows to remove short 8.3 form paths that cannot be
-// loaded on remote machines.
-//
-// The class ensures that lto::InputFile objects are preserved until enough of
-// the LTO pipeline has executed to determine the required per-module
-// information, such as whether a module will participate in ThinLTO.
+class DTLTO;
+
+/// Prepares inputs for Distributed ThinLTO so that backend compilations can use
+/// individual bitcode paths and consistent module IDs.
+///
+/// Each input must exist as an individual bitcode file on disk and be loadable
+/// via its ModuleID. Archive members and FatLTO objects do not satisfy that by
+/// default; this class writes bitcode out when needed and updates ModuleID.
+/// On Windows, module IDs are normalized to remove short 8.3 path components
+/// that are machine-local and break distribution; other normalization is left
+/// to DTLTO distributors.
+///
+/// Input files are kept until the pipeline has determined per-module ThinLTO
+/// participation. addInput() performs: (1) register the input; (2) on Windows,
+/// normalize module ID for standalone bitcode; (3) for thin archive members,
+/// set module ID to the on-disk member path; (4) for other archives and FatLTO,
+/// set module ID to a unique path and serialize content in
+/// handleArchiveInputs().
class DTLTO : public LTO {
using Base = LTO;
public:
LLVM_ABI DTLTO(Config Conf, ThinBackend Backend,
unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode,
- StringRef LinkerOutputFile, bool SaveTemps)
+ IndexWriteCallback OnWrite, bool EmitIndexFiles,
+ bool EmitImportsFiles, StringRef LinkerOutputFile,
+ StringRef Distributor, ArrayRef<StringRef> DistributorArgs,
+ StringRef RemoteCompiler,
+ ArrayRef<StringRef> RemoteCompilerPrependArgs,
+ ArrayRef<StringRef> RemoteCompilerArgs,
+ AddBufferFn AddBufferArg, bool SaveTempsArg)
: Base(std::move(Conf), Backend, ParallelCodeGenParallelismLevel,
LTOMode),
- LinkerOutputFile(LinkerOutputFile), SaveTemps(SaveTemps) {
+ OnWriteCb(OnWrite), ShouldEmitIndexFiles(EmitIndexFiles),
+ ShouldEmitImportFiles(EmitImportsFiles),
+ DistributorParams{Distributor, DistributorArgs,
+ RemoteCompiler, RemoteCompilerPrependArgs,
+ RemoteCompilerArgs, LinkerOutputFile},
+ AddBuffer(AddBufferArg), SaveTemps(SaveTempsArg) {
assert(!LinkerOutputFile.empty() && "expected a valid linker output file");
}
- // Add an input file and prepare it for distribution.
+ /// Add an input file and prepare it for distribution.
+ ///
+ /// This function performs the following tasks:
+ /// 1. Add the input file to the LTO object's list of input files.
+ /// 2. For individual bitcode file inputs on Windows only, overwrite the
+ /// module
+ /// ID with a normalized path to remove short 8.3 form components.
+ /// 3. For thin archive members, overwrite the module ID with the path
+ /// (normalized on Windows) to the member file on disk.
+ /// 4. For archive members and FatLTO objects, overwrite the module ID with a
+ /// unique path (normalized on Windows) naming a file that will contain the
+ /// member content. The file is created and populated later (see
+ /// serializeInputs()).
LLVM_ABI Expected<std::shared_ptr<InputFile>>
addInput(std::unique_ptr<InputFile> InputPtr) override;
-protected:
- // Save the contents of ThinLTO-enabled input files that must be serialized
- // for distribution, such as archive members and FatLTO objects, to individual
- // bitcode files named after the module ID.
- LLVM_ABI llvm::Error serializeInputsForDistribution() override;
+ /// Runs the DTLTO pipeline. This function calls the supplied AddStream
+ /// function to add native object files to the link.
+ ///
+ /// The Cache parameter is optional. If supplied, it will be used to cache
+ /// native object files and add them to the link.
+ ///
+ /// The client will receive at most one callback (via either AddStream or
+ /// Cache) for each task identifier.
+ LLVM_ABI virtual Error run(AddStreamFn AddStream,
+ FileCache Cache = {}) override;
+
+private:
+ /// DTLTO archives support.
+ ///
+ /// Save the contents of ThinLTO-enabled input files that must be serialized
+ /// for distribution, such as archive members and FatLTO objects, to
+ /// individual bitcode files named after the module ID.
+ ///
+ /// Must be called after all input files are added but before optimization
+ /// begins. If a file with that name already exists, it is likely a leftover
+ /// from a previously terminated linker process and can be safely overwritten.
+ LLVM_ABI Error handleArchiveInputs();
+
+ // Remove temporary files created to enable distribution.
+ LLVM_ABI void cleanup();
+
+public:
+ // Mutable and const accessors to the LTO configuration object.
+ Config &getConfig() { return Conf; }
+ const Config &getConfig() const { return Conf; }
- LLVM_ABI void cleanup() override;
+ // Set the LTO kind.
+ void setLTOMode(LTOKind Knd) { LTOMode = Knd; }
+ // Replace the ThinLTO backend (e.g. WriteIndexesThinBackend for the thin
+ // link).
+ void setThinBackend(ThinBackend Backend) { ThinLTO.Backend = Backend; }
private:
- // Bump allocator for a purpose of saving updated module IDs.
+ // Bump allocator for saving updated module IDs.
BumpPtrAllocator PtrAlloc;
+ // String saver backed by PtrAlloc.
StringSaver Saver{PtrAlloc};
- /// The output file to which this LTO invocation will contribute.
- StringRef LinkerOutputFile;
+ using SString = SmallString<128>;
- /// The normalized output directory, derived from LinkerOutputFile.
- StringRef LinkerOutputDir;
+ // Function pointer that defines the callback to add a pre-existing file.
+ AddBufferFn AddBuffer;
+ // Count of jobs that hit the cache.
+ std::atomic<size_t> CachedJobs{0};
+ // Normalized output directory from LinkerOutputFile.
+ SString LinkerOutputDir;
+ // Keep temporary files when true.
+ bool SaveTemps = false;
- /// Controls preservation of any created temporary files.
- bool SaveTemps;
+public:
+ struct Job {
+ // Task index (combines RegularLTO parallel codegen offset with module
+ // index).
+ unsigned Task;
+ // Module identifier (bitcode path) for the ThinLTO module.
+ StringRef ModuleID;
+ // Native object path.
+ StringRef NativeObjectPath;
+ // Per-module summary index path.
+ StringRef SummaryIndexPath;
+ // Per-module imports list path.
+ StringRef ImportsPath;
+ // Bitcode files from which this module imports.
+ ArrayRef<std::string> ImportsFiles;
+ // Cache key from thin link.
+ std::string CacheKey;
+ // On cache miss, stream used to store the compiled object in the cache.
+ AddStreamFn CacheAddStream;
+ // Set when the object was already supplied via the cache callback.
+ bool Cached = false;
+ };
- // Array of input bitcode files for LTO.
- std::vector<std::shared_ptr<lto::InputFile>> InputFiles;
+private:
+ // Backend compilation jobs, one per module.
+ SmallVector<Job> Jobs;
+ // Task index offset for first ThinLTO job.
+ unsigned ThinLTOTaskOffset;
+ // Optional cache for native objects.
+ FileCache Cache;
+ // Keep summary index files when true.
+ bool ShouldEmitIndexFiles = false;
+ // Keep summary import files when true.
+ bool ShouldEmitImportFiles = false;
+ // On index file write callback.
+ IndexWriteCallback OnWriteCb;
- // Cache of whether a path refers to a thin archive.
- StringMap<bool> ArchiveIsThinCache;
+ /// Probes the LTO cache for a compiled native object for the given job.
+ ///
+ /// If no cache is configured (Cache.isValid() is false), returns immediately
+ /// without modifying the job.
+ ///
+ /// Otherwise, looks up the cache using J.CacheKey. On a cache hit, the cached
+ /// object has already been passed to the linker via the Cache callback, so
+ /// J.Cached is set to true, CachedJobs is incremented, and the distributor
+ /// can skip this job. On a cache miss, the cache returns an AddStreamFn; we
+ /// store it in J.CacheAddStream for use when storing the freshly compiled
+ /// object after the distributor runs.
+ ///
+ /// \param J The job to check. Must have Task, CacheKey, and ModuleID set.
+ /// On return, J.Cached and J.CacheAddStream may be updated.
+ ///
+ /// \returns Error::success() on success, or an Error from the cache lookup.
+ Error checkCacheHit(Job &J);
+
+ /// Prepares a single DTLTO backend compilation job for a ThinLTO module.
+ ///
+ /// Called once per module during performCodegen(). This function:
+ ///
+ /// 1. Computes output paths for the native object and summary index files.
+ /// Both are placed in the linker output directory with names of the form
+ /// stem.Task.UID.native.o and stem.Task.UID.thinlto.bc, where stem is
+ /// derived from ModulePath.
+ ///
+ /// 2. Initializes the Job struct with Task, ModuleID (ModulePath), paths,
+ /// ImportsFiles and CacheKey from thin link results, and default values
+ /// for CacheAddStream and Cached.
+ ///
+ /// 3. Calls checkCacheHit() to probe the cache. On a cache hit, J.Cached is
+ /// set and the cached object has already been passed to the linker; the
+ /// distributor will skip this job. On a cache miss, J.CacheAddStream is
+ /// set for later use when storing the compiled object.
+ ///
+ /// 4. Writes the per-module summary index to disk only on cache miss. The
+ /// remote compiler will read this via -fthinlto-index=.
+ ///
+ /// 5. Registers the job's temporary files for removal on abnormal process
+ /// exit when SaveTemps is false (only for files that will be created).
+ ///
+ /// \param ModulePath The module identifier (bitcode path) for the ThinLTO
+ /// module.
+ /// \param Task The task index (combines RegularLTO.ParallelCodeGen
+ /// parallelism offset with the module index).
+ ///
+ /// \returns Error::success() on success, or an Error from saveBuffer() or
+ /// checkCacheHit().
+ Error prepareDtltoJob(StringRef ModulePath, unsigned Task);
+
+ /// Initializes DTLTO state and prepares a job for each ThinLTO module.
+ ///
+ /// Sets task offset, target triple, UID, and Jobs. For each module,...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
c409f70 to
5275bba
Compare
🪟 Windows x64 Test Results
✅ The build succeeded and all tests passed. |
6716605 to
624a047
Compare
teresajohnson
left a comment
There was a problem hiding this comment.
Can you give a high level summary in the PR description of how this was refactored? There seem to be quite a lot of changes to how the backends and DLTO are set up, for example. And also, why there are effects to the tests.
@teresajohnson, At a high level, the idea behind the refactor can be summarized as a change in control flow: Before the refactor: After the refactor: Essentially, DTLTO code was consolidated from multiple places into one source , duplicates were eliminated, and the remainder was refactored into functional pieces. The impact on tests is very minor, no essential functionality had changed—for example, after the refactor DTLTO no longer creates the JSON manifest when every input bitcode file already has a cache entry. |
What is meant by "LTO front end" and "ThinLTO front end" in this context?
Although there is now reuse of the WriteIndexesThinBackend, which causes some other issues / duplication (e.g. the need now for callbacks in emitFiles). Could the goals of this refactor been accomplished while keeping the OutOfProcessThinBackend, and avoiding some of the added complexity other places such as in emitFiles()?
I don't really see much duplication being removed though - can you clarify? I guess there is some in the setup of the BackendThreadPool by reusing WriteIndexesThinBackend, but this seems like fairly minor duplication, and causes some headaches as mentioned above. Could some of the functional pieces have been refactored out independently of this larger refactor? I'm still trying to work through all of the changes in this very large PR. At the least it would be nice if parts that aren't required to be included with the major part of the refactoring could be extracted out and sent as preparation PRs.
I have a few minor comments so far, will send shortly. |
| // The class ensures that lto::InputFile objects are preserved until enough of | ||
| // the LTO pipeline has executed to determine the required per-module | ||
| // information, such as whether a module will participate in ThinLTO. | ||
| class DTLTO; |
There was a problem hiding this comment.
Thanks, removed forward declaration of DTLTO class.
| } | ||
|
|
||
| Error LTO::run(AddStreamFn AddStream, FileCache Cache) { | ||
| llvm::scope_exit CleanUp([this]() { cleanup(); }); |
There was a problem hiding this comment.
Why was this removed? As of PR181269 LTO::cleanup() was being used to finalize optimization remarks, and some additional remarks handling using cleanup() was added in PR182570. Did it somehow move somewhere else? If removed I'm not sure why the tests would not fail.
There was a problem hiding this comment.
Restored, thanks, it was a merge artifact. I am not familiar with PR182570, but it looks the test [llvm/test/ThinLTO/X86/memprof-basic.ll] does not have check for this condition.
| }; | ||
|
|
||
| TimeTraceScope JobScope("Remove DTLTO temporary files"); | ||
| for (const auto &Name : CleanupList) { |
There was a problem hiding this comment.
Nit: no braces for single line loop bodies
There was a problem hiding this comment.
Removes unnecessary braces.
| // Runs the DTLTO thin link phase, producing per-module summary indices, | ||
| // import lists, and cache keys for distribution. | ||
| Error lto::DTLTO::performThinLink() { | ||
| auto ThinIndexBackend = lto::createWriteIndexesThinBackend( |
There was a problem hiding this comment.
createWriteIndexesThinBackend is also called in lld when there is a dtltoDistributor. Why is it done again here and what happened to the one created by the linker?
There was a problem hiding this comment.
Thanks, the instance of WriteIndexesThinBackend created in the linker is not needed for DTLTO and code in the linker will be corrected.
|
|
||
| /// Called by WriteIndexesThinBackend when it needs to write a bitcode | ||
| /// module's summary index. The callback should return a stream to write | ||
| /// the index into. If the callback returns nullptr, the backend falls back |
There was a problem hiding this comment.
Do you mean "If not set, the backend ..." ?
Sorry for the terminological confusion.
|
The OutOfProcessThinBackend duplicates almost all the orchestration logic of WriteIndexesThinBackend (handling paths, iterating over modules, gathering summaries, etc.), so keeping them separate was a
Temporary files cleanup code - before the refactor - was existed in two places - now in one place. Another performance improvement idea [https://github.com//pull/191142] also can be implemented much more simpler on the refactored code. |
|
@teresajohnson , |
Sorry, I went out of town and I have been remiss in getting back to this. Is it ready for re-review? I can't tell from the above whether you planned to split the PR into 2 separate PRs or not. |
|
@teresajohnson ,
|
9d8b48c to
c6a3d68
Compare
I meant break up in terms of different PRs, but I'll review it as is if that isn't possible. However, the PR seems to be in an odd state? I don't see these files and a ton of reviewers got added, and I also see some fixes that seem to have been undone (e.g. the change to CleanUp in LTO::run is back). Also, if you can avoid force pushing that would make reviewing significantly easier - with force pushing I can't compare to prior versions of the PR in github, so I have to restart the review from scratch after every force push which for a PR this large makes the review take even longer. |
@teresajohnson ,
|
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // \file | ||
| // Declarations for Distributed ThinLTO, including the DTLTO class and the |
There was a problem hiding this comment.
While here, can you change this in this and other files and comments to spell out as "Integrated Distributed ThinLTO" when not using "DTLTO" to distinguish from the original distributed ThinLTO? Otherwise I think it gets confusing.
There was a problem hiding this comment.
Meanwhile I'll try to come up with a term for the original distributed ThinLTO and start migrating references to use that...suggestions welcome.
There was a problem hiding this comment.
-
Replaced
Distributed ThinLTOforIntegrated Distributed ThinLTOin the comments. -
Perhaps we should consider
DLTOagain? (we have a plan to distribute regular LTO partitions)
There was a problem hiding this comment.
Perhaps we should consider DLTO again? (we have a plan to distribute regular LTO partitions)
Interesting. Let's keep it as DTLTO for now. Please send an RFC tho when you are ready about this work tho.
| /// This function performs the following tasks: | ||
| /// 1. Add the input file to the LTO object's list of input files. | ||
| /// 2. For individual bitcode file inputs on Windows only, overwrite the | ||
| /// module |
There was a problem hiding this comment.
nit: formatting issue
| /// Must be called after all input files are added but before optimization | ||
| /// begins. If a file with that name already exists, it is likely a leftover | ||
| /// from a previously terminated linker process and can be safely overwritten. | ||
| LLVM_ABI Error handleArchiveInputs(); |
There was a problem hiding this comment.
maybe something that reflects what it is doing and that it isn't just archives, like extractBitcode or serializeEmbeddedBitcode or something like that?
There was a problem hiding this comment.
Renamed to serializeBitcodeArchiveMembers()
There was a problem hiding this comment.
Is FatLTO considered an archive tho? The name is still maybe too narrow. Maybe just remove "Archive"?
|
|
||
| if (Error EC = serializeInputsForDistribution()) | ||
| return EC; | ||
| llvm::scope_exit CleanUp([this]() { LTO::cleanup(); }); |
There was a problem hiding this comment.
Why do we need to call LTO::cleanup() rather than just cleanup() now? Oh I see, DTLTO::run() invokes performThinLink which invokes Base::run, and you want the DTLTO::cleanup to happen later, on exit from DTLTO::run presumably. Can you add a comment here that any derived LTO (specifically DLTO) is expected to invoke its own cleanup itself? And that we don't want to inadvertently do the derived class cleanup too early if it invokes LTO::run?
Thankfully, it looks like LTO::cleanup, which will end up getting called twice, can handle that. But perhaps DTLTO::cleanup should not invoke Base::cleanup?
There was a problem hiding this comment.
Added the comment, removed Base::cleanup() call.
| raw_fd_ostream OS(SummaryPath, EC, sys::fs::OpenFlags::OF_None); | ||
| if (EC) | ||
| return createFileError("cannot open " + Twine(SummaryPath), EC); | ||
| if (!Conf.OnSummaryIndexStoreCb) { |
There was a problem hiding this comment.
I think the handling below in this if/else could be restructured for clarity. Here's a modified version of what gemini came up with:
std::unique_ptr<raw_fd_ostream> FileOS;
raw_ostream *OS = nullptr;
// Resolve the output stream (either file-backed or callback-provided) for the index file.
if (Conf.OnSummaryIndexStoreCb) {
OS = Conf.OnSummaryIndexStoreCb(Task).get();
} else {
FileOS = std::make_unique<raw_fd_ostream>(SummaryPath, EC, sys::fs::OpenFlags::OF_None);
if (EC)
return createFileError("cannot open " + Twine(SummaryPath), EC);
OS = FileOS.get();
}
writeIndexToFile(CombinedIndex, *OS, &ModuleToSummariesForIndex, &DeclarationSummaries);
// Emit imports files if requested, using callback if provided.
if (Conf.OnImportsListStoreCb) {
std::vector<std::string> &ImportsListRef = Conf.OnImportsListStoreCb(Task);
processImportsFiles(ModulePath, ModuleToSummariesForIndex,
[&](StringRef M) { ImportsListRef.push_back(M.str()); });
} else if (ShouldEmitImportsFiles) {
if (Error E = EmitImportsFiles(ModulePath, NewModulePath + ".imports", ModuleToSummariesForIndex))
return E;
}
Also, I think the callbacks could be renamed for clarity. Suggestion on that in the header.
There was a problem hiding this comment.
Replaced the code with some modifications - the Gemini generated code has a problem with dangling pointer.
| if (Error Err = save(SummaryIndexFiles[Task], J.SummaryIndexPath)) | ||
| return Err; | ||
| } | ||
| if (OnWriteCb) |
There was a problem hiding this comment.
Maybe OnIndexWriteCb?
There was a problem hiding this comment.
Renamed into OnIndexWriteCb
| Cfg.OnCacheKeyStoreCb = [&](size_t task) -> std::string & { | ||
| return CacheKeysList[task]; | ||
| }; | ||
| Cfg.OnImportsListStoreCb = [&](size_t task) -> std::vector<std::string> & { |
There was a problem hiding this comment.
What does it mean if ShouldEmitImportFiles is false?
There was a problem hiding this comment.
Added the if statement that prevents the callback initialization if ShouldEmitImportFiles is false.
| BM.setModuleIdentifier(Saver.save(Id.str())); | ||
| return Input; | ||
| if (ShouldEmitImportFiles) | ||
| if (Error Err = save(join(ImportsFilesLists[Task], "\n"), J.ImportsPath)) |
There was a problem hiding this comment.
Maybe use J.ImportsFiles instead of ImportsFilesLists[Task], for consistency with surrounding code.
There was a problem hiding this comment.
Renamed ImportsFilesLists into ImportsFiles
There was a problem hiding this comment.
I meant use J.ImportsFiles instead of ImportsFilesLists[Task], since the surrounding code here references the fields out of J which is already set up (no problem with the rename tho, but accessing it out of J might be clearer here.)
| MemoryBufferRef MB = Input->getFileBuffer(); | ||
| return save(MB.getBuffer(), Path); | ||
| TimeTraceScope JobScope("Remove DTLTO temporary files"); | ||
| for (const auto &Name : CleanupList) |
There was a problem hiding this comment.
Suggest clearing CleanupList after this loop for safety. It doesn't look like this should be called more than once but probably best to leave in a clean state.
There was a problem hiding this comment.
Added CleanupList.clear() after this loop.
| const GVSummaryMapTy &DefinedGlobals = | ||
| ModuleToDefinedGVSummaries.find(ModulePath)->second; | ||
|
|
||
| if (Conf.OnCacheKeyStoreCb) { |
There was a problem hiding this comment.
Can you add a comment about why the cache key is being computed here? And similar to the other callbacks as noted in the header I think this one would be clearer if renamed as to what it is actually doing.
There was a problem hiding this comment.
Added the comment. Renamed the callback to GetCacheKeyOutputString()
teresajohnson
left a comment
There was a problem hiding this comment.
lgtm with some more minor cleanup requests below.
| void setLTOMode(LTOKind Knd) { LTOMode = Knd; } | ||
| // Replace the ThinLTO backend (e.g. WriteIndexesThinBackend for the thin | ||
| // link). | ||
| void setThinBackend(ThinBackend Backend) { ThinLTO.Backend = Backend; } |
There was a problem hiding this comment.
Can this be removed now? No longer called.
There was a problem hiding this comment.
Removed setThinBackend()
Removed setLTOMode(), LTOMode variable initialization moved to DTLTO class constructor.
Replaced ImportsFilesLists[Task] with J.ImportsFiles
Renamed handleArchiveInputs() into serializeLTOInputs()
| auto ThinIndexBackend = lto::createWriteIndexesThinBackend( | ||
| hardware_concurrency(), "", "", "", true, nullptr, nullptr); | ||
| setThinBackend(ThinIndexBackend); | ||
| setLTOMode(lto::LTO::LTOKind::LTOK_UnifiedThin); |
There was a problem hiding this comment.
This one was about setLTOMode - why do we need to override it here? Can it just be set correctly in the constructor, and then remove setLTOMode()?
| BM.setModuleIdentifier(Saver.save(Id.str())); | ||
| return Input; | ||
| if (ShouldEmitImportFiles) | ||
| if (Error Err = save(join(ImportsFilesLists[Task], "\n"), J.ImportsPath)) |
There was a problem hiding this comment.
I meant use J.ImportsFiles instead of ImportsFilesLists[Task], since the surrounding code here references the fields out of J which is already set up (no problem with the rename tho, but accessing it out of J might be clearer here.)
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // \file | ||
| // Declarations for Distributed ThinLTO, including the DTLTO class and the |
There was a problem hiding this comment.
Perhaps we should consider DLTO again? (we have a plan to distribute regular LTO partitions)
Interesting. Let's keep it as DTLTO for now. Please send an RFC tho when you are ready about this work tho.
| /// Must be called after all input files are added but before optimization | ||
| /// begins. If a file with that name already exists, it is likely a leftover | ||
| /// from a previously terminated linker process and can be safely overwritten. | ||
| LLVM_ABI Error handleArchiveInputs(); |
There was a problem hiding this comment.
Is FatLTO considered an archive tho? The name is still maybe too narrow. Maybe just remove "Archive"?
DTLTO implementation code has been moved from `llvm/lib/LTO/` to `llvm/lib/DTLTO/`. This refactor does not change any externally visible behavior, so existing DTLTO tests and documentation remain valid. The move was done to reduce code duplication, improve maintainability, and make it easier to adopt future performance improvements.
…es.cpp, DTLTO.cpp and DTLTODistributionDriver.cpp.
…TLTOInputFiles.cpp.
71b1f36 to
ab18489
Compare
DTLTO implementation code has been moved from
llvm/lib/LTO/tollvm/lib/DTLTO/. This refactor does not change any externally visible behavior, so existing DTLTO tests and documentation remain valid. The move was done to reduce code duplication, improve maintainability, and make it easier to adopt future performance improvements.