Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions src/lake/Lake/Build/Common.lean
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,9 @@ open ResolveOutputs in
/--
Retrieve artifacts from the Lake cache using only the outputs stored in the saved trace file.

If the cache is writable, saves the input-to-output mapping derived from the trace to the cache,
unless a mapping for the `inputHash` already exists.

**For internal use only.**
-/
@[specialize] public def getArtifactsUsingTrace?
Expand All @@ -549,7 +552,7 @@ Retrieve artifacts from the Lake cache using only the outputs stored in the save
try
let arts ← resolveOutputs (.ofData out)
if (← pkg.isArtifactCacheWritable) then
let act := (← getLakeCache).writeOutputs pkg.cacheScope inputHash out
let act := (← getLakeCache).writeOutputs pkg.cacheScope inputHash out (overwrite := false)
if let .error e ← act.toBaseIO then
logWarning s!"could not write outputs to cache: {e}"
return some arts
Expand All @@ -560,17 +563,17 @@ Retrieve artifacts from the Lake cache using only the outputs stored in the save
open ResolveOutputs in
/--
Retrieve artifacts from the Lake cache using the outputs stored in either
the saved trace file or (unless `traceOnly` is `true`) in the cached input-to-content mapping.
the saved trace file or in the cached input-to-content mapping.

**For internal use only.**
-/
@[inline] public nonrec def getArtifacts?
[ResolveOutputs α] (inputHash : Hash) (savedTrace : SavedTrace) (pkg : Package)
: JobM (Option α) := do
if let some a ← getArtifactsUsingCache? inputHash pkg then
return some a
if let some a ← getArtifactsUsingTrace? inputHash savedTrace pkg then
return some a
if let some a ← getArtifactsUsingCache? inputHash pkg then
return some a
return none

/-- **For internal use only.** -/
Expand Down
7 changes: 6 additions & 1 deletion src/lake/Lake/Build/Module.lean
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,7 @@ def Module.recBuildLtar (self : Module) : FetchM (Job FilePath) := do
modify ({· with wantsRebuild := true})
error "archive does not exist and needs to be built"
else
updateAction .build
self.packLtar arts
newTrace s!"{self.name.toString}:ltar"
addTrace art.trace
Expand Down Expand Up @@ -948,7 +949,11 @@ where
-- end up in the build directory and, if writable, the cache
let arts ← mod.restoreAllArtifacts {arts with ltar? := some ltar}
if (← mod.pkg.isArtifactCacheWritable) then
.inl <$> mod.cacheOutputArtifacts setup.isModule restoreAll
let arts ← mod.cacheOutputArtifacts setup.isModule restoreAll
-- Note: Cache service metadata is not preserved on an output update because it would
-- result in downloading module outputs that are not available on the remote.
(← getLakeCache).writeOutputs mod.pkg.cacheScope inputHash arts.descrs (overwrite := true)
return .inl arts
else
return .inl arts
else
Expand Down
31 changes: 19 additions & 12 deletions src/lake/Lake/CLI/Help.lean
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ artifacts. If no mappings are found, Lake will backtrack the Git history up to
will search the repository's entire history (or as far as Git will allow).

By default, Lake will download both the input-to-output mappings and the
output artifacts for a package. By using `--mappings-onlys`, Lake will only
output artifacts for a package. By using `--mappings-only`, Lake will only
download the mappings and delay downloading artifacts until they are needed.

If a download for an artifact fails or the download process for a whole
Expand Down Expand Up @@ -505,12 +505,16 @@ OPTIONS:
--service=<name> cache service to fetch from on demand
--scope=<remote-scope> the prefix of artifacts within the service
--repo=<github-repo> for Reservoir, a GitHub repository scope
--no-overwrite do not overwrite existing mappings

Reads a list of input-to-output mappings from the provided file and adds
them to the local Lake cache. If `--service` is provided, the output artifacts
can then be fetched lazily from that service during a Lake build. The service
must either be `reservoir` or be configured through the Lake system
configuration (see the help page of `lake cache services` for details).
them to the local Lake cache. Mappings already in the cache are overwritten
unless `--no-overwrite` is specified.

If `--service` is provided, the output artifacts can then be fetched lazily
from that service during a Lake build. The service must either be `reservoir`
or be configured through the Lake system configuration (see the help page of
`lake cache services` for details).

Since Lake does not currently use cryptographically secure hashes for
artifacts and outputs, artifacts in a cache service are prefixed with a scope
Expand All @@ -522,25 +526,28 @@ def helpCacheStage :=
"Copy build outputs from the cache to a staging directory

USAGE:
lake cache stage <mappings> <staging-directory>
lake cache stage <mappings> <staging-directory> [--force-overwrite]

Creates the staging directory and copies the mappings file to it. Then, it
copies all artifacts described within the mappings file from the cache to the
staging directory. Errors if any of the artifacts described cannot be found in
the cache."
Creates the staging directory and copies the mappings file to it. Then,
it copies all artifacts described within the mappings file from the cache to
the staging directory. Artifacts in the staging directory are not overwritten
unless `--force-overwrite` is specified. Errors if any of the artifacts
described cannot be found in the cache."

def helpCacheUnstage :=
"Cache build outputs from a staging directory

USAGE:
lake cache unstage <staging-directory>
lake cache unstage <staging-directory> [--force-overwrite]

Copies the mappings and artifacts stored in staging directory (e.g., via
`lake cache stage`) back into the cache.

Reads the mappings file located at `outputs.jsonl` within the staging
directory and writes the mappings to the Lake cache. Then, it copies the
described artifacts from the staging directory into the cache."
described artifacts from the staging directory into the cache. Mappings and
artifacts already in the cache are not overwritten unless `--force-overwrite`
is specified."

def helpCachePutStaged :=
"Upload build outputs from a staging directory to a remote service
Expand Down
28 changes: 23 additions & 5 deletions src/lake/Lake/CLI/Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public structure LakeOptions where
outFormat : OutFormat := .text
offline : Bool := false
outputsFile? : Option FilePath := none
overwrite? : Option Bool := none
forceDownload : Bool := false
mappingsOnly : Bool := false
service? : Option String := none
Expand Down Expand Up @@ -285,6 +286,8 @@ def lakeLongOption : (opt : String) → CliM PUnit
| "--offline" => modifyThe LakeOptions ({· with offline := true})
| "--wfail" => modifyThe LakeOptions ({· with failLv := .warning})
| "--iofail" => modifyThe LakeOptions ({· with failLv := .info})
| "--no-overwrite" => modifyThe LakeOptions ({· with overwrite? := some false})
| "--force-overwrite" => modifyThe LakeOptions ({· with overwrite? := some true})
| "--force-download" => modifyThe LakeOptions ({· with forceDownload := true})
| "--download-arts" => modifyThe LakeOptions ({· with mappingsOnly := false})
| "--mappings-only" => modifyThe LakeOptions ({· with mappingsOnly := true})
Expand Down Expand Up @@ -465,6 +468,10 @@ protected def get : CliM PUnit := do
let cfg ← mkLoadConfig opts
let ws ← loadWorkspace cfg
let cache := ws.lakeCache
let overwrite := opts.overwrite?.getD true
unless overwrite do
-- artifacts of skipped mappings with `--no-overwrite` cannot be cleanly handled
error "`--no-overwrite` is not supported for `cache get`"
if let some file := mappings? then liftM (m := LoggerIO) do
if opts.mappingsOnly then
error "`--mappings-only` is not supported with a mappings file; use `lake cache add` instead"
Expand All @@ -485,7 +492,7 @@ protected def get : CliM PUnit := do
else
return ws.defaultCacheService
let map ← CacheMap.load file
cache.writeMap ws.root.cacheScope map service.name? (some remoteScope)
cache.writeMap ws.root.cacheScope map service.name? (some remoteScope) overwrite
let descrs ← map.collectOutputDescrs
service.downloadArtifacts descrs cache remoteScope opts.forceDownload
else
Expand Down Expand Up @@ -530,7 +537,7 @@ protected def get : CliM PUnit := do
return map
else
findOutputs cache service pkg remoteScope opts platform toolchain
cache.writeMap pkg.cacheScope map service.name? (some remoteScope)
cache.writeMap pkg.cacheScope map service.name? (some remoteScope) overwrite
unless opts.mappingsOnly do
let descrs ← map.collectOutputDescrs
service.downloadArtifacts descrs cache remoteScope opts.forceDownload
Expand All @@ -544,7 +551,7 @@ protected def get : CliM PUnit := do
let toolchain := cacheToolchain pkg toolchain
try
let map ← findOutputs cache service pkg remoteScope opts platform toolchain
cache.writeMap pkg.cacheScope map service.name? (some remoteScope)
cache.writeMap pkg.cacheScope map service.name? (some remoteScope) overwrite
unless opts.mappingsOnly do
let descrs ← map.collectOutputDescrs
service.downloadArtifacts descrs cache remoteScope opts.forceDownload
Expand Down Expand Up @@ -677,7 +684,8 @@ protected def add : CliM PUnit := do
error (serviceNotFound service ws.lakeConfig.config.cache.services)
return some (.ofString service)
let map ← CacheMap.load file
ws.lakeCache.writeMap localScope map service? opts.scope?
let overwrite := opts.overwrite?.getD true
ws.lakeCache.writeMap localScope map service? opts.scope? overwrite

private def stagingOutputsFile := "outputs.jsonl"

Expand All @@ -699,9 +707,12 @@ protected def stage : CliM PUnit := do
let descrs ← map.collectOutputDescrs
IO.FS.createDirAll stagingDir
copyFile mappingsFile (stagingDir / stagingOutputsFile)
let overwrite := opts.overwrite?.getD false
let ok ← descrs.foldlM (init := true) fun ok descr => do
let cachePath := cache.artifactDir / descr.relPath
let stagingPath := stagingDir / descr.relPath
unless overwrite || !(← stagingPath.pathExists) do
return ok
match (← copyFile cachePath stagingPath |>.toBaseIO) with
| .ok _ =>
return ok
Expand Down Expand Up @@ -739,9 +750,16 @@ protected def unstage : CliM PUnit := do
let descrs ← map.collectOutputDescrs
let artDir := ws.lakeCache.artifactDir
IO.FS.createDirAll artDir
let overwrite := opts.overwrite?.getD false
let ok ← descrs.foldlM (init := true) fun ok descr => do
let cachePath := artDir/ descr.relPath
let stagingPath := stagingDir / descr.relPath
if (← cachePath.pathExists) then
if overwrite then
-- Cache artifacts are read-only, so the old artifact must be deleted first.
IO.FS.removeFile cachePath
else
return ok
match (← copyFile stagingPath cachePath |>.toBaseIO) with
| .ok _ =>
return ok
Expand All @@ -754,7 +772,7 @@ protected def unstage : CliM PUnit := do
unless ok do
logError "failed to copy all outputs to the staging directory"
exit 1
ws.lakeCache.writeMap localScope map service? opts.scope?
ws.lakeCache.writeMap localScope map service? opts.scope? overwrite

protected def putStaged : CliM PUnit := do
processOptions lakeOption
Expand Down
14 changes: 11 additions & 3 deletions src/lake/Lake/Config/Cache.lean
Original file line number Diff line number Diff line change
Expand Up @@ -434,22 +434,30 @@ public def getArtifact (cache : Cache) (descr : ArtifactDescr) : EIO String Arti
def writeOutputsCore
(cache : Cache) (scope : String) (inputHash : Hash) (out : Json)
(service? : Option CacheServiceName) (remoteScope? : Option CacheServiceScope)
(overwrite : Bool)
: IO Unit := do
let file := cache.outputsFile scope inputHash
createParentDirs file
let out := {service?, scope? := remoteScope?, data := out : CacheOutput}
IO.FS.writeFile file (toJson out).pretty
let contents := (toJson out).pretty
if overwrite then
IO.FS.writeFile file contents
else
writeFileIfNew file contents

/-- Cache the outputs corresponding to the given input for the package. -/
@[inline] public def writeOutputs
[ToJson α] (cache : Cache) (scope : String) (inputHash : Hash) (outputs : α)
: IO Unit := cache.writeOutputsCore scope inputHash (toJson outputs) none none
(service? : Option CacheServiceName := none) (remoteScope? : Option CacheServiceScope := none)
(overwrite := true)
: IO Unit := cache.writeOutputsCore scope inputHash (toJson outputs) service? remoteScope? overwrite

/-- Cache the input-to-outputs mappings from a `CacheMap`. -/
public def writeMap
(cache : Cache) (scope : String) (map : CacheMap)
(service? : Option CacheServiceName := none) (remoteScope? : Option CacheServiceScope := none)
: IO Unit := map.forM fun i e => cache.writeOutputsCore scope i e.out service? remoteScope?
(overwrite := true)
: IO Unit := map.forM fun i e => cache.writeOutputsCore scope i e.out service? remoteScope? overwrite

/-- Retrieve the cached outputs corresponding to the given input for the package (if any). -/
public def readOutputs? (cache : Cache) (scope : String) (inputHash : Hash) : LogIO (Option CacheOutput) := do
Expand Down
2 changes: 2 additions & 0 deletions tests/lake/tests/cache/online-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ test_err 'service `bogus` not found in system configuration'\
cache put bogus.jsonl --scope='bogus' --service='bogus'

# Test `cache get` command errors for bad configurations
test_err '`--no-overwrite` is not supported for `cache get`' \
cache get --no-overwrite
test_err 'the `--platform` and `--toolchain` options do nothing' \
cache get bogus.jsonl --scope='bogus' --platform='bogus' --wfail
test_err 'the `--platform` and `--toolchain` options do nothing' \
Expand Down
38 changes: 35 additions & 3 deletions tests/lake/tests/cache/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,41 @@ test_run cache unstage .lake/staging
# Verify that `cache unstage` fails if staging artifacts are missing
test_cmd mkdir -p .lake/staging-empty
test_cmd cp .lake/outputs.jsonl .lake/staging-empty/outputs.jsonl
test_err 'artifact not found in staging directory' cache unstage .lake/staging-empty
test_err 'artifact not found in staging directory' cache unstage .lake/staging-empty \
--force-overwrite # needed since the artifact is in the cache

# Test stage/unstage behavior regarding `--no-overwrite` / `--force-overwrite`
test_cmd rm -rf "$CACHE_DIR"
test_run build Test:static -o .lake/outputs.jsonl
cache_art=$(echo "$CACHE_DIR"/artifacts/*.a)
test_exp -s $cache_art
staging_art=$(echo .lake/staging/*.a)
test_exp -s $staging_art
# Verify stage overwritting
test_cmd rm -rf .lake/staging
test_run cache stage .lake/outputs.jsonl .lake/staging --no-overwrite
test_exp -f $staging_art # verify copy can occur with `--no-overwrite`
test_cmd rm $staging_art
test_cmd touch $staging_art
test_exp ! -s $staging_art
test_run cache stage .lake/outputs.jsonl .lake/staging
test_exp ! -s $staging_art
test_run cache stage .lake/outputs.jsonl .lake/staging --force-overwrite
test_exp -s $staging_art
# Verify unstage overwritting
test_cmd rm -rf "$CACHE_DIR"
test_run cache unstage .lake/staging --no-overwrite
test_exp -f $cache_art # verify copy can occur with `--no-overwrite`
test_cmd rm $staging_art
test_cmd touch $staging_art
test_exp ! -s $staging_art
test_run cache unstage .lake/staging
test_exp -s $cache_art
test_run cache unstage .lake/staging --force-overwrite
test_exp ! -s $cache_art

# Verify that `lake cache clean` deletes the cache directory
test_exp -d "$CACHE_DIR"
test_cmd cp -r "$CACHE_DIR" .lake/cache-backup
test_run cache clean
test_exp ! -d "$CACHE_DIR"

Expand Down Expand Up @@ -280,9 +310,11 @@ if command -v jq > /dev/null; then # skip if no jq found
test_cmd rm -f $libPath
inputHash=$(jq -r '.depHash' $libPath.trace)
echo $inputHash
test_cmd rm -f $libPath.trace
echo bogus > "$CACHE_DIR/outputs/test/$inputHash.json"
test_out 'invalid JSON' build Test:static
test_cmd rm -f $libPath
test_exp -f $libPath
test_cmd rm -f $libPath $libPath.trace
echo '"bogus"' > "$CACHE_DIR/outputs/test/$inputHash.json"
test_out 'some output(s) have issues' build Test:static
test_exp -f $libPath
Expand Down
35 changes: 33 additions & 2 deletions tests/lake/tests/ltar/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ source ../common.sh
export LAKE_CACHE_DIR=

#-------------------------------------------------------------------------------
# The tests covers the (offline) use of `leantar` in Lake
# This test suite covers the (offline) use of `leantar` in Lake
#-------------------------------------------------------------------------------

# Test regular build does not produce an `ltar`
Expand All @@ -19,7 +19,7 @@ test_err "archive does not exist and needs to be built" \
build +Test:ltar --no-build -v

# Test the build of the `ltar` facet
test_run build +Test:ltar -v
test_out "Built Test:ltar" build +Test:ltar -v
test_exp -f .lake/build/ir/Test.ltar

# Test that Lake unpacks the archive if the module's artifacts are missing
Expand Down Expand Up @@ -54,6 +54,37 @@ test_run -f restoreAll.toml build +Test -v --wfail -o .lake/outputs.jsonl
test_cmd ls .lake/cache/artifacts/*.ltar
test_exp -f .lake/build/ir/Test.ltar

# Test that the cache input-to-output mapping is overwritten without
# `--no-overwrite`. Since the build mapping includes module outputs and the
# tracked mapping only includes the `ltar`, the tracking mapping should
# require an unpack, whereas the build mapping should not.
rm -rf .lake/build
no_match_text ltar .lake/cache/outputs/test/*.json
test_run cache add .lake/outputs.jsonl --no-overwrite
no_match_text ltar .lake/cache/outputs/test/*.json
LAKE_ARTIFACT_CACHE=true test_not_out "leantar" build +Test --no-build -v
rm -rf .lake/build
no_match_text ltar .lake/cache/outputs/test/*.json
test_run cache add .lake/outputs.jsonl --service reservoir --repo 'foo/bar'
match_text ltar .lake/cache/outputs/test/*.json
match_text reservoir .lake/cache/outputs/test/*.json
LAKE_ARTIFACT_CACHE=true test_out "leantar" build +Test --no-build -v
# Unpack should have updated the cached mapping: added module outputs,
# removed service metadata, and left the ltar.
rm -rf .lake/build
match_text ltar .lake/cache/outputs/test/*.json
match_text olean .lake/cache/outputs/test/*.json
no_match_text reservoir .lake/cache/outputs/test/*.json
LAKE_ARTIFACT_CACHE=true test_not_out "leantar" build +Test --no-build -v

# Test that Lake prefers the local trace outputs over the cache
rm -f .lake/build/lib/lean/*.[!t]*
test_run cache add .lake/outputs.jsonl
match_text ltar .lake/cache/outputs/test/*.json
LAKE_ARTIFACT_CACHE=true test_not_out "leantar" build +Test --no-build -v
# Test that the cache output is not overwritten without an unpack
match_text ltar .lake/cache/outputs/test/*.json

# Test producing an `ltar` without already restored artifacts
rm -rf .lake/cache .lake/build
LAKE_ARTIFACT_CACHE=true test_run build +Test -v
Expand Down
Loading