|
| 1 | + |
| 2 | +# ============================================================================== |
| 3 | +# Shared CAS folder utilities (used by FolderBlobprovider & LinkBlobprovider) |
| 4 | +# ============================================================================== |
| 5 | + |
| 6 | +""" |
| 7 | + blobfilename(folder, m::Multihash) -> String |
| 8 | +
|
| 9 | +Generates the physical file path for a blob using a Git/IPFS style sharding |
| 10 | +strategy to prevent OS directory-size limits. |
| 11 | +""" |
| 12 | +function blobfilename(folder::String, m::Multihash) |
| 13 | + hex_str = bytes2hex(m.bytes) |
| 14 | + folder_shard1 = hex_str[1:6] |
| 15 | + folder_shard2 = hex_str[7:8] |
| 16 | + return expanduser(joinpath(folder, folder_shard1, folder_shard2, hex_str)) |
| 17 | +end |
| 18 | + |
| 19 | +function fetchBlob_folder(folder::String, m::Multihash) |
| 20 | + path = blobfilename(folder, m) |
| 21 | + isfile(path) || return nothing |
| 22 | + return read(path) |
| 23 | +end |
| 24 | + |
| 25 | +function purgeBlob_folder!(folder::String, m::Multihash) |
| 26 | + path = blobfilename(folder, m) |
| 27 | + if isfile(path) |
| 28 | + rm(path) |
| 29 | + parent = dirname(path) |
| 30 | + if isempty(readdir(parent)) |
| 31 | + rm(parent) |
| 32 | + end |
| 33 | + return 1 |
| 34 | + end |
| 35 | + return 0 |
| 36 | +end |
| 37 | + |
| 38 | +function listBlobs_folder(folder::String) |
| 39 | + root_folder = expanduser(folder) |
| 40 | + !isdir(root_folder) && return Multihash[] |
| 41 | + |
| 42 | + hashes = Multihash[] |
| 43 | + for (root, _, files) in walkdir(root_folder) |
| 44 | + for filename in files |
| 45 | + m = tryparse(Multihash, filename) |
| 46 | + if !isnothing(m) |
| 47 | + push!(hashes, m) |
| 48 | + end |
| 49 | + end |
| 50 | + end |
| 51 | + return hashes |
| 52 | +end |
| 53 | + |
| 54 | +# ============================================================================== |
| 55 | +# FolderBlobprovider |
| 56 | +# ============================================================================== |
| 57 | +struct FolderBlobprovider <: AbstractBlobprovider |
| 58 | + label::Symbol |
| 59 | + folder::String |
| 60 | +end |
| 61 | + |
| 62 | +function FolderBlobprovider( |
| 63 | + foldername::String; |
| 64 | + label::Symbol = :default, |
| 65 | + createfolder = true, |
| 66 | +) |
| 67 | + storepath = expanduser(joinpath(foldername)) |
| 68 | + if createfolder && !isdir(storepath) |
| 69 | + @info "Folder '$storepath' doesn't exist - creating." |
| 70 | + mkpath(storepath) |
| 71 | + end |
| 72 | + return FolderBlobprovider(label, foldername) |
| 73 | +end |
| 74 | + |
| 75 | +blobfilename(provider::FolderBlobprovider, m::Multihash) = blobfilename(provider.folder, m) |
| 76 | + |
| 77 | +function fetchBlob(provider::FolderBlobprovider, m::Multihash) |
| 78 | + return fetchBlob_folder(provider.folder, m) |
| 79 | +end |
| 80 | + |
| 81 | +function putBlob!(provider::FolderBlobprovider, m::Multihash, blob::Vector{UInt8}) |
| 82 | + filename = blobfilename(provider, m) |
| 83 | + if !isfile(filename) |
| 84 | + mkpath(dirname(filename)) |
| 85 | + open(filename, "w") do f |
| 86 | + return write(f, blob) |
| 87 | + end |
| 88 | + end |
| 89 | + return m |
| 90 | +end |
| 91 | + |
| 92 | +function purgeBlob!(provider::FolderBlobprovider, m::Multihash) |
| 93 | + return purgeBlob_folder!(provider.folder, m) |
| 94 | +end |
| 95 | + |
| 96 | +function hasBlob(provider::FolderBlobprovider, m::Multihash) |
| 97 | + return isfile(blobfilename(provider, m)) |
| 98 | +end |
| 99 | + |
| 100 | +function listBlobs(provider::FolderBlobprovider) |
| 101 | + return listBlobs_folder(provider.folder) |
| 102 | +end |
| 103 | + |
| 104 | +# ============================================================================== |
| 105 | +# MemoryBlobprovider |
| 106 | +# ============================================================================== |
| 107 | +struct MemoryBlobprovider <: AbstractBlobprovider |
| 108 | + label::Symbol |
| 109 | + blobs::Dict{Multihash, Vector{UInt8}} |
| 110 | +end |
| 111 | + |
| 112 | +function MemoryBlobprovider(; label::Symbol = :default) |
| 113 | + return MemoryBlobprovider(label, Dict{Multihash, Vector{UInt8}}()) |
| 114 | +end |
| 115 | + |
| 116 | +function fetchBlob(store::MemoryBlobprovider, m::Multihash) |
| 117 | + return get(store.blobs, m, nothing) |
| 118 | +end |
| 119 | + |
| 120 | +function putBlob!(store::MemoryBlobprovider, m::Multihash, blob::Vector{UInt8}) |
| 121 | + if !haskey(store.blobs, m) |
| 122 | + store.blobs[m] = copy(blob) |
| 123 | + end |
| 124 | + return m |
| 125 | +end |
| 126 | + |
| 127 | +function purgeBlob!(store::MemoryBlobprovider, m::Multihash) |
| 128 | + !haskey(store.blobs, m) && return 0 |
| 129 | + pop!(store.blobs, m) |
| 130 | + return 1 |
| 131 | +end |
| 132 | + |
| 133 | +hasBlob(store::MemoryBlobprovider, m::Multihash) = haskey(store.blobs, m) |
| 134 | + |
| 135 | +listBlobs(store::MemoryBlobprovider) = collect(keys(store.blobs)) |
| 136 | + |
| 137 | +# ============================================================================== |
| 138 | +# CachedBlobprovider — Write-through cache wrapping a local and remote provider |
| 139 | +# ============================================================================== |
| 140 | + |
| 141 | +""" |
| 142 | + CachedBlobprovider(local, remote; label = :default) |
| 143 | +
|
| 144 | +A write-through CAS cache that composes two `AbstractBlobprovider`s. |
| 145 | +
|
| 146 | +- **`putBlob!`**: Writes to both `remote` and `local` (remote first). |
| 147 | +- **`fetchBlob`**: Reads from `local` first; on miss, fetches from `remote` and caches locally. |
| 148 | +- **`purgeBlob!`**: Purges from both stores. |
| 149 | +- **`hasBlob`**: Checks local, falls back to remote. |
| 150 | +- **`listBlobs`**: Delegates to remote (the authoritative provider). |
| 151 | +""" |
| 152 | +struct CachedBlobprovider <: AbstractBlobprovider |
| 153 | + label::Symbol |
| 154 | + local_provider::AbstractBlobprovider |
| 155 | + remote_provider::AbstractBlobprovider |
| 156 | +end |
| 157 | + |
| 158 | +function CachedBlobprovider( |
| 159 | + local_provider::AbstractBlobprovider, |
| 160 | + remote_provider::AbstractBlobprovider; |
| 161 | + label::Symbol = :default, |
| 162 | +) |
| 163 | + return CachedBlobprovider(label, local_provider, remote_provider) |
| 164 | +end |
| 165 | + |
| 166 | +function fetchBlob(store::CachedBlobprovider, m::Multihash) |
| 167 | + blob = fetchBlob(store.local_provider, m) |
| 168 | + !isnothing(blob) && return blob |
| 169 | + blob = fetchBlob(store.remote_provider, m) |
| 170 | + if !isnothing(blob) |
| 171 | + putBlob!(store.local_provider, blob) |
| 172 | + end |
| 173 | + return blob |
| 174 | +end |
| 175 | + |
| 176 | +function putBlob!(store::CachedBlobprovider, m::Multihash, blob::Vector{UInt8}) |
| 177 | + putBlob!(store.remote_provider, m, blob) |
| 178 | + putBlob!(store.local_provider, m, blob) |
| 179 | + return m |
| 180 | +end |
| 181 | + |
| 182 | +function purgeBlob!(store::CachedBlobprovider, m::Multihash) |
| 183 | + rem_num = purgeBlob!(store.remote_provider, m) |
| 184 | + rem_num += purgeBlob!(store.local_provider, m) |
| 185 | + return rem_num |
| 186 | +end |
| 187 | + |
| 188 | +function hasBlob(store::CachedBlobprovider, m::Multihash) |
| 189 | + return hasBlob(store.local_provider, m) || hasBlob(store.remote_provider, m) |
| 190 | +end |
| 191 | + |
| 192 | +listBlobs(store::CachedBlobprovider) = listBlobs(store.remote_provider) |
| 193 | + |
| 194 | +# ============================================================================== |
| 195 | +# LinkBlobprovider — Hardlink existing files into the CAS folder layout |
| 196 | +# ============================================================================== |
| 197 | + |
| 198 | +""" |
| 199 | + LinkBlobprovider(folder; label = :default) |
| 200 | +
|
| 201 | +A CAS provider that hardlinks existing files into the standard sharded folder |
| 202 | +layout (same as `FolderBlobprovider`). This avoids copying large files while |
| 203 | +still making them addressable by Multihash. |
| 204 | +
|
| 205 | +The specialised `putBlob!(provider, filepath::String)` streams the file to |
| 206 | +compute its hash (no full load into RAM), then creates a hardlink at the CAS |
| 207 | +path. All other Layer 1 verbs (`fetchBlob`, `hasBlob`, `purgeBlob!`, |
| 208 | +`listBlobs`) behave identically to `FolderBlobprovider`. |
| 209 | +""" |
| 210 | +struct LinkBlobprovider <: AbstractBlobprovider |
| 211 | + label::Symbol |
| 212 | + folder::String |
| 213 | +end |
| 214 | + |
| 215 | +function LinkBlobprovider(foldername::String; label::Symbol = :default, createfolder = true) |
| 216 | + storepath = expanduser(foldername) |
| 217 | + if createfolder && !isdir(storepath) |
| 218 | + @info "Folder '$storepath' doesn't exist - creating." |
| 219 | + mkpath(storepath) |
| 220 | + end |
| 221 | + return LinkBlobprovider(label, foldername) |
| 222 | +end |
| 223 | + |
| 224 | +# Shared CAS folder layout — identical to FolderBlobprovider |
| 225 | +blobfilename(provider::LinkBlobprovider, m::Multihash) = blobfilename(provider.folder, m) |
| 226 | + |
| 227 | +fetchBlob(provider::LinkBlobprovider, m::Multihash) = fetchBlob_folder(provider.folder, m) |
| 228 | + |
| 229 | +hasBlob(provider::LinkBlobprovider, m::Multihash) = isfile(blobfilename(provider, m)) |
| 230 | + |
| 231 | +listBlobs(provider::LinkBlobprovider) = listBlobs_folder(provider.folder) |
| 232 | + |
| 233 | +function purgeBlob!(provider::LinkBlobprovider, m::Multihash) |
| 234 | + return purgeBlob_folder!(provider.folder, m) |
| 235 | +end |
| 236 | + |
| 237 | +# Streams the file to compute its Multihash, then hardlinks the file into the |
| 238 | +# CAS sharded folder. |
| 239 | +function putBlob!( |
| 240 | + provider::LinkBlobprovider, |
| 241 | + filepath::String; |
| 242 | + hash_func::Function = sha2_256, |
| 243 | +) |
| 244 | + m = open(filepath, "r") do io |
| 245 | + return Multihash(hash_func, io) |
| 246 | + end |
| 247 | + return putBlob!(provider, m, filepath) |
| 248 | +end |
| 249 | + |
| 250 | +function putBlob!(provider::LinkBlobprovider, m::Multihash, filepath::String) |
| 251 | + target_path = blobfilename(provider, m) |
| 252 | + if !isfile(target_path) |
| 253 | + mkpath(dirname(target_path)) |
| 254 | + hardlink(filepath, target_path) |
| 255 | + end |
| 256 | + return m |
| 257 | +end |
0 commit comments