Skip to content

Commit f69f683

Browse files
committed
Rebuild the Blob system as a CAS model
1 parent 78de65a commit f69f683

34 files changed

Lines changed: 1659 additions & 1117 deletions

NEWS.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@ Listing news on any major breaking changes in DFG. For regular changes, see int
1010
- `blobidFilter` -> `whereBlobid`
1111
- `variableLabelFilter` -> `whereVariableLabel`
1212

13+
- The Blob system was redesigned and refactored to a Content-Addressable Storage (CAS) model with Multihash keys, and renamed to Blobprovider to better reflect the abstraction. See #1157 for full discussion and design details.
14+
- Renamed Blobstore → Blobprovider
15+
- `AbstractBlobStore``AbstractBlobprovider`; concrete types renamed accordingly (`FolderBlobprovider`, `MemoryBlobprovider`, `CachedBlobprovider`).
16+
- All CRUD helpers renamed: `addBlobstore!``addBlobprovider!`, `getBlobstore``getBlobprovider`, `deleteBlobstore!``deleteBlobprovider!`, etc.
17+
- `putBlob!` now returns a `Multihash` and `fetchBlob` / `purgeBlob!` take a `Multihash` key.
18+
- Layer 1 verbs renamed: `getBlob(provider, hash)``fetchBlob` (returns `nothing` on miss), `deleteBlob!``purgeBlob!`.
19+
- `checkHash` decodes multihash to verify blob integrity.
20+
- `saveBlob_Variable!``saveVariableBlob!`, `loadBlob_Variable``loadVariableBlob`
21+
- Same pattern for Factor, Graph, Agent variants.
22+
- Old names kept as `const` aliases for backward compatibility.
23+
- `deleteBlob_Variable` and similar wrappers no longer exist. Use `deleteVariableBlobentry!` (metadata) and `purgeBlob!(provider, multihash)` (physical) directly. Warning: be carefull when deleting blobs to make sure they are no longer in use anywhere because the multihash id can be shared by multiple blobs.
24+
1325
# v0.28
1426
- Reading or deserialzing of factor graphs created prior to v0.25 are no longer suppoted with the complete removal of User/Robot/Session
1527
- Deprecated AbstractRelativeMinimize and AbstractManifoldsMinimize

Project.toml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ version = "0.28.0"
44

55
[deps]
66
Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
7-
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
87
CodecZlib = "944b1d66-785c-5afd-91f1-9de20f533193"
98
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
109
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
@@ -44,7 +43,6 @@ DFGPlots = "GraphMakie"
4443
Aqua = "0.8"
4544
Arrow = "2.7"
4645
Base64 = "1.11"
47-
CSV = "0.10"
4846
CodecZlib = "0.7"
4947
Dates = "1.11"
5048
Distributions = "0.23, 0.24, 0.25"

docs/src/blob_ref.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Modules = [DistributedFactorGraphs]
1313
1414
Pages = [
1515
"entities/Blobentry.jl",
16-
"entities/Blobstore.jl",
16+
"entities/Blobprovider.jl",
1717
]
1818
1919
```
@@ -24,10 +24,10 @@ Modules = [DistributedFactorGraphs]
2424
2525
Pages = [
2626
"services/blobentry_ops.jl",
27-
"services/blobstore_ops.jl",
27+
"services/blobprovider_ops.jl",
2828
"Serialization/BlobPacking.jl",
2929
"services/blob_save_load.jl",
30-
"Blobstores/Blobstores.jl",
30+
"Blobproviders/Blobproviders.jl",
3131
]
3232
3333
```

src/Blobproviders/Blobproviders.jl

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
2+
# ==============================================================================
3+
# Shared CAS folder utilities (used by FolderBlobprovider & LinkBlobprovider)
4+
# ==============================================================================
5+
6+
"""
7+
blobfilename(folder, m::Multihash) -> String
8+
9+
Generates the physical file path for a blob using a Git/IPFS style sharding
10+
strategy to prevent OS directory-size limits.
11+
"""
12+
function blobfilename(folder::String, m::Multihash)
13+
hex_str = bytes2hex(m.bytes)
14+
folder_shard1 = hex_str[1:6]
15+
folder_shard2 = hex_str[7:8]
16+
return expanduser(joinpath(folder, folder_shard1, folder_shard2, hex_str))
17+
end
18+
19+
function fetchBlob_folder(folder::String, m::Multihash)
20+
path = blobfilename(folder, m)
21+
isfile(path) || return nothing
22+
return read(path)
23+
end
24+
25+
function purgeBlob_folder!(folder::String, m::Multihash)
26+
path = blobfilename(folder, m)
27+
if isfile(path)
28+
rm(path)
29+
parent = dirname(path)
30+
if isempty(readdir(parent))
31+
rm(parent)
32+
end
33+
return 1
34+
end
35+
return 0
36+
end
37+
38+
function listBlobs_folder(folder::String)
39+
root_folder = expanduser(folder)
40+
!isdir(root_folder) && return Multihash[]
41+
42+
hashes = Multihash[]
43+
for (root, _, files) in walkdir(root_folder)
44+
for filename in files
45+
m = tryparse(Multihash, filename)
46+
if !isnothing(m)
47+
push!(hashes, m)
48+
end
49+
end
50+
end
51+
return hashes
52+
end
53+
54+
# ==============================================================================
55+
# FolderBlobprovider
56+
# ==============================================================================
57+
struct FolderBlobprovider <: AbstractBlobprovider
58+
label::Symbol
59+
folder::String
60+
end
61+
62+
function FolderBlobprovider(
63+
foldername::String;
64+
label::Symbol = :default,
65+
createfolder = true,
66+
)
67+
storepath = expanduser(joinpath(foldername))
68+
if createfolder && !isdir(storepath)
69+
@info "Folder '$storepath' doesn't exist - creating."
70+
mkpath(storepath)
71+
end
72+
return FolderBlobprovider(label, foldername)
73+
end
74+
75+
blobfilename(provider::FolderBlobprovider, m::Multihash) = blobfilename(provider.folder, m)
76+
77+
function fetchBlob(provider::FolderBlobprovider, m::Multihash)
78+
return fetchBlob_folder(provider.folder, m)
79+
end
80+
81+
function putBlob!(provider::FolderBlobprovider, m::Multihash, blob::Vector{UInt8})
82+
filename = blobfilename(provider, m)
83+
if !isfile(filename)
84+
mkpath(dirname(filename))
85+
open(filename, "w") do f
86+
return write(f, blob)
87+
end
88+
end
89+
return m
90+
end
91+
92+
function purgeBlob!(provider::FolderBlobprovider, m::Multihash)
93+
return purgeBlob_folder!(provider.folder, m)
94+
end
95+
96+
function hasBlob(provider::FolderBlobprovider, m::Multihash)
97+
return isfile(blobfilename(provider, m))
98+
end
99+
100+
function listBlobs(provider::FolderBlobprovider)
101+
return listBlobs_folder(provider.folder)
102+
end
103+
104+
# ==============================================================================
105+
# MemoryBlobprovider
106+
# ==============================================================================
107+
struct MemoryBlobprovider <: AbstractBlobprovider
108+
label::Symbol
109+
blobs::Dict{Multihash, Vector{UInt8}}
110+
end
111+
112+
function MemoryBlobprovider(; label::Symbol = :default)
113+
return MemoryBlobprovider(label, Dict{Multihash, Vector{UInt8}}())
114+
end
115+
116+
function fetchBlob(store::MemoryBlobprovider, m::Multihash)
117+
return get(store.blobs, m, nothing)
118+
end
119+
120+
function putBlob!(store::MemoryBlobprovider, m::Multihash, blob::Vector{UInt8})
121+
if !haskey(store.blobs, m)
122+
store.blobs[m] = copy(blob)
123+
end
124+
return m
125+
end
126+
127+
function purgeBlob!(store::MemoryBlobprovider, m::Multihash)
128+
!haskey(store.blobs, m) && return 0
129+
pop!(store.blobs, m)
130+
return 1
131+
end
132+
133+
hasBlob(store::MemoryBlobprovider, m::Multihash) = haskey(store.blobs, m)
134+
135+
listBlobs(store::MemoryBlobprovider) = collect(keys(store.blobs))
136+
137+
# ==============================================================================
138+
# CachedBlobprovider — Write-through cache wrapping a local and remote provider
139+
# ==============================================================================
140+
141+
"""
142+
CachedBlobprovider(local, remote; label = :default)
143+
144+
A write-through CAS cache that composes two `AbstractBlobprovider`s.
145+
146+
- **`putBlob!`**: Writes to both `remote` and `local` (remote first).
147+
- **`fetchBlob`**: Reads from `local` first; on miss, fetches from `remote` and caches locally.
148+
- **`purgeBlob!`**: Purges from both stores.
149+
- **`hasBlob`**: Checks local, falls back to remote.
150+
- **`listBlobs`**: Delegates to remote (the authoritative provider).
151+
"""
152+
struct CachedBlobprovider <: AbstractBlobprovider
153+
label::Symbol
154+
local_provider::AbstractBlobprovider
155+
remote_provider::AbstractBlobprovider
156+
end
157+
158+
function CachedBlobprovider(
159+
local_provider::AbstractBlobprovider,
160+
remote_provider::AbstractBlobprovider;
161+
label::Symbol = :default,
162+
)
163+
return CachedBlobprovider(label, local_provider, remote_provider)
164+
end
165+
166+
function fetchBlob(store::CachedBlobprovider, m::Multihash)
167+
blob = fetchBlob(store.local_provider, m)
168+
!isnothing(blob) && return blob
169+
blob = fetchBlob(store.remote_provider, m)
170+
if !isnothing(blob)
171+
putBlob!(store.local_provider, blob)
172+
end
173+
return blob
174+
end
175+
176+
function putBlob!(store::CachedBlobprovider, m::Multihash, blob::Vector{UInt8})
177+
putBlob!(store.remote_provider, m, blob)
178+
putBlob!(store.local_provider, m, blob)
179+
return m
180+
end
181+
182+
function purgeBlob!(store::CachedBlobprovider, m::Multihash)
183+
rem_num = purgeBlob!(store.remote_provider, m)
184+
rem_num += purgeBlob!(store.local_provider, m)
185+
return rem_num
186+
end
187+
188+
function hasBlob(store::CachedBlobprovider, m::Multihash)
189+
return hasBlob(store.local_provider, m) || hasBlob(store.remote_provider, m)
190+
end
191+
192+
listBlobs(store::CachedBlobprovider) = listBlobs(store.remote_provider)
193+
194+
# ==============================================================================
195+
# LinkBlobprovider — Hardlink existing files into the CAS folder layout
196+
# ==============================================================================
197+
198+
"""
199+
LinkBlobprovider(folder; label = :default)
200+
201+
A CAS provider that hardlinks existing files into the standard sharded folder
202+
layout (same as `FolderBlobprovider`). This avoids copying large files while
203+
still making them addressable by Multihash.
204+
205+
The specialised `putBlob!(provider, filepath::String)` streams the file to
206+
compute its hash (no full load into RAM), then creates a hardlink at the CAS
207+
path. All other Layer 1 verbs (`fetchBlob`, `hasBlob`, `purgeBlob!`,
208+
`listBlobs`) behave identically to `FolderBlobprovider`.
209+
"""
210+
struct LinkBlobprovider <: AbstractBlobprovider
211+
label::Symbol
212+
folder::String
213+
end
214+
215+
function LinkBlobprovider(foldername::String; label::Symbol = :default, createfolder = true)
216+
storepath = expanduser(foldername)
217+
if createfolder && !isdir(storepath)
218+
@info "Folder '$storepath' doesn't exist - creating."
219+
mkpath(storepath)
220+
end
221+
return LinkBlobprovider(label, foldername)
222+
end
223+
224+
# Shared CAS folder layout — identical to FolderBlobprovider
225+
blobfilename(provider::LinkBlobprovider, m::Multihash) = blobfilename(provider.folder, m)
226+
227+
fetchBlob(provider::LinkBlobprovider, m::Multihash) = fetchBlob_folder(provider.folder, m)
228+
229+
hasBlob(provider::LinkBlobprovider, m::Multihash) = isfile(blobfilename(provider, m))
230+
231+
listBlobs(provider::LinkBlobprovider) = listBlobs_folder(provider.folder)
232+
233+
function purgeBlob!(provider::LinkBlobprovider, m::Multihash)
234+
return purgeBlob_folder!(provider.folder, m)
235+
end
236+
237+
# Streams the file to compute its Multihash, then hardlinks the file into the
238+
# CAS sharded folder.
239+
function putBlob!(
240+
provider::LinkBlobprovider,
241+
filepath::String;
242+
hash_func::Function = sha2_256,
243+
)
244+
m = open(filepath, "r") do io
245+
return Multihash(hash_func, io)
246+
end
247+
return putBlob!(provider, m, filepath)
248+
end
249+
250+
function putBlob!(provider::LinkBlobprovider, m::Multihash, filepath::String)
251+
target_path = blobfilename(provider, m)
252+
if !isfile(target_path)
253+
mkpath(dirname(target_path))
254+
hardlink(filepath, target_path)
255+
end
256+
return m
257+
end

0 commit comments

Comments
 (0)