Skip to content

Commit 5f896ed

Browse files
AffieCopilot
andauthored
Enhance Blob handling and MIME type integration, add solvable to summary level and rm Blobentries (#1213)
* add solvable to summary level, rm blobentries * Enhance Blob handling and MIME type integration with new functions and tests * fix docs * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * fixes and formatting --------- Co-authored-by: Johannes Terblanche <Affie@users.noreply.github.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent ddf99a4 commit 5f896ed

21 files changed

+760
-158
lines changed

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
1414
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
1515
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
1616
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
17+
MIMEs = "6c6e2e6c-3030-632d-7369-2d6c69616d65"
1718
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
1819
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
1920
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
@@ -55,6 +56,7 @@ InteractiveUtils = "1.11"
5556
JSON = "1.0.0"
5657
LieGroups = "0.1"
5758
LinearAlgebra = "1.11"
59+
MIMEs = "1.1"
5860
ManifoldsBase = "1, 2"
5961
OrderedCollections = "1.4"
6062
Pkg = "1.4, 1.5"

ext/BlobArrow.jl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@ module BlobArrow
22

33
using Arrow
44
using DistributedFactorGraphs
5-
using DistributedFactorGraphs: _MIMETypes
5+
using DistributedFactorGraphs: _MIMEOverrides, getMimetype
66

77
function __init__()
88
@info "Including Arrow blobs support in DFG."
9-
return push!(_MIMETypes, MIME("application/vnd.apache.arrow.file") => format"Arrow") # see issue #507
9+
push!(_MIMEOverrides, format"Arrow" => MIME("application/vnd.apache.arrow.file"))
10+
return nothing
1011
end
1112

1213
# kwargs: compress = :lz4,
1314
function DFG.packBlob(::Type{format"Arrow"}, data; kwargs...)
1415
io = IOBuffer()
1516
Arrow.write(io, data; kwargs...)
1617
blob = take!(io)
17-
mimetype = findfirst(==(format"Arrow"), _MIMETypes)
18+
mimetype = getMimetype(format"Arrow")
1819
return blob, mimetype
1920
end
2021

src/Common.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ function Timestamp(epoch::Val{:unix}, t::Float64, zone = tz"UTC")
214214
end
215215
Timestamp(t::Float64, zone = tz"UTC") = Timestamp(Val(:unix), t, zone)
216216
function Timestamp(epoch::Val{:rata}, t::Float64, zone = tz"UTC")
217-
return TimeDateZone(convert(DateTime, Millisecond(t*10^3)), zone)
217+
return TimeDateZone(convert(DateTime, Millisecond(t * 10^3)), zone)
218218
end
219219

220220
function now_tdz(zone = tz"UTC")

src/DataBlobs/entities/BlobEntry.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,23 +20,23 @@ StructUtils.@kwarg struct Blobentry
2020
""" (Optional) crc32c hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
2121
crchash::Union{UInt32, Nothing} =
2222
nothing & (
23-
json=(
24-
lower = h->isnothing(h) ? nothing : string(h, base = 16),
25-
lift = s->isnothing(s) ? nothing : parse(UInt32, s; base = 16),
23+
json = (
24+
lower = h -> isnothing(h) ? nothing : string(h; base = 16),
25+
lift = s -> isnothing(s) ? nothing : parse(UInt32, s; base = 16),
2626
)
2727
)
2828
""" (Optional) sha256 hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
2929
shahash::Union{Vector{UInt8}, Nothing} =
3030
nothing & (
31-
json=(
32-
lower = h->isnothing(h) ? nothing : bytes2hex(h),
33-
lift = s->isnothing(s) ? nothing : hex2bytes(s),
31+
json = (
32+
lower = h -> isnothing(h) ? nothing : bytes2hex(h),
33+
lift = s -> isnothing(s) ? nothing : hex2bytes(s),
3434
)
3535
)
3636
""" Source system or application where the blob was created (e.g., webapp, sdk, robot)"""
3737
origin::String = ""
3838
"""Number of bytes in blob serialized as a string"""
39-
size::Int64 = -1 & (json=(lower = string, lift = x->parse(Int64, x)))
39+
size::Int64 = -1 & (json = (lower = string, lift = x -> parse(Int64, x)))
4040
""" Additional information that can help a different user of the Blob. """
4141
description::String = ""
4242
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json'. """
Lines changed: 80 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,91 @@
1-
# using FileIO
2-
# using ImageIO
3-
# using LasIO
4-
# using BSON
5-
# using OrderedCollections
6-
7-
# 2 types for now with MIME type
8-
# 1. JSON - application/octet-stream/json
9-
# 2. FileIO - application/octet-stream
10-
# - application/bson
11-
# - image/jpeg
12-
# - image/png
13-
# - application/vnd.apache.arrow.file
14-
15-
const _MIMETypes = OrderedDict{MIME, DataType}()
16-
push!(_MIMETypes, MIME("application/octet-stream/json") => format"JSON")
17-
push!(_MIMETypes, MIME("application/bson") => format"BSON")
18-
push!(_MIMETypes, MIME("image/png") => format"PNG")
19-
push!(_MIMETypes, MIME("image/jpeg") => format"JPG")
20-
push!(_MIMETypes, MIME("application/vnd.las") => format"LAS")
21-
push!(_MIMETypes, MIME("application/vnd.apache.parque") => format"Parquet") # Provided by FileIO with ParquetFiles
1+
##==============================================================================
2+
## BlobPacking: format <-> MIME type bridging and blob serialization
3+
##==============================================================================
4+
5+
# Override dictionary for formats not covered by MIMEs.jl + FileIO auto-detection.
6+
# Standard types (PNG, JPEG, CSV, etc.) are auto-detected and don't need entries here.
7+
const _MIMEOverrides = OrderedDict{DataType, MIME}(
8+
format"JSON" => MIME("application/json"),
9+
format"BSON" => MIME("application/bson"),
10+
format"LAS" => MIME("application/vnd.las"),
11+
format"Parquet" => MIME("application/vnd.apache.parquet"),
12+
)
13+
14+
"""
15+
getMimetype(::Type{DataFormat{S}}) -> MIME
16+
17+
Get the MIME type for a FileIO `DataFormat`. Uses FileIO's extension registry
18+
and MIMEs.jl for standard types, falls back to `_MIMEOverrides` for
19+
domain-specific formats.
20+
21+
# Examples
22+
```julia
23+
getMimetype(format"PNG") # MIME("image/png")
24+
getMimetype(format"JSON") # MIME("application/json")
25+
```
26+
"""
27+
function getMimetype(::Type{DataFormat{S}}) where {S}
28+
T = DataFormat{S}
29+
haskey(_MIMEOverrides, T) && return _MIMEOverrides[T]
30+
try
31+
finfo = FileIO.info(T)
32+
ext = finfo[2]
33+
ext = ext isa AbstractVector ? first(ext) : ext
34+
m = mime_from_extension(ext)
35+
!isnothing(m) && return m
36+
catch
37+
end
38+
return MIME("application/octet-stream")
39+
end
40+
41+
"""
42+
getDataFormat(::MIME) -> Union{Type{DataFormat{S}}, Nothing}
43+
44+
Get the FileIO `DataFormat` for a MIME type. Uses MIMEs.jl and FileIO's extension
45+
registry, falls back to `_MIMEOverrides`.
46+
47+
Returns `nothing` if no matching format is found.
48+
49+
# Examples
50+
```julia
51+
getDataFormat(MIME("image/png")) # format"PNG"
52+
getDataFormat(MIME("application/json")) # format"JSON"
53+
```
54+
"""
55+
function getDataFormat(m::MIME)
56+
for (fmt, mime) in _MIMEOverrides
57+
mime == m && return fmt
58+
end
59+
ext = extension_from_mime(m)
60+
sym = get(FileIO.ext2sym, ext, nothing)
61+
!isnothing(sym) && return DataFormat{sym}
62+
return nothing
63+
end
2264

2365
"""
2466
packBlob
25-
Convert a file (JSON, JPG, PNG, BSON, LAS) to Vector{UInt8} for use as a Blob.
26-
Returns the blob and MIME type.
67+
Convert data to `Vector{UInt8}` for use as a Blob. Returns `(blob, mimetype)`.
68+
The MIME type is automatically determined from the DataFormat.
2769
"""
2870
function packBlob end
71+
2972
"""
3073
unpackBlob
31-
Convert a Blob back to the origanal typ using the MIME type or DataFormat type.
74+
Convert a Blob back to the original type using the MIME type or DataFormat type.
3275
"""
3376
function unpackBlob end
3477

3578
unpackBlob(mime::String, blob) = unpackBlob(MIME(mime), blob)
3679

3780
function unpackBlob(T::MIME, blob)
38-
dataformat = get(_MIMETypes, T, nothing)
81+
dataformat = getDataFormat(T)
3982
isnothing(dataformat) && error("Format not found for MIME type $(T)")
4083
return unpackBlob(dataformat, blob)
4184
end
4285

4386
# 1. JSON strings are saved as is
4487
function packBlob(::Type{format"JSON"}, json_str::String)
45-
mimetype = findfirst(==(format"JSON"), _MIMETypes)
46-
# blob = codeunits(json_str)
88+
mimetype = getMimetype(format"JSON")
4789
blob = Vector{UInt8}(json_str)
4890
return blob, mimetype
4991
end
@@ -55,16 +97,12 @@ end
5597
unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimetype, blob)
5698
unpackBlob(eb::Pair{<:Blobentry, Vector{UInt8}}) = unpackBlob(eb[1], eb[2])
5799

58-
# 2/ FileIO
100+
# 2. FileIO formats (PNG, JPEG, BSON, LAS, Parquet, etc.)
59101
function packBlob(::Type{T}, data::Any; kwargs...) where {T <: DataFormat}
60102
io = IOBuffer()
61103
save(Stream{T}(io), data; kwargs...)
62104
blob = take!(io)
63-
mimetype = findfirst(==(T), _MIMETypes)
64-
if isnothing(mimetype)
65-
@warn "No MIME type found for format $T"
66-
mimetype = MIME"application/octet-stream"
67-
end
105+
mimetype = getMimetype(T)
68106
return blob, mimetype
69107
end
70108

@@ -73,18 +111,13 @@ function unpackBlob(::Type{T}, blob::Vector{UInt8}) where {T <: DataFormat}
73111
return load(Stream{T}(io))
74112
end
75113

76-
# if false
77-
# json_str = "{\"name\":\"John\"}"
78-
# blob, mimetype = packBlob(format"JSON", json_str)
79-
# @assert json_str == unpackBlob(format"JSON", blob)
80-
# @assert json_str == unpackBlob(MIME("application/octet-stream/json"), blob)
81-
# @assert json_str == unpackBlob("application/octet-stream/json", blob)
82-
83-
# blob,mime = packBlob(format"PNG", img)
84-
# up_img = unpackBlob(format"PNG", blob)
85-
86-
# #TODO BSON does not work yet, can extend [un]packBlob(::Type{format"BSON"}, ...)
87-
# packBlob(format"BSON", Dict("name"=>"John"))
88-
# unpackBlob(format"BSON", Dict("name"=>"John"))
114+
"""
115+
getMimetype(io::IO) -> MIME
89116
90-
# end
117+
Detect the MIME type of data in an IO stream using FileIO's format detection.
118+
"""
119+
function getMimetype(io::IO)
120+
_getFormat(s::FileIO.Stream{T}) where {T} = T
121+
stream = FileIO.query(io)
122+
return getMimetype(_getFormat(stream))
123+
end

src/DataBlobs/services/BlobWrappers.jl

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,27 @@ $(METHODLIST)
6363
"""
6464
function deleteBlob_Agent! end
6565

66+
"""
67+
Convenience wrapper to load a Blob for a given factor and Blobentry label.
68+
69+
$(METHODLIST)
70+
"""
71+
function loadBlob_Factor end
72+
73+
"""
74+
Convenience wrapper to save a Blob to a Blobstore and a Blobentry to a factor.
75+
76+
$(METHODLIST)
77+
"""
78+
function saveBlob_Factor! end
79+
80+
"""
81+
Convenience wrapper to delete a Blob from a Blobstore and its Blobentry from a factor.
82+
83+
$(METHODLIST)
84+
"""
85+
function deleteBlob_Factor! end
86+
6687
function loadBlob_Variable(
6788
dfg::AbstractDFG,
6889
variable_label::Symbol,
@@ -166,6 +187,42 @@ function deleteBlob_Agent!(dfg::AbstractDFG, entry_label::Symbol)
166187
return 2
167188
end
168189

190+
function loadBlob_Factor(dfg::AbstractDFG, factor_label::Symbol, entry_label::Symbol)
191+
entry = getFactorBlobentry(dfg, factor_label, entry_label)
192+
blob = getBlob(dfg, entry)
193+
return entry, blob
194+
end
195+
196+
function saveBlob_Factor!(
197+
dfg::AbstractDFG,
198+
factor_label::Symbol,
199+
blob::Vector{UInt8},
200+
entry::Blobentry,
201+
)
202+
addFactorBlobentry!(dfg, factor_label, entry)
203+
addBlob!(dfg, entry, blob)
204+
return entry
205+
end
206+
207+
function saveBlob_Factor!(
208+
dfg::AbstractDFG,
209+
factor_label::Symbol,
210+
blob::Vector{UInt8},
211+
entry_label::Symbol,
212+
blobstore::Symbol = :default;
213+
blobentry_kwargs...,
214+
)
215+
entry = Blobentry(entry_label, blobstore; blobentry_kwargs...)
216+
return saveBlob_Factor!(dfg, factor_label, blob, entry)
217+
end
218+
219+
function deleteBlob_Factor!(dfg::AbstractDFG, factor_label::Symbol, entry_label::Symbol)
220+
entry = getFactorBlobentry(dfg, factor_label, entry_label)
221+
deleteFactorBlobentry!(dfg, factor_label, entry_label)
222+
deleteBlob!(dfg, entry)
223+
return 2
224+
end
225+
169226
function saveImage_Variable!(
170227
dfg::AbstractDFG,
171228
variable_label::Symbol,
@@ -174,19 +231,19 @@ function saveImage_Variable!(
174231
blobstore::Symbol = :default;
175232
entry_kwargs...,
176233
)
177-
mimeType = get(entry_kwargs, :mimeType, MIME("image/png"))
178-
format = _MIMETypes[mimeType]
179-
180-
blob, mimeType = packBlob(format, img)
234+
mimetype = get(entry_kwargs, :mimeType, MIME("image/png"))
235+
format = getDataFormat(mimetype)
236+
isnothing(format) &&
237+
throw(ArgumentError("Unsupported MIME type for image: $(mimetype)"))
238+
blob, mimetype = packBlob(format, img)
181239

182-
size = string(length(blob))
183240
entry = Blobentry(
184241
entry_label,
185242
blobstore;
186243
blobid = uuid4(),
187244
entry_kwargs...,
188-
size,
189-
mimeType = string(mimeType),
245+
size = length(blob),
246+
mimetype,
190247
)
191248

192249
return saveBlob_Variable!(dfg, variable_label, blob, entry)

0 commit comments

Comments
 (0)