Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ManifoldsBase = "3362f125-f0bb-47a3-aa74-596ffd7ef2fb"
NanoDates = "46f1a544-deae-4307-8689-c12aa3c955c6"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Expand Down Expand Up @@ -56,6 +57,7 @@ JSON = "1.0.0"
LieGroups = "0.1"
LinearAlgebra = "1.10"
ManifoldsBase = "1, 2"
NanoDates = "1.0.3"
OrderedCollections = "1.4"
Pkg = "1.4, 1.5"
ProgressMeter = "1"
Expand Down
128 changes: 83 additions & 45 deletions src/DataBlobs/entities/BlobEntry.jl
Original file line number Diff line number Diff line change
@@ -1,86 +1,124 @@

##==============================================================================
## Blobentry
##==============================================================================
#TODO think origin and buildSourceString should be deprecated, description can be used instead
#TODO hash - maybe use both crc32c for fast error check and sha256 for strong integrity check
# stored seperately as crc and sha or as a tuple `hash::Tuple{Symbol, String}` where Symbol is :crc32c or :sha256
# or an enum with suppored hash types
"""
$(TYPEDEF)

A `Blobentry` is a small about of structured data that holds reference information to find an actual blob. Many `Blobentry`s
can exist on different graph nodes spanning Agents and Factor Graphs which can all reference the same `Blob`.

Notes:
- `blobId`s should be unique within a blobstore and are immutable.
- `blobid`s should be unique within a blobstore and are immutable.
"""
Base.@kwdef struct Blobentry
""" Remotely assigned and globally unique identifier for the `Blobentry` itself (not the `.blobId`). """
id::Union{UUID, Nothing} = nothing
""" Machine friendly and globally unique identifier of the 'Blob', usually assigned from a common point in the system. This can be used to guarantee unique retrieval of the large data blob. """
blobId::UUID = uuid4()
StructUtils.@kwarg struct Blobentry
""" Human friendly label of the `Blob` and also used as unique identifier per node on which a `Blobentry` is added. E.g. do "LEFTCAM_1", "LEFTCAM_2", ... of you need to repeat a label on the same variable. """
label::Symbol
""" A hint about where the `Blob` itself might be stored. Remember that a Blob may be duplicated over multiple blobstores. """
""" The label of the `Blobstore` in which the `Blob` is stored. Default is `:default`."""
blobstore::Symbol = :default
""" A hash value to ensure data consistency which must correspond to the stored hash upon retrieval. Use `bytes2hex(sha256(blob))`. [Legacy: some usage functions allow the check to be skipped if needed.] """
hash::String = ""# Probably https://docs.julialang.org/en/v1/stdlib/SHA
""" Context from which a Blobentry=>Blob was first created. E.g. agent|graph|varlabel. """
""" Machine friendly and unique within a `Blobstore` identifier of the 'Blob'."""
blobid::UUID = uuid4() # was blobId
""" (Optional) crc32c hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
crchash::Union{UInt32, Nothing} =
nothing & (
json=(
lower = h->isnothing(h) ? nothing : string(h, base = 16),
lift = s->isnothing(s) ? nothing : parse(UInt32, s; base = 16),
)
)
""" (Optional) sha256 hash value to ensure data consistency which must correspond to the stored hash upon retrieval."""
shahash::Union{Vector{UInt8}, Nothing} =
nothing & (
json=(
lower = h->isnothing(h) ? nothing : bytes2hex(h),
lift = s->isnothing(s) ? nothing : hex2bytes(s),
)
)
""" Source system or application where the blob was created (e.g., webapp, sdk, robot)"""
origin::String = ""
""" number of bytes in blob as a string"""
size::String = "-1"
"""Number of bytes in blob serialized as a string"""
size::Int64 = -1 & (json=(lower = string, lift = x->parse(Int64, x)))
""" Additional information that can help a different user of the Blob. """
description::String = ""
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json; _type=CameraModel'. """
mimeType::String = "application/octet-stream"
""" Additional storage for functional metadata used in some scenarios, e.g. to support advanced features such as `parsejson(base64decode(entry.metadata))['time_sync']`. """
metadata::String = "e30="
""" When the Blob itself was first created. """
timestamp::ZonedDateTime = now(localzone())
""" When the Blobentry was created. """
createdTimestamp::Union{ZonedDateTime, Nothing} = nothing
""" Use carefully, but necessary to support advanced usage such as time synchronization over Blob data. """
lastUpdatedTimestamp::Union{ZonedDateTime, Nothing} = nothing
""" MIME description describing the format of binary data in the `Blob`, e.g. 'image/png' or 'application/json'. """
mimetype::String = "application/octet-stream" #FIXME ::MIME = MIME("application/octet-stream")
""" Storage for a couple of bytes directly in the graph. Use with caution and keep it small and simple."""
metadata::JSONText = JSONText("{}")
""" When the Blob itself was first created. Serialized as an ISO 8601 string."""
timestamp::NanoDate = ndnow(UTC) & (json = (lower = timestamp,),)
""" Type version of this Blobentry."""
_version::VersionNumber = _getDFGVersion()
version::VersionNumber = version(Blobentry)
end
version(::Type{Blobentry}) = v"0.1.0"
version(node) = node.version

function Blobentry(label::Symbol, blobstore = :default; kwargs...)
return Blobentry(; label, blobstore, kwargs...)
end
# construction helper from existing Blobentry for user overriding via kwargs
function Blobentry(
entry::Blobentry;
id::Union{UUID, Nothing} = entry.id,
blobId::UUID = entry.blobId,
blobid::UUID = entry.blobid,
label::Symbol = entry.label,
blobstore::Symbol = entry.blobstore,
hash::String = entry.hash,
size::Union{String, Int, Nothing} = entry.size,
crchash = entry.crchash,
shahash = entry.shahash,
size::Int64 = entry.size,
origin::String = entry.origin,
description::String = entry.description,
mimeType::String = entry.mimeType,
metadata::String = entry.metadata,
mimetype::String = entry.mimetype,
metadata::JSONText = entry.metadata,
timestamp::ZonedDateTime = entry.timestamp,
createdTimestamp = entry.createdTimestamp,
lastUpdatedTimestamp = entry.lastUpdatedTimestamp,
_version = entry._version,
version = entry.version,
)
return Blobentry(;
id,
blobId,
label,
blobstore,
hash,
blobid,
crchash,
shahash,
origin,
size = string(size),
size,
description,
mimeType,
mimetype,
metadata,
timestamp,
createdTimestamp,
lastUpdatedTimestamp,
_version,
version,
)
end

#TODO deprecated in v0.29
function Base.getproperty(x::Blobentry, f::Symbol)
if f in [:id, :createdTimestamp, :lastUpdatedTimestamp]
error("Blobentry field $f has been deprecated")
elseif f == :hash
error("Blobentry field :hash has been deprecated; use :crchash or :shahash instead")
elseif f == :blobId
@warn "Blobentry field :blobId has been renamed to :blobid"
return getfield(x, :blobid)
elseif f == :mimeType
@warn "Blobentry field :mimeType has been renamed to :mimetype"
return getfield(x, :mimetype)
elseif f == :_version
@warn "Blobentry field :_version has been renamed to :version"
return getfield(x, :version)
else
getfield(x, f)
end
end

function Base.setproperty!(x::Blobentry, f::Symbol, val)
if f == :blobId
@warn "Blobentry field :blobId has been renamed to :blobid"
setfield!(x, :blobid, val)
elseif f == :mimeType
@warn "Blobentry field :mimeType has been renamed to :mimetype"
setfield!(x, :mimetype, val)
elseif f == :_version
@warn "Blobentry field :_version has been renamed to :version"
setfield!(x, :version, val)
elseif f in [:id, :createdTimestamp, :lastUpdatedTimestamp, :hash]
error("Blobentry field $f has been deprecated")
else
setfield!(x, f, val)
end
end
14 changes: 7 additions & 7 deletions src/DataBlobs/entities/BlobStores.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,25 @@ Abstract supertype for all blobstore implementations.

Subtypes of `AbstractBlobstore{T}` must implement the required interface for blob storage and retrieval, such as:

- `add!(store, blobId, blob)`: Add a new blob to the store.
- `get(store, blobId)`: Retrieve a blob by its ID.
- `add!(store, blobid, blob)`: Add a new blob to the store.
- `get(store, blobid)`: Retrieve a blob by its ID.
- `list(store)`: List all blob IDs in the store.

The parameter `T` represents the type of blobs stored (e.g., `Vector{UInt8}` or a custom `Blob` type).

See concrete implementations for details.

Design Notes
- `blobId` is not considered unique across blobstores with different labels only within a single blobstore.
- We cannot guarantee that `blobId` is unique across different blobstores with the same label and this is up to the end user.
- `blobid` is not considered unique across blobstores with different labels only within a single blobstore.
- We cannot guarantee that `blobid` is unique across different blobstores with the same label and this is up to the end user.
- Within a single blobstore `addBlob!` will fail if there is a UUID collision.
- TODO: We should consider using uuid7 for `blobId`s (requires jl v1.12).
- TODO: We should consider using uuid7 for `blobid`s (requires jl v1.12).
- `Blobstrores`are identified by a `label::Symbol`, which allows for multiple blobstores to coexist in the same system.

TODO: If we want to make the `blobId`=>Blob pair immutable:
TODO: If we want to make the `blobid`=>Blob pair immutable:
- We can use the tombstone pattern to mark a blob as deleted. See FolderStore in PR#TODO.

Design goal: all `Blobstore`s with the same `label` can contain the same `blobId`=>`Blob` pair and the blobs should be identical since they are immutable.
Design goal: all `Blobstore`s with the same `label` can contain the same `blobid`=>`Blob` pair and the blobs should be identical since they are immutable.

"""
abstract type AbstractBlobstore{T} end
Expand Down
57 changes: 34 additions & 23 deletions src/DataBlobs/services/BlobEntry.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
##==============================================================================
## Blobentry - common
##==============================================================================
#TODO think origin and buildSourceString should be deprecated, description can be used instead
#TODO think buildSourceString should be deprecated.
"""
$(SIGNATURES)
Function to generate source string - agentLabel|graphLabel|varLabel
Expand All @@ -17,32 +17,43 @@ end
# label
# id

getHash(entry::Blobentry) = hex2bytes(entry.hash)
getTimestamp(entry::Blobentry) = entry.timestamp

function assertHash(de::Blobentry, db; hashfunction::Function = sha256)
getHash(de) === nothing && @warn "Missing hash?" && return true
if hashfunction(db) == getHash(de)
return true #or nothing?
else
error("Stored hash and data blob hash do not match")
"""
checkHash(entry::Blobentry, blob) -> Union{Bool,Nothing}

Checks the integrity of a blob against the hashes (crc32c, sha256) stored in the given `Blobentry`.

- Returns `true` if all present hashes (`crchash`, `shahash`) match the computed values from `blob`.
- Returns `false` if any present hash does not match.
- Returns `nothing` if no hashes are stored in the `Blobentry` to check against.
"""
function checkHash(entry::Blobentry, blob)
if !isnothing(entry.crchash)
crc32c(blob) != entry.crchash && return false
end
if entry.shahash != ""
sha256(blob) != entry.shahash && return false
end
if isnothing(entry.crchash) && entry.shahash == ""
return nothing
end
return true
end

function Base.show(io::IO, ::MIME"text/plain", entry::Blobentry)
println(io, "Blobentry {")
println(io, " id: ", entry.id)
println(io, " blobId: ", entry.blobId)
println(io, " label: ", entry.label)
println(io, " blobstore: ", entry.blobstore)
println(io, " hash: ", entry.hash)
println(io, " origin: ", entry.origin)
println(io, " description: ", entry.description)
println(io, " mimeType: ", entry.mimeType)
println(io, " timestamp ", entry.timestamp)
println(io, " _version: ", entry._version)
return println(io, "}")
end
# function Base.show(io::IO, ::MIME"text/plain", entry::Blobentry)
# println(io, "Blobentry {")
# println(io, " id: ", entry.id)
# println(io, " blobid: ", entry.blobid)
# println(io, " label: ", entry.label)
# println(io, " blobstore: ", entry.blobstore)
# println(io, " origin: ", entry.origin)
# println(io, " description: ", entry.description)
# println(io, " mimetype: ", entry.mimetype)
# println(io, " timestamp ", entry.timestamp)
# println(io, " version: ", entry.version)
# return println(io, "}")
# end

##==============================================================================
## Blobentry - CRUD
Expand Down Expand Up @@ -238,7 +249,7 @@ function getBlobentries(
)
entries = getBlobentries(v)
filterDFG!(entries, labelFilter, getLabel)
filterDFG!(entries, blobIdFilter, x -> string(x.blobId))
filterDFG!(entries, blobIdFilter, x -> string(x.blobid))
return entries
end

Expand Down
2 changes: 1 addition & 1 deletion src/DataBlobs/services/BlobPacking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ function unpackBlob(::Type{format"JSON"}, blob::Vector{UInt8})
return String(copy(blob))
end

unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimeType, blob)
unpackBlob(entry::Blobentry, blob::Vector{UInt8}) = unpackBlob(entry.mimetype, blob)
unpackBlob(eb::Pair{<:Blobentry, Vector{UInt8}}) = unpackBlob(eb[1], eb[2])

# 2/ FileIO
Expand Down
Loading
Loading