Skip to content

Commit a080281

Browse files
committed
feat(refs): add branch/tag metadata maps and tag timestamps
1 parent 1ef6310 commit a080281

14 files changed

Lines changed: 567 additions & 29 deletions

File tree

docs/src/format/table/branch_tag.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Each branch metadata file is a JSON file with the following fields:
4242
| `parent_version` | number | | Version number of the parent branch at the time this branch was created. |
4343
| `create_at` | number | | Unix timestamp (seconds since epoch) when the branch was created. |
4444
| `manifest_size` | number | | Size of the initial manifest file in bytes. |
45+
| `metadata` | object | Yes | String key/value metadata map. If absent, it is treated as an empty object. |
4546

4647
### Branch Dataset Layout
4748

@@ -118,4 +119,7 @@ Each tag file is a JSON file with the following fields:
118119
|-----------------|--------|----------|--------------------------------------------------------------------------|
119120
| `branch` | string | Yes | Branch name being tagged. `null` or absent indicates main branch. |
120121
| `version` | number | | Version number being tagged within that branch. |
122+
| `createdAt` | string | Yes | RFC 3339 timestamp for when the tag was first created. |
123+
| `updatedAt` | string | Yes | RFC 3339 timestamp for the latest tag reference update. |
121124
| `manifest_size` | number | | Size of the manifest file in bytes. Used for efficient manifest loading. |
125+
| `metadata` | object | Yes | String key/value metadata map. If absent, it is treated as an empty object. |

docs/src/guide/tags_and_branches.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,21 +36,21 @@ print(ds.tags.list())
3636
# {}
3737
ds.tags.create("v1-prod", (None, 1))
3838
print(ds.tags.list())
39-
# {'v1-prod': {'version': 1, 'manifest_size': ...}}
39+
# {'v1-prod': {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
4040
ds.tags.update("v1-prod", (None, 2))
4141
print(ds.tags.list())
42-
# {'v1-prod': {'version': 2, 'manifest_size': ...}}
42+
# {'v1-prod': {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...}}
4343
ds.tags.delete("v1-prod")
4444
print(ds.tags.list())
4545
# {}
4646
print(ds.tags.list_ordered())
4747
# []
4848
ds.tags.create("v1-prod", (None, 1))
4949
print(ds.tags.list_ordered())
50-
# [('v1-prod', {'version': 1, 'manifest_size': ...})]
50+
# [('v1-prod', {'version': 1, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
5151
ds.tags.update("v1-prod", (None, 2))
5252
print(ds.tags.list_ordered())
53-
# [('v1-prod', {'version': 2, 'manifest_size': ...})]
53+
# [('v1-prod', {'version': 2, 'created_at': ..., 'updated_at': ..., 'manifest_size': ...})]
5454
ds.tags.delete("v1-prod")
5555
print(ds.tags.list_ordered())
5656
# []
@@ -122,4 +122,4 @@ print(ds.branches.list_ordered(order="desc"))
122122

123123
Branches hold references to data files. Lance ensures that cleanup does not delete files still referenced by any branch.
124124

125-
Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.
125+
Delete unused branches to allow their referenced files to be cleaned up by `cleanup_old_versions()`.

java/lance-jni/src/blocking_dataset.rs

Lines changed: 113 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use crate::storage_options::JavaStorageOptionsProvider;
1111
use crate::traits::{FromJObjectWithEnv, FromJString, export_vec, import_vec, import_vec_to_rust};
1212
use crate::utils::{
1313
build_compaction_options, extract_storage_options, extract_write_params,
14-
get_scalar_index_params, get_vector_index_params, to_rust_map,
14+
get_scalar_index_params, get_vector_index_params, to_java_map, to_rust_map,
1515
};
1616
use crate::{RT, traits::IntoJava};
1717
use arrow::array::RecordBatchReader;
@@ -314,6 +314,24 @@ impl BlockingDataset {
314314
Ok(())
315315
}
316316

317+
pub fn replace_tag_metadata(
318+
&mut self,
319+
tag: &str,
320+
metadata: HashMap<String, String>,
321+
) -> Result<()> {
322+
RT.block_on(self.inner.tags().replace_metadata(tag, metadata))?;
323+
Ok(())
324+
}
325+
326+
pub fn replace_branch_metadata(
327+
&mut self,
328+
branch: &str,
329+
metadata: HashMap<String, String>,
330+
) -> Result<()> {
331+
RT.block_on(self.inner.branches().replace_metadata(branch, metadata))?;
332+
Ok(())
333+
}
334+
317335
pub fn get_version(&self, tag: &str) -> Result<u64> {
318336
let version = RT.block_on(self.inner.tags().get_version(tag))?;
319337
Ok(version)
@@ -2261,6 +2279,26 @@ fn inner_add_columns_by_schema(
22612279
//////////////////////////////
22622280
// Tag operation Methods //
22632281
//////////////////////////////
2282+
fn optional_datetime_to_java_instant<'local>(
2283+
env: &mut JNIEnv<'local>,
2284+
timestamp: Option<&DateTime<Utc>>,
2285+
) -> Result<JObject<'local>> {
2286+
if let Some(timestamp) = timestamp {
2287+
let seconds = timestamp.timestamp();
2288+
let nanos = timestamp.timestamp_subsec_nanos() as i64;
2289+
Ok(env
2290+
.call_static_method(
2291+
"java/time/Instant",
2292+
"ofEpochSecond",
2293+
"(JJ)Ljava/time/Instant;",
2294+
&[JValue::Long(seconds), JValue::Long(nanos)],
2295+
)?
2296+
.l()?)
2297+
} else {
2298+
Ok(JObject::null())
2299+
}
2300+
}
2301+
22642302
#[unsafe(no_mangle)]
22652303
pub extern "system" fn Java_org_lance_Dataset_nativeListTags<'local>(
22662304
mut env: JNIEnv<'local>,
@@ -2273,27 +2311,34 @@ fn inner_list_tags<'local>(
22732311
env: &mut JNIEnv<'local>,
22742312
java_dataset: JObject,
22752313
) -> Result<JObject<'local>> {
2276-
let tag_map = {
2314+
let mut tags: Vec<_> = {
22772315
let dataset_guard =
22782316
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
2279-
dataset_guard.list_tags()?
2317+
dataset_guard.list_tags()?.into_iter().collect()
22802318
};
2319+
tags.sort_unstable_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
22812320
let array_list = env.new_object("java/util/ArrayList", "()V", &[])?;
22822321

2283-
for (tag_name, tag_contents) in tag_map {
2322+
for (tag_name, tag_contents) in tags {
22842323
let branch_name: JObject = if let Some(branch_name) = tag_contents.branch.as_ref() {
22852324
env.new_string(branch_name)?.into()
22862325
} else {
22872326
JObject::null()
22882327
};
2328+
let created_at = optional_datetime_to_java_instant(env, tag_contents.created_at.as_ref())?;
2329+
let updated_at = optional_datetime_to_java_instant(env, tag_contents.updated_at.as_ref())?;
2330+
let java_metadata = to_java_map(env, &tag_contents.metadata)?;
22892331
let java_tag = env.new_object(
22902332
"org/lance/Tag",
2291-
"(Ljava/lang/String;Ljava/lang/String;JI)V",
2333+
"(Ljava/lang/String;Ljava/lang/String;JILjava/time/Instant;Ljava/time/Instant;Ljava/util/Map;)V",
22922334
&[
22932335
JValue::Object(&env.new_string(tag_name)?.into()),
22942336
JValue::Object(&branch_name),
22952337
JValue::Long(tag_contents.version as i64),
22962338
JValue::Int(tag_contents.manifest_size as i32),
2339+
JValue::Object(&created_at),
2340+
JValue::Object(&updated_at),
2341+
JValue::Object(&java_metadata),
22972342
],
22982343
)?;
22992344
env.call_method(
@@ -2375,6 +2420,32 @@ fn inner_update_tag(
23752420
dataset_guard.update_tag(tag.as_str(), reference)
23762421
}
23772422

2423+
#[unsafe(no_mangle)]
2424+
pub extern "system" fn Java_org_lance_Dataset_nativeReplaceTagMetadata(
2425+
mut env: JNIEnv,
2426+
java_dataset: JObject,
2427+
jtag_name: JString,
2428+
jmetadata: JObject,
2429+
) {
2430+
ok_or_throw_without_return!(
2431+
env,
2432+
inner_replace_tag_metadata(&mut env, java_dataset, jtag_name, jmetadata)
2433+
)
2434+
}
2435+
2436+
fn inner_replace_tag_metadata(
2437+
env: &mut JNIEnv,
2438+
java_dataset: JObject,
2439+
jtag_name: JString,
2440+
jmetadata: JObject,
2441+
) -> Result<()> {
2442+
let tag = jtag_name.extract(env)?;
2443+
let metadata = extract_metadata_map(env, &jmetadata)?;
2444+
let mut dataset_guard =
2445+
{ unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }? };
2446+
dataset_guard.replace_tag_metadata(tag.as_str(), metadata)
2447+
}
2448+
23782449
#[unsafe(no_mangle)]
23792450
pub extern "system" fn Java_org_lance_Dataset_nativeGetVersionByTag(
23802451
mut env: JNIEnv,
@@ -2414,11 +2485,12 @@ fn inner_list_branches<'local>(
24142485
env: &mut JNIEnv<'local>,
24152486
java_dataset: JObject,
24162487
) -> Result<JObject<'local>> {
2417-
let branches = {
2488+
let mut branches: Vec<_> = {
24182489
let dataset_guard =
24192490
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
2420-
dataset_guard.list_branches()?
2491+
dataset_guard.list_branches()?.into_iter().collect()
24212492
};
2493+
branches.sort_unstable_by(|(left_name, _), (right_name, _)| left_name.cmp(right_name));
24222494
let array_list = env.new_object("java/util/ArrayList", "()V", &[])?;
24232495

24242496
for (name, contents) in branches {
@@ -2447,16 +2519,18 @@ fn inner_list_branches<'local>(
24472519
&[JValue::Object(&jmapping)],
24482520
)?;
24492521
}
2522+
let java_metadata = to_java_map(env, &contents.metadata)?;
24502523
let jbranch = env.new_object(
24512524
"org/lance/Branch",
2452-
"(Ljava/lang/String;Ljava/lang/String;Ljava/util/List;JJI)V",
2525+
"(Ljava/lang/String;Ljava/lang/String;Ljava/util/List;JJILjava/util/Map;)V",
24532526
&[
24542527
JValue::Object(&jname),
24552528
JValue::Object(&jparent),
24562529
JValue::Object(&jbranch_identifier),
24572530
JValue::Long(contents.parent_version as i64),
24582531
JValue::Long(contents.create_at as i64),
24592532
JValue::Int(contents.manifest_size as i32),
2533+
JValue::Object(&java_metadata),
24602534
],
24612535
)?;
24622536
env.call_method(
@@ -2507,6 +2581,37 @@ fn inner_create_branch<'local>(
25072581
new_blocking_dataset.into_java(env)
25082582
}
25092583

2584+
#[unsafe(no_mangle)]
2585+
pub extern "system" fn Java_org_lance_Dataset_nativeReplaceBranchMetadata(
2586+
mut env: JNIEnv,
2587+
java_dataset: JObject,
2588+
jbranch: JString,
2589+
jmetadata: JObject,
2590+
) {
2591+
ok_or_throw_without_return!(
2592+
env,
2593+
inner_replace_branch_metadata(&mut env, java_dataset, jbranch, jmetadata)
2594+
)
2595+
}
2596+
2597+
fn inner_replace_branch_metadata(
2598+
env: &mut JNIEnv,
2599+
java_dataset: JObject,
2600+
jbranch: JString,
2601+
jmetadata: JObject,
2602+
) -> Result<()> {
2603+
let branch: String = jbranch.extract(env)?;
2604+
let metadata = extract_metadata_map(env, &jmetadata)?;
2605+
let mut dataset_guard =
2606+
unsafe { env.get_rust_field::<_, _, BlockingDataset>(java_dataset, NATIVE_DATASET) }?;
2607+
dataset_guard.replace_branch_metadata(&branch, metadata)
2608+
}
2609+
2610+
fn extract_metadata_map(env: &mut JNIEnv, jmetadata: &JObject) -> Result<HashMap<String, String>> {
2611+
let jmap = JMap::from_env(env, jmetadata)?;
2612+
to_rust_map(env, &jmap)
2613+
}
2614+
25102615
fn transform_jref_to_ref(jref: JObject, env: &mut JNIEnv) -> Result<Ref> {
25112616
let source_tag_name = env.get_optional_string_from_method(&jref, "getTagName")?;
25122617
let source_version_number = env.get_optional_u64_from_method(&jref, "getVersionNumber")?;

java/lance-jni/src/traits.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,15 @@ impl IntoJava for JLance<Option<usize>> {
262262
}
263263
}
264264

265+
impl IntoJava for JLance<Option<&str>> {
266+
fn into_java<'a>(self, env: &mut JNIEnv<'a>) -> Result<JObject<'a>> {
267+
Ok(match self.0 {
268+
Some(value) => env.new_string(value)?.into(),
269+
None => JObject::null(),
270+
})
271+
}
272+
}
273+
265274
impl FromJObjectWithEnv<Option<i64>> for JObject<'_> {
266275
fn extract_object(&self, env: &mut JNIEnv<'_>) -> Result<Option<i64>> {
267276
let ret = if self.is_null() {

java/src/main/java/org/lance/Branch.java

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
import com.google.common.base.MoreObjects;
1717
import com.google.common.collect.ImmutableList;
1818

19+
import java.util.Collections;
20+
import java.util.HashMap;
1921
import java.util.List;
22+
import java.util.Map;
2023
import java.util.Objects;
2124
import java.util.Optional;
2225

@@ -70,10 +73,18 @@ public int hashCode() {
7073
private final long parentVersion;
7174
private final long createAt;
7275
private final int manifestSize;
76+
private final Map<String, String> metadata;
7377

7478
public Branch(
7579
String name, String parentBranch, long parentVersion, long createAt, int manifestSize) {
76-
this(name, parentBranch, ImmutableList.of(), parentVersion, createAt, manifestSize);
80+
this(
81+
name,
82+
parentBranch,
83+
ImmutableList.of(),
84+
parentVersion,
85+
createAt,
86+
manifestSize,
87+
Collections.emptyMap());
7788
}
7889

7990
public Branch(
@@ -82,13 +93,15 @@ public Branch(
8293
List<BranchVersionMapping> branchIdentifier,
8394
long parentVersion,
8495
long createAt,
85-
int manifestSize) {
96+
int manifestSize,
97+
Map<String, String> metadata) {
8698
this.name = name;
8799
this.parentBranch = Optional.ofNullable(parentBranch);
88100
this.branchIdentifier = ImmutableList.copyOf(Objects.requireNonNull(branchIdentifier));
89101
this.parentVersion = parentVersion;
90102
this.createAt = createAt;
91103
this.manifestSize = manifestSize;
104+
this.metadata = Collections.unmodifiableMap(new HashMap<>(metadata));
92105
}
93106

94107
public String getName() {
@@ -115,6 +128,10 @@ public int getManifestSize() {
115128
return manifestSize;
116129
}
117130

131+
public Map<String, String> getMetadata() {
132+
return metadata;
133+
}
134+
118135
@Override
119136
public String toString() {
120137
return MoreObjects.toStringHelper(this)
@@ -124,6 +141,7 @@ public String toString() {
124141
.add("parentVersion", parentVersion)
125142
.add("createAt", createAt)
126143
.add("manifestSize", manifestSize)
144+
.add("metadata", metadata)
127145
.toString();
128146
}
129147

@@ -137,12 +155,13 @@ public boolean equals(Object o) {
137155
&& manifestSize == branch.manifestSize
138156
&& Objects.equals(name, branch.name)
139157
&& Objects.equals(parentBranch, branch.parentBranch)
140-
&& Objects.equals(branchIdentifier, branch.branchIdentifier);
158+
&& Objects.equals(branchIdentifier, branch.branchIdentifier)
159+
&& Objects.equals(metadata, branch.metadata);
141160
}
142161

143162
@Override
144163
public int hashCode() {
145164
return Objects.hash(
146-
name, parentBranch, branchIdentifier, parentVersion, createAt, manifestSize);
165+
name, parentBranch, branchIdentifier, parentVersion, createAt, manifestSize, metadata);
147166
}
148167
}

0 commit comments

Comments
 (0)