Skip to content

Commit 8b6b69c

Browse files
Vova Kolmakovclaude
andcommitted
refactor: trim metadata PK tests and simplify sync helper
- Keep only "set + sync" and "merge insert" tests in dataset::metadata per review; parse-error coverage lives in lance-core field unit tests. - Extract parse_unenforced_primary_key_position helper so TryFrom<&ArrowField> computes the value before the struct literal instead of the None+update pattern. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent b6bce65 commit 8b6b69c

2 files changed

Lines changed: 29 additions & 217 deletions

File tree

rust/lance-core/src/datatypes/field.rs

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1025,23 +1025,30 @@ impl Field {
10251025
/// to keep structured fields like `unenforced_primary_key_position` in sync.
10261026
pub fn sync_embedded_metadata(&mut self) -> Result<()> {
10271027
self.unenforced_primary_key_position =
1028-
if let Some(s) = self.metadata.get(LANCE_UNENFORCED_PRIMARY_KEY_POSITION) {
1029-
Some(s.parse::<u32>().map_err(|e| {
1030-
Error::invalid_input(format!(
1031-
"Invalid value '{}' for {}: {}",
1032-
s, LANCE_UNENFORCED_PRIMARY_KEY_POSITION, e
1033-
))
1034-
})?)
1035-
} else {
1036-
self.metadata
1037-
.get(LANCE_UNENFORCED_PRIMARY_KEY)
1038-
.filter(|s| matches!(s.to_lowercase().as_str(), "true" | "1" | "yes"))
1039-
.map(|_| 0)
1040-
};
1028+
parse_unenforced_primary_key_position(&self.metadata)?;
10411029
Ok(())
10421030
}
10431031
}
10441032

1033+
fn parse_unenforced_primary_key_position(
1034+
metadata: &HashMap<String, String>,
1035+
) -> Result<Option<u32>> {
1036+
if let Some(s) = metadata.get(LANCE_UNENFORCED_PRIMARY_KEY_POSITION) {
1037+
let parsed = s.parse::<u32>().map_err(|e| {
1038+
Error::invalid_input(format!(
1039+
"Invalid value '{}' for {}: {}",
1040+
s, LANCE_UNENFORCED_PRIMARY_KEY_POSITION, e
1041+
))
1042+
})?;
1043+
Ok(Some(parsed))
1044+
} else {
1045+
Ok(metadata
1046+
.get(LANCE_UNENFORCED_PRIMARY_KEY)
1047+
.filter(|s| matches!(s.to_lowercase().as_str(), "true" | "1" | "yes"))
1048+
.map(|_| 0))
1049+
}
1050+
}
1051+
10451052
impl fmt::Display for Field {
10461053
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
10471054
write!(
@@ -1137,7 +1144,9 @@ impl TryFrom<&ArrowField> for Field {
11371144
LogicalType::try_from(field.data_type())?
11381145
};
11391146

1140-
let mut result = Self {
1147+
let unenforced_primary_key_position = parse_unenforced_primary_key_position(&metadata)?;
1148+
1149+
Ok(Self {
11411150
id,
11421151
parent_id: -1,
11431152
name: field.name().clone(),
@@ -1156,10 +1165,8 @@ impl TryFrom<&ArrowField> for Field {
11561165
nullable: field.is_nullable(),
11571166
children,
11581167
dictionary: None,
1159-
unenforced_primary_key_position: None,
1160-
};
1161-
result.sync_embedded_metadata()?;
1162-
Ok(result)
1168+
unenforced_primary_key_position,
1169+
})
11631170
}
11641171
}
11651172

rust/lance/src/dataset/metadata.rs

Lines changed: 4 additions & 199 deletions
Original file line numberDiff line numberDiff line change
@@ -587,7 +587,7 @@ mod tests {
587587
async fn test_update_field_metadata_sets_unenforced_primary_key() {
588588
let mut dataset = test_dataset_for_pk().await;
589589

590-
// Set the boolean primary key metadata on the "id" field
590+
// Legacy boolean flag should map to position 0.
591591
dataset
592592
.update_field_metadata()
593593
.update("id", [("lance-schema:unenforced-primary-key", "true")])
@@ -605,21 +605,13 @@ mod tests {
605605
Some(0),
606606
"Legacy boolean flag should map to position 0"
607607
);
608-
}
609608

610-
#[tokio::test]
611-
async fn test_update_field_metadata_sets_unenforced_primary_key_position() {
612-
let mut dataset = test_dataset_for_pk().await;
613-
614-
// Set both the boolean flag and explicit position
609+
// Explicit position should override the legacy flag.
615610
dataset
616611
.update_field_metadata()
617612
.update(
618613
"id",
619-
[
620-
("lance-schema:unenforced-primary-key", "true"),
621-
("lance-schema:unenforced-primary-key:position", "2"),
622-
],
614+
[("lance-schema:unenforced-primary-key:position", "2")],
623615
)
624616
.unwrap()
625617
.await
@@ -630,137 +622,7 @@ mod tests {
630622
assert_eq!(
631623
field.unenforced_primary_key_position,
632624
Some(2),
633-
"Explicit position should take precedence over boolean flag"
634-
);
635-
}
636-
637-
#[tokio::test]
638-
async fn test_update_field_metadata_removes_unenforced_primary_key() {
639-
let mut dataset = test_dataset_for_pk().await;
640-
641-
// First, set the primary key
642-
dataset
643-
.update_field_metadata()
644-
.update("id", [("lance-schema:unenforced-primary-key", "true")])
645-
.unwrap()
646-
.await
647-
.unwrap();
648-
assert!(
649-
dataset
650-
.schema()
651-
.field("id")
652-
.unwrap()
653-
.is_unenforced_primary_key()
654-
);
655-
656-
// Now remove it by setting value to None (delete the key)
657-
dataset
658-
.update_field_metadata()
659-
.update(
660-
"id",
661-
[("lance-schema:unenforced-primary-key", Option::<&str>::None)],
662-
)
663-
.unwrap()
664-
.await
665-
.unwrap();
666-
667-
let field = dataset.schema().field("id").unwrap();
668-
assert!(
669-
!field.is_unenforced_primary_key(),
670-
"Field should no longer be a primary key after removing the metadata key"
671-
);
672-
assert_eq!(field.unenforced_primary_key_position, None);
673-
}
674-
675-
#[tokio::test]
676-
async fn test_update_field_metadata_replace_clears_unenforced_primary_key() {
677-
let mut dataset = test_dataset_for_pk().await;
678-
679-
// First, set the primary key
680-
dataset
681-
.update_field_metadata()
682-
.update("id", [("lance-schema:unenforced-primary-key", "true")])
683-
.unwrap()
684-
.await
685-
.unwrap();
686-
assert!(
687-
dataset
688-
.schema()
689-
.field("id")
690-
.unwrap()
691-
.is_unenforced_primary_key()
692-
);
693-
694-
// Replace all metadata with unrelated keys — PK metadata should be cleared
695-
dataset
696-
.update_field_metadata()
697-
.replace("id", [("some-other-key", "some-value")])
698-
.unwrap()
699-
.await
700-
.unwrap();
701-
702-
let field = dataset.schema().field("id").unwrap();
703-
assert!(
704-
!field.is_unenforced_primary_key(),
705-
"Primary key status should be cleared after replacing metadata without PK keys"
706-
);
707-
assert_eq!(field.unenforced_primary_key_position, None);
708-
}
709-
710-
#[tokio::test]
711-
async fn test_update_field_metadata_primary_key_roundtrip() {
712-
use lance_core::utils::tempfile::TempDir;
713-
714-
let dir = TempDir::default();
715-
let uri = dir.path_str();
716-
717-
let schema = Arc::new(ArrowSchema::new(vec![
718-
ArrowField::new("id", DataType::Int32, false),
719-
ArrowField::new("value", DataType::Utf8, true),
720-
]));
721-
722-
let batch = RecordBatch::try_new(
723-
schema.clone(),
724-
vec![
725-
Arc::new(Int32Array::from(vec![1, 2, 3])),
726-
Arc::new(arrow_array::StringArray::from(vec!["a", "b", "c"])),
727-
],
728-
)
729-
.unwrap();
730-
731-
let mut dataset = Dataset::write(
732-
RecordBatchIterator::new(vec![Ok(batch)], schema.clone()),
733-
&uri,
734-
None,
735-
)
736-
.await
737-
.unwrap();
738-
739-
// Set PK metadata via update
740-
dataset
741-
.update_field_metadata()
742-
.update(
743-
"id",
744-
[
745-
("lance-schema:unenforced-primary-key", "true"),
746-
("lance-schema:unenforced-primary-key:position", "1"),
747-
],
748-
)
749-
.unwrap()
750-
.await
751-
.unwrap();
752-
753-
// Reload the dataset from storage to verify protobuf round-trip
754-
let reloaded = Dataset::open(&uri).await.unwrap();
755-
let field = reloaded.schema().field("id").unwrap();
756-
assert!(
757-
field.is_unenforced_primary_key(),
758-
"Primary key should survive protobuf round-trip after metadata update"
759-
);
760-
assert_eq!(
761-
field.unenforced_primary_key_position,
762-
Some(1),
763-
"Primary key position should survive protobuf round-trip"
625+
"Explicit position should take precedence over the legacy boolean flag"
764626
);
765627
}
766628

@@ -838,61 +700,4 @@ mod tests {
838700
]
839701
);
840702
}
841-
842-
#[tokio::test]
843-
async fn test_update_field_metadata_invalid_pk_position_returns_error() {
844-
let mut dataset = test_dataset_for_pk().await;
845-
846-
let result = dataset
847-
.update_field_metadata()
848-
.update(
849-
"id",
850-
[("lance-schema:unenforced-primary-key:position", "abc")],
851-
)
852-
.unwrap()
853-
.await;
854-
855-
assert!(result.is_err(), "Non-numeric position should return error");
856-
assert!(matches!(result.unwrap_err(), Error::InvalidInput { .. }));
857-
}
858-
859-
#[tokio::test]
860-
async fn test_update_field_metadata_negative_pk_position_returns_error() {
861-
let mut dataset = test_dataset_for_pk().await;
862-
863-
let result = dataset
864-
.update_field_metadata()
865-
.update(
866-
"id",
867-
[("lance-schema:unenforced-primary-key:position", "-1")],
868-
)
869-
.unwrap()
870-
.await;
871-
872-
assert!(result.is_err(), "Negative position should return error");
873-
assert!(matches!(result.unwrap_err(), Error::InvalidInput { .. }));
874-
}
875-
876-
#[tokio::test]
877-
async fn test_update_field_metadata_overflow_pk_position_returns_error() {
878-
let mut dataset = test_dataset_for_pk().await;
879-
880-
let result = dataset
881-
.update_field_metadata()
882-
.update(
883-
"id",
884-
[(
885-
"lance-schema:unenforced-primary-key:position",
886-
"99999999999",
887-
)],
888-
)
889-
.unwrap()
890-
.await;
891-
892-
assert!(
893-
result.is_err(),
894-
"Overflowing u32 position should return error"
895-
);
896-
assert!(matches!(result.unwrap_err(), Error::InvalidInput { .. }));
897-
}
898703
}

0 commit comments

Comments
 (0)