Skip to content

Commit d4ad092

Browse files
fix: replace unsafe byte indexing with upfront span validation (#45)
Direct byte indexing (source[span.0..span.1] and source[..span.0]) panics if yamlpath returns a span misaligned to UTF-8 boundaries, crashing the Python process. Replace with upfront is_char_boundary + bounds checks that return a recoverable error, then use direct indexing safely. Applied consistently to all 5 sites in document.rs: parse_value, apply_insert_at, and apply_complex_replace. Closes #44
1 parent 9bcf237 commit d4ad092

1 file changed

Lines changed: 11 additions & 8 deletions

File tree

src/document.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,13 @@ impl PyDocument {
9494
match self.inner.query_exact(&r) {
9595
Ok(Some(feature)) => {
9696
let span = feature.location.byte_span;
97-
// Note: span.0 <= span.1 is guaranteed by tree-sitter node
98-
// ranges, so we only check bounds and UTF-8 alignment.
97+
// Note: span.0 <= span.1 is guaranteed by tree-sitter node construction.
9998
if span.1 > source.len()
10099
|| !source.is_char_boundary(span.0)
101100
|| !source.is_char_boundary(span.1)
102101
{
103102
return Err(PyErr::new::<pyo3::exceptions::PyValueError, _>(
104-
"Feature span is not valid in source",
103+
"Feature span is out of bounds or not aligned to UTF-8 character boundaries",
105104
));
106105
}
107106
let raw = &source[span.0..span.1];
@@ -295,9 +294,11 @@ fn apply_insert_at(
295294
.ok_or_else(|| format!("insert_at: item at index {resolved} not found"))?;
296295

297296
let item_start = item_feature.location.byte_span.0;
298-
// Note: no reversed-span check needed; tree-sitter nodes guarantee start <= end.
299297
if item_start > source.len() || !source.is_char_boundary(item_start) {
300-
return Err("Feature span is not valid in source".to_string());
298+
return Err(
299+
"Feature span is out of bounds or not aligned to UTF-8 character boundaries"
300+
.to_string(),
301+
);
301302
}
302303
let line_start = source[..item_start]
303304
.rfind('\n')
@@ -358,11 +359,13 @@ fn apply_complex_replace(
358359
.map_err(|e| format!("Query failed: {e}"))?;
359360

360361
let span = feature.location.byte_span;
361-
// Note: span.0 <= span.1 is guaranteed by tree-sitter node ranges,
362-
// so we only check bounds and UTF-8 alignment.
362+
// Note: span.0 <= span.1 is guaranteed by tree-sitter node construction.
363363
if span.1 > source.len() || !source.is_char_boundary(span.0) || !source.is_char_boundary(span.1)
364364
{
365-
return Err("Feature span is not valid in source".to_string());
365+
return Err(
366+
"Feature span is out of bounds or not aligned to UTF-8 character boundaries"
367+
.to_string(),
368+
);
366369
}
367370

368371
let content_with_ws = doc.extract_with_leading_whitespace(&feature);

0 commit comments

Comments
 (0)