Skip to content

Commit 1bd4027

Browse files
committed
faidx: better errors
And one less segfault in seq_name
1 parent 6739479 commit 1bd4027

2 files changed

Lines changed: 139 additions & 23 deletions

File tree

src/errors.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,14 @@ pub enum Error {
3737
FaidxBadSeqName,
3838
#[error("failed to build index for fasta file {path:?}")]
3939
FaidxBuildFailed { path: std::path::PathBuf },
40+
#[error("failed to open FASTA/FAI")]
41+
FaidxOpenError,
42+
#[error("failed to fetch sequence {name}:{begin}-{end}")]
43+
FaidxFetchFailed {
44+
name: String,
45+
begin: usize,
46+
end: usize,
47+
},
4048

4149
// Errors for Tbx
4250
#[error("previous iterator generation failed")]

src/faidx/mod.rs

Lines changed: 131 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,11 @@ impl Reader {
7373
///
7474
/// * `path` - the path or URL to open
7575
fn new(path: &[u8]) -> Result<Self, Error> {
76-
let cpath = ffi::CString::new(path).unwrap();
76+
let cpath = ffi::CString::new(path).map_err(|_| Error::FaidxOpenError)?;
7777
let inner = unsafe { htslib::fai_load(cpath.as_ptr()) };
78+
if inner.is_null() {
79+
return Err(Error::FaidxOpenError);
80+
}
7881
Ok(Self { inner })
7982
}
8083

@@ -92,7 +95,13 @@ impl Reader {
9295
if end > i64::MAX as usize {
9396
return Err(Error::FaidxPositionTooLarge);
9497
}
95-
let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
98+
let cname = ffi::CString::new(name.as_ref().as_bytes()).map_err(|_| {
99+
Error::FaidxFetchFailed {
100+
name: name.as_ref().to_owned(),
101+
begin,
102+
end,
103+
}
104+
})?;
96105
let mut len_out: htslib::hts_pos_t = 0;
97106
let ptr = unsafe {
98107
htslib::faidx_fetch_seq64(
@@ -103,8 +112,20 @@ impl Reader {
103112
&mut len_out, //len
104113
)
105114
};
106-
let vec =
107-
unsafe { Vec::from_raw_parts(ptr as *mut u8, len_out as usize, len_out as usize) };
115+
if ptr.is_null() || len_out < 0 {
116+
return Err(Error::FaidxFetchFailed {
117+
name: name.as_ref().to_owned(),
118+
begin,
119+
end,
120+
});
121+
}
122+
// Copy the data out of the C-allocated buffer and free it with libc::free.
123+
// We must not use Vec::from_raw_parts because the pointer was allocated by
124+
// htslib's malloc, not Rust's global allocator. If a custom allocator is
125+
// active (e.g. mimalloc), dropping the Vec would call the wrong deallocator.
126+
let len = len_out as usize;
127+
let vec = unsafe { std::slice::from_raw_parts(ptr as *const u8, len) }.to_vec();
128+
unsafe { libc::free(ptr as *mut libc::c_void) };
108129
Ok(vec)
109130
}
110131

@@ -122,13 +143,15 @@ impl Reader {
122143
end: usize,
123144
) -> Result<String> {
124145
let bytes = self.fetch_seq(name, begin, end)?;
125-
Ok(std::str::from_utf8(&bytes).unwrap().to_owned())
146+
String::from_utf8(bytes).map_err(|_| Error::FaidxBadSeqName)
126147
}
127148

128-
/// Fetches the number of sequences in the fai index
149+
/// Fetches the number of sequences in the fai index.
129150
pub fn n_seqs(&self) -> u64 {
130151
let n = unsafe { htslib::faidx_nseq(self.inner) };
131-
n as u64
152+
// faidx_nseq returns c_int; negative should not occur on a valid
153+
// Reader (constructor validated the index), but clamp defensively.
154+
n.max(0) as u64
132155
}
133156

134157
/// Fetches the i-th sequence name
@@ -137,10 +160,11 @@ impl Reader {
137160
///
138161
/// * `i` - index to query
139162
pub fn seq_name(&self, i: i32) -> Result<String> {
140-
let cname = unsafe {
141-
let ptr = htslib::faidx_iseq(self.inner, i);
142-
ffi::CStr::from_ptr(ptr)
143-
};
163+
let ptr = unsafe { htslib::faidx_iseq(self.inner, i) };
164+
if ptr.is_null() {
165+
return Err(Error::FaidxBadSeqName);
166+
}
167+
let cname = unsafe { ffi::CStr::from_ptr(ptr) };
144168

145169
let out = match cname.to_str() {
146170
Ok(s) => s.to_string(),
@@ -154,29 +178,39 @@ impl Reader {
154178

155179
/// Fetches the length of the given sequence name.
156180
///
181+
/// Returns `None` if the sequence is not found in the index.
182+
///
157183
/// # Arguments
158184
///
159185
/// * `name` - the name of the template sequence (e.g., "chr1")
160-
pub fn fetch_seq_len<N: AsRef<str>>(&self, name: N) -> u64 {
161-
let cname = ffi::CString::new(name.as_ref().as_bytes()).unwrap();
162-
let seq_len = unsafe { htslib::faidx_seq_len(self.inner, cname.as_ptr()) };
163-
seq_len as u64
186+
pub fn fetch_seq_len<N: AsRef<str>>(&self, name: N) -> Option<u64> {
187+
let cname = ffi::CString::new(name.as_ref().as_bytes()).ok()?;
188+
let seq_len = unsafe { htslib::faidx_seq_len64(self.inner, cname.as_ptr()) };
189+
if seq_len < 0 {
190+
None
191+
} else {
192+
Some(seq_len as u64)
193+
}
164194
}
165195

166196
/// Returns a Result<Vector<String>> for all seq names.
197+
///
167198
/// # Errors
168199
///
169200
/// * `errors::Error::FaidxBadSeqName` - missing sequence name for sequence id.
170201
///
171-
/// If thrown, the index is malformed, and the number of sequences in the index does not match the number of sequence names available.
172-
///```
202+
/// If thrown, the index is malformed, and the number of sequences in the
203+
/// index does not match the number of sequence names available.
204+
///
205+
/// # Examples
206+
///
207+
/// ```
173208
/// use rust_htslib::faidx::build;
174209
/// let path = std::path::PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"),"/test/test_cram.fa"));
175210
/// build(&path).expect("Failed to build fasta index");
176211
/// let reader = rust_htslib::faidx::Reader::from_path(path).expect("Failed to open faidx");
177212
/// assert_eq!(reader.seq_names(), Ok(vec!["chr1".to_string(), "chr2".to_string(), "chr3".to_string()]));
178-
///```
179-
///
213+
/// ```
180214
pub fn seq_names(&self) -> Result<Vec<String>> {
181215
let num_seq = self.n_seqs();
182216
let mut ret = Vec::with_capacity(num_seq as usize);
@@ -313,10 +347,8 @@ mod tests {
313347
#[test]
314348
fn faidx_get_seq_len() {
315349
let r = open_reader();
316-
let chr1_len = r.fetch_seq_len("chr1");
317-
let chr2_len = r.fetch_seq_len("chr2");
318-
assert_eq!(chr1_len, 120u64);
319-
assert_eq!(chr2_len, 120u64);
350+
assert_eq!(r.fetch_seq_len("chr1"), Some(120));
351+
assert_eq!(r.fetch_seq_len("chr2"), Some(120));
320352
}
321353

322354
#[test]
@@ -326,4 +358,80 @@ mod tests {
326358
drop(reader);
327359
}
328360
}
361+
362+
#[test]
363+
fn faidx_open_nonexistent_errors() {
364+
let result = Reader::from_path("/does/not/exist.fa");
365+
assert!(result.is_err());
366+
}
367+
368+
#[test]
369+
fn faidx_fetch_nonexistent_seq_errors() {
370+
let r = open_reader();
371+
let result = r.fetch_seq("nonexistent_chromosome", 0, 10);
372+
assert!(result.is_err());
373+
}
374+
375+
#[test]
376+
fn faidx_fetch_nonexistent_seq_string_errors() {
377+
let r = open_reader();
378+
let result = r.fetch_seq_string("nonexistent_chromosome", 0, 10);
379+
assert!(result.is_err());
380+
}
381+
382+
#[test]
383+
fn faidx_seq_name_out_of_bounds_errors() {
384+
let r = open_reader();
385+
// There are only 3 sequences (indices 0, 1, 2)
386+
let result = r.seq_name(3);
387+
assert!(result.is_err());
388+
389+
let result = r.seq_name(999);
390+
assert!(result.is_err());
391+
392+
let result = r.seq_name(-1);
393+
assert!(result.is_err());
394+
}
395+
396+
#[test]
397+
fn faidx_fetch_seq_begin_after_end() {
398+
let r = open_reader();
399+
// htslib docs say behavior is undefined when begin > end,
400+
// but it should not segfault
401+
let result = r.fetch_seq("chr1", 10, 5);
402+
// We don't care if it's Ok or Err, just that it doesn't crash
403+
drop(result);
404+
}
405+
406+
#[test]
407+
fn faidx_fetch_seq_past_chromosome_end() {
408+
let r = open_reader();
409+
// chr1 is 120bp; fetch beyond that
410+
let result = r.fetch_seq("chr1", 0, 999);
411+
// htslib clamps to chromosome length, should not crash
412+
assert!(result.is_ok());
413+
}
414+
415+
#[test]
416+
fn faidx_fetch_empty_name() {
417+
let r = open_reader();
418+
let result = r.fetch_seq("", 0, 10);
419+
assert!(result.is_err());
420+
}
421+
422+
#[test]
423+
fn faidx_seq_len_nonexistent() {
424+
let r = open_reader();
425+
assert_eq!(r.fetch_seq_len("nonexistent"), None);
426+
}
427+
428+
#[test]
429+
fn faidx_fetch_many_times_no_leak() {
430+
let r = open_reader();
431+
// Exercises the copy+free path repeatedly to catch allocator mismatches
432+
for _ in 0..100_000 {
433+
let seq = r.fetch_seq("chr1", 0, 119).unwrap();
434+
assert_eq!(seq.len(), 120);
435+
}
436+
}
329437
}

0 commit comments

Comments
 (0)