From 36c2bc33498b693e720edd585c102fcfe5e01a3b Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Tue, 14 Nov 2023 12:07:29 -0600 Subject: [PATCH] csi/reader/index/header: Add read error --- noodles-csi/src/async/reader.rs | 4 +- noodles-csi/src/reader/index.rs | 2 +- noodles-csi/src/reader/index/header.rs | 215 +++++++++++++++++-------- 3 files changed, 155 insertions(+), 66 deletions(-) diff --git a/noodles-csi/src/async/reader.rs b/noodles-csi/src/async/reader.rs index 86bcf0ca9..f32fa81fe 100644 --- a/noodles-csi/src/async/reader.rs +++ b/noodles-csi/src/async/reader.rs @@ -137,7 +137,9 @@ where reader.read_exact(&mut aux).await?; let mut rdr = &aux[..]; - read_tabix_header(&mut rdr).map(Some) + read_tabix_header(&mut rdr) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + .map(Some) } else { Ok(None) } diff --git a/noodles-csi/src/reader/index.rs b/noodles-csi/src/reader/index.rs index 1fea3eb40..c7a6edcd2 100644 --- a/noodles-csi/src/reader/index.rs +++ b/noodles-csi/src/reader/index.rs @@ -22,7 +22,7 @@ where .read_i32::() .and_then(|n| u8::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)))?; - let header = read_aux(reader)?; + let header = read_aux(reader).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; let reference_sequences = read_reference_sequences(reader, depth) .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; diff --git a/noodles-csi/src/reader/index/header.rs b/noodles-csi/src/reader/index/header.rs index 747391dce..95646cbd4 100644 --- a/noodles-csi/src/reader/index/header.rs +++ b/noodles-csi/src/reader/index/header.rs @@ -1,19 +1,103 @@ use std::{ + error, fmt, io::{self, Read}, - str, + num, str, }; use byteorder::{LittleEndian, ReadBytesExt}; -use crate::index::{header::ReferenceSequenceNames, Header}; +use crate::index::{ + header::{format, ReferenceSequenceNames}, + Header, +}; + +/// An error returned when a CSI header fails to be read. +#[derive(Debug)] +pub enum ReadError { + /// An I/O error. + Io(io::Error), + /// The aux length is invalid. + InvalidAuxLength(num::TryFromIntError), + /// The header format is invalid. + InvalidFormat(format::TryFromIntError), + /// The header reference sequence index is invalid. + InvalidReferenceSequenceIndex(num::TryFromIntError), + /// The header reference sequence index value is invalid. + InvalidReferenceSequenceIndexValue, + /// The header start position index is invalid. + InvalidStartPositionIndex(num::TryFromIntError), + /// The header start position index value is invalid. + InvalidStartPositionIndexValue, + /// The header end position index is invalid. + InvalidEndPositionIndex(num::TryFromIntError), + /// The header end position index value is invalid. + InvalidEndPositionIndexValue, + /// The header line comment prefix is invalid. + InvalidLineCommentPrefix(num::TryFromIntError), + /// The header line skip count is invalid. + InvalidLineSkipCount(num::TryFromIntError), + /// The header names length is invalid. + InvalidNamesLength(num::TryFromIntError), + /// A header name is duplicated. + DuplicateName(String), + /// The header names is invalid. + InvalidNames, +} + +impl error::Error for ReadError { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self { + Self::Io(e) => Some(e), + Self::InvalidAuxLength(e) => Some(e), + Self::InvalidFormat(e) => Some(e), + Self::InvalidReferenceSequenceIndex(e) => Some(e), + Self::InvalidStartPositionIndex(e) => Some(e), + Self::InvalidEndPositionIndex(e) => Some(e), + Self::InvalidLineCommentPrefix(e) => Some(e), + Self::InvalidLineSkipCount(e) => Some(e), + Self::InvalidNamesLength(e) => Some(e), + _ => None, + } + } +} + +impl fmt::Display for ReadError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Io(_) => write!(f, "I/O error"), + Self::InvalidAuxLength(_) => write!(f, "invalid aux length"), + Self::InvalidFormat(_) => write!(f, "invalid format"), + Self::InvalidReferenceSequenceIndex(_) => write!(f, "invalid reference sequence index"), + Self::InvalidReferenceSequenceIndexValue => { + write!(f, "invalid reference sequence index value") + } + Self::InvalidStartPositionIndex(_) => write!(f, "invalid start position index"), + Self::InvalidStartPositionIndexValue => write!(f, "invalid start position index value"), + Self::InvalidEndPositionIndex(_) => write!(f, "invalid end position index"), + Self::InvalidEndPositionIndexValue => write!(f, "invalid end position index value"), + Self::InvalidLineCommentPrefix(_) => write!(f, "invalid line comment prefix"), + Self::InvalidLineSkipCount(_) => write!(f, "invalid line skip count"), + Self::InvalidNamesLength(_) => write!(f, "invalid names length"), + Self::DuplicateName(name) => write!(f, "duplicate name: {name}"), + Self::InvalidNames => write!(f, "invalid names"), + } + } +} + +impl From for ReadError { + fn from(e: io::Error) -> Self { + Self::Io(e) + } +} -pub(super) fn read_aux(reader: &mut R) -> io::Result> +pub(super) fn read_aux(reader: &mut R) -> Result, ReadError> where R: Read, { - let l_aux = reader.read_i32::().and_then(|n| { - u64::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let l_aux = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|n| u64::try_from(n).map_err(ReadError::InvalidAuxLength))?; if l_aux > 0 { let mut aux_reader = reader.take(l_aux); @@ -23,52 +107,64 @@ where } } -pub(crate) fn read_header(reader: &mut R) -> io::Result
+pub(crate) fn read_header(reader: &mut R) -> Result where R: Read, { use crate::index::header::Format; - let format = reader.read_i32::().and_then(|n| { - Format::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; - - let col_seq = reader.read_i32::().and_then(|i| { - usize::try_from(i) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - .and_then(|n| { - n.checked_sub(1) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_seq")) - }) - })?; - - let col_beg = reader.read_i32::().and_then(|i| { - usize::try_from(i) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - .and_then(|n| { - n.checked_sub(1) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_beg")) - }) - })?; - - let col_end = reader.read_i32::().and_then(|i| match i { - 0 => Ok(None), - _ => usize::try_from(i) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - .and_then(|n| { - n.checked_sub(1) - .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_end")) - }) - .map(Some), - })?; + let format = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|n| Format::try_from(n).map_err(ReadError::InvalidFormat))?; + + let col_seq = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|i| { + usize::try_from(i) + .map_err(ReadError::InvalidReferenceSequenceIndex) + .and_then(|n| { + n.checked_sub(1) + .ok_or(ReadError::InvalidReferenceSequenceIndexValue) + }) + })?; + + let col_beg = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|i| { + usize::try_from(i) + .map_err(ReadError::InvalidStartPositionIndex) + .and_then(|n| { + n.checked_sub(1) + .ok_or(ReadError::InvalidStartPositionIndexValue) + }) + })?; + + let col_end = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|i| match i { + 0 => Ok(None), + _ => usize::try_from(i) + .map_err(ReadError::InvalidEndPositionIndex) + .and_then(|n| { + n.checked_sub(1) + .ok_or(ReadError::InvalidEndPositionIndexValue) + }) + .map(Some), + })?; let meta = reader .read_i32::() - .and_then(|b| u8::try_from(b).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)))?; + .map_err(ReadError::Io) + .and_then(|b| u8::try_from(b).map_err(ReadError::InvalidLineCommentPrefix))?; - let skip = reader.read_i32::().and_then(|n| { - u32::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let skip = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|n| u32::try_from(n).map_err(ReadError::InvalidLineSkipCount))?; let names = read_names(reader)?; @@ -83,13 +179,14 @@ where .build()) } -fn read_names(reader: &mut R) -> io::Result +fn read_names(reader: &mut R) -> Result where R: Read, { - let l_nm = reader.read_i32::().and_then(|n| { - usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let l_nm = reader + .read_i32::() + .map_err(ReadError::Io) + .and_then(|n| usize::try_from(n).map_err(ReadError::InvalidNamesLength))?; let mut names = vec![0; l_nm]; reader.read_exact(&mut names)?; @@ -97,7 +194,7 @@ where parse_names(&names) } -pub(crate) fn parse_names(mut src: &[u8]) -> io::Result { +pub(crate) fn parse_names(mut src: &[u8]) -> Result { const NUL: u8 = 0x00; let mut names = ReferenceSequenceNames::new(); @@ -109,10 +206,7 @@ pub(crate) fn parse_names(mut src: &[u8]) -> io::Result str::from_utf8(raw_name).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; if !names.insert(name.into()) { - return Err(io::Error::new( - io::ErrorKind::InvalidData, - format!("duplicate reference sequence name: {name}"), - )); + return Err(ReadError::DuplicateName(name.into())); } src = &rest[1..]; @@ -121,10 +215,7 @@ pub(crate) fn parse_names(mut src: &[u8]) -> io::Result if src.is_empty() { Ok(names) } else { - Err(io::Error::new( - io::ErrorKind::InvalidData, - "invalid reference sequence names", - )) + Err(ReadError::InvalidNames) } } @@ -133,7 +224,7 @@ mod tests { use super::*; #[test] - fn test_read_aux() -> io::Result<()> { + fn test_read_aux() -> Result<(), ReadError> { let src = [0x00, 0x00, 0x00, 0x00]; let mut reader = &src[..]; assert!(read_aux(&mut reader)?.is_none()); @@ -167,7 +258,7 @@ mod tests { } #[test] - fn test_parse_names() -> io::Result<()> { + fn test_parse_names() -> Result<(), ReadError> { let data = b"sq0\x00sq1\x00"; let actual = parse_names(&data[..])?; let expected: ReferenceSequenceNames = [String::from("sq0"), String::from("sq1")] @@ -187,17 +278,13 @@ mod tests { assert!(matches!( parse_names(data), - Err(ref e) if e.kind() == io::ErrorKind::InvalidData, + Err(ReadError::DuplicateName(s)) if s == "sq0" )); } #[test] fn test_parse_names_with_trailing_data() { let data = b"sq0\x00sq1\x00sq2"; - - assert!(matches!( - parse_names(data), - Err(ref e) if e.kind() == io::ErrorKind::InvalidData - )); + assert!(matches!(parse_names(data), Err(ReadError::InvalidNames))); } }