Skip to content

Commit

Permalink
csi/reader/index/header: Add read error
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Nov 14, 2023
1 parent 345c60d commit 36c2bc3
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 66 deletions.
4 changes: 3 additions & 1 deletion noodles-csi/src/async/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,9 @@ where
reader.read_exact(&mut aux).await?;

let mut rdr = &aux[..];
read_tabix_header(&mut rdr).map(Some)
read_tabix_header(&mut rdr)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
.map(Some)
} else {
Ok(None)
}
Expand Down
2 changes: 1 addition & 1 deletion noodles-csi/src/reader/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ where
.read_i32::<LittleEndian>()
.and_then(|n| u8::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)))?;

let header = read_aux(reader)?;
let header = read_aux(reader).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

let reference_sequences = read_reference_sequences(reader, depth)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
Expand Down
215 changes: 151 additions & 64 deletions noodles-csi/src/reader/index/header.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,103 @@
use std::{
error, fmt,
io::{self, Read},
str,
num, str,
};

use byteorder::{LittleEndian, ReadBytesExt};

use crate::index::{header::ReferenceSequenceNames, Header};
use crate::index::{
header::{format, ReferenceSequenceNames},
Header,
};

/// An error returned when a CSI header fails to be read.
#[derive(Debug)]
pub enum ReadError {
/// An I/O error.
Io(io::Error),
/// The aux length is invalid.
InvalidAuxLength(num::TryFromIntError),
/// The header format is invalid.
InvalidFormat(format::TryFromIntError),
/// The header reference sequence index is invalid.
InvalidReferenceSequenceIndex(num::TryFromIntError),
/// The header reference sequence index value is invalid.
InvalidReferenceSequenceIndexValue,
/// The header start position index is invalid.
InvalidStartPositionIndex(num::TryFromIntError),
/// The header start position index value is invalid.
InvalidStartPositionIndexValue,
/// The header end position index is invalid.
InvalidEndPositionIndex(num::TryFromIntError),
/// The header end position index value is invalid.
InvalidEndPositionIndexValue,
/// The header line comment prefix is invalid.
InvalidLineCommentPrefix(num::TryFromIntError),
/// The header line skip count is invalid.
InvalidLineSkipCount(num::TryFromIntError),
/// The header names length is invalid.
InvalidNamesLength(num::TryFromIntError),
/// A header name is duplicated.
DuplicateName(String),
/// The header names is invalid.
InvalidNames,
}

impl error::Error for ReadError {
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self {
Self::Io(e) => Some(e),
Self::InvalidAuxLength(e) => Some(e),
Self::InvalidFormat(e) => Some(e),
Self::InvalidReferenceSequenceIndex(e) => Some(e),
Self::InvalidStartPositionIndex(e) => Some(e),
Self::InvalidEndPositionIndex(e) => Some(e),
Self::InvalidLineCommentPrefix(e) => Some(e),
Self::InvalidLineSkipCount(e) => Some(e),
Self::InvalidNamesLength(e) => Some(e),
_ => None,
}
}
}

impl fmt::Display for ReadError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Io(_) => write!(f, "I/O error"),
Self::InvalidAuxLength(_) => write!(f, "invalid aux length"),
Self::InvalidFormat(_) => write!(f, "invalid format"),
Self::InvalidReferenceSequenceIndex(_) => write!(f, "invalid reference sequence index"),
Self::InvalidReferenceSequenceIndexValue => {
write!(f, "invalid reference sequence index value")
}
Self::InvalidStartPositionIndex(_) => write!(f, "invalid start position index"),
Self::InvalidStartPositionIndexValue => write!(f, "invalid start position index value"),
Self::InvalidEndPositionIndex(_) => write!(f, "invalid end position index"),
Self::InvalidEndPositionIndexValue => write!(f, "invalid end position index value"),
Self::InvalidLineCommentPrefix(_) => write!(f, "invalid line comment prefix"),
Self::InvalidLineSkipCount(_) => write!(f, "invalid line skip count"),
Self::InvalidNamesLength(_) => write!(f, "invalid names length"),
Self::DuplicateName(name) => write!(f, "duplicate name: {name}"),
Self::InvalidNames => write!(f, "invalid names"),
}
}
}

impl From<io::Error> for ReadError {
fn from(e: io::Error) -> Self {
Self::Io(e)
}
}

pub(super) fn read_aux<R>(reader: &mut R) -> io::Result<Option<Header>>
pub(super) fn read_aux<R>(reader: &mut R) -> Result<Option<Header>, ReadError>
where
R: Read,
{
let l_aux = reader.read_i32::<LittleEndian>().and_then(|n| {
u64::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;
let l_aux = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|n| u64::try_from(n).map_err(ReadError::InvalidAuxLength))?;

if l_aux > 0 {
let mut aux_reader = reader.take(l_aux);
Expand All @@ -23,52 +107,64 @@ where
}
}

pub(crate) fn read_header<R>(reader: &mut R) -> io::Result<Header>
pub(crate) fn read_header<R>(reader: &mut R) -> Result<Header, ReadError>
where
R: Read,
{
use crate::index::header::Format;

let format = reader.read_i32::<LittleEndian>().and_then(|n| {
Format::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;

let col_seq = reader.read_i32::<LittleEndian>().and_then(|i| {
usize::try_from(i)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
.and_then(|n| {
n.checked_sub(1)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_seq"))
})
})?;

let col_beg = reader.read_i32::<LittleEndian>().and_then(|i| {
usize::try_from(i)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
.and_then(|n| {
n.checked_sub(1)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_beg"))
})
})?;

let col_end = reader.read_i32::<LittleEndian>().and_then(|i| match i {
0 => Ok(None),
_ => usize::try_from(i)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
.and_then(|n| {
n.checked_sub(1)
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid col_end"))
})
.map(Some),
})?;
let format = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|n| Format::try_from(n).map_err(ReadError::InvalidFormat))?;

let col_seq = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|i| {
usize::try_from(i)
.map_err(ReadError::InvalidReferenceSequenceIndex)
.and_then(|n| {
n.checked_sub(1)
.ok_or(ReadError::InvalidReferenceSequenceIndexValue)
})
})?;

let col_beg = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|i| {
usize::try_from(i)
.map_err(ReadError::InvalidStartPositionIndex)
.and_then(|n| {
n.checked_sub(1)
.ok_or(ReadError::InvalidStartPositionIndexValue)
})
})?;

let col_end = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|i| match i {
0 => Ok(None),
_ => usize::try_from(i)
.map_err(ReadError::InvalidEndPositionIndex)
.and_then(|n| {
n.checked_sub(1)
.ok_or(ReadError::InvalidEndPositionIndexValue)
})
.map(Some),
})?;

let meta = reader
.read_i32::<LittleEndian>()
.and_then(|b| u8::try_from(b).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)))?;
.map_err(ReadError::Io)
.and_then(|b| u8::try_from(b).map_err(ReadError::InvalidLineCommentPrefix))?;

let skip = reader.read_i32::<LittleEndian>().and_then(|n| {
u32::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;
let skip = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|n| u32::try_from(n).map_err(ReadError::InvalidLineSkipCount))?;

let names = read_names(reader)?;

Expand All @@ -83,21 +179,22 @@ where
.build())
}

fn read_names<R>(reader: &mut R) -> io::Result<ReferenceSequenceNames>
fn read_names<R>(reader: &mut R) -> Result<ReferenceSequenceNames, ReadError>
where
R: Read,
{
let l_nm = reader.read_i32::<LittleEndian>().and_then(|n| {
usize::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;
let l_nm = reader
.read_i32::<LittleEndian>()
.map_err(ReadError::Io)
.and_then(|n| usize::try_from(n).map_err(ReadError::InvalidNamesLength))?;

let mut names = vec![0; l_nm];
reader.read_exact(&mut names)?;

parse_names(&names)
}

pub(crate) fn parse_names(mut src: &[u8]) -> io::Result<ReferenceSequenceNames> {
pub(crate) fn parse_names(mut src: &[u8]) -> Result<ReferenceSequenceNames, ReadError> {
const NUL: u8 = 0x00;

let mut names = ReferenceSequenceNames::new();
Expand All @@ -109,10 +206,7 @@ pub(crate) fn parse_names(mut src: &[u8]) -> io::Result<ReferenceSequenceNames>
str::from_utf8(raw_name).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;

if !names.insert(name.into()) {
return Err(io::Error::new(
io::ErrorKind::InvalidData,
format!("duplicate reference sequence name: {name}"),
));
return Err(ReadError::DuplicateName(name.into()));
}

src = &rest[1..];
Expand All @@ -121,10 +215,7 @@ pub(crate) fn parse_names(mut src: &[u8]) -> io::Result<ReferenceSequenceNames>
if src.is_empty() {
Ok(names)
} else {
Err(io::Error::new(
io::ErrorKind::InvalidData,
"invalid reference sequence names",
))
Err(ReadError::InvalidNames)
}
}

Expand All @@ -133,7 +224,7 @@ mod tests {
use super::*;

#[test]
fn test_read_aux() -> io::Result<()> {
fn test_read_aux() -> Result<(), ReadError> {
let src = [0x00, 0x00, 0x00, 0x00];
let mut reader = &src[..];
assert!(read_aux(&mut reader)?.is_none());
Expand Down Expand Up @@ -167,7 +258,7 @@ mod tests {
}

#[test]
fn test_parse_names() -> io::Result<()> {
fn test_parse_names() -> Result<(), ReadError> {
let data = b"sq0\x00sq1\x00";
let actual = parse_names(&data[..])?;
let expected: ReferenceSequenceNames = [String::from("sq0"), String::from("sq1")]
Expand All @@ -187,17 +278,13 @@ mod tests {

assert!(matches!(
parse_names(data),
Err(ref e) if e.kind() == io::ErrorKind::InvalidData,
Err(ReadError::DuplicateName(s)) if s == "sq0"
));
}

#[test]
fn test_parse_names_with_trailing_data() {
let data = b"sq0\x00sq1\x00sq2";

assert!(matches!(
parse_names(data),
Err(ref e) if e.kind() == io::ErrorKind::InvalidData
));
assert!(matches!(parse_names(data), Err(ReadError::InvalidNames)));
}
}

0 comments on commit 36c2bc3

Please sign in to comment.