Skip to content

Commit

Permalink
sam/record/data/field/value/integer: Support integer values up to (2^…
Browse files Browse the repository at this point in the history
…32) - 1

Values between 2^31 and (2^32) - 1 are now parsed as unsigned 32-bit
integers (`u32`), which are used as such in BAM and CRAM.

This allows `hts-specs/test/sam/passed/aux.pass-i.sam` to pass, which
requires support for parsing 4294967295.
  • Loading branch information
zaeleus committed Nov 6, 2024
1 parent 8474c64 commit d019a75
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 10 deletions.
8 changes: 8 additions & 0 deletions noodles-sam/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

## Unreleased

### Added

* sam/record/data/field/value/integer: Support integer (`i`) values up to
(2^32) - 1.

Values between 2^31 and (2^32) - 1 are now parsed as unsigned 32-bit
integers (`u32`), which are used as such in BAM and CRAM.

### Changed

* sam/alignment/record/quality_scores: Change `QualityScores::iter` item to
Expand Down
6 changes: 3 additions & 3 deletions noodles-sam/src/record/data/field/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::io;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) enum Type {
Character,
Int32,
Integer,
Float,
String,
Hex,
Expand All @@ -14,7 +14,7 @@ pub(super) fn parse_type(src: &mut &[u8]) -> io::Result<Type> {
if let Some((b, rest)) = src.split_first() {
let ty = match b {
b'A' => Type::Character,
b'i' => Type::Int32,
b'i' => Type::Integer,
b'f' => Type::Float,
b'Z' => Type::String,
b'H' => Type::Hex,
Expand Down Expand Up @@ -42,7 +42,7 @@ mod tests {
}

t(b"A", Type::Character)?;
t(b"i", Type::Int32)?;
t(b"i", Type::Integer)?;
t(b"f", Type::Float)?;
t(b"Z", Type::String)?;
t(b"H", Type::Hex)?;
Expand Down
6 changes: 3 additions & 3 deletions noodles-sam/src/record/data/field/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@ use std::io;

use bstr::{BStr, ByteSlice};

use self::{array::parse_array, integer::parse_int32_value};
use self::{array::parse_array, integer::parse_integer_value};
use super::Type;
use crate::alignment::record::data::field::Value;

pub(super) fn parse_value<'a>(src: &mut &'a [u8], ty: Type) -> io::Result<Value<'a>> {
match ty {
Type::Character => parse_character_value(src),
Type::Int32 => parse_int32_value(src),
Type::Integer => parse_integer_value(src),
Type::Float => parse_float_value(src),
Type::String => Ok(parse_string_value(src)),
Type::Hex => Ok(parse_hex_value(src)),
Expand Down Expand Up @@ -75,7 +75,7 @@ mod tests {

let mut src = &b"0"[..];
assert!(matches!(
parse_value(&mut src, Type::Int32)?,
parse_value(&mut src, Type::Integer)?,
Value::Int32(0)
));

Expand Down
62 changes: 58 additions & 4 deletions noodles-sam/src/record/data/field/value/integer.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,66 @@
use std::io;

use lexical_core::FromLexical;

use crate::alignment::record::data::field::Value;

pub(super) fn parse_int32_value<'a>(src: &mut &'a [u8]) -> io::Result<Value<'a>> {
let (n, i) = lexical_core::parse_partial(src)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
pub(super) fn parse_integer_value<'a>(src: &mut &'a [u8]) -> io::Result<Value<'a>> {
parse_int(src)
.map(Value::Int32)
.or_else(|e| match e {
lexical_core::Error::Overflow(_) => parse_int(src).map(Value::UInt32),
_ => Err(e),
})
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
}

fn parse_int<N>(src: &mut &[u8]) -> lexical_core::Result<N>
where
N: FromLexical,
{
let (n, i) = lexical_core::parse_partial(src)?;
*src = &src[i..];
Ok(n)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_parse_integer_value() -> io::Result<()> {
let mut src = &b"-2147483649"[..]; // -(1 << 31) - 1
assert!(matches!(
parse_integer_value(&mut src),
Err(e) if e.kind() == io::ErrorKind::InvalidData
));

let mut src = &b"-2147483648"[..];
let actual = parse_integer_value(&mut src)?;
assert!(matches!(actual, Value::Int32(i32::MIN)));

let mut src = &b"0"[..];
let actual = parse_integer_value(&mut src)?;
assert!(matches!(actual, Value::Int32(0)));

let mut src = &b"2147483647"[..];
let actual = parse_integer_value(&mut src)?;
assert!(matches!(actual, Value::Int32(i32::MAX)));

let mut src = &b"2147483648"[..];
let actual = parse_integer_value(&mut src)?;
assert!(matches!(actual, Value::UInt32(n) if n == 1 << 31));

let mut src = &b"4294967295"[..];
let actual = parse_integer_value(&mut src)?;
assert!(matches!(actual, Value::UInt32(u32::MAX)));

let mut src = &b"4294967296"[..]; // 1 << 32
assert!(matches!(
parse_integer_value(&mut src),
Err(e) if e.kind() == io::ErrorKind::InvalidData
));

Ok(Value::Int32(n))
Ok(())
}
}

0 comments on commit d019a75

Please sign in to comment.