From 1bba92989422dd3139091ba9f3b66a46edbc8627 Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Sun, 22 Oct 2023 16:12:11 -0500 Subject: [PATCH] cram/record: Change quality scores to be raw scores --- noodles-cram/CHANGELOG.md | 6 ++ noodles-cram/src/data_container/slice.rs | 12 +-- noodles-cram/src/reader/record.rs | 32 ++---- noodles-cram/src/record.rs | 7 +- noodles-cram/src/record/builder.rs | 17 ++- noodles-cram/src/record/convert.rs | 19 +++- noodles-cram/src/record/feature.rs | 22 ++-- noodles-cram/src/record/features.rs | 101 +++++++++--------- .../src/record/features/with_positions.rs | 7 +- noodles-cram/src/record/quality_scores.rs | 58 ++++++++++ noodles-cram/src/record/resolve.rs | 26 ++--- noodles-cram/src/writer/record.rs | 25 ++--- 12 files changed, 181 insertions(+), 151 deletions(-) create mode 100644 noodles-cram/src/record/quality_scores.rs diff --git a/noodles-cram/CHANGELOG.md b/noodles-cram/CHANGELOG.md index 4fbb5e018..2b390acb9 100644 --- a/noodles-cram/CHANGELOG.md +++ b/noodles-cram/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +### Changed + + * cram/record: Change quality scores to be raw scores. + ## 0.45.0 - 2023-10-19 ### Changed diff --git a/noodles-cram/src/data_container/slice.rs b/noodles-cram/src/data_container/slice.rs index 57dd2851d..6a4299df0 100644 --- a/noodles-cram/src/data_container/slice.rs +++ b/noodles-cram/src/data_container/slice.rs @@ -656,9 +656,7 @@ mod tests { #[test] fn test_resolve_quality_scores() -> Result<(), Box> { - use sam::record::{quality_scores::Score, QualityScores}; - - use crate::record::{Feature, Features}; + use crate::record::{Feature, Features, QualityScores}; let mut records = [ Record::builder() @@ -667,7 +665,7 @@ mod tests { .set_read_length(2) .set_features(Features::from(vec![Feature::Scores( Position::try_from(1)?, - vec![Score::try_from(8)?, Score::try_from(13)?], + vec![8, 13], )])) .build(), Record::builder().set_id(2).build(), @@ -675,7 +673,7 @@ mod tests { .set_id(3) .set_flags(Flags::QUALITY_SCORES_STORED_AS_ARRAY) .set_read_length(2) - .set_quality_scores(QualityScores::try_from(vec![21, 34])?) + .set_quality_scores(QualityScores::from(vec![21, 34])) .build(), ]; @@ -684,9 +682,9 @@ mod tests { let actual: Vec<_> = records.into_iter().map(|r| r.quality_scores).collect(); let expected = [ - QualityScores::try_from(vec![8, 13])?, + QualityScores::from(vec![8, 13]), QualityScores::default(), - QualityScores::try_from(vec![21, 34])?, + QualityScores::from(vec![21, 34]), ]; assert_eq!(actual, expected); diff --git a/noodles-cram/src/reader/record.rs b/noodles-cram/src/reader/record.rs index 3595e369f..b19969613 100644 --- a/noodles-cram/src/reader/record.rs +++ b/noodles-cram/src/reader/record.rs @@ -7,10 +7,7 @@ use std::{error, fmt, io}; use bytes::Buf; use noodles_bam as bam; use noodles_core::Position; -use noodles_sam::{ - self as sam, - record::{quality_scores::Score, sequence::Base}, -}; +use noodles_sam::{self as sam, record::sequence::Base}; use crate::{ container::block, @@ -23,7 +20,7 @@ use crate::{ io::BitReader, record::{ feature::{self, substitution}, - Feature, Flags, NextMateFlags, + Feature, Flags, NextMateFlags, QualityScores, }, Record, }; @@ -493,9 +490,7 @@ where } Code::ReadBase => { let base = self.read_base()?; - let quality_score = self.read_quality_score().and_then(|n| { - Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let quality_score = self.read_quality_score()?; Ok(Feature::ReadBase(position, base, quality_score)) } Code::Substitution => { @@ -515,9 +510,7 @@ where Ok(Feature::InsertBase(position, base)) } Code::QualityScore => { - let score = self.read_quality_score().and_then(|n| { - Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) - })?; + let score = self.read_quality_score()?; Ok(Feature::QualityScore(position, score)) } Code::ReferenceSkip => { @@ -591,9 +584,8 @@ where .collect() } - fn read_stretches_of_quality_scores(&mut self) -> io::Result> { - let scores = self - .compression_header + fn read_stretches_of_quality_scores(&mut self) -> io::Result> { + self.compression_header .data_series_encoding_map() .stretches_of_quality_scores_encoding() .ok_or_else(|| { @@ -604,12 +596,7 @@ where ), ) })? - .decode(&mut self.core_data_reader, &mut self.external_data_readers)?; - - scores - .into_iter() - .map(|n| Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))) - .collect() + .decode(&mut self.core_data_reader, &mut self.external_data_readers) } fn read_base(&mut self) -> io::Result { @@ -797,7 +784,7 @@ where fn read_quality_scores_stored_as_array( &mut self, read_length: usize, - ) -> io::Result { + ) -> io::Result { const MISSING: u8 = 0xff; let encoding = self @@ -823,7 +810,6 @@ where buf.clear(); } - sam::record::QualityScores::try_from(buf) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + Ok(QualityScores::from(buf)) } } diff --git a/noodles-cram/src/record.rs b/noodles-cram/src/record.rs index f32a0ae5c..f752632e5 100644 --- a/noodles-cram/src/record.rs +++ b/noodles-cram/src/record.rs @@ -6,11 +6,12 @@ pub mod feature; mod features; mod flags; mod next_mate_flags; +mod quality_scores; pub mod resolve; pub use self::{ builder::Builder, feature::Feature, features::Features, flags::Flags, - next_mate_flags::NextMateFlags, + next_mate_flags::NextMateFlags, quality_scores::QualityScores, }; use std::io; @@ -44,7 +45,7 @@ pub struct Record { pub(crate) bases: sam::record::Sequence, pub(crate) features: Features, pub(crate) mapping_quality: Option, - pub(crate) quality_scores: sam::record::QualityScores, + pub(crate) quality_scores: QualityScores, } impl Record { @@ -223,7 +224,7 @@ impl Record { } /// Returns the quality scores. - pub fn quality_scores(&self) -> &sam::record::QualityScores { + pub fn quality_scores(&self) -> &QualityScores { &self.quality_scores } } diff --git a/noodles-cram/src/record/builder.rs b/noodles-cram/src/record/builder.rs index 46a8a0c2c..5c5de65f9 100644 --- a/noodles-cram/src/record/builder.rs +++ b/noodles-cram/src/record/builder.rs @@ -1,10 +1,7 @@ use noodles_core::Position; -use noodles_sam::{ - self as sam, - record::{quality_scores::Score, sequence::Base}, -}; +use noodles_sam::{self as sam, record::sequence::Base}; -use super::{Feature, Features, Flags, NextMateFlags, Record}; +use super::{Feature, Features, Flags, NextMateFlags, QualityScores, Record}; /// A CRAM record builder. pub struct Builder { @@ -25,7 +22,7 @@ pub struct Builder { bases: sam::record::Sequence, features: Features, mapping_quality: Option, - quality_scores: sam::record::QualityScores, + quality_scores: QualityScores, } impl Builder { @@ -157,14 +154,14 @@ impl Builder { } /// Sets the per-base quality scores. - pub fn set_quality_scores(mut self, quality_scores: sam::record::QualityScores) -> Self { + pub fn set_quality_scores(mut self, quality_scores: QualityScores) -> Self { self.quality_scores = quality_scores; self } /// Adds a quality score. - pub fn add_quality_score(mut self, quality_score: Score) -> Self { - self.quality_scores.push(quality_score); + pub fn add_quality_score(mut self, score: u8) -> Self { + self.quality_scores.as_mut().push(score); self } @@ -213,7 +210,7 @@ impl Default for Builder { bases: sam::record::Sequence::default(), features: Features::default(), mapping_quality: None, - quality_scores: sam::record::QualityScores::default(), + quality_scores: QualityScores::default(), } } } diff --git a/noodles-cram/src/record/convert.rs b/noodles-cram/src/record/convert.rs index 337b9371a..3bc3c8944 100644 --- a/noodles-cram/src/record/convert.rs +++ b/noodles-cram/src/record/convert.rs @@ -5,7 +5,7 @@ use noodles_sam::{ header::{record::value::map, ReferenceSequences}, }; -use super::{Features, Flags, Record}; +use super::{Features, Flags, QualityScores, Record}; impl Record { /// Converts an alignment record to a CRAM record. @@ -87,7 +87,17 @@ impl Record { flags.insert(Flags::QUALITY_SCORES_STORED_AS_ARRAY); } - builder = builder.set_quality_scores(record.quality_scores().clone()); + let scores: Vec<_> = record + .quality_scores() + .as_ref() + .iter() + .copied() + .map(u8::from) + .collect(); + + let quality_scores = QualityScores::from(scores); + + builder = builder.set_quality_scores(quality_scores); } Ok(builder.set_flags(flags).build()) @@ -142,7 +152,10 @@ impl Record { } if !self.quality_scores.is_empty() { - builder = builder.set_quality_scores(self.quality_scores); + let scores = Vec::<_>::from(self.quality_scores); + let quality_scores = sam::record::QualityScores::try_from(scores) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; + builder = builder.set_quality_scores(quality_scores); } let mut data = self.tags; diff --git a/noodles-cram/src/record/feature.rs b/noodles-cram/src/record/feature.rs index d9166ffa5..fb803af67 100644 --- a/noodles-cram/src/record/feature.rs +++ b/noodles-cram/src/record/feature.rs @@ -6,7 +6,7 @@ pub mod substitution; pub use self::code::Code; use noodles_core::Position; -use noodles_sam::record::{quality_scores::Score, sequence::Base}; +use noodles_sam::record::sequence::Base; /// A CRAM record feature. #[derive(Clone, Debug, Eq, PartialEq)] @@ -14,9 +14,9 @@ pub enum Feature { /// A stretch of bases (position, bases). Bases(Position, Vec), /// A stretch of quality scores (position, quality scores). - Scores(Position, Vec), + Scores(Position, Vec), /// A base-quality score pair (position, base, quality score). - ReadBase(Position, Base, Score), + ReadBase(Position, Base, u8), /// A base substitution (position, code (read) / base (write)). Substitution(Position, substitution::Value), /// Inserted bases (position, bases). @@ -26,7 +26,7 @@ pub enum Feature { /// A single inserted base (position, base). InsertBase(Position, Base), /// A single quality score (position, score). - QualityScore(Position, Score), + QualityScore(Position, u8), /// A number of skipped bases (position, length). ReferenceSkip(Position, usize), /// Soft clipped bases (position, bases). @@ -112,7 +112,7 @@ mod tests { assert_eq!(Feature::Bases(position, Vec::new()).code(), Code::Bases); assert_eq!(Feature::Scores(position, Vec::new()).code(), Code::Scores); assert_eq!( - Feature::ReadBase(position, Base::N, Score::default()).code(), + Feature::ReadBase(position, Base::N, 0).code(), Code::ReadBase ); assert_eq!( @@ -129,7 +129,7 @@ mod tests { Code::InsertBase ); assert_eq!( - Feature::QualityScore(position, Score::default()).code(), + Feature::QualityScore(position, 0).code(), Code::QualityScore ); assert_eq!( @@ -150,10 +150,7 @@ mod tests { assert_eq!(Feature::Bases(position, Vec::new()).position(), position); assert_eq!(Feature::Scores(position, Vec::new()).position(), position); - assert_eq!( - Feature::ReadBase(position, Base::N, Score::default()).position(), - position - ); + assert_eq!(Feature::ReadBase(position, Base::N, 0).position(), position); assert_eq!( Feature::Substitution(position, substitution::Value::Code(0)).position(), position @@ -164,10 +161,7 @@ mod tests { ); assert_eq!(Feature::Deletion(position, 0).position(), position); assert_eq!(Feature::InsertBase(position, Base::N).position(), position); - assert_eq!( - Feature::QualityScore(position, Score::default()).position(), - position - ); + assert_eq!(Feature::QualityScore(position, 0).position(), position); assert_eq!(Feature::ReferenceSkip(position, 0).position(), position); assert_eq!(Feature::SoftClip(position, Vec::new()).position(), position); assert_eq!(Feature::Padding(position, 0).position(), position); diff --git a/noodles-cram/src/record/features.rs b/noodles-cram/src/record/features.rs index 076227a28..a7062f785 100644 --- a/noodles-cram/src/record/features.rs +++ b/noodles-cram/src/record/features.rs @@ -131,7 +131,7 @@ fn cigar_to_features( Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch => { if op.len() == 1 { let base = sequence[read_position]; - let score = quality_scores[read_position]; + let score = u8::from(quality_scores[read_position]); features.push(Feature::ReadBase(read_position, base, score)); } else { let end = read_position @@ -142,8 +142,13 @@ fn cigar_to_features( features.push(Feature::Bases(read_position, bases.to_vec())); if !flags.are_quality_scores_stored_as_array() { - let scores = &quality_scores[read_position..end]; - features.push(Feature::Scores(read_position, scores.to_vec())); + let scores = quality_scores[read_position..end] + .iter() + .copied() + .map(u8::from) + .collect(); + + features.push(Feature::Scores(read_position, scores)); } } } @@ -153,7 +158,7 @@ fn cigar_to_features( features.push(Feature::InsertBase(read_position, base)); if !flags.are_quality_scores_stored_as_array() { - let score = quality_scores[read_position]; + let score = u8::from(quality_scores[read_position]); features.push(Feature::QualityScore(read_position, score)); } } else { @@ -165,8 +170,13 @@ fn cigar_to_features( features.push(Feature::Insertion(read_position, bases.to_vec())); if !flags.are_quality_scores_stored_as_array() { - let scores = &quality_scores[read_position..end]; - features.push(Feature::Scores(read_position, scores.to_vec())); + let scores = quality_scores[read_position..end] + .iter() + .copied() + .map(u8::from) + .collect(); + + features.push(Feature::Scores(read_position, scores)); } } } @@ -182,11 +192,16 @@ fn cigar_to_features( if !flags.are_quality_scores_stored_as_array() { if bases.len() == 1 { - let score = quality_scores[read_position]; + let score = u8::from(quality_scores[read_position]); features.push(Feature::QualityScore(read_position, score)); } else { - let scores = &quality_scores[read_position..end]; - features.push(Feature::Scores(read_position, scores.to_vec())); + let scores = quality_scores[read_position..end] + .iter() + .copied() + .map(u8::from) + .collect(); + + features.push(Feature::Scores(read_position, scores)); } } } @@ -217,7 +232,7 @@ mod tests { #[test] fn test_cigar_to_features() -> Result<(), Box> { - use sam::record::{quality_scores::Score, sequence::Base}; + use sam::record::sequence::Base; let flags = Flags::default(); @@ -225,11 +240,7 @@ mod tests { let sequence = "A".parse()?; let quality_scores = sam::record::QualityScores::try_from(vec![45])?; let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); - let expected = Features::from(vec![Feature::ReadBase( - Position::try_from(1)?, - Base::A, - Score::try_from(45)?, - )]); + let expected = Features::from(vec![Feature::ReadBase(Position::try_from(1)?, Base::A, 45)]); assert_eq!(actual, expected); let cigar = "2M".parse()?; @@ -238,10 +249,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::Bases(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::Scores( - Position::try_from(1)?, - vec![Score::try_from(45)?, Score::try_from(35)?], - ), + Feature::Scores(Position::try_from(1)?, vec![45, 35]), ]); assert_eq!(actual, expected); @@ -251,8 +259,8 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::InsertBase(Position::try_from(1)?, Base::A), - Feature::QualityScore(Position::try_from(1)?, Score::try_from(45)?), - Feature::ReadBase(Position::try_from(2)?, Base::C, Score::try_from(35)?), + Feature::QualityScore(Position::try_from(1)?, 45), + Feature::ReadBase(Position::try_from(2)?, Base::C, 35), ]); assert_eq!(actual, expected); @@ -262,11 +270,8 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::Insertion(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::Scores( - Position::try_from(1)?, - vec![Score::try_from(45)?, Score::try_from(35)?], - ), - Feature::ReadBase(Position::try_from(3)?, Base::G, Score::try_from(43)?), + Feature::Scores(Position::try_from(1)?, vec![45, 35]), + Feature::ReadBase(Position::try_from(3)?, Base::G, 43), ]); assert_eq!(actual, expected); @@ -277,10 +282,7 @@ mod tests { let expected = Features::from(vec![ Feature::Deletion(Position::try_from(1)?, 1), Feature::Bases(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::Scores( - Position::try_from(1)?, - vec![Score::try_from(45)?, Score::try_from(35)?], - ), + Feature::Scores(Position::try_from(1)?, vec![45, 35]), ]); assert_eq!(actual, expected); @@ -290,7 +292,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::ReferenceSkip(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); @@ -300,8 +302,8 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::SoftClip(Position::try_from(1)?, vec![Base::A]), - Feature::QualityScore(Position::try_from(1)?, Score::try_from(45)?), - Feature::ReadBase(Position::try_from(2)?, Base::C, Score::try_from(35)?), + Feature::QualityScore(Position::try_from(1)?, 45), + Feature::ReadBase(Position::try_from(2)?, Base::C, 35), ]); assert_eq!(actual, expected); @@ -311,11 +313,8 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::SoftClip(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::Scores( - Position::try_from(1)?, - vec![Score::try_from(45)?, Score::try_from(35)?], - ), - Feature::ReadBase(Position::try_from(3)?, Base::G, Score::try_from(43)?), + Feature::Scores(Position::try_from(1)?, vec![45, 35]), + Feature::ReadBase(Position::try_from(3)?, Base::G, 43), ]); assert_eq!(actual, expected); @@ -325,7 +324,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::HardClip(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); @@ -335,7 +334,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::Padding(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); @@ -345,7 +344,7 @@ mod tests { #[test] fn test_cigar_to_features_with_quality_scores_stored_as_array( ) -> Result<(), Box> { - use sam::{record::quality_scores::Score, record::sequence::Base}; + use sam::record::sequence::Base; let flags = Flags::QUALITY_SCORES_STORED_AS_ARRAY; @@ -353,11 +352,7 @@ mod tests { let sequence = "A".parse()?; let quality_scores = sam::record::QualityScores::try_from(vec![45])?; let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); - let expected = Features::from(vec![Feature::ReadBase( - Position::try_from(1)?, - Base::A, - Score::try_from(45)?, - )]); + let expected = Features::from(vec![Feature::ReadBase(Position::try_from(1)?, Base::A, 45)]); assert_eq!(actual, expected); let cigar = "2M".parse()?; @@ -376,7 +371,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::InsertBase(Position::try_from(1)?, Base::A), - Feature::ReadBase(Position::try_from(2)?, Base::C, Score::try_from(35)?), + Feature::ReadBase(Position::try_from(2)?, Base::C, 35), ]); assert_eq!(actual, expected); @@ -386,7 +381,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::Insertion(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::ReadBase(Position::try_from(3)?, Base::G, Score::try_from(43)?), + Feature::ReadBase(Position::try_from(3)?, Base::G, 43), ]); assert_eq!(actual, expected); @@ -406,7 +401,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::ReferenceSkip(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); @@ -416,7 +411,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::SoftClip(Position::try_from(1)?, vec![Base::A]), - Feature::ReadBase(Position::try_from(2)?, Base::C, Score::try_from(35)?), + Feature::ReadBase(Position::try_from(2)?, Base::C, 35), ]); assert_eq!(actual, expected); @@ -426,7 +421,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::SoftClip(Position::try_from(1)?, vec![Base::A, Base::C]), - Feature::ReadBase(Position::try_from(3)?, Base::G, Score::try_from(43)?), + Feature::ReadBase(Position::try_from(3)?, Base::G, 43), ]); assert_eq!(actual, expected); @@ -436,7 +431,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::HardClip(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); @@ -446,7 +441,7 @@ mod tests { let actual = cigar_to_features(flags, &cigar, &sequence, &quality_scores); let expected = Features::from(vec![ Feature::Padding(Position::try_from(1)?, 1), - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(45)?), + Feature::ReadBase(Position::try_from(1)?, Base::A, 45), ]); assert_eq!(actual, expected); diff --git a/noodles-cram/src/record/features/with_positions.rs b/noodles-cram/src/record/features/with_positions.rs index e9f31e2e0..d0e15af3d 100644 --- a/noodles-cram/src/record/features/with_positions.rs +++ b/noodles-cram/src/record/features/with_positions.rs @@ -92,16 +92,13 @@ mod tests { #[test] fn test_next() -> Result<(), Box> { - use noodles_sam::record::{quality_scores::Score, sequence::Base}; + use noodles_sam::record::sequence::Base; use crate::record::Features; let features = Features::from(vec![ Feature::Bases(Position::MIN, vec![Base::A, Base::C]), - Feature::Scores( - Position::MIN, - vec![Score::try_from(0)?, Score::try_from(0)?], - ), + Feature::Scores(Position::MIN, vec![0, 0]), ]); let mut iter = WithPositions::new(features.iter(), Position::MIN); diff --git a/noodles-cram/src/record/quality_scores.rs b/noodles-cram/src/record/quality_scores.rs new file mode 100644 index 000000000..5ef05c639 --- /dev/null +++ b/noodles-cram/src/record/quality_scores.rs @@ -0,0 +1,58 @@ +use std::ops::{Index, IndexMut}; + +use noodles_core::position::SequenceIndex; + +/// Raw CRAM record quality scores. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct QualityScores(Vec); + +impl QualityScores { + /// Returns whether there are any scores. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } +} + +impl AsRef<[u8]> for QualityScores { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl AsMut> for QualityScores { + fn as_mut(&mut self) -> &mut Vec { + &mut self.0 + } +} + +impl From> for QualityScores { + fn from(values: Vec) -> Self { + Self(values) + } +} + +impl Index for QualityScores +where + I: SequenceIndex, +{ + type Output = I::Output; + + fn index(&self, index: I) -> &Self::Output { + index.index(&self.0) + } +} + +impl IndexMut for QualityScores +where + I: SequenceIndex, +{ + fn index_mut(&mut self, index: I) -> &mut Self::Output { + index.index_mut(&mut self.0) + } +} + +impl From for Vec { + fn from(quality_scores: QualityScores) -> Self { + quality_scores.0 + } +} diff --git a/noodles-cram/src/record/resolve.rs b/noodles-cram/src/record/resolve.rs index ea66b5690..7e20423bd 100644 --- a/noodles-cram/src/record/resolve.rs +++ b/noodles-cram/src/record/resolve.rs @@ -10,7 +10,7 @@ use crate::data_container::compression_header::SubstitutionMatrix; use super::{ feature::substitution::{self, Base as SubstitutionBase}, - Feature, Features, + Feature, Features, QualityScores, }; pub(crate) fn resolve_bases( @@ -117,12 +117,10 @@ fn copy_from_raw_bases(dst: &mut [Base], src: &[u8]) -> io::Result<()> { pub fn resolve_quality_scores( features: &[Feature], read_len: usize, - quality_scores: &mut sam::record::QualityScores, + quality_scores: &mut QualityScores, ) { - use sam::record::quality_scores::Score; - quality_scores.as_mut().clear(); - quality_scores.as_mut().resize(read_len, Score::default()); + quality_scores.as_mut().resize(read_len, 0); for feature in features { let read_position = feature.position(); @@ -145,7 +143,6 @@ pub fn resolve_quality_scores( #[cfg(test)] mod tests { use noodles_core::Position; - use sam::record::quality_scores::Score; use super::*; @@ -181,11 +178,7 @@ mod tests { &"TGGT".parse()?, )?; t( - &Features::from(vec![Feature::ReadBase( - Position::try_from(2)?, - Base::Y, - Score::default(), - )]), + &Features::from(vec![Feature::ReadBase(Position::try_from(2)?, Base::Y, 0)]), &"AYGT".parse()?, )?; t( @@ -277,15 +270,10 @@ mod tests { #[test] fn test_resolve_quality_scores() -> Result<(), Box> { - use sam::record::{quality_scores::Score, QualityScores}; - let features = [ - Feature::ReadBase(Position::try_from(1)?, Base::A, Score::try_from(5)?), - Feature::QualityScore(Position::try_from(3)?, Score::try_from(8)?), - Feature::Scores( - Position::try_from(5)?, - vec![Score::try_from(13)?, Score::try_from(21)?], - ), + Feature::ReadBase(Position::try_from(1)?, Base::A, 5), + Feature::QualityScore(Position::try_from(3)?, 8), + Feature::Scores(Position::try_from(5)?, vec![13, 21]), ]; let mut quality_scores = QualityScores::default(); diff --git a/noodles-cram/src/writer/record.rs b/noodles-cram/src/writer/record.rs index e1a38ec6e..e03198498 100644 --- a/noodles-cram/src/writer/record.rs +++ b/noodles-cram/src/writer/record.rs @@ -6,10 +6,7 @@ use std::{ use noodles_bam as bam; use noodles_core::Position; -use noodles_sam::{ - self as sam, - record::{quality_scores::Score, sequence::Base}, -}; +use noodles_sam::{self as sam, record::sequence::Base}; use crate::{ container::block, @@ -614,9 +611,8 @@ where ) } - fn write_stretches_of_quality_scores(&mut self, quality_scores: &[Score]) -> io::Result<()> { - let encoding = self - .compression_header + fn write_stretches_of_quality_scores(&mut self, quality_scores: &[u8]) -> io::Result<()> { + self.compression_header .data_series_encoding_map() .stretches_of_quality_scores_encoding() .ok_or_else(|| { @@ -626,11 +622,12 @@ where DataSeries::StretchesOfQualityScores, ), ) - })?; - - let scores: Vec<_> = quality_scores.iter().copied().map(u8::from).collect(); - - encoding.encode(self.core_data_writer, self.external_data_writers, &scores) + })? + .encode( + self.core_data_writer, + self.external_data_writers, + quality_scores, + ) } fn write_base(&mut self, base: Base) -> io::Result<()> { @@ -650,7 +647,7 @@ where ) } - fn write_quality_score(&mut self, quality_score: Score) -> io::Result<()> { + fn write_quality_score(&mut self, quality_score: u8) -> io::Result<()> { self.compression_header .data_series_encoding_map() .quality_scores_encoding() @@ -663,7 +660,7 @@ where .encode( self.core_data_writer, self.external_data_writers, - u8::from(quality_score), + quality_score, ) }