Skip to content

Commit

Permalink
cram/record: Change quality scores to be raw scores
Browse files Browse the repository at this point in the history
  • Loading branch information
zaeleus committed Oct 23, 2023
1 parent 26cec9e commit 1bba929
Show file tree
Hide file tree
Showing 12 changed files with 181 additions and 151 deletions.
6 changes: 6 additions & 0 deletions noodles-cram/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

### Changed

* cram/record: Change quality scores to be raw scores.

## 0.45.0 - 2023-10-19

### Changed
Expand Down
12 changes: 5 additions & 7 deletions noodles-cram/src/data_container/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -656,9 +656,7 @@ mod tests {

#[test]
fn test_resolve_quality_scores() -> Result<(), Box<dyn std::error::Error>> {
use sam::record::{quality_scores::Score, QualityScores};

use crate::record::{Feature, Features};
use crate::record::{Feature, Features, QualityScores};

let mut records = [
Record::builder()
Expand All @@ -667,15 +665,15 @@ mod tests {
.set_read_length(2)
.set_features(Features::from(vec![Feature::Scores(
Position::try_from(1)?,
vec![Score::try_from(8)?, Score::try_from(13)?],
vec![8, 13],
)]))
.build(),
Record::builder().set_id(2).build(),
Record::builder()
.set_id(3)
.set_flags(Flags::QUALITY_SCORES_STORED_AS_ARRAY)
.set_read_length(2)
.set_quality_scores(QualityScores::try_from(vec![21, 34])?)
.set_quality_scores(QualityScores::from(vec![21, 34]))
.build(),
];

Expand All @@ -684,9 +682,9 @@ mod tests {
let actual: Vec<_> = records.into_iter().map(|r| r.quality_scores).collect();

let expected = [
QualityScores::try_from(vec![8, 13])?,
QualityScores::from(vec![8, 13]),
QualityScores::default(),
QualityScores::try_from(vec![21, 34])?,
QualityScores::from(vec![21, 34]),
];

assert_eq!(actual, expected);
Expand Down
32 changes: 9 additions & 23 deletions noodles-cram/src/reader/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,7 @@ use std::{error, fmt, io};
use bytes::Buf;
use noodles_bam as bam;
use noodles_core::Position;
use noodles_sam::{
self as sam,
record::{quality_scores::Score, sequence::Base},
};
use noodles_sam::{self as sam, record::sequence::Base};

use crate::{
container::block,
Expand All @@ -23,7 +20,7 @@ use crate::{
io::BitReader,
record::{
feature::{self, substitution},
Feature, Flags, NextMateFlags,
Feature, Flags, NextMateFlags, QualityScores,
},
Record,
};
Expand Down Expand Up @@ -493,9 +490,7 @@ where
}
Code::ReadBase => {
let base = self.read_base()?;
let quality_score = self.read_quality_score().and_then(|n| {
Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;
let quality_score = self.read_quality_score()?;
Ok(Feature::ReadBase(position, base, quality_score))
}
Code::Substitution => {
Expand All @@ -515,9 +510,7 @@ where
Ok(Feature::InsertBase(position, base))
}
Code::QualityScore => {
let score = self.read_quality_score().and_then(|n| {
Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
})?;
let score = self.read_quality_score()?;
Ok(Feature::QualityScore(position, score))
}
Code::ReferenceSkip => {
Expand Down Expand Up @@ -591,9 +584,8 @@ where
.collect()
}

fn read_stretches_of_quality_scores(&mut self) -> io::Result<Vec<Score>> {
let scores = self
.compression_header
fn read_stretches_of_quality_scores(&mut self) -> io::Result<Vec<u8>> {
self.compression_header
.data_series_encoding_map()
.stretches_of_quality_scores_encoding()
.ok_or_else(|| {
Expand All @@ -604,12 +596,7 @@ where
),
)
})?
.decode(&mut self.core_data_reader, &mut self.external_data_readers)?;

scores
.into_iter()
.map(|n| Score::try_from(n).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)))
.collect()
.decode(&mut self.core_data_reader, &mut self.external_data_readers)
}

fn read_base(&mut self) -> io::Result<Base> {
Expand Down Expand Up @@ -797,7 +784,7 @@ where
fn read_quality_scores_stored_as_array(
&mut self,
read_length: usize,
) -> io::Result<sam::record::QualityScores> {
) -> io::Result<QualityScores> {
const MISSING: u8 = 0xff;

let encoding = self
Expand All @@ -823,7 +810,6 @@ where
buf.clear();
}

sam::record::QualityScores::try_from(buf)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))
Ok(QualityScores::from(buf))
}
}
7 changes: 4 additions & 3 deletions noodles-cram/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@ pub mod feature;
mod features;
mod flags;
mod next_mate_flags;
mod quality_scores;
pub mod resolve;

pub use self::{
builder::Builder, feature::Feature, features::Features, flags::Flags,
next_mate_flags::NextMateFlags,
next_mate_flags::NextMateFlags, quality_scores::QualityScores,
};

use std::io;
Expand Down Expand Up @@ -44,7 +45,7 @@ pub struct Record {
pub(crate) bases: sam::record::Sequence,
pub(crate) features: Features,
pub(crate) mapping_quality: Option<sam::record::MappingQuality>,
pub(crate) quality_scores: sam::record::QualityScores,
pub(crate) quality_scores: QualityScores,
}

impl Record {
Expand Down Expand Up @@ -223,7 +224,7 @@ impl Record {
}

/// Returns the quality scores.
pub fn quality_scores(&self) -> &sam::record::QualityScores {
pub fn quality_scores(&self) -> &QualityScores {
&self.quality_scores
}
}
Expand Down
17 changes: 7 additions & 10 deletions noodles-cram/src/record/builder.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
use noodles_core::Position;
use noodles_sam::{
self as sam,
record::{quality_scores::Score, sequence::Base},
};
use noodles_sam::{self as sam, record::sequence::Base};

use super::{Feature, Features, Flags, NextMateFlags, Record};
use super::{Feature, Features, Flags, NextMateFlags, QualityScores, Record};

/// A CRAM record builder.
pub struct Builder {
Expand All @@ -25,7 +22,7 @@ pub struct Builder {
bases: sam::record::Sequence,
features: Features,
mapping_quality: Option<sam::record::MappingQuality>,
quality_scores: sam::record::QualityScores,
quality_scores: QualityScores,
}

impl Builder {
Expand Down Expand Up @@ -157,14 +154,14 @@ impl Builder {
}

/// Sets the per-base quality scores.
pub fn set_quality_scores(mut self, quality_scores: sam::record::QualityScores) -> Self {
pub fn set_quality_scores(mut self, quality_scores: QualityScores) -> Self {
self.quality_scores = quality_scores;
self
}

/// Adds a quality score.
pub fn add_quality_score(mut self, quality_score: Score) -> Self {
self.quality_scores.push(quality_score);
pub fn add_quality_score(mut self, score: u8) -> Self {
self.quality_scores.as_mut().push(score);
self
}

Expand Down Expand Up @@ -213,7 +210,7 @@ impl Default for Builder {
bases: sam::record::Sequence::default(),
features: Features::default(),
mapping_quality: None,
quality_scores: sam::record::QualityScores::default(),
quality_scores: QualityScores::default(),
}
}
}
Expand Down
19 changes: 16 additions & 3 deletions noodles-cram/src/record/convert.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use noodles_sam::{
header::{record::value::map, ReferenceSequences},
};

use super::{Features, Flags, Record};
use super::{Features, Flags, QualityScores, Record};

impl Record {
/// Converts an alignment record to a CRAM record.
Expand Down Expand Up @@ -87,7 +87,17 @@ impl Record {
flags.insert(Flags::QUALITY_SCORES_STORED_AS_ARRAY);
}

builder = builder.set_quality_scores(record.quality_scores().clone());
let scores: Vec<_> = record
.quality_scores()
.as_ref()
.iter()
.copied()
.map(u8::from)
.collect();

let quality_scores = QualityScores::from(scores);

builder = builder.set_quality_scores(quality_scores);
}

Ok(builder.set_flags(flags).build())
Expand Down Expand Up @@ -142,7 +152,10 @@ impl Record {
}

if !self.quality_scores.is_empty() {
builder = builder.set_quality_scores(self.quality_scores);
let scores = Vec::<_>::from(self.quality_scores);
let quality_scores = sam::record::QualityScores::try_from(scores)
.map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
builder = builder.set_quality_scores(quality_scores);
}

let mut data = self.tags;
Expand Down
22 changes: 8 additions & 14 deletions noodles-cram/src/record/feature.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,17 @@ pub mod substitution;
pub use self::code::Code;

use noodles_core::Position;
use noodles_sam::record::{quality_scores::Score, sequence::Base};
use noodles_sam::record::sequence::Base;

/// A CRAM record feature.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Feature {
/// A stretch of bases (position, bases).
Bases(Position, Vec<Base>),
/// A stretch of quality scores (position, quality scores).
Scores(Position, Vec<Score>),
Scores(Position, Vec<u8>),
/// A base-quality score pair (position, base, quality score).
ReadBase(Position, Base, Score),
ReadBase(Position, Base, u8),
/// A base substitution (position, code (read) / base (write)).
Substitution(Position, substitution::Value),
/// Inserted bases (position, bases).
Expand All @@ -26,7 +26,7 @@ pub enum Feature {
/// A single inserted base (position, base).
InsertBase(Position, Base),
/// A single quality score (position, score).
QualityScore(Position, Score),
QualityScore(Position, u8),
/// A number of skipped bases (position, length).
ReferenceSkip(Position, usize),
/// Soft clipped bases (position, bases).
Expand Down Expand Up @@ -112,7 +112,7 @@ mod tests {
assert_eq!(Feature::Bases(position, Vec::new()).code(), Code::Bases);
assert_eq!(Feature::Scores(position, Vec::new()).code(), Code::Scores);
assert_eq!(
Feature::ReadBase(position, Base::N, Score::default()).code(),
Feature::ReadBase(position, Base::N, 0).code(),
Code::ReadBase
);
assert_eq!(
Expand All @@ -129,7 +129,7 @@ mod tests {
Code::InsertBase
);
assert_eq!(
Feature::QualityScore(position, Score::default()).code(),
Feature::QualityScore(position, 0).code(),
Code::QualityScore
);
assert_eq!(
Expand All @@ -150,10 +150,7 @@ mod tests {

assert_eq!(Feature::Bases(position, Vec::new()).position(), position);
assert_eq!(Feature::Scores(position, Vec::new()).position(), position);
assert_eq!(
Feature::ReadBase(position, Base::N, Score::default()).position(),
position
);
assert_eq!(Feature::ReadBase(position, Base::N, 0).position(), position);
assert_eq!(
Feature::Substitution(position, substitution::Value::Code(0)).position(),
position
Expand All @@ -164,10 +161,7 @@ mod tests {
);
assert_eq!(Feature::Deletion(position, 0).position(), position);
assert_eq!(Feature::InsertBase(position, Base::N).position(), position);
assert_eq!(
Feature::QualityScore(position, Score::default()).position(),
position
);
assert_eq!(Feature::QualityScore(position, 0).position(), position);
assert_eq!(Feature::ReferenceSkip(position, 0).position(), position);
assert_eq!(Feature::SoftClip(position, Vec::new()).position(), position);
assert_eq!(Feature::Padding(position, 0).position(), position);
Expand Down
Loading

0 comments on commit 1bba929

Please sign in to comment.