Skip to content

Commit

Permalink
Merge pull request #33 from COMBINE-lab/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
rob-p authored Dec 6, 2024
2 parents de6a459 + 5dea2f8 commit 09c3d20
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 10 deletions.
9 changes: 4 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "libradicl"
version = "0.9.0"
version = "0.9.1"
authors = [
"Avi Srivastava <[email protected]>",
"Hirak Sarkar <[email protected]>",
Expand Down Expand Up @@ -32,15 +32,14 @@ num = "0.4.3"
ahash = "0.8.11"
serde = { version = "1.0.204", features = ["derive"] }
dashmap = "^6.0.1"
bio-types = "1.0.1"
bio-types = "1.0.3"
smallvec = "1.13.2"
noodles-bam = "0.64.0"
noodles-sam = "0.61.0"
anyhow = "1.0.86"
itertools = "0.13.0"
bytemuck = { version = "1.16.1", features = ["aarch64_simd"] }
bytemuck = { version = "1.16.3", features = ["aarch64_simd"] }
derivative = "2.2.0"
crossbeam-queue = "0.3.11"
noodles = { version = "0.85.0", features = ["bam", "sam"] }

[dev-dependencies]
needletail="0.5.1"
Expand Down
2 changes: 1 addition & 1 deletion src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::{self as libradicl, constants};
use anyhow::{self, Context};
use libradicl::rad_types::{TagSection, TagSectionLabel};
use libradicl::record::RecordContext;
use noodles_sam as sam;
use noodles::sam;
use scroll::Pread;
use std::cmp::{Eq, PartialEq};
use std::io::{Read, Write};
Expand Down
19 changes: 15 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use crate as libradicl;
use self::libradicl::rad_types::RadIntId;
use self::libradicl::record::AlevinFryReadRecord;
use self::libradicl::record::AtacSeqReadRecord;
use self::libradicl::schema::TempCellInfo;
use self::libradicl::schema::{TempCellInfo,CollateKey};
#[allow(unused_imports)]
use ahash::{AHasher, RandomState};
use bio_types::strand::*;
Expand Down Expand Up @@ -821,7 +821,12 @@ pub fn dump_corrected_cb_chunk_to_temp_file_atac<T: Read>(
output_cache: &HashMap<u64, Arc<TempBucket>>,
local_buffers: &mut [Cursor<&mut [u8]>],
flush_limit: usize,
) {
ck: CollateKey
// f: F
)
where
// F: Fn(u32)
{
let mut buf = [0u8; 8];
let mut tbuf = vec![0u8; 4096];
//let mut tcursor = Cursor::new(tbuf);
Expand Down Expand Up @@ -849,10 +854,16 @@ pub fn dump_corrected_cb_chunk_to_temp_file_atac<T: Read>(
// could be replaced with orientation
let rr = AtacSeqReadRecord::from_bytes_with_header(reader, tup.0, tup.1);

if rr.is_empty() {
if rr.is_empty() || tup.1 > 1 {
continue;
}
if let Some(v) = output_cache.get(corrected_id) {
let pos = rr.start_pos[0];
let ref_id = rr.refs[0];
let check_id = match ck {
CollateKey::Barcode => *corrected_id,
CollateKey::Pos(ref f) => f(pos, ref_id as usize) as u64,
};
if let Some(v) = output_cache.get(&check_id) {
// if this is a valid barcode, then
// write the corresponding entry to the
// thread-local buffer for this bucket
Expand Down
5 changes: 5 additions & 0 deletions src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,8 @@ pub struct ProtocolInfo {
// right now. Expand to be generic.
pub expected_ori: Strand,
}

pub enum CollateKey<'a> {
Barcode,
Pos(Box<dyn Fn(u32, usize) -> usize + 'a>)
}
20 changes: 20 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
//! This module contains some utility constants and functions that are
//! helpful in processing RAD information.
use scroll::Pread;

pub const MASK_TOP_BIT_U32: u32 = 0x7FFFFFFF;
pub const MASK_LOWER_31_U32: u32 = 0x80000000;
pub const SPLICE_MASK_U32: u32 = 0xFFFFFFFE;
Expand All @@ -26,3 +28,21 @@ pub const SPLICE_MASK_U32: u32 = 0xFFFFFFFE;
pub fn has_data_left<T: std::io::BufRead>(reader: &mut T) -> std::io::Result<bool> {
reader.fill_buf().map(|b| !b.is_empty())
}

/// Reads the header of a chunk, returning the number of bytes and number of records.
/// Returns [Ok(u32, u32)] on success and an [std::io::Error] if there was a problem
/// reading the header.
///
/// In the returned tuple, the first [u32] is the number of bytes in the chunk and the
/// second [u32] is the number of records in the chunk.
///
/// This function lives in util and outside of the [Chunk] trait because it is agnostic
/// to the type of the chunk (i.e. the record type).
#[inline]
pub fn read_chunk_header<T: std::io::BufRead>(reader: &mut T) -> std::io::Result<(u32, u32)> {
let mut buf = [0_u8; 2 * std::mem::size_of::<u32>()];
reader.read_exact(&mut buf)?;
let nbytes = buf.pread::<u32>(0).unwrap();
let nrec = buf.pread::<u32>(std::mem::size_of::<u32>()).unwrap();
Ok((nbytes, nrec))
}

0 comments on commit 09c3d20

Please sign in to comment.