Skip to content

Commit

Permalink
BREAKING CHANGE: v.0.1.9.3 + .gz readers!
Browse files Browse the repository at this point in the history
  • Loading branch information
alejandrogzi committed Nov 20, 2024
1 parent f64ea43 commit eb1f82c
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 18 deletions.
31 changes: 28 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ use std::collections::HashMap;
use std::error::Error;
use std::fs::File;
use std::io::{BufWriter, Write};
use std::path::Path;
use std::string::String;
use std::time::Instant;

Expand All @@ -65,7 +66,7 @@ use bed2gtf::*;

const SOURCE: &str = "bed2gtf";

fn main() {
fn main() -> Result<(), Box<dyn Error>> {
let args = Cli::parse();
args.check().unwrap_or_else(|e| {
error!("{}", e);
Expand Down Expand Up @@ -95,7 +96,29 @@ fn main() {
HashMap::new()
};

let bed = bed_reader(&args.bed);
let bed = match args.bed.extension().and_then(|s| s.to_str()) {
Some("gz") => {
let bed = match Path::new(args.bed.file_stem().unwrap())
.extension()
.expect("ERROR: No extension found")
.to_str()
{
Some("bed") => {
let contents = with_gz(&args.bed)?;
parallel_parse(&contents)?
}
_ => panic!("ERROR: Not a .BED/.BED.GZ. Wrong file format!"),
};

bed
}
Some("bed") => {
let contents = raw(&args.bed)?;
parallel_parse(&contents)?
}
_ => panic!("ERROR: Not a .BED/.BED.GZ. Wrong file format!"),
};

let gene_track = custom_par_parse(&bed).unwrap_or_else(|_| {
let message = format!("Error parsing BED file {}", args.bed.display());
panic!("{}", message);
Expand Down Expand Up @@ -143,7 +166,9 @@ fn main() {

let peak_mem = (max_mem_usage_mb() - bmem).max(0.0);
log::info!("Memory usage: {} MB", peak_mem);
log::info!("Elapsed: {:.4?} secs", start.elapsed().as_secs_f32())
log::info!("Elapsed: {:.4?} secs", start.elapsed().as_secs_f32());

Ok(())
}

fn to_gtf(
Expand Down
50 changes: 35 additions & 15 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -1,28 +1,22 @@
use crate::bed::BedRecord;

use chrono::Datelike;

use colored::Colorize;

use flate2::read::GzDecoder;
use indoc::indoc;

use rayon::prelude::*;

use std::collections::HashMap;
use std::error::Error;
use std::fmt::Debug;
use std::fs::File;
use std::io::{self, Read, Write};
use std::path::PathBuf;
use std::io::{self, BufReader, Read, Write};
use std::path::{Path, PathBuf};

const SOURCE: &str = "bed2gtf";
const VERSION: &str = env!("CARGO_PKG_VERSION");
const REPOSITORY: &str = env!("CARGO_PKG_REPOSITORY");

pub fn bed_reader(file: &PathBuf) -> Vec<BedRecord> {
let bed = reader(file).unwrap();
let records = parallel_parse(&bed).unwrap();
records
}

pub fn get_isoforms(file: &String) -> HashMap<String, String> {
let pairs = parallel_hash_rev(file);
// let rev_pairs = parallel_hash(&file);
Expand All @@ -46,6 +40,23 @@ pub fn reader(file: &PathBuf) -> io::Result<String> {
Ok(contents)
}

pub fn raw<P: AsRef<Path> + Debug>(f: P) -> Result<String, Box<dyn Error>> {
let mut file = File::open(f)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
}

pub fn with_gz<P: AsRef<Path> + Debug>(f: P) -> Result<String, Box<dyn Error>> {
let file = File::open(f)?;
let mut decoder = GzDecoder::new(BufReader::new(file));

let mut contents = String::new();
decoder.read_to_string(&mut contents)?;

Ok(contents)
}

pub fn parallel_hash<'a>(s: &'a str) -> HashMap<String, String> {
s.par_lines()
.filter_map(|line| {
Expand Down Expand Up @@ -75,11 +86,20 @@ pub fn parallel_hash_rev<'a>(s: &'a str) -> HashMap<String, String> {
.collect()
}

pub fn parallel_parse<'a>(s: &'a str) -> Result<Vec<BedRecord>, &'static str> {
let records: Result<Vec<BedRecord>, &'static str> =
s.par_lines().map(|line| BedRecord::parse(line)).collect();
pub fn parallel_parse<'a>(s: &'a str) -> Result<Vec<BedRecord>, String> {
let records = s
.par_lines()
// .map(|line| BedRecord::parse(line))
.filter_map(|line| match std::str::from_utf8(line.as_bytes()) {
Ok(valid_line) => Some(BedRecord::parse(valid_line)),
Err(_) => {
eprintln!("Skipping invalid UTF-8 line: {:?}", line);
None
}
})
.collect::<Result<Vec<BedRecord>, String>>();

records
Ok(records?)
}

pub fn custom_par_parse(
Expand Down

0 comments on commit eb1f82c

Please sign in to comment.