Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Py port of gtfsort + CI , new tree, and additional updates #10

Merged
merged 18 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 71 additions & 34 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,59 +25,59 @@ jobs:
toolchain: ${{ matrix.rust }}

- name: Cargo Check with Minimum Feature Flags
run: cargo check --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM"
run: cargo check --manifest-path ./gtfsort/Cargo.toml --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM"

- name: Cargo Check with Full Feature Flags
run: cargo check --features "$GTFSORT_TEST_FEATURES_FULL"
run: cargo check --manifest-path ./gtfsort/Cargo.toml --features "$GTFSORT_TEST_FEATURES_FULL"

- name: Cargo Clippy with Minimum Feature Flags
run: cargo clippy --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM" -- -D warnings
run: cargo clippy --manifest-path ./gtfsort/Cargo.toml --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM" -- -D warnings

- name: Cargo Clippy with Full Feature Flags
run: cargo clippy --features "$GTFSORT_TEST_FEATURES_FULL" -- -D warnings
run: cargo clippy --manifest-path ./gtfsort/Cargo.toml --features "$GTFSORT_TEST_FEATURES_FULL" -- -D warnings

- name: Cargo Test with Minimum Feature Flags
# saves time on more expensive machines
if: ${{ startsWith(matrix.os, 'ubuntu') }}
run: RUST_BACKTRACE=full cargo test --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM" -- --nocapture
run: RUST_BACKTRACE=full cargo test --manifest-path ./gtfsort/Cargo.toml --no-default-features --features "$GTFSORT_TEST_FEATURES_MINIMUM" -- --nocapture

- name: Cargo Build Release
run: cargo build --release --features "$GTFSORT_TEST_FEATURES_FULL"
run: cargo build --manifest-path ./gtfsort/Cargo.toml --release --features "$GTFSORT_TEST_FEATURES_FULL"

- name: Cargo Test with Full Feature Flags
run: RUST_BACKTRACE=full cargo test --features "$GTFSORT_TEST_FEATURES_FULL" -- --nocapture
run: RUST_BACKTRACE=full cargo test --manifest-path ./gtfsort/Cargo.toml --features "$GTFSORT_TEST_FEATURES_FULL" -- --nocapture

- name: Check C headers are up-to-date
if: ${{ startsWith(matrix.os, 'ubuntu') }}
run: git diff --exit-code include/gtfsort.h include/gtfsort.hxx
run: git diff --exit-code ./gtfsort/include/gtfsort.h ./gtfsort/include/gtfsort.hxx

- name: Cargo Clean
if: ${{ matrix.rust == 'stable' }}
run: cargo clean
run: cargo clean --manifest-path ./gtfsort/Cargo.toml

- name: Build Release
if: ${{ matrix.rust == 'stable' }}
run: cargo build --release --features "$GTFSORT_TEST_FEATURES_RELEASE"
run: cargo build --manifest-path ./gtfsort/Cargo.toml --release --features "$GTFSORT_TEST_FEATURES_RELEASE"

- name: Upload Build Artifacts
if: ${{ startsWith(matrix.os, 'macos') != true && matrix.rust == 'stable' }}
uses: actions/upload-artifact@v4
with:
name: build-artifacts-${{ matrix.os }}
path: |
target/release/gtfsort
target/release/libgtfsort.so
./gtfsort/target/release/gtfsort
./gtfsort/target/release/libgtfsort.so

- name: Upload Build Artifacts (macOS)
if: ${{ startsWith(matrix.os, 'macos') && matrix.rust == 'stable' }}
uses: actions/upload-artifact@v4
with:
name: build-artifacts-${{ matrix.os }}
path: |
target/release/gtfsort
target/release/libgtfsort.dylib
include/gtfsort.h
include/gtfsort.hxx
./gtfsort/target/release/gtfsort
./gtfsort/target/release/libgtfsort.dylib
./gtfsort/include/gtfsort.h
./gtfsort/include/gtfsort.hxx

check-windows:
runs-on: windows-latest
Expand Down Expand Up @@ -106,45 +106,82 @@ jobs:
target: x86_64-pc-windows-gnu

- name: Cargo Check with Minimum Feature Flags
run: cargo check --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM
run: cargo check --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM

- name: Cargo Check with Full Feature Flags
run: cargo check --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL
run: cargo check --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL

- name: Cargo Clippy with Minimum Feature Flags
run: cargo clippy --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM -- -D warnings
run: cargo clippy --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM -- -D warnings

- name: Cargo Clippy with Full Feature Flags
run: cargo clippy --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL -- -D warnings
run: cargo clippy --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL -- -D warnings

- name: Cargo Test with Minimum Feature Flags
run: $env:RUST_BACKTRACE="full"; cargo test --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM -- --nocapture
run: $env:RUST_BACKTRACE="full"; cargo test --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --no-default-features --features $env:GTFSORT_TEST_FEATURES_MINIMUM -- --nocapture

- name: Cargo Build Release
run: cargo build --target x86_64-pc-windows-gnu --release --features $env:GTFSORT_TEST_FEATURES_FULL
run: cargo build --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --release --features $env:GTFSORT_TEST_FEATURES_FULL

- name: Cargo Test with Full Feature Flags
run: $env:RUST_BACKTRACE="full"; cargo test --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL -- --nocapture
run: $env:RUST_BACKTRACE="full"; cargo test --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --features $env:GTFSORT_TEST_FEATURES_FULL -- --nocapture

- name: Cargo Clean
if: ${{ matrix.rust == 'stable' }}
run: cargo clean
run: cargo clean --manifest-path ./gtfsort/Cargo.toml

- name: Build Release
if: ${{ matrix.rust == 'stable' }}
run: cargo build --target x86_64-pc-windows-gnu --release --features $env:GTFSORT_TEST_FEATURES_RELEASE
run: cargo build --manifest-path ./gtfsort/Cargo.toml --target x86_64-pc-windows-gnu --release --features $env:GTFSORT_TEST_FEATURES_RELEASE

- name: Upload Build Artifacts
if: ${{ matrix.rust == 'stable' }}
uses: actions/upload-artifact@v4
with:
name: build-artifacts-windows
path: |
target/x86_64-pc-windows-gnu/release/gtfsort.exe
target/x86_64-pc-windows-gnu/release/gtfsort.dll
include/gtfsort.h
include/gtfsort.hxx
./gtfsort/target/x86_64-pc-windows-gnu/release/gtfsort.exe
./gtfsort/target/x86_64-pc-windows-gnu/release/gtfsort.dll
./gtfsort/include/gtfsort.h
./gtfsort/include/gtfsort.hxx

py_port:
needs: [check-unix]
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.9']
target: [x86_64, aarch64]
steps:
- uses: actions/checkout@v3

- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}

- name: Set up Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable

- name: Install pipx
run: python -m pip install --user pipx && python -m pipx ensurepath

- name: Install dependencies
run: pipx install hatch maturin

- name: Create Hatch environment
shell: bash
run: cd py-gtfsort && hatch env create

- name: Activate Hatch env, build and run py test
shell: bash
run: |
cd py-gtfsort
source $(hatch env find)/bin/activate
maturin develop --release --manifest-path Cargo.toml --features test
hatch run test --verbose

benchmark:
needs: [check-unix, check-windows]
if: ${{ needs.check-unix.result == 'success' && contains(github.event.head_commit.message, '[ci benchmark]') }}
Expand Down Expand Up @@ -173,20 +210,20 @@ jobs:
run: git fetch upstream

- name: Build Benchmark
run: cargo build --release --bin gtfsort-benchmark --features "mmap benchmark"
run: cargo build --manifest-path ./gtfsort/Cargo.toml --release --bin gtfsort-benchmark --features "mmap benchmark"

- name: Run Benchmark
env:
GITHUB_REPO_OWNER: ${{ github.event.repository.owner.login }}
GITHUB_REPO_NAME: ${{ github.event.repository.name }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: target/release/gtfsort-benchmark -r upstream/master -- --show-output
run: ./gtfsort/target/release/gtfsort-benchmark -r upstream/master -- --show-output

- name: Upload Benchmark Results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ matrix.os }}
path: |
tests/benchmark_*.csv
tests/benchmark_*.md
tests/benchmark-output.txt
./gtfsort/tests/benchmark_*.csv
./gtfsort/tests/benchmark_*.md
./gtfsort/tests/benchmark-output.txt
21 changes: 15 additions & 6 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,27 @@
# Visit https://bit.ly/cffinit to generate yours today!

cff-version: 1.2.0
title: gtfsort
title: "gtfsort: A tool for sorting GTF files"
message: >-
If you use this software, please cite it using the
metadata from this file.
type: software
authors:
- given-names: Alejandro
family-names: Gonzales-Irribarren
email: [email protected]
email: [email protected]
affiliation: >-
Laboratorio Internacional de Investigación sobre el
Genoma Humano, Universidad Nacional Autónoma de
México, Santiago de Querétaro, 76230, México
LOEWE Centre for Translational Biodiversity Genomics,
Senckenberganlage 25, 60325 Frankfurt, Germany
orcid: 'https://orcid.org/0000-0001-7010-8146'
- given-names: Anne
family-names: Fu
email: [email protected]
affiliation: >-
School of Natural Sciences and Mathematics, The University of
Texas at Dallas, Richardson, TX 75080, USA
orcid: 'https://orcid.org/0000-0002-9025-6071'

identifiers:
- type: url
value: 'https://github.com/alejandrogzi/gtfsort'
Expand All @@ -34,6 +41,8 @@ abstract: >-
keywords:
- sort-algorithm
- gtf
- gff3
- gene-annotation
license: MIT
version: 0.2.1
version: 0.2.3
doi: 10.1101/2023.10.21.563454
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ While current tools (most of them GFF3-focused) have been recommended for sortin
>
> If you use gtfsort in your work, please cite:
>
> Gonzales-Irribarren A. gtfsort: a tool to efficiently sort GTF files. bioRxiv 2023.10.21.563454; doi: https://doi.org/10.1101/2023.10.21.563454
> Gonzales-Irribarren A and Fu. A. gtfsort: a tool to efficiently sort GTF files. bioRxiv 2023.10.21.563454; doi: https://doi.org/10.1101/2023.10.21.563454

## Usage
``` rust
Expand All @@ -36,7 +36,7 @@ Options:
--version: print version
```

> What's new on v.0.2.2
> What's new on v.0.2.3
>
> - **gtfsort now supports GFF sorting!**
> - Now gtfsort is bit more faster (~0.2s); 1.9GB (*Cyprinus carpio carpio*) in 6.7s
Expand Down
2 changes: 1 addition & 1 deletion .gitignore → gtfsort/.gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
/target
target/
tests/

2 changes: 1 addition & 1 deletion Cargo.lock → gtfsort/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions Cargo.toml → gtfsort/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[package]
name = "gtfsort"
version = "0.2.2"
authors = ["alejandrogzi <[email protected]>"]
version = "0.2.3"
authors = ["alejandrogzi <[email protected]>", "eternal-flame-AD <[email protected]>"]
edition = "2021"
license = "MIT"
description = "An optimized chr/pos/feature GTF2.5-3 sorter using a lexicographic-based index ordering algorithm written in Rust."
description = "An optimized chr/pos/feature GTF/GFF sorter using a lexicographic-based index ordering algorithm written in Rust."
homepage = "https://github.com/alejandrogzi/gtfsort"
repository = "https://github.com/alejandrogzi/gtfsort"
readme = "README.md"
keywords = ["sort", "gtf", "cli", "gtfsort"]
keywords = ["sort", "gtf", "gff", "gtfsort"]
categories = ["command-line-utilities", "science"]
default-run = "gtfsort"

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 3 additions & 1 deletion src/lib.rs → gtfsort/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ pub mod ord;
pub use ord::CowNaturalSort;

pub mod utils;
use thiserror::Error;
pub use utils::*;

pub mod interop;

#[cfg(feature = "testing")]
pub mod test_utils;
#[cfg(feature = "testing")]
pub use test_utils::*;

use std::{io, path::PathBuf};
use thiserror::Error;

#[cfg(feature = "mmap")]
use mmap::Madvice;
Expand Down
6 changes: 3 additions & 3 deletions src/main.rs → gtfsort/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
//! A fast and efficient GTF sorter tool.
//!
//! ## Overview
//! `gtfsort` is a rapid chr/pos/feature GTF2.5-3 sorter using a lexicographic-based
//! `gtfsort` is a rapid chr/pos/feature GTF/GFF sorter using a lexicographic-based
//! index ordering algorithm written in Rust. This tool is intended to be used as a
//! standalone command-line tool. The primary goal of this tool is to sort GTF files
//! by chromosome, position and feature in a fast and memory-efficient way.
Expand Down Expand Up @@ -31,8 +31,8 @@ use gtfsort::*;
#[derive(Parser, Debug)]
#[clap(
name = "gtfsort",
version = "0.2.2",
author = "Alejandro Gonzales-Irribarren <[email protected]>",
version = "0.2.3",
author = "alejandrogzi <[email protected]>, eternal-flame-AD <[email protected]>",
about = "An optimized chr/pos/feature GTF2.5-3 sorter using a lexicographic-based index ordering algorithm written in Rust."
)]
struct Args {
Expand Down
File renamed without changes.
File renamed without changes.
14 changes: 12 additions & 2 deletions src/test_utils.rs → gtfsort/src/test_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,17 @@ impl Deref for TempFile {
impl Drop for TempFile {
fn drop(&mut self) {
if self.cleanup {
std::fs::remove_file(&self.path).unwrap();
// std::fs::remove_file(&self.path).unwrap();
if self.path.exists() {
if let Err(e) = std::fs::remove_file(&self.path) {
eprintln!(
"Warning: Failed to remove temporary file {:?}: {:?}",
self.path, e
);
}
} else {
eprintln!("Warning: Temporary file {:?} does not exist.", self.path);
}
}
}
}
Expand Down Expand Up @@ -176,7 +186,7 @@ pub fn crc32_hex<R: Read>(mut r: R) -> String {
}

pub struct TestFile {
name: String,
pub name: String,
expect_output_cksum: Vec<&'static str>,
}

Expand Down
2 changes: 1 addition & 1 deletion src/utils.rs → gtfsort/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ pub fn write_obj_mmaped<'a, P: AsRef<Path> + Debug>(
pub fn parallel_parse<const SEP: u8>(s: &str) -> Result<ChromRecord<'_>, &'static str> {
let x = s
.par_lines()
.filter(|line| !line.starts_with("#"))
.filter(|line| !line.starts_with('#'))
.filter_map(|line| Record::parse::<SEP>(line).ok())
.fold(HashMap::new, |mut acc: ChromRecord, record| {
acc.entry(record.chrom).or_default().push(record);
Expand Down
Loading
Loading