-
Notifications
You must be signed in to change notification settings - Fork 82
Module: Alignments
alignment/detail/... // The core implementation of the alignment code, which is not prevent.
alignment/representation.hpp // Required only if output format is not score only.
alignment/representation/... // Data structures and utility functions to represent and work with alignments.
alignment/pairwise.hpp
alignment/pairwise/... // Defining the public API for calling the standard alignment algorithms.
alignment/msa.hpp
alignment/msa/... // Defining the public API for calling the msa algorithms.
alignment/seed_extend.hpp
alignment/seed_extend/...
alignment/split.hpp
alignment/split/...
alignment/banded_chain.hpp
alignment/banded_chain/...
In SeqAn 2.x we have several possibilities to represent an alignment. The most unpractical on is the Align
object, which represents multiple alignment rows of the same types.
template <typename t>
concept bool aligned_sequence_view_concept(t g)
{
requires view_concept<t>;
typename t::underlying_type;
// Element access
{ g[] const; } -> typename t::value_type;
// Manipulation
{ g.insert_gap(0, 4) } -> bool;
{ g.remove_gap(0, 2) } -> bool;
{ g.find_view_position(0) } -> typename t::size_type;
{ g.find_underlying_position(0) } -> typename t::size_type;
};
Concrete implementation:
Array Gaps Uses simple interleaved blocks structure, where the first value gives the number of gaps and the second the number of sequence chars that follow. Requires linear scan of the block structure for random access.
template <typename underlying_t>
requires container_concept<underlying_t>
class aligned_sequence_view_linear_access
{
public:
protected:
private:
};
Anchor Gaps Using sorted sequence anchors, which allow binary search for random access.
template <typename host_t>
class aligned_sequence_view_logarithmic_access
{
public:
protected:
private:
};
Bitvector Gaps
- using rank-support query data structure to lookup a position in view space.
- sdsl::rank_support_v5? (+0.0625n bits)
- sdsl::rank_support_il? (+128 bits)
template <typename host_t>
class aligned_sequence_view_constant_access
{
public:
protected:
private:
};
template <typename ...gap_ts>
requires (gap_sequence_view_concept<gap_ts> && ...)
class alignment : public std::tuple<gap_ts...>
{
public:
// display
template <typename stream_t>
stream_t & operator<<(stream_t && stream);
protected:
};
What else do we want to do with the alignment view? Utility functions:
- merge two alignment views. How?
template <typename t>
concept bool dp_traits_concept = requires
{
typename t::algorithm_type;
typename t::gap_type;
typename t::trace_type;
typename t::output_format;
}
struct dp_traits
{
// Gocal alignment with linear gap costs.
struct global_linear
{
// The algorithm to choose.
using algorithm_type = detail::global_alignment<>;
// The Gaps to choose
using gap_type = detail::linear_gaps;
// The traceback.
using traceback_type = detail::traceback_on<detail::traceback_config<detail::single_trace, detail::gaps_left>>;
// The output to choose.
using format_type = aligned_sequence_view_constant_access;
};
// Global alignment with affine gap costs.
struct global_affine : public global_linear
{
using gap_type = detail::affine_gaps;
};
// Global alignment with affine gap costs.
struct semi_global_linear : public global_linear
{
using algorithm_type = detail::global_alignment<detail::free_end_gaps<true, false, true, false>>;
};
// Global alignment with affine gap costs.
struct semi_global_affine : public global_affine
{
using algorithm_type = detail::global_alignment<detail::free_end_gaps<true, false, true, false>>;
};
// Local alignment with linear gap costs.
struct local_linear : public global_linear
{
using algorithm_type = detail::local_alignment<>;
};
// Local alignment with affine gap costs.
struct local_affine : public local_linear
{
using gap_type = detail::affine_gaps;
};
};
template <typename exec_policy_t, typename seq1_t, typename seq2_t, typename dp_traits>
requires is_execution_policy<exec_policy_t>::value &&
container_concept<seq1_t> &&
container_concept<seq2_t> &&
dp_traits_concept<dp_traits>
inline auto
align(exec_policy_t const & exec_policy,
seq1_t const & seq1,
seq2_t const & seq2,
align_config<> const & config,
dp_traits const & traits)
{
// delegate to corresponding functions.
// depending on configuration we can return several different things, like the alignment and score or just the score or the trace matrix or what ever we want.
return { ... };
}
template <typename exec_policy_t, typename seq1_t, typename seq2_t, typename configurator_t, typename callback_f>
requires is_execution_policy<exec_policy_t>::value &&
container_concept<seq1_t> &&
container_concept<seq2_t>
inline void
align_and_then(exec_policy_t const & exec_policy,
seq1_t const & seq1,
seq2_t const & seq2,
configurator_t const & config,
callback_f && callback)
{
// delegate to corresponding functions.
// depending on configuration we can callback several different things, like the alignment and score or just the score or the trace matrix or what ever we want.
}
The runtime configuration creates a chain of continuators to create a dp_settings object and delegate to the traits based interfaces. Generic continuators can be designed as a lambda-continuator.
struct align_configurator
{
// ----------------------------------------------------------------------------
// Enum gap_penalty
// ----------------------------------------------------------------------------
enum struct gap_penalty : uint8_t
{
linear,
affine,
dynamic,
automatic
};
// ----------------------------------------------------------------------------
// Class FreeEndGaps
// ----------------------------------------------------------------------------
struct FreeEndGaps
{
static constexpr uint8_t TOP{1};
static constexpr uint8_t BOTTOM{2};
static constexpr uint8_t LEFT{4};
static constexpr uint8_t RIGHT{8};
};
// ----------------------------------------------------------------------------
// Enum Traceback
// ----------------------------------------------------------------------------
enum Traceback : uint8_t
{
SCORE_ONLY,
BEST_ONE,
BEST_ALL
};
// ----------------------------------------------------------------------------
// Enum GapsPlacement
// ----------------------------------------------------------------------------
enum GapsPlacement : uint8_t
{
LEFT,
RIGHT
};
enum OutputFormat : uint8_t
{
ANCHOR_GAPS,
ARRAY_GAPS,
ALIGNMENT_GRAPH,
FRAGMENTS
};
};
template <typename exec_policy_t, typename seq1_t, typename seq2_t, typename configurator_t>
requires is_execution_policy<exec_policy_t>::value &&
container_concept<seq1_t> &&
container_concept<seq2_t>
inline auto
align(exec_policy_t const & exec_policy,
seq1_t const & seq1,
seq2_t const & seq2,
configurator_t const & config)
{
// delegate to corresponding functions.
// depending on configuration we can return several different things, like the alignment and score or just the score or the trace matrix or what ever we want.
return { ... };
}
template <typename exec_policy_t, typename seq1_t, typename seq2_t, typename configurator_t, typename callback_f>
requires is_execution_policy<exec_policy_t>::value &&
container_concept<seq1_t> &&
container_concept<seq2_t>
inline void
align_and_then(exec_policy_t const & exec_policy,
seq1_t const & seq1,
seq2_t const & seq2,
configurator_t const & config,
callback_f && callback)
{
// delegate to corresponding functions.
// depending on configuration we can callback several different things, like the alignment and score or just the score or the trace matrix or what ever we want.
}
#include <iostream>
#include <seqan3/alignment/algorithm.hpp>
#include <seqan3/alignment/representation.hpp>
int main()
{
dp_configurator<scoring_scheme<int>> c;
c.set_scoring_scheme({3, -4, -1, -4});
c.set_band({-19, 20});
c.gaps = dp_configurator::gap_type::dynamic;
c.trace = dp_configurator::trace_type::best_all;
c.output = dp_configurator::output_format::aligned_view_constant_access;
// Auto return type is not possible, since we cannot determine the output type
// by the config type.
align_and_then("ACGTACGACTGAC"_dna4,
"AGGTACGGACTACGAC"_dna4,
c,
[](auto const score, auto && alignment) { std::cout << alignment << std::endl; } );
}
#include <iostream>
#include <seqan3/alignment/algorithm.hpp>
#include <seqan3/alignment/representation.hpp>
int main()
{
global_affine_traits<scoring_scheme<int>> traits;
traits.set_scoring_scheme({3, -4, -1, -4});
traits.set_band({-19, 20});
// Auto return type is not possible, since we cannot determine the output type
// by the config type.
align_and_then("ACGTACGACTGAC"_dna4,
"AGGTACGGACTACGAC"_dna4,
traits,
[](auto const score, auto && alignment) { std::cout << alignment << std::endl; } );
}