diff --git a/src/subcommand/surject_main.cpp b/src/subcommand/surject_main.cpp index 90359f6105..660aa0965e 100644 --- a/src/subcommand/surject_main.cpp +++ b/src/subcommand/surject_main.cpp @@ -76,6 +76,27 @@ static void ensure_alignment_is_for_graph(const Alignment& aln, const HandleGrap } } +/// If the given multipath alignment doesn't make sense against the given graph (i.e. +/// doesn't agree with the nodes in the graph), print a message and stop the +/// program. Is thread-safe. +static void ensure_alignment_is_for_graph(const MultipathAlignment& aln, const HandleGraph& graph) { + // For multipaht alignments we just check node existence. + for (auto& subpath : aln.subpath()) { + for (auto& mapping : subpath.path().mapping()) { + nid_t node_id = mapping.position().node_id(); + if (!graph.has_node(node_id)) { + // Something is wrong with this alignment. + #pragma omp critical (cerr) + { + std::cerr << "error:[vg surject] MultipathAlignment " << aln.name() << " cannot be interpreted against this graph: node " << node_id << " does not exist!" << std::endl; + std::cerr << "Make sure that you are using the same graph that the reads were mapped to!" << std::endl; + } + exit(1); + } + } + } +} + int main_surject(int argc, char** argv) { if (argc == 2) { @@ -554,6 +575,11 @@ int main_surject(int argc, char** argv) { exit(1); } + + if (validate) { + ensure_alignment_is_for_graph(src1, *xgidx); + ensure_alignment_is_for_graph(src2, *xgidx); + } // convert out of protobuf multipath_alignment_t mp_src1, mp_src2; @@ -661,6 +687,10 @@ int main_surject(int argc, char** argv) { watchdog->check_in(thread_num, src.name()); } + if (validate) { + ensure_alignment_is_for_graph(src, *xgidx); + } + multipath_alignment_t mp_src; from_proto_multipath_alignment(src, mp_src); diff --git a/src/surjector.hpp b/src/surjector.hpp index 84bacead64..9c63effa24 100644 --- a/src/surjector.hpp +++ b/src/surjector.hpp @@ -24,7 +24,14 @@ namespace vg { using namespace std; - + + /** + * Widget to surject alignments down to linear paths in the graph. + * + * Assumes the alignments actually go with the graph; the caller is + * repsonsible for ensuring that e.g. all nodes referenced by the + * alignments actually exist. + */ class Surjector : public AlignerClient { public: diff --git a/test/t/15_vg_surject.t b/test/t/15_vg_surject.t index 3704ec823c..28d8992c09 100644 --- a/test/t/15_vg_surject.t +++ b/test/t/15_vg_surject.t @@ -6,7 +6,7 @@ BASH_TAP_ROOT=../deps/bash-tap PATH=../bin:$PATH # for vg -plan tests 48 +plan tests 52 vg construct -r small/x.fa >j.vg vg index -x j.xg j.vg @@ -196,7 +196,15 @@ is "$(vg surject -x x.xg -s -m -t 1 mapped.gamp | grep -v '@' | wc -l)" 40 "GAMP is "$(vg surject -x x.xg -M -m -s -t 1 mapped.gamp | grep -v '@' | wc -l)" 80 "GAMP surject can return multimappings" is "$(vg surject -x x.xg -M -m -s -i -t 1 mapped.gamp | grep -v '@' | wc -l)" 80 "GAMP surject can return multimappings" -rm x.vg x.pathdup.vg x.xg x.gcsa x.gcsa.lcp x.gam mapped.gam mapped.gamp +vg construct -r tiny/tiny.fa > tiny.vg +vg surject -x tiny.vg -s -t 1 mapped.gam >/dev/null 2>err.txt +is "${?}" "1" "Surjection fails when using the wrong graph for GAM" +is "$(cat err.txt | grep 'cannot be interpreted' | wc -l)" "1" "Surjection of GAM to the wrong graph reports the problem" +vg surject -x tiny.vg -s -t 1 -m mapped.gamp >/dev/null 2>err.txt +is "${?}" "1" "Surjection fails when using the wrong graph for GAMP" +is "$(cat err.txt | grep 'cannot be interpreted' | wc -l)" "1" "Surjection of GAMP to the wrong graph reports the problem" + +rm x.vg x.pathdup.vg x.xg x.gcsa x.gcsa.lcp x.gam mapped.gam mapped.gamp tiny.vg err.txt is "$(vg surject -p CHM13#0#chr8 -x surject/opposite_strands.gfa --prune-low-cplx --sam-output --gaf-input surject/opposite_strands.gaf | grep -v "^@" | cut -f3-12 | sort | uniq | wc -l)" "1" "vg surject low compelxity pruning gets the same alignment regardless of read orientation"