diff --git a/Makefile b/Makefile index b74c694..f8b3fae 100644 --- a/Makefile +++ b/Makefile @@ -118,7 +118,7 @@ test.fast: python -m pytest -m "not (integration or slow)" rm -rf output/ mkdir output/ - python -m pytest --basetemp=output -m "integration and not slow" + python -m pytest --basetemp=output -m "integration and not slow" -vv rm test.fa test.occupancy test.partition # used by GitHub actions during CI workflow diff --git a/change_log.txt b/change_log.txt index 27f8b82..a32b44a 100644 --- a/change_log.txt +++ b/change_log.txt @@ -1,5 +1,8 @@ Major changes to PhyKIT are summarized here. +1.20.0 + - Fixed bug for thread_dna function when using a ClipKIT log file. Input protein alignment must be the untrimmed alignment. + 1.19.4 - Saturation function forces y-intercept to be zero when calculating slope diff --git a/docs/change_log/index.rst b/docs/change_log/index.rst index 24f2e15..0e4318d 100644 --- a/docs/change_log/index.rst +++ b/docs/change_log/index.rst @@ -8,6 +8,9 @@ Change log Major changes to PhyKIT are summarized here. +**1.20.0**: +Fixed bug for thread_dna function when using a ClipKIT log file. Input protein alignment must be the untrimmed alignment. + **1.19.9**: Saturation function now also reports the absolute value of 1-saturation. Lower values are indicative of less saturation. diff --git a/docs/usage/index.rst b/docs/usage/index.rst index 1d08d0a..9911316 100644 --- a/docs/usage/index.rst +++ b/docs/usage/index.rst @@ -500,9 +500,20 @@ Thread DNA sequence onto a protein alignment to create a codon-based alignment. This function requires input alignments are in fasta format. -Codon alignments are then printed to stdout. Note, sequences -are assumed to occur in the same order in the protein and -nucleotide alignment. +Codon alignments are then printed to stdout. Note, paired +sequences are assumed to have the same name between the +protein and nucleotide file. The order does not matter. + +To thread nucleotide sequences over a trimmed amino acid +alignment, provide PhyKIT with a log file specifying which +sites have been trimmed and which have been kept. The log +file must be formatted the same as the log files outputted +by the alignment trimming toolkit ClipKIT (see -l in ClipKIT +documentation.) Details about ClipKIT can be seen here: +https://github.com/JLSteenwyk/ClipKIT. + +If using a ClipKIT log file, the untrimmed protein alignment +should be provided in the -p/--protein argument. .. code-block:: shell diff --git a/phykit/phykit.py b/phykit/phykit.py index 924a27d..a7734f2 100644 --- a/phykit/phykit.py +++ b/phykit/phykit.py @@ -2606,9 +2606,9 @@ def thread_dna(argv): codon-based alignment. This function requires input alignments are in fasta format. - Codon alignments are then printed to stdout. Note, sequences - are assumed to occur in the same order in the protein and - nucleotide alignment. + Codon alignments are then printed to stdout. Note, paired + sequences are assumed to have the same name between the + protein and nucleotide file. The order does not matter. To thread nucleotide sequences over a trimmed amino acid alignment, provide PhyKIT with a log file specifying which @@ -2618,6 +2618,9 @@ def thread_dna(argv): documentation.) Details about ClipKIT can be seen here: https://github.com/JLSteenwyk/ClipKIT. + If using a ClipKIT log file, the untrimmed protein alignment + should be provided in the -p/--protein argument. + Aliases: thread_dna, pal2nal, p2n Command line interfaces: diff --git a/phykit/services/alignment/dna_threader.py b/phykit/services/alignment/dna_threader.py index 4ad2589..b225a5d 100644 --- a/phykit/services/alignment/dna_threader.py +++ b/phykit/services/alignment/dna_threader.py @@ -61,16 +61,18 @@ def create_mask(self, length): return keep_mask def normalize_p_seq(self, p_seq, mask): - if self.clipkit_log_data: - untrimmed = [] - offset = 0 - for idx, value in enumerate(mask[::3]): - if value is True: - untrimmed.append(p_seq[idx - offset]) - else: - offset += 1 - untrimmed.append("#") - p_seq = "".join(untrimmed) + #TODO: write MP + #TODO: update tests + # if self.clipkit_log_data: + # untrimmed = [] + # offset = 0 + # for idx, value in enumerate(mask[::3]): + # if value is True: + # untrimmed.append(p_seq[idx - offset]) + # else: + # offset += 1 + # untrimmed.append("#") + # p_seq = "".join(untrimmed) return "".join([c * 3 for c in p_seq]) def normalize_n_seq(self, n_seq, normalized_p_seq): diff --git a/phykit/version.py b/phykit/version.py index 46764ef..0dcddbc 100644 --- a/phykit/version.py +++ b/phykit/version.py @@ -1 +1 @@ -__version__ = "1.19.9" +__version__ = "1.20.0" diff --git a/tests/integration/alignment/test_dna_threader_integration.py b/tests/integration/alignment/test_dna_threader_integration.py index 2a71e7a..361efaf 100644 --- a/tests/integration/alignment/test_dna_threader_integration.py +++ b/tests/integration/alignment/test_dna_threader_integration.py @@ -11,335 +11,29 @@ @pytest.mark.integration class TestDNAThreader(object): - # @patch("builtins.print") - # def test_dna_threader0(self, mocked_print): - # expected_result_0 = dedent( - # """>200_S38""" - # ) - # expected_result_1 = dedent( - # """atggctgacatcctcacgcagctccagacttgcctggatcagcttgcaacacaattctacgcaacacttggttatctcacaacataccacgacaatgcccccacaacaccaccacca------cccgacgcagcaccagccctagcaaagatcaccaagaactcatcatcaccgccagtcccagcagccatcgcaaataaagtggggggtgcagctgctgttgcgggcaatgcatcaccc---caggcgcctcctcaacaa---------gcg---------------------------------------------ccagactcgcccagc---------agccgg---cgggagcttgcgcgcgatctcattatcaaagaacagcagatcgagtaccttatctccgtgcttcccgggattggcgcctctgaggctgaacaagaaaccagaatccaggacctggagaccgagcttagagacgtcgagaaggagcgcgctgcgaaagtgcgggagttgaaaaagttgaggactcggttggaggatgttcttggcgctgtcgctgtgggtatccacggggatggttactctcaaaactga""" - # ) - # expected_result_2 = dedent( - # """>203_S40""" - # ) - # expected_result_3 = dedent( - # """atggctgacatcctcacgcagctccagacttgcctggatcagcttgcaacacaattctacgcaacacttggttatctcacaacataccacgacaatgcccccacaacaccaccacca------cccgacgcagcaccagccctagcaaagatcaccaagaactcatcatcaccaccagtcccagcagccatcgcaaataaagtggggggtgcagctgctgttgcgggcaatgcatcaccc---caggcgcctcctcaacaa---------gcg---------------------------------------------ccagactcgcccagc---------agccgg---cgggagcttgcgcgcgatctcattatcaaagaacagcagatcgagtaccttatctccgtgcttcccgggattggcgcctctgaggctgaacaagaaaccagaatccaggacctggagaccgagcttagagacgtcgagaaggagcgcgctgcgaaagtgcgggagttgaaaaagttgaggactcggttggaggatgttcttggcgctgtcgctgtgggtatccacggggatggttactctcaaaactga""" - # ) - - # testargs = [ - # "phykit", - # "thread_dna", - # "-p", - # f"{here.parent.parent.parent}/sample_files/EOG091N44MS.fa.mafft", - # "-n", - # f"{here.parent.parent.parent}/sample_files/EOG091N44MS.fa", - # ] - - # with patch.object(sys, "argv", testargs): - # Phykit() - # assert mocked_print.mock_calls == [ - # call(expected_result_0), - # call(expected_result_1), - # call(expected_result_2), - # call(expected_result_3), - # ] - @patch("builtins.print") - def test_dna_threader1(self, mocked_print): + def test_dna_threader_OG0002774(self, mocked_print): expected_result_0 = dedent( - """>1""" + """>sample8_000590-T1""" ) expected_result_1 = dedent( - """AAAGGG---""" - ) + """------------------------------------------------------------------ATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCGCGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" + ) expected_result_2 = dedent( - """>2""" + """>sample25_002627-T1""" ) expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" + """ATGACCACCTATGGTGAATCCCATTGCCGCTCTGTCGGCTGCATCGTCGATGGCTGCCCTCCAGGCATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCACGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" ) testargs = [ "phykit", "thread_dna", "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_alias0(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAAGGG---""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" - ) - testargs = [ - "phykit", - "pal2nal", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_alias1(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAAGGG---""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_incorrect_input_file(self, mocked_print): - testargs = [ - "phykit", - "thread_dna", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.fa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - ] - - with pytest.raises(SystemExit) as pytest_wrapped_e: - Phykit() - - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 2 - - @patch("builtins.print") - def test_dna_threader_alias_stop_codon_true1(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAAGGG---""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "1" - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_alias_stop_codon_false(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAAGGG---""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTT---""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "0", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_alias_stop_codon_true0(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAAGGG---""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTTGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.txt", + "-c", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.clipkit.log.txt", "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "1", + f"{here.parent.parent.parent}/sample_files/OG0002774.mrna.fa.txt", ] with patch.object(sys, "argv", testargs): @@ -349,47 +43,31 @@ def test_dna_threader_alias_stop_codon_true0(self, mocked_print): call(expected_result_1), call(expected_result_2), call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), ] @patch("builtins.print") - def test_dna_threader_ambig_char_false(self, mocked_print): + def test_dna_threader_OG0002774_alias0(self, mocked_print): expected_result_0 = dedent( - """>1""" + """>sample8_000590-T1""" ) expected_result_1 = dedent( - """AAAGGG---""" - ) + """------------------------------------------------------------------ATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCGCGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" + ) expected_result_2 = dedent( - """>2""" + """>sample25_002627-T1""" ) expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTT---""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTT---""" + """ATGACCACCTATGGTGAATCCCATTGCCGCTCTGTCGGCTGCATCGTCGATGGCTGCCCTCCAGGCATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCACGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" ) testargs = [ "phykit", "p2n", "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.ambig.faa", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.txt", + "-c", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.clipkit.log.txt", "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "F", + f"{here.parent.parent.parent}/sample_files/OG0002774.mrna.fa.txt", ] with patch.object(sys, "argv", testargs): @@ -399,183 +77,31 @@ def test_dna_threader_ambig_char_false(self, mocked_print): call(expected_result_1), call(expected_result_2), call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), ] @patch("builtins.print") - def test_dna_threader_ambig_char_true(self, mocked_print): + def test_dna_threader_OG0002774_alias1(self, mocked_print): expected_result_0 = dedent( - """>1""" + """>sample8_000590-T1""" ) expected_result_1 = dedent( - """AAAGGG---""" - ) + """------------------------------------------------------------------ATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCGCGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" + ) expected_result_2 = dedent( - """>2""" + """>sample25_002627-T1""" ) expected_result_3 = dedent( - """AAATTTGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAATTT---""" + """ATGACCACCTATGGTGAATCCCATTGCCGCTCTGTCGGCTGCATCGTCGATGGCTGCCCTCCAGGCATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCACGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAATTTGGG""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.ambig.faa", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "T", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_aa_file_not_found(self, mocked_print): - testargs = [ - "phykit", - "p2n", - "-p", - "not_real", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "-s", - "T", - ] - with pytest.raises(SystemExit) as pytest_wrapped_e: - Phykit() - - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 2 - - @patch("builtins.print") - def test_dna_threader_nucl_file_not_found(self, mocked_print): testargs = [ "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.ambig.faa", - "-n", - "not_real", - "-s", - "T", - ] - with pytest.raises(SystemExit) as pytest_wrapped_e: - Phykit() - - assert pytest_wrapped_e.type == SystemExit - assert pytest_wrapped_e.value.code == 2 - - @patch("builtins.print") - def test_dna_threader_trimmed_alignment_short_log(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAATTT""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAAGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAA---""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAAGGG""" - ) - testargs = [ - "phykit", - "p2n", + "pal2nal", "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa.clipkit", - "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.txt", "-c", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa.clipkit.log", - ] - - with patch.object(sys, "argv", testargs): - Phykit() - assert mocked_print.mock_calls == [ - call(expected_result_0), - call(expected_result_1), - call(expected_result_2), - call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), - ] - - @patch("builtins.print") - def test_dna_threader_trimmed_alignment_long_log(self, mocked_print): - expected_result_0 = dedent( - """>1""" - ) - expected_result_1 = dedent( - """AAATTT""" - ) - expected_result_2 = dedent( - """>2""" - ) - expected_result_3 = dedent( - """AAAGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAA---""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAAGGG""" - ) - testargs = [ - "phykit", - "p2n", - "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa.clipkit", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.clipkit.log.txt", "-n", - f"{here.parent.parent.parent}/sample_files/test.nucl.fna", - "--clipkit_log_file", - f"{here.parent.parent.parent}/sample_files/test_alignment.prot.faa.clipkit.log", + f"{here.parent.parent.parent}/sample_files/OG0002774.mrna.fa.txt", ] with patch.object(sys, "argv", testargs): @@ -585,76 +111,29 @@ def test_dna_threader_trimmed_alignment_long_log(self, mocked_print): call(expected_result_1), call(expected_result_2), call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), ] @patch("builtins.print") - def test_dna_threader_trimmed_longer_alignment_log(self, mocked_print): + def test_dna_threader_no_log_file(self, mocked_print): expected_result_0 = dedent( - """>1""" + """>sample8_000590-T1""" ) expected_result_1 = dedent( - """AAATTTCCCAAA------GGG---""" - ) - - ## With ClipKIT log - # AAAGGGTTTGGGAAACCCAAAGGG---TTT---GGG---AAACCCAAATTTGGGAAAGGGTTTGGGAAAGGG - # 11122233344455566677788899910 11 12 13 14 - # AAA===TTT======CCCAAA===---===---GGGAAACCCAAATTTGGGAAAGGGTTTGGGAAAGGG - # MMM###LLL######GGGGGG###---###---GGGXXX - - # - # AAAGGGTTTGGGAAACCCAAAGGGTTTGGGAAACCCAAATTTGGGAAAGGGTTTGGGAAAGGG - # - # MMM###LLL######GGGGGG###---###---GGGXXX - # AAA===TTT======CCCAAA===---===---GGG--- - - - - # 1 keep Const 0.0 - # 2 trim nConst,nPI 0.0 - # 3 keep nConst,nPI 0.25 - # 4 trim Const 0.0 - # 5 trim Const 0.0 - # 6 keep Const 0.0 - # 7 keep Const 0.0 - # 8 trim Const 0.0 - # 9 keep Const 0.0 - # 10 trim Const 0.0 - # 11 keep Const 0.0 - # 12 keep Const 0.0 - # 13 keep Const 0.0 - + """---------------------------------------------------------------------------------------------ATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCGCGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" + ) expected_result_2 = dedent( - """>2""" + """>sample25_002627-T1""" ) expected_result_3 = dedent( - """AAAGGGTTTGGGAAAGGGAAAGGG""" - ) - expected_result_4 = dedent( - """>3""" - ) - expected_result_5 = dedent( - """AAAGGGTTTGGGAAAGGGAAAGGG""" - ) - expected_result_6 = dedent( - """>4""" - ) - expected_result_7 = dedent( - """AAAGGGTTTGGGAAAGGGAAAGGG""" + """ATGTCGACGTGGGGAGAATACTTTCGGGTCACCACCTATGGTGAATCCCATTGCCGCTCTGTCGGCTGCATCGTCGATGGCTGCCCTCCAGGCATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCACGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCA""" ) testargs = [ "phykit", - "p2n", + "pal2nal", "-p", - f"{here.parent.parent.parent}/sample_files/test_alignment_longer.prot.faa.clipkit", + f"{here.parent.parent.parent}/sample_files/OG0002774.aln.afa.txt", "-n", - f"{here.parent.parent.parent}/sample_files/test_longer.nucl.fna", - "--clipkit_log_file", - f"{here.parent.parent.parent}/sample_files/test_alignment_longer.prot.faa.clipkit.log", + f"{here.parent.parent.parent}/sample_files/OG0002774.mrna.fa.txt", ] with patch.object(sys, "argv", testargs): @@ -664,8 +143,5 @@ def test_dna_threader_trimmed_longer_alignment_log(self, mocked_print): call(expected_result_1), call(expected_result_2), call(expected_result_3), - call(expected_result_4), - call(expected_result_5), - call(expected_result_6), - call(expected_result_7), ] + diff --git a/tests/sample_files/OG0002774.aln.afa.clipkit.log.txt b/tests/sample_files/OG0002774.aln.afa.clipkit.log.txt new file mode 100644 index 0000000..1ec7313 --- /dev/null +++ b/tests/sample_files/OG0002774.aln.afa.clipkit.log.txt @@ -0,0 +1,412 @@ +1 keep constant 0.1481 +2 trim constant 0.2222 +3 trim constant 0.2222 +4 trim constant 0.2222 +5 trim constant 0.2222 +6 trim constant 0.2222 +7 trim constant 0.2222 +8 trim constant 0.2222 +9 trim constant 0.2222 +10 trim constant 0.2222 +11 keep parsimony-informative 0.1481 +12 keep constant 0.1481 +13 keep constant 0.1481 +14 keep constant 0.1481 +15 keep constant 0.1481 +16 keep constant 0.1481 +17 keep constant 0.1481 +18 keep constant 0.1481 +19 keep constant 0.1481 +20 keep constant 0.1481 +21 keep constant 0.1481 +22 keep constant 0.1481 +23 keep constant 0.1481 +24 keep constant 0.1481 +25 keep constant 0.1481 +26 keep constant 0.1481 +27 keep constant 0.1481 +28 keep constant 0.1481 +29 keep constant 0.1481 +30 keep constant 0.1481 +31 keep constant 0.1481 +32 keep constant 0.0 +33 keep constant 0.0 +34 keep constant 0.0 +35 keep constant 0.0 +36 keep constant 0.0 +37 keep constant 0.0 +38 keep constant 0.0 +39 keep constant 0.0 +40 keep constant 0.0 +41 keep constant 0.0 +42 keep constant 0.0 +43 keep constant 0.0 +44 keep constant 0.0 +45 keep constant 0.0 +46 keep constant 0.0 +47 keep constant 0.0 +48 keep constant 0.0 +49 keep constant 0.0 +50 keep constant 0.0 +51 keep constant 0.0 +52 keep constant 0.0 +53 keep constant 0.0 +54 keep constant 0.0 +55 keep constant 0.0 +56 keep constant 0.0 +57 keep constant 0.0 +58 keep constant 0.0 +59 keep constant 0.0 +60 keep constant 0.0 +61 keep constant 0.0 +62 keep constant 0.0 +63 keep constant 0.0 +64 keep constant 0.0 +65 keep constant 0.0 +66 keep constant 0.0 +67 keep constant 0.0 +68 keep constant 0.0 +69 keep constant 0.0 +70 keep constant 0.0 +71 keep constant 0.0 +72 keep constant 0.0 +73 keep constant 0.0 +74 keep constant 0.0 +75 keep constant 0.0 +76 keep constant 0.0 +77 keep constant 0.0 +78 keep constant 0.0 +79 keep constant 0.0 +80 keep constant 0.0 +81 keep constant 0.0 +82 keep constant 0.0 +83 keep constant 0.0 +84 keep constant 0.0 +85 keep constant 0.0 +86 keep constant 0.0 +87 keep constant 0.0 +88 keep constant 0.0 +89 keep constant 0.0 +90 keep constant 0.0 +91 keep constant 0.0 +92 keep constant 0.0 +93 keep constant 0.0 +94 keep constant 0.0 +95 keep constant 0.0 +96 keep constant 0.0 +97 keep constant 0.0 +98 keep constant 0.0 +99 keep constant 0.0 +100 keep constant 0.0 +101 keep constant 0.0 +102 keep constant 0.0 +103 keep constant 0.0 +104 keep constant 0.0 +105 keep constant 0.0 +106 keep constant 0.0 +107 keep constant 0.0 +108 keep constant 0.0 +109 keep constant 0.0 +110 keep constant 0.0 +111 keep constant 0.0 +112 keep constant 0.0 +113 keep constant 0.0 +114 keep constant 0.0 +115 keep constant 0.0 +116 keep constant 0.0 +117 keep constant 0.0 +118 keep constant 0.0 +119 keep constant 0.0 +120 keep constant 0.0 +121 keep constant 0.0 +122 keep constant 0.0 +123 keep constant 0.0 +124 keep constant 0.0 +125 keep constant 0.0 +126 keep constant 0.0 +127 keep constant 0.0 +128 keep constant 0.0 +129 keep constant 0.0 +130 keep constant 0.0 +131 keep constant 0.0 +132 keep constant 0.0 +133 keep constant 0.0 +134 keep constant 0.0 +135 keep constant 0.0 +136 keep constant 0.0 +137 keep constant 0.0 +138 keep constant 0.0 +139 keep constant 0.0 +140 keep constant 0.0 +141 keep constant 0.0 +142 keep constant 0.0 +143 keep constant 0.0 +144 keep constant 0.0 +145 keep constant 0.0 +146 keep constant 0.0 +147 keep constant 0.0 +148 keep constant 0.0 +149 keep constant 0.0 +150 keep constant 0.0 +151 keep constant 0.0 +152 keep constant 0.0 +153 keep constant 0.0 +154 keep constant 0.0 +155 keep constant 0.0 +156 keep constant 0.0 +157 keep constant 0.0 +158 keep constant 0.0 +159 keep constant 0.0 +160 keep constant 0.0 +161 keep constant 0.0 +162 keep constant 0.0 +163 keep constant 0.0 +164 keep constant 0.0 +165 keep constant 0.0 +166 keep constant 0.0 +167 keep constant 0.0 +168 keep constant 0.0 +169 keep constant 0.0 +170 keep constant 0.0 +171 keep constant 0.0 +172 keep constant 0.0 +173 keep constant 0.0 +174 keep constant 0.0 +175 keep constant 0.0 +176 keep constant 0.0 +177 keep constant 0.0 +178 keep constant 0.0 +179 keep constant 0.0 +180 keep constant 0.0 +181 keep constant 0.0 +182 keep constant 0.0 +183 keep constant 0.0 +184 keep constant 0.0 +185 keep constant 0.0 +186 keep constant 0.0 +187 keep constant 0.0 +188 keep constant 0.0 +189 keep constant 0.0 +190 keep constant 0.0 +191 keep constant 0.0 +192 keep constant 0.0 +193 keep constant 0.0 +194 keep constant 0.0 +195 keep singleton 0.0 +196 keep constant 0.0 +197 keep constant 0.0 +198 keep constant 0.0 +199 keep constant 0.0 +200 keep constant 0.0 +201 keep constant 0.0 +202 keep constant 0.0 +203 keep constant 0.0 +204 keep constant 0.0 +205 keep constant 0.0 +206 keep constant 0.0 +207 keep constant 0.0 +208 keep constant 0.0 +209 keep constant 0.0 +210 keep constant 0.0 +211 keep constant 0.0 +212 keep constant 0.0 +213 keep constant 0.0 +214 keep constant 0.0 +215 keep constant 0.0 +216 keep constant 0.0 +217 keep constant 0.0 +218 keep constant 0.0 +219 keep constant 0.0 +220 keep constant 0.0 +221 keep constant 0.0 +222 keep constant 0.0 +223 keep constant 0.0 +224 keep constant 0.0 +225 keep constant 0.0 +226 keep constant 0.0 +227 keep constant 0.0 +228 keep constant 0.0 +229 keep constant 0.0 +230 keep constant 0.0 +231 keep constant 0.0 +232 keep constant 0.0 +233 keep constant 0.0 +234 keep constant 0.0 +235 keep constant 0.0 +236 keep constant 0.0 +237 keep constant 0.0 +238 keep constant 0.0 +239 keep constant 0.0 +240 keep constant 0.0 +241 keep constant 0.0 +242 keep constant 0.0 +243 keep constant 0.0 +244 keep constant 0.0 +245 keep constant 0.0 +246 keep constant 0.0 +247 keep constant 0.0 +248 keep constant 0.0 +249 keep constant 0.0 +250 keep constant 0.0 +251 keep constant 0.0 +252 keep constant 0.0 +253 keep constant 0.0 +254 keep constant 0.0 +255 keep constant 0.0 +256 keep constant 0.0 +257 keep constant 0.0 +258 keep constant 0.0 +259 keep constant 0.0 +260 keep constant 0.0 +261 keep constant 0.0 +262 keep constant 0.0 +263 keep constant 0.0 +264 keep constant 0.0 +265 keep constant 0.0 +266 keep constant 0.0 +267 keep constant 0.0 +268 keep constant 0.0 +269 keep constant 0.0 +270 keep constant 0.0 +271 keep constant 0.0 +272 keep constant 0.0 +273 keep constant 0.0 +274 keep constant 0.0 +275 keep constant 0.0 +276 keep constant 0.0 +277 keep constant 0.0 +278 keep constant 0.0 +279 keep constant 0.0 +280 keep constant 0.0 +281 keep constant 0.0 +282 keep constant 0.0 +283 keep constant 0.0 +284 keep constant 0.0 +285 keep constant 0.0 +286 keep constant 0.0 +287 keep constant 0.0 +288 keep constant 0.0 +289 keep constant 0.0 +290 keep constant 0.0 +291 keep constant 0.0 +292 keep constant 0.0 +293 keep constant 0.0 +294 keep constant 0.0 +295 keep constant 0.0 +296 keep constant 0.0 +297 keep constant 0.0 +298 keep constant 0.0 +299 keep constant 0.0 +300 keep constant 0.0 +301 keep constant 0.0 +302 keep constant 0.0 +303 keep constant 0.0 +304 keep constant 0.0 +305 keep constant 0.0 +306 keep constant 0.0 +307 keep constant 0.0 +308 keep constant 0.0 +309 keep constant 0.0 +310 keep constant 0.0 +311 keep constant 0.0 +312 keep constant 0.0 +313 keep constant 0.0 +314 keep constant 0.0 +315 keep constant 0.0 +316 keep constant 0.0 +317 keep constant 0.0 +318 keep constant 0.0 +319 keep constant 0.0 +320 keep constant 0.0 +321 keep constant 0.0 +322 keep constant 0.0 +323 keep constant 0.0 +324 keep constant 0.0 +325 keep constant 0.0 +326 keep constant 0.0 +327 keep constant 0.0 +328 keep constant 0.0 +329 keep constant 0.0 +330 keep constant 0.0 +331 keep constant 0.0 +332 keep constant 0.0 +333 keep constant 0.0 +334 keep constant 0.0 +335 keep constant 0.0 +336 keep constant 0.0 +337 keep constant 0.0 +338 keep constant 0.0 +339 keep constant 0.0 +340 keep constant 0.0 +341 keep singleton 0.0 +342 keep constant 0.0 +343 keep constant 0.0 +344 keep constant 0.0 +345 keep constant 0.0 +346 keep constant 0.0 +347 keep constant 0.0 +348 keep constant 0.0 +349 keep constant 0.0 +350 keep constant 0.0 +351 keep constant 0.0 +352 keep constant 0.0 +353 keep constant 0.0 +354 keep constant 0.0 +355 keep constant 0.0 +356 keep constant 0.0 +357 keep constant 0.0 +358 keep constant 0.0 +359 keep constant 0.0 +360 keep constant 0.0 +361 keep constant 0.0 +362 keep constant 0.0 +363 keep constant 0.0 +364 keep constant 0.0 +365 keep constant 0.0 +366 keep constant 0.0 +367 keep constant 0.0 +368 keep constant 0.0 +369 keep constant 0.0 +370 keep constant 0.0 +371 keep constant 0.0 +372 keep constant 0.0 +373 keep constant 0.0 +374 keep constant 0.0 +375 keep constant 0.0 +376 keep constant 0.0 +377 keep constant 0.0 +378 keep constant 0.0 +379 keep constant 0.0 +380 keep constant 0.0 +381 keep constant 0.0 +382 keep constant 0.0 +383 keep constant 0.0 +384 keep constant 0.0 +385 keep constant 0.0 +386 keep constant 0.0 +387 keep constant 0.0 +388 keep constant 0.0 +389 keep constant 0.0 +390 keep constant 0.0 +391 keep constant 0.0 +392 keep constant 0.0 +393 keep constant 0.0 +394 keep singleton 0.0 +395 keep constant 0.0 +396 keep constant 0.0 +397 keep constant 0.0 +398 keep constant 0.0 +399 keep constant 0.0 +400 keep constant 0.0 +401 keep constant 0.0 +402 keep constant 0.0 +403 keep constant 0.0 +404 keep constant 0.0 +405 keep constant 0.0 +406 keep constant 0.0 +407 keep constant 0.0 +408 keep constant 0.0 +409 keep constant 0.0 +410 keep constant 0.0 +411 keep constant 0.0 +412 keep constant 0.0 diff --git a/tests/sample_files/OG0002774.aln.afa.txt b/tests/sample_files/OG0002774.aln.afa.txt new file mode 100644 index 0000000..d01d70f --- /dev/null +++ b/tests/sample_files/OG0002774.aln.afa.txt @@ -0,0 +1,14 @@ +>sample8_000590-T1 +-------------------------------MELTEEDIQPQMTRRRPGQSALTTPRNEKDRVEIQSGTEFGITLGTPIG +MMVRNEDQRPKDYGGSTMDLYPRPSHADYTYLEKYGVKASSGGGRSSARETIGRVAAGAIAEKYLRLSHGVEIVAFVSSV +GNEHLFPPTPEHPSPSTNPEFLKLIETIDRKTVDAFVPTRCPNEEAAARMTKVIETFRDNQDSIGGTVTCVIRNVPVGLG +EPCFDKLEAKLAHAMLSIPATKGFEIGSGFGGCEVPGSIHNDPFTVSEVQTRTGSTQRLTTKTNNSGGIQGGISNGAPIY +FRVAFKPPATIGQAQTTASYSFEEGILEAKGRHDPCVTPRAVPIVEAMSALVVMDALMAQYARESAKNLLPPLPSTLPTK +PTLGSSGAPASS +>sample25_002627-T1 +MSTWGEYFRVTTYGESHCRSVGCIVDGCPPGMELTEEDIQPQMTRRRPGQSALTTPRNEKDRVEIQSGTEFGITLGTPIG +MMVRNEDQRPKDYGGSTMDLYPRPSHADYTYLEKYGVKASSGGGRSSARETIGRVAAGAIAEKYLRLSHGVEIVAFVSSV +GNEHLFPPTPEHPSPSTNPEFLKLIETIDRKTVDAFVPTRCPNEEAAARMTKVIETFRDNQDSIGGTVTCVIRNVPVGLG +EPCFDKLEAKLAHAMLSIPATKGFEIGSGFGGCEVPGSIHNDPFTVSEVQTRTGSTQRLTTKTNNSGGIQGGISNGAPIY +FRVAFKPPATIGQAQTTASYSFEEGILEAKGRHDPCVTPRAVPIVEAMSALVVMDALMAQYARESAKNLLPPLPSTLPTK +PTLGSSGAPASS diff --git a/tests/sample_files/OG0002774.mrna.fa.txt b/tests/sample_files/OG0002774.mrna.fa.txt new file mode 100644 index 0000000..4e5df53 --- /dev/null +++ b/tests/sample_files/OG0002774.mrna.fa.txt @@ -0,0 +1,4 @@ +>sample8_000590-T1 +ATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCGCGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCATAG +>sample25_002627-T1 +ATGTCGACGTGGGGAGAATACTTTCGGGTCACCACCTATGGTGAATCCCATTGCCGCTCTGTCGGCTGCATCGTCGATGGCTGCCCTCCAGGCATGGAGCTTACAGAGGAAGACATCCAACCCCAGATGACTCGAAGACGTCCCGGGCAGAGTGCTCTAACGACGCCTCGAAATGAAAAAGACCGAGTAGAGATCCAGTCTGGAACGGAGTTCGGCATCACCCTGGGTACCCCGATTGGAATGATGGTGCGCAACGAGGATCAGAGACCCAAGGACTACGGTGGCAGCACAATGGATCTCTACCCTCGTCCCAGTCACGCTGATTATACTTACCTGGAGAAATACGGTGTCAAGGCGAGCAGCGGTGGTGGCCGGAGTAGTGCCCGCGAGACCATTGGCCGTGTCGCCGCAGGAGCCATTGCGGAGAAGTACCTACGCCTGTCGCATGGTGTCGAAATTGTCGCCTTTGTGTCCTCCGTTGGTAACGAACACCTTTTCCCGCCGACCCCCGAGCACCCTTCTCCATCGACCAACCCTGAGTTCCTGAAGCTCATCGAGACCATCGACCGTAAGACTGTCGATGCCTTCGTCCCCACTCGCTGCCCGAACGAGGAGGCGGCGGCACGCATGACAAAAGTGATCGAGACTTTCCGGGACAACCAAGATAGCATCGGCGGCACCGTCACCTGCGTGATCCGCAACGTCCCCGTCGGCCTGGGCGAGCCTTGCTTCGACAAGCTCGAGGCCAAGCTGGCGCACGCCATGCTCAGCATCCCCGCCACCAAGGGCTTTGAGATCGGCTCGGGCTTCGGTGGCTGCGAGGTCCCCGGCTCCATCCACAACGACCCCTTCACCGTCTCCGAGGTCCAGACCCGCACCGGCAGCACACAGCGCCTGACCACCAAGACCAACAACTCCGGCGGCATCCAGGGCGGGATCTCCAACGGCGCTCCCATCTATTTCCGCGTTGCCTTCAAGCCCCCCGCCACCATCGGCCAGGCTCAGACCACCGCCTCTTACAGCTTCGAGGAGGGCATCCTCGAGGCCAAGGGCCGCCACGACCCCTGCGTTACCCCTCGTGCTGTCCCCATCGTCGAGGCCATGTCCGCCCTCGTCGTCATGGATGCGCTCATGGCCCAGTATGCCCGCGAAAGCGCAAAGAATTTACTGCCCCCGCTGCCCAGCACCCTCCCTACCAAACCGACTCTCGGCTCCAGCGGTGCTCCCGCCTCTTCATAG