From df2d064b42e3b0347e35dddc3be5332d7b56d98a Mon Sep 17 00:00:00 2001
From: iquasere <maildosequeira@gmail.com>
Date: Fri, 20 Sep 2024 23:25:38 +0100
Subject: [PATCH] Removed testing artifacts

---
 recognizer.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/recognizer.py b/recognizer.py
index c039400..cf89eb9 100644
--- a/recognizer.py
+++ b/recognizer.py
@@ -610,7 +610,6 @@ def check_tax_databases(smp_directory, db_directory, db_prefix, taxids, hmm_pgap
             taxids_lacking_db.append(taxid)
         else:
             taxids_with_db.append(taxid)
-    print(taxids_with_db, taxids_lacking_db)
     create_tax_db(smp_directory, db_directory, db_prefix, taxids_lacking_db, hmm_pgap)
     return taxids_with_db + taxids_lacking_db
 
@@ -761,7 +760,7 @@ def split_fasta_by_threads(file, output_basename, threads):
 def taxids_of_interest(tax_file, protein_id_col, tax_col, tax_df):
     tax_file = pd.read_csv(tax_file, sep='\t', index_col=protein_id_col, low_memory=False)
     tax_file[tax_col] = tax_file[tax_col].fillna(0.0).astype(int).astype(str)
-    lineages, all_taxids = {'293256': ['293256', '2950009', '194924', '213115', '3031449', '200940', '2', '131567'], '119484': ['119484', '29526', '213465', '213462', '3024408', '200940', '2', '131567'], '35554': ['35554', '28231', '213422', '3031668', '3031651', '200940', '2', '131567'], '28232': ['28232', '28231', '213422', '3031668', '3031651', '200940', '2', '131567'], '2203': ['2203', '2202', '196137', '2191', '224756', '2290931', '28890', '2157', '131567'], '863': ['863', '862', '68298', '186802', '186801', '1239', '1783272', '2', '131567'], '58180': ['58180', '2909705', '194924', '213115', '3031449', '200940', '2', '131567'], '2162': ['2162', '2160', '2159', '2158', '183925', '2283794', '28890', '2157', '131567'], '0': [], '29543': ['29543', '18', '213421', '69541', '3031651', '200940', '2', '131567'], '313985': ['313985', '115782', '213422', '3031668', '3031651', '200940', '2', '131567'], '2223': ['2223', '2222', '143067', '2905377', '224756', '2290931', '28890', '2157', '131567']}, ['293256', '2950009', '194924', '213115', '3031449', '200940', '2', '131567', '119484', '29526', '213465', '213462', '3024408', '200940', '2', '131567', '35554', '28231', '213422', '3031668', '3031651', '200940', '2', '131567', '28232', '28231', '213422', '3031668', '3031651', '200940', '2', '131567', '2203', '2202', '196137', '2191', '224756', '2290931', '28890', '2157', '131567', '863', '862', '68298', '186802', '186801', '1239', '1783272', '2', '131567', '58180', '2909705', '194924', '213115', '3031449', '200940', '2', '131567', '2162', '2160', '2159', '2158', '183925', '2283794', '28890', '2157', '131567', '29543', '18', '213421', '69541', '3031651', '200940', '2', '131567', '313985', '115782', '213422', '3031668', '3031651', '200940', '2', '131567', '2223', '2222', '143067', '2905377', '224756', '2290931', '28890', '2157', '131567'] #get_lineages_multiprocessing(set(tax_file[tax_col].tolist()), tax_df)
+    lineages, all_taxids = get_lineages_multiprocessing(set(tax_file[tax_col].tolist()), tax_df)
     return tax_file, lineages, all_taxids
 
 
@@ -873,7 +872,6 @@ def custom_database_workflow(output, databases, threads=15, max_target_seqs=1, e
 def taxonomic_workflow(
         output, resources_directory, threads, lineages, all_taxids, db_prefixes, base, hmm_pgap,
         max_target_seqs=1, evalue=1e-5):
-    print('got here')
     all_taxids += ['131567', '0']  # cellular organisms and no taxonomy
     hmm_pgap_taxids = get_hmm_pgap_taxids(all_taxids, db_prefixes[base][1], hmm_pgap)
     taxids_with_db = check_tax_databases(
@@ -887,7 +885,6 @@ def taxonomic_workflow(
         lineages[taxid] + ['0'] if parent_taxid in taxids_with_db] for taxid in lineages.keys()}
     dbs = {**dbs,
            **{'0': [f'{resources_directory}/dbs/{db_prefixes[base][0]}']}}  # no taxonomy is annotated with all
-    print(dbs)
     db_report = pd.DataFrame(columns=['qseqid', 'sseqid', 'Superfamilies', 'Sites', 'Motifs'])
     for taxid in list(lineages.keys()) + ['0']:
         if os.path.isfile(f'{output}/tmp/{taxid}.fasta'):