From df2d064b42e3b0347e35dddc3be5332d7b56d98a Mon Sep 17 00:00:00 2001 From: iquasere Date: Fri, 20 Sep 2024 23:25:38 +0100 Subject: [PATCH] Removed testing artifacts --- recognizer.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/recognizer.py b/recognizer.py index c039400..cf89eb9 100644 --- a/recognizer.py +++ b/recognizer.py @@ -610,7 +610,6 @@ def check_tax_databases(smp_directory, db_directory, db_prefix, taxids, hmm_pgap taxids_lacking_db.append(taxid) else: taxids_with_db.append(taxid) - print(taxids_with_db, taxids_lacking_db) create_tax_db(smp_directory, db_directory, db_prefix, taxids_lacking_db, hmm_pgap) return taxids_with_db + taxids_lacking_db @@ -761,7 +760,7 @@ def split_fasta_by_threads(file, output_basename, threads): def taxids_of_interest(tax_file, protein_id_col, tax_col, tax_df): tax_file = pd.read_csv(tax_file, sep='\t', index_col=protein_id_col, low_memory=False) tax_file[tax_col] = tax_file[tax_col].fillna(0.0).astype(int).astype(str) - lineages, all_taxids = {'293256': ['293256', '2950009', '194924', '213115', '3031449', '200940', '2', '131567'], '119484': ['119484', '29526', '213465', '213462', '3024408', '200940', '2', '131567'], '35554': ['35554', '28231', '213422', '3031668', '3031651', '200940', '2', '131567'], '28232': ['28232', '28231', '213422', '3031668', '3031651', '200940', '2', '131567'], '2203': ['2203', '2202', '196137', '2191', '224756', '2290931', '28890', '2157', '131567'], '863': ['863', '862', '68298', '186802', '186801', '1239', '1783272', '2', '131567'], '58180': ['58180', '2909705', '194924', '213115', '3031449', '200940', '2', '131567'], '2162': ['2162', '2160', '2159', '2158', '183925', '2283794', '28890', '2157', '131567'], '0': [], '29543': ['29543', '18', '213421', '69541', '3031651', '200940', '2', '131567'], '313985': ['313985', '115782', '213422', '3031668', '3031651', '200940', '2', '131567'], '2223': ['2223', '2222', '143067', '2905377', '224756', '2290931', '28890', '2157', '131567']}, ['293256', '2950009', '194924', '213115', '3031449', '200940', '2', '131567', '119484', '29526', '213465', '213462', '3024408', '200940', '2', '131567', '35554', '28231', '213422', '3031668', '3031651', '200940', '2', '131567', '28232', '28231', '213422', '3031668', '3031651', '200940', '2', '131567', '2203', '2202', '196137', '2191', '224756', '2290931', '28890', '2157', '131567', '863', '862', '68298', '186802', '186801', '1239', '1783272', '2', '131567', '58180', '2909705', '194924', '213115', '3031449', '200940', '2', '131567', '2162', '2160', '2159', '2158', '183925', '2283794', '28890', '2157', '131567', '29543', '18', '213421', '69541', '3031651', '200940', '2', '131567', '313985', '115782', '213422', '3031668', '3031651', '200940', '2', '131567', '2223', '2222', '143067', '2905377', '224756', '2290931', '28890', '2157', '131567'] #get_lineages_multiprocessing(set(tax_file[tax_col].tolist()), tax_df) + lineages, all_taxids = get_lineages_multiprocessing(set(tax_file[tax_col].tolist()), tax_df) return tax_file, lineages, all_taxids @@ -873,7 +872,6 @@ def custom_database_workflow(output, databases, threads=15, max_target_seqs=1, e def taxonomic_workflow( output, resources_directory, threads, lineages, all_taxids, db_prefixes, base, hmm_pgap, max_target_seqs=1, evalue=1e-5): - print('got here') all_taxids += ['131567', '0'] # cellular organisms and no taxonomy hmm_pgap_taxids = get_hmm_pgap_taxids(all_taxids, db_prefixes[base][1], hmm_pgap) taxids_with_db = check_tax_databases( @@ -887,7 +885,6 @@ def taxonomic_workflow( lineages[taxid] + ['0'] if parent_taxid in taxids_with_db] for taxid in lineages.keys()} dbs = {**dbs, **{'0': [f'{resources_directory}/dbs/{db_prefixes[base][0]}']}} # no taxonomy is annotated with all - print(dbs) db_report = pd.DataFrame(columns=['qseqid', 'sseqid', 'Superfamilies', 'Sites', 'Motifs']) for taxid in list(lineages.keys()) + ['0']: if os.path.isfile(f'{output}/tmp/{taxid}.fasta'):