Skip to content

Commit

Permalink
added rules for generating all-genes from ppi network
Browse files Browse the repository at this point in the history
  • Loading branch information
paolomanlapaz committed Dec 20, 2024
1 parent ddb5e6a commit 80716bc
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 0 deletions.
1 change: 1 addition & 0 deletions prepare_data/workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include: "rules/last_whole_genome_alignment.smk"
include: "rules/prepare_annotation.smk"
include: "rules/transcription_factor_binding_sites.smk"
include: "rules/module_detection.smk"
include: "rules/prepare_enrichment_analysis.smk"

#rule download_ref_genomes:
#wget and then make symbolic links
Expand Down
6 changes: 6 additions & 0 deletions prepare_data/workflow/configfile.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ network_dir: "../../static/app_data/networks"
mod_detect_dir: "../../static/raw_data/network_modules"
network_mod_dir: "../../static/app_data/network_modules"

ppi_dir: "../../static/raw_data/ppi"
raw_enrich_dir: "../../static/raw_data/enrichment_analysis"

gene_id_mapping_dir: "../../static/app_data/gene_id_mapping"
gene_desc_dir: "../../static/app_data/gene_descriptions"

networks:
"STRING-Physical": "{network_dir}/STRING-Physical.txt"

Expand Down
55 changes: 55 additions & 0 deletions prepare_data/workflow/rules/prepare_enrichment_analysis.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Resolve Network Dependency
for key, path in config['networks'].items():
config["networks"][key] = path.format(network_dir=config["network_dir"])

rule data_prep_for_enrichment_analysis:
input:
expand(
"{ppi_dir}/all_proteins/{network}/{file_format}/{name}.txt",
ppi_dir = config["ppi_dir"],
network = config["networks"].keys(),
file_format = "uniprot",
name = "all-proteins"
),
expand(
"{gene_id_mapping_dir}/msu_mapping/{file_name}.pickle",
gene_id_mapping_dir = config["gene_id_mapping_dir"],
file_name = "uniprot_to_msu"
),
expand(
"{raw_enrich_dir}/all_genes/{network}/{file_format}/all-genes.txt",
raw_enrich_dir = config["raw_enrich_dir"],
network = config["networks"].keys(),
file_format = "MSU"
)

rule get_proteins_from_network:
input:
lambda wildcards: config['networks'][wildcards.network]
output:
"{ppi_dir}/all_proteins/{network}/{file_format}/{name}.txt"
shell:
"python scripts/network_util/get-nodes-from-network.py " \
"{input} {wildcards.ppi_dir}/all_proteins/{wildcards.network}/{wildcards.file_format} " \
"--name {wildcards.name}"

rule convert_all_proteins_to_genes:
input:
proteins_file="{0}/all_proteins/{{network}}/uniprot/all-proteins.txt".format(config["ppi_dir"]),
protein_to_gene_mapping="{0}/msu_mapping/uniprot_to_msu.pickle".format(config["gene_id_mapping_dir"])
output:
"{raw_enrich_dir}/all_genes/{network}/{file_format}/all-genes.txt"
shell:
"python scripts/ppi_util/convert_all_prot_to_gene.py " \
"{input.proteins_file} {input.protein_to_gene_mapping} " \
"{wildcards.raw_enrich_dir}/all_genes/{wildcards.network}/{wildcards.file_format}"

rule prepare_uniprot_to_gene:
input:
"{0}/Nb/Nb_gene_descriptions.csv".format(config["gene_desc_dir"])
output:
"{gene_id_mapping_dir}/msu_mapping/{file_name}.pickle"
shell:
"python scripts/ppi_util/prepare_uniprot_to_gene.py " \
"{input} {wildcards.gene_id_mapping_dir}/msu_mapping " \
"{wildcards.file_name}"

0 comments on commit 80716bc

Please sign in to comment.