Skip to content

Commit

Permalink
Merge pull request #224 from monarch-initiative/223-refactor-docstrin…
Browse files Browse the repository at this point in the history
…gs-in-update_phenopacketpy

223 refactor docstrings in update phenopacketpy
  • Loading branch information
yaseminbridges authored Dec 8, 2023
2 parents a7aeca8 + 37dc8e3 commit 9d91aa5
Showing 1 changed file with 65 additions and 8 deletions.
73 changes: 65 additions & 8 deletions src/pheval/prepare/update_phenopacket.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
from collections import defaultdict
from pathlib import Path
from typing import Union

from phenopackets import Family, Phenopacket

from pheval.utils.file_utils import all_files
from pheval.utils.phenopacket_utils import (
Expand All @@ -14,8 +17,23 @@

def update_outdated_gene_context(
phenopacket_path: Path, gene_identifier: str, hgnc_data: defaultdict
):
"""Updates the gene context of the phenopacket."""
) -> Union[Phenopacket, Family]:
"""
Update the gene context of the Phenopacket.
Args:
phenopacket_path (Path): The path to the Phenopacket file.
gene_identifier (str): Identifier to update the gene context.
hgnc_data (defaultdict): The HGNC data used for updating.
Returns:
Union[Phenopacket, Family]: The updated Phenopacket or Family.
Notes:
This function updates the gene context within the Phenopacket or Family instance.
The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
to describe the gene identifiers.
"""
phenopacket = phenopacket_reader(phenopacket_path)
interpretations = PhenopacketUtil(phenopacket).interpretations()
updated_interpretations = GeneIdentifierUpdater(
Expand All @@ -25,15 +43,42 @@ def update_outdated_gene_context(
return PhenopacketRebuilder(phenopacket).update_interpretations(updated_interpretations)


def create_updated_phenopacket(gene_identifier: str, phenopacket_path: Path, output_dir: Path):
"""Updates the gene context within the interpretations for a phenopacket."""
def create_updated_phenopacket(
gene_identifier: str, phenopacket_path: Path, output_dir: Path
) -> None:
"""
Update the gene context within the interpretations for a Phenopacket and writes the updated Phenopacket.
Args:
gene_identifier (str): Identifier used to update the gene context.
phenopacket_path (Path): The path to the input Phenopacket file.
output_dir (Path): The directory where the updated Phenopacket will be written.
Notes:
The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
to describe the gene identifiers.
"""
hgnc_data = create_hgnc_dict()
updated_phenopacket = update_outdated_gene_context(phenopacket_path, gene_identifier, hgnc_data)
write_phenopacket(updated_phenopacket, output_dir.joinpath(phenopacket_path.name))


def create_updated_phenopackets(gene_identifier: str, phenopacket_dir: Path, output_dir: Path):
"""Updates the gene context within the interpretations for phenopackets."""
def create_updated_phenopackets(
gene_identifier: str, phenopacket_dir: Path, output_dir: Path
) -> None:
"""
Update the gene context within the interpretations for a directory of Phenopackets
and writes the updated Phenopackets.
Args:
gene_identifier (str): Identifier used to update the gene context.
phenopacket_dir (Path): The path to the input Phenopacket directory.
output_dir (Path): The directory where the updated Phenopackets will be written.
Notes:
The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
to describe the gene identifiers.
"""
hgnc_data = create_hgnc_dict()
for phenopacket_path in all_files(phenopacket_dir):
updated_phenopacket = update_outdated_gene_context(
Expand All @@ -44,8 +89,20 @@ def create_updated_phenopackets(gene_identifier: str, phenopacket_dir: Path, out

def update_phenopackets(
gene_identifier: str, phenopacket_path: Path, phenopacket_dir: Path, output_dir: Path
):
"""Update the gene identifiers in either a single phenopacket or a directory of phenopackets."""
) -> None:
"""
Update the gene identifiers in either a single phenopacket or a directory of phenopackets.
Args:
gene_identifier (str): The gene identifier to be updated.
phenopacket_path (Path): The path to a single Phenopacket file.
phenopacket_dir (Path): The directory containing multiple Phenopacket files.
output_dir (Path): The output directory to save the updated Phenopacket files.
Notes:
The gene_identifier parameter should be chosen from ensembl_id, hgnc_id, or entrez_id
to update to the current gene identifier in the Phenopacket. We recommend using the ENSEMBL namespace
to describe the gene identifiers.
"""
output_dir.mkdir(exist_ok=True)
if phenopacket_path is not None:
create_updated_phenopacket(gene_identifier, phenopacket_path, output_dir)
Expand Down

0 comments on commit 9d91aa5

Please sign in to comment.