Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

186 generalise rankcomparisongenerator class #192

Merged
merged 3 commits into from
Sep 14, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 23 additions & 36 deletions src/pheval/analyse/generate_summary_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,11 @@ def _calculate_rank_difference(self) -> pd.DataFrame:
comparison_df["rank_decrease"] = comparison_df.iloc[:, 3] - comparison_df.iloc[:, 2]
return comparison_df

def generate_gene_output(self, prefix: str) -> None:
"""Generate the output for gene prioritisation ranks."""
self._generate_dataframe().to_csv(prefix + "-gene_rank_comparison.tsv", sep="\t")
def generate_output(self, prefix: str, suffix: str) -> None:
self._generate_dataframe().to_csv(prefix + suffix, sep="\t")

def generate_variant_output(self, prefix: str) -> None:
"""Generate the output for variant prioritisation ranks."""
self._generate_dataframe().to_csv(prefix + "-variant_rank_comparison.tsv", sep="\t")

def generate_disease_output(self, prefix: str) -> None:
"""Generate the output for disease prioritisation ranks."""
self._generate_dataframe().to_csv(prefix + "-disease_rank_comparison.tsv", sep="\t")

def generate_gene_comparison_output(self, prefix: str) -> None:
"""Generate the output for gene prioritisation rank comparison."""
self._calculate_rank_difference().to_csv(prefix + "-gene_rank_comparison.tsv", sep="\t")

def generate_variant_comparison_output(self, prefix: str) -> None:
"""Generate the output for variant prioritisation rank comparison."""
self._calculate_rank_difference().to_csv(prefix + "-variant_rank_comparison.tsv", sep="\t")

def generate_disease_comparison_output(self, prefix: str) -> None:
"""Generate the output for disease prioritisation rank comparison."""
self._calculate_rank_difference().to_csv(prefix + "-disease_rank_comparison.tsv", sep="\t")
def generate_comparison_output(self, prefix: str, suffix: str) -> None:
self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t")


class RankStatsWriter:
Expand Down Expand Up @@ -118,8 +100,8 @@ def generate_benchmark_gene_output(
prioritisation_data: TrackRunPrioritisation, plot_type: str
) -> None:
"""Generate gene prioritisation outputs for benchmarking single run."""
RankComparisonGenerator(prioritisation_data.gene_prioritisation.ranks).generate_gene_output(
f"{prioritisation_data.gene_prioritisation.results_dir.name}"
RankComparisonGenerator(prioritisation_data.gene_prioritisation.ranks).generate_output(
f"{prioritisation_data.gene_prioritisation.results_dir.name}", "-gene_rank_comparison.tsv"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically this filename seems to violate SRP (because it exists more than once in the code).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The construction of the full filename? Or the "-gene_rank_comparison.tsv" part?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The string "-gene_rank_comparison.tsv" appears twice in that file.

Copy link
Contributor Author

@yaseminbridges yaseminbridges Sep 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes! I plan on addressing this in #193. I have the code roughly written out with how I plan it to be structured and it avoids this (used a constant for _rank_comparison.tsv)

)
generate_plots(
[prioritisation_data],
Expand All @@ -134,9 +116,10 @@ def generate_benchmark_variant_output(
prioritisation_data: TrackRunPrioritisation, plot_type: str
) -> None:
"""Generate variant prioritisation outputs for benchmarking single run."""
RankComparisonGenerator(
prioritisation_data.variant_prioritisation.ranks
).generate_variant_output(f"{prioritisation_data.variant_prioritisation.results_dir.name}")
RankComparisonGenerator(prioritisation_data.variant_prioritisation.ranks).generate_output(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@julesjacobsen I am sure this is one of the things where you believe Python has gone wrong over a beautiful Java OO design..

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nope. The previous implementation was all useless repetition. No one wants that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant prioritisation_data.variant_prioritisation.ranks without encapsulation!

f"{prioritisation_data.variant_prioritisation.results_dir.name}",
"-variant_rank_comparison.tsv",
)
generate_plots(
[prioritisation_data],
TrackRunPrioritisation.return_variant,
Expand All @@ -150,9 +133,10 @@ def generate_benchmark_disease_output(
prioritisation_data: TrackRunPrioritisation, plot_type: str
) -> None:
"""Generate disease prioritisation outputs for benchmarking single run."""
RankComparisonGenerator(
prioritisation_data.disease_prioritisation.ranks
).generate_disease_output(f"{prioritisation_data.disease_prioritisation.results_dir.name}")
RankComparisonGenerator(prioritisation_data.disease_prioritisation.ranks).generate_output(
f"{prioritisation_data.disease_prioritisation.results_dir.name}",
"-disease_rank_comparison.tsv",
)
generate_plots(
[prioritisation_data],
TrackRunPrioritisation.return_disease,
Expand Down Expand Up @@ -184,11 +168,12 @@ def generate_gene_rank_comparisons(comparison_ranks: [tuple]) -> None:
merged_results = merge_results(
deepcopy(pair[0].gene_prioritisation.ranks), deepcopy(pair[1].gene_prioritisation.ranks)
)
RankComparisonGenerator(merged_results).generate_gene_comparison_output(
RankComparisonGenerator(merged_results).generate_comparison_output(
f"{pair[0].gene_prioritisation.results_dir.parents[0].name}_"
f"{pair[0].gene_prioritisation.results_dir.name}"
f"_vs_{pair[1].gene_prioritisation.results_dir.parents[0].name}_"
f"{pair[1].gene_prioritisation.results_dir.name}"
f"{pair[1].gene_prioritisation.results_dir.name}",
"-gene_rank_comparison.tsv",
)


Expand All @@ -199,11 +184,12 @@ def generate_variant_rank_comparisons(comparison_ranks: [tuple]) -> None:
deepcopy(pair[0].variant_prioritisation.ranks),
deepcopy(pair[1].variant_prioritisation.ranks),
)
RankComparisonGenerator(merged_results).generate_variant_comparison_output(
RankComparisonGenerator(merged_results).generate_comparison_output(
f"{pair[0].variant_prioritisation.results_dir.parents[0].name}_"
f"{pair[0].variant_prioritisation.results_dir.name}"
f"_vs_{pair[1].variant_prioritisation.results_dir.parents[0].name}_"
f"{pair[1].variant_prioritisation.results_dir.name}"
f"{pair[1].variant_prioritisation.results_dir.name}",
"-variant_rank_comparison.tsv",
)


Expand All @@ -214,11 +200,12 @@ def generate_disease_rank_comparisons(comparison_ranks: [tuple]) -> None:
deepcopy(pair[0].disease_prioritisation.ranks),
deepcopy(pair[1].disease_prioritisation.ranks),
)
RankComparisonGenerator(merged_results).generate_disease_comparison_output(
RankComparisonGenerator(merged_results).generate_comparison_output(
f"{pair[0].disease_prioritisation.results_dir.parents[0].name}_"
f"{pair[0].disease_prioritisation.results_dir.name}"
f"_vs_{pair[1].disease_prioritisation.results_dir.parents[0].name}_"
f"{pair[1].disease_prioritisation.results_dir.name}"
f"{pair[1].disease_prioritisation.results_dir.name}",
"-disease_rank_comparison.tsv",
)


Expand Down