-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
186 generalise rankcomparisongenerator class #192
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,29 +34,11 @@ def _calculate_rank_difference(self) -> pd.DataFrame: | |
comparison_df["rank_decrease"] = comparison_df.iloc[:, 3] - comparison_df.iloc[:, 2] | ||
return comparison_df | ||
|
||
def generate_gene_output(self, prefix: str) -> None: | ||
"""Generate the output for gene prioritisation ranks.""" | ||
self._generate_dataframe().to_csv(prefix + "-gene_rank_comparison.tsv", sep="\t") | ||
def generate_output(self, prefix: str, suffix: str) -> None: | ||
self._generate_dataframe().to_csv(prefix + suffix, sep="\t") | ||
|
||
def generate_variant_output(self, prefix: str) -> None: | ||
"""Generate the output for variant prioritisation ranks.""" | ||
self._generate_dataframe().to_csv(prefix + "-variant_rank_comparison.tsv", sep="\t") | ||
|
||
def generate_disease_output(self, prefix: str) -> None: | ||
"""Generate the output for disease prioritisation ranks.""" | ||
self._generate_dataframe().to_csv(prefix + "-disease_rank_comparison.tsv", sep="\t") | ||
|
||
def generate_gene_comparison_output(self, prefix: str) -> None: | ||
"""Generate the output for gene prioritisation rank comparison.""" | ||
self._calculate_rank_difference().to_csv(prefix + "-gene_rank_comparison.tsv", sep="\t") | ||
|
||
def generate_variant_comparison_output(self, prefix: str) -> None: | ||
"""Generate the output for variant prioritisation rank comparison.""" | ||
self._calculate_rank_difference().to_csv(prefix + "-variant_rank_comparison.tsv", sep="\t") | ||
|
||
def generate_disease_comparison_output(self, prefix: str) -> None: | ||
"""Generate the output for disease prioritisation rank comparison.""" | ||
self._calculate_rank_difference().to_csv(prefix + "-disease_rank_comparison.tsv", sep="\t") | ||
def generate_comparison_output(self, prefix: str, suffix: str) -> None: | ||
self._calculate_rank_difference().to_csv(prefix + suffix, sep="\t") | ||
|
||
|
||
class RankStatsWriter: | ||
|
@@ -118,8 +100,8 @@ def generate_benchmark_gene_output( | |
prioritisation_data: TrackRunPrioritisation, plot_type: str | ||
) -> None: | ||
"""Generate gene prioritisation outputs for benchmarking single run.""" | ||
RankComparisonGenerator(prioritisation_data.gene_prioritisation.ranks).generate_gene_output( | ||
f"{prioritisation_data.gene_prioritisation.results_dir.name}" | ||
RankComparisonGenerator(prioritisation_data.gene_prioritisation.ranks).generate_output( | ||
f"{prioritisation_data.gene_prioritisation.results_dir.name}", "-gene_rank_comparison.tsv" | ||
) | ||
generate_plots( | ||
[prioritisation_data], | ||
|
@@ -134,9 +116,10 @@ def generate_benchmark_variant_output( | |
prioritisation_data: TrackRunPrioritisation, plot_type: str | ||
) -> None: | ||
"""Generate variant prioritisation outputs for benchmarking single run.""" | ||
RankComparisonGenerator( | ||
prioritisation_data.variant_prioritisation.ranks | ||
).generate_variant_output(f"{prioritisation_data.variant_prioritisation.results_dir.name}") | ||
RankComparisonGenerator(prioritisation_data.variant_prioritisation.ranks).generate_output( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @julesjacobsen I am sure this is one of the things where you believe Python has gone wrong over a beautiful Java OO design.. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope. The previous implementation was all useless repetition. No one wants that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant prioritisation_data.variant_prioritisation.ranks without encapsulation! |
||
f"{prioritisation_data.variant_prioritisation.results_dir.name}", | ||
"-variant_rank_comparison.tsv", | ||
) | ||
generate_plots( | ||
[prioritisation_data], | ||
TrackRunPrioritisation.return_variant, | ||
|
@@ -150,9 +133,10 @@ def generate_benchmark_disease_output( | |
prioritisation_data: TrackRunPrioritisation, plot_type: str | ||
) -> None: | ||
"""Generate disease prioritisation outputs for benchmarking single run.""" | ||
RankComparisonGenerator( | ||
prioritisation_data.disease_prioritisation.ranks | ||
).generate_disease_output(f"{prioritisation_data.disease_prioritisation.results_dir.name}") | ||
RankComparisonGenerator(prioritisation_data.disease_prioritisation.ranks).generate_output( | ||
f"{prioritisation_data.disease_prioritisation.results_dir.name}", | ||
"-disease_rank_comparison.tsv", | ||
) | ||
generate_plots( | ||
[prioritisation_data], | ||
TrackRunPrioritisation.return_disease, | ||
|
@@ -184,11 +168,12 @@ def generate_gene_rank_comparisons(comparison_ranks: [tuple]) -> None: | |
merged_results = merge_results( | ||
deepcopy(pair[0].gene_prioritisation.ranks), deepcopy(pair[1].gene_prioritisation.ranks) | ||
) | ||
RankComparisonGenerator(merged_results).generate_gene_comparison_output( | ||
RankComparisonGenerator(merged_results).generate_comparison_output( | ||
f"{pair[0].gene_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[0].gene_prioritisation.results_dir.name}" | ||
f"_vs_{pair[1].gene_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[1].gene_prioritisation.results_dir.name}" | ||
f"{pair[1].gene_prioritisation.results_dir.name}", | ||
"-gene_rank_comparison.tsv", | ||
) | ||
|
||
|
||
|
@@ -199,11 +184,12 @@ def generate_variant_rank_comparisons(comparison_ranks: [tuple]) -> None: | |
deepcopy(pair[0].variant_prioritisation.ranks), | ||
deepcopy(pair[1].variant_prioritisation.ranks), | ||
) | ||
RankComparisonGenerator(merged_results).generate_variant_comparison_output( | ||
RankComparisonGenerator(merged_results).generate_comparison_output( | ||
f"{pair[0].variant_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[0].variant_prioritisation.results_dir.name}" | ||
f"_vs_{pair[1].variant_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[1].variant_prioritisation.results_dir.name}" | ||
f"{pair[1].variant_prioritisation.results_dir.name}", | ||
"-variant_rank_comparison.tsv", | ||
) | ||
|
||
|
||
|
@@ -214,11 +200,12 @@ def generate_disease_rank_comparisons(comparison_ranks: [tuple]) -> None: | |
deepcopy(pair[0].disease_prioritisation.ranks), | ||
deepcopy(pair[1].disease_prioritisation.ranks), | ||
) | ||
RankComparisonGenerator(merged_results).generate_disease_comparison_output( | ||
RankComparisonGenerator(merged_results).generate_comparison_output( | ||
f"{pair[0].disease_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[0].disease_prioritisation.results_dir.name}" | ||
f"_vs_{pair[1].disease_prioritisation.results_dir.parents[0].name}_" | ||
f"{pair[1].disease_prioritisation.results_dir.name}" | ||
f"{pair[1].disease_prioritisation.results_dir.name}", | ||
"-disease_rank_comparison.tsv", | ||
) | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically this filename seems to violate SRP (because it exists more than once in the code).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The construction of the full filename? Or the
"-gene_rank_comparison.tsv"
part?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The string "-gene_rank_comparison.tsv" appears twice in that file.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah yes! I plan on addressing this in #193. I have the code roughly written out with how I plan it to be structured and it avoids this (used a constant for
_rank_comparison.tsv
)