Skip to content

Commit

Permalink
Merge pull request #349 from lenatr99/cluster_analysis_bugfix
Browse files Browse the repository at this point in the history
Cluster analysis: Fix bugs and use of Gene Selection Component
  • Loading branch information
lenatr99 authored Sep 30, 2024
2 parents d78ea79 + 8d6aad8 commit 150ee19
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 96 deletions.
189 changes: 94 additions & 95 deletions orangecontrib/bioinformatics/widgets/OWClusterAnalysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" OWClusterAnalysis """

import sys
import itertools

Expand Down Expand Up @@ -36,7 +37,7 @@
)
from Orange.widgets.utils.signals import Input, Output

from orangecontrib.bioinformatics.geneset.utils import GeneSetException
from orangecontrib.bioinformatics.geneset import GeneSets
from orangecontrib.bioinformatics.cluster_analysis import (
DISPLAY_GENE_SETS_COUNT,
Cluster,
Expand All @@ -47,8 +48,8 @@
from orangecontrib.bioinformatics.widgets.utils.gui import (
HTMLDelegate,
GeneScoringWidget,
GeneSetsSelection,
)
from orangecontrib.bioinformatics.widgets.components import GeneSetSelection
from orangecontrib.bioinformatics.widgets.utils.data import (
TAX_ID,
GENE_ID_COLUMN,
Expand All @@ -58,16 +59,16 @@


class ClusterAnalysisContextHandler(PerfectDomainContextHandler):
def encode_setting(self, context, setting, value):
def encode_setting(self, context, setting, value, *args):
if setting.name == 'cluster_indicators':
value = [(var.name, 100 + vartype(var)) for var in value]
return super().encode_setting(context, setting, value)
return super().encode_setting(context, setting, value, *args)

def decode_setting(self, setting, value, domain=None):
def decode_setting(self, setting, value, domain=None, *args):
return (
[domain[var[0]] for var in value]
if setting.name == 'cluster_indicators'
else super().decode_setting(setting, value, domain)
else super().decode_setting(setting, value, domain, *args)
)


Expand Down Expand Up @@ -107,6 +108,9 @@ class Error(OWWidget.Error):
'Organism in input data and custom gene sets does not match'
)
cluster_batch_conflict = Msg('Cluster and batch must not be the same variable')
custom_gene_sets_table_format = Msg(
'Custom gene sets data must have genes represented as rows.'
)

settingsHandler = ClusterAnalysisContextHandler()
cluster_indicators = ContextSetting([])
Expand Down Expand Up @@ -215,31 +219,14 @@ def __init__(self):
self.gene_scoring.set_method_design_area('scoring_method_design')
self.gene_scoring.set_test_type('scoring_test_type')

# Gene Sets widget
gene_sets_box = widgetBox(self.controlArea, "Gene Sets")
self.gs_widget = GeneSetsSelection(
gene_sets_box, self, 'stored_gene_sets_selection'
box = vBox(self.controlArea, True, margin=0)
self.gs_selection_component: GeneSetSelection = GeneSetSelection(self, box)
self.gs_selection_component.selection_changed.connect(
self._on_selection_changed
)
self.gs_widget.hierarchy_tree_widget.itemClicked.connect(
self.__gene_sets_enrichment
)

# custom gene sets area
box = vBox(self.controlArea, "Custom Gene Sets")

if self.custom_gene_set_indicator not in self.feature_model:
self.custom_gene_set_indicator = None

self.gs_label_combobox = comboBox(
box,
self,
"custom_gene_set_indicator",
sendSelectedValue=True,
model=self.feature_model,
callback=self.handle_custom_gene_sets,
)
self.gs_label_combobox.setDisabled(True)

self.gs_selection_component.sample_column_combo.hide()
self.gs_selection_component.sample_column_combo.parent().hide()
self.gs_selection_component._update_tree_widget()
# main area
splitter = QSplitter(Qt.Horizontal, self.mainArea)
self.mainArea.layout().addWidget(splitter)
Expand Down Expand Up @@ -386,7 +373,7 @@ def __create_temp_class_var(self):
row_profile = None
new_cluster_values = []
var_index_lookup = {
val: idx
(var.name, str(val)): idx
for var in self.cluster_indicators
for idx, val in enumerate(var.values)
}
Expand All @@ -396,7 +383,12 @@ def __create_temp_class_var(self):
)
for comb in cart_prod:
new_cluster_values.append(', '.join(list(comb)))
self.new_cluster_profile.append([var_index_lookup[val] for val in comb])
self.new_cluster_profile.append(
[
var_index_lookup[(var.name, str(val))]
for var, val in zip(self.cluster_indicators, comb)
]
)

row_profile_lookup = {
tuple(profile): indx
Expand All @@ -418,13 +410,15 @@ def __create_temp_class_var(self):
ca_ind = DiscreteVariable.make(
cluster_indicator_name,
values=list(new_cluster_values),
ordered=True,
)

domain = Domain(
self.input_data.domain.attributes,
self.input_data.domain.class_vars,
self.input_data.domain.metas + (ca_ind,),
(
self.input_data.domain.metas[:-1] + (ca_ind,)
if ca_ind in self.input_data.domain
else self.input_data.domain.metas + (ca_ind,)
),
)

table = self.input_data.transform(domain)
Expand Down Expand Up @@ -512,6 +506,8 @@ def filter_gene_sets(self):
# call sizeHint function
self.cluster_info_view.resizeRowsToContents()

self.commit()

def __gene_enrichment(self):
design = bool(
self.gene_scoring.get_selected_desig()
Expand All @@ -538,8 +534,8 @@ def __gene_enrichment(self):
def __gene_sets_enrichment(self):
if self.input_data:
self.Warning.no_selected_gene_sets.clear()
all_sets = self.gs_widget.get_hierarchies()
selected_sets = self.gs_widget.get_hierarchies(only_selected=True)
selected_sets = self.gs_selection_component.selection
all_sets = self.gs_selection_component.gene_sets.hierarchies()

if len(selected_sets) == 0 and len(all_sets) > 0:
self.Warning.no_selected_gene_sets()
Expand All @@ -550,7 +546,7 @@ def __gene_sets_enrichment(self):

try:
self.cluster_info_model.gene_sets_enrichment(
self.gs_widget.gs_object, selected_sets, ref_genes
self.gs_selection_component.gene_sets, selected_sets, ref_genes
)
except Exception as e:
# TODO: possible exceptions?
Expand Down Expand Up @@ -580,6 +576,34 @@ def invalidate(self, cluster_init=True):
def batch_indicator_changed(self):
self.invalidate(cluster_init=False)

def _on_selection_changed(self):
if not self.input_data or not self.gs_selection_component:
return
self.num_of_custom_sets = self.gs_selection_component.num_of_custom_sets

selected_sets = self.gs_selection_component.selection
all_sets = self.gs_selection_component.gene_sets.hierarchies()

self.Warning.no_selected_gene_sets.clear()
if not selected_sets and all_sets:
self.Warning.no_selected_gene_sets()

self.stored_gene_sets_selection = tuple(selected_sets)
ref_genes = set(self.input_genes_ids)

if not self.cluster_info_model or len(ref_genes) == 0:
return
try:
self.cluster_info_model.gene_sets_enrichment(
self.gs_selection_component.gene_sets, selected_sets, ref_genes
)
except Exception as e:
raise e

self.filter_gene_sets()
self.__update_info_box()
self.cluster_info_view.resizeRowsToContents()

@Inputs.data_table
def handle_input(self, data):
self.closeContext()
Expand All @@ -596,8 +620,6 @@ def handle_input(self, data):
self.gene_id_attribute = None
self.clusters = None

self.gs_widget.clear()
self.gs_widget.clear_gene_sets()
self.cluster_info_view.setModel(None)

self.cluster_indicators = []
Expand Down Expand Up @@ -638,6 +660,9 @@ def handle_input(self, data):
GENE_ID_ATTRIBUTE, None
)

self.gs_selection_component.set_selected_organism_by_tax_id(self.tax_id)
self.gs_selection_component._load_gene_sets()

if not self.cluster_indicator_model:
self.Error.no_cluster_indicator()
return
Expand All @@ -647,21 +672,17 @@ def handle_input(self, data):

self.openContext(self.input_data.domain)

self.gs_widget.load_gene_sets(self.tax_id)
if self.cluster_indicator_model and len(self.cluster_indicators) < 1:
self.cluster_indicators = [self.cluster_indicator_model[0]]
if self.batch_indicator_model and self.batch_indicator is None:
self.batch_indicator = self.batch_indicator_model[0]

self.invalidate()

if self.custom_data:
self.refresh_custom_gene_sets()
self._handle_future_model()
self.handle_custom_gene_sets()
else:
self.gs_selection_component._gene_sets = GeneSets()
self.handle_custom_gene_sets()

@Inputs.custom_sets
def handle_custom_input(self, data):
def handle_custom_input(self, custom_data):
self.Error.clear()
self.Warning.clear()
self.closeContext()
Expand All @@ -672,9 +693,8 @@ def handle_custom_input(self, data):
self.custom_gene_id_column = None
self.num_of_custom_sets = None
self.feature_model.set_domain(None)

if data:
self.custom_data = data
if custom_data:
self.custom_data = custom_data
self.feature_model.set_domain(self.custom_data.domain)
self.custom_tax_id = str(self.custom_data.attributes.get(TAX_ID, None))
self.custom_use_attr_names = self.custom_data.attributes.get(
Expand All @@ -686,15 +706,14 @@ def handle_custom_input(self, data):
self.custom_gene_id_column = self.custom_data.attributes.get(
GENE_ID_COLUMN, None
)

self._handle_future_model()

if self.input_data:
self.openContext(self.input_data.domain)

self.gs_label_combobox.setDisabled(True)
self.refresh_custom_gene_sets()
self.gs_selection_component.initialize_custom_gene_sets(custom_data)
self.num_of_custom_sets = self.gs_selection_component.num_of_custom_sets
else:
self.stored_gene_sets_selection = ()
self.gs_selection_component.selection = []
self.gs_selection_component.initialize_custom_gene_sets(None)
self.handle_custom_gene_sets(select_customs_flag=True)
self.__update_info_box()

def __check_organism_mismatch(self):
"""Check if organisms from different inputs match.
Expand All @@ -719,49 +738,21 @@ def handle_custom_gene_sets(self, select_customs_flag=False):
if self.custom_gene_set_indicator:
if self.custom_data is not None and self.custom_gene_id_column is not None:
if self.__check_organism_mismatch():
self.gs_label_combobox.setDisabled(True)
self.gs_selection_component.initialize_custom_gene_sets(None)
self.Error.organism_mismatch()
self.gs_widget.update_gs_hierarchy()
self.__gene_sets_enrichment()
return

if isinstance(self.custom_gene_set_indicator, DiscreteVariable):
labels = self.custom_gene_set_indicator.values
gene_sets_names = [
labels[int(idx)]
for idx in self.custom_data.get_column(
self.custom_gene_set_indicator
)
]
else:
gene_sets_names = self.custom_data.get_column(
self.custom_gene_set_indicator
)

self.num_of_custom_sets = len(set(gene_sets_names))
gene_names = self.custom_data.get_column(self.custom_gene_id_column)
hierarchy_title = (
self.custom_data.name if self.custom_data.name else 'Custom sets',
)
try:
self.gs_widget.add_custom_sets(
gene_sets_names,
gene_names,
hierarchy_title=hierarchy_title,
select_customs_flag=select_customs_flag,
)
except GeneSetException:
pass
self.gs_label_combobox.setDisabled(False)
else:
self.gs_widget.update_gs_hierarchy()

self.gs_selection_component._update_tree_widget()
self.__gene_sets_enrichment()
self.__update_info_box()

def refresh_custom_gene_sets(self):
self.gs_widget.clear_custom_sets()
# self.gs_widget.update_gs_hierarchy()
self.gs_selection_component._gene_sets.delete_sets_by_hierarchy(
self.gs_selection_component.custom_gene_set_hierarchy
)
self.gs_selection_component.custom_gene_set_hierarchy = None
self.gs_selection_component._update_tree_widget()

def gene_scores_output(self, selected_clusters):
metas = [
Expand Down Expand Up @@ -856,7 +847,15 @@ def commit(self):
selected_cluster_indexes = set()
selected_cluster_genes = set()

if not self.input_data or not selected_rows:
if self.input_data and not selected_rows:
selected_rows = [
self.cluster_info_model.index(row, 0)
for row in range(self.cluster_info_model.rowCount())
]

if not self.input_data:
self.Outputs.gene_scores.send(None)
self.Outputs.gene_set_scores.send(None)
self.Outputs.selected_data.send(None)
return

Expand Down
3 changes: 2 additions & 1 deletion orangecontrib/bioinformatics/widgets/utils/gui/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
""" GUI utils for widgets """

from numbers import Real, Integral
from collections import namedtuple

Expand Down Expand Up @@ -135,7 +136,7 @@ def sizeHint(self, option, index):
doc.setHtml(gene_obj.to_html())
doc.setTextWidth(options.rect.width() - 10)

return QSize(doc.idealWidth(), doc.size().height())
return QSize(int(doc.idealWidth()), int(doc.size().height()))

def paint(self, painter, option, index):
options = QStyleOptionViewItem(option)
Expand Down

0 comments on commit 150ee19

Please sign in to comment.