diff --git a/orangecontrib/text/widgets/owdocumentscorer.py b/orangecontrib/text/widgets/owdocumentscorer.py index 281bfd3b9..76cc226f0 100644 --- a/orangecontrib/text/widgets/owdocumentscorer.py +++ b/orangecontrib/text/widgets/owdocumentscorer.py @@ -3,7 +3,7 @@ from typing import List import numpy as np -from Orange.data import Domain, StringVariable, Table +from Orange.data import Domain, StringVariable, Table, ContinuousVariable from Orange.util import wrap_callback from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState from Orange.widgets.widget import Input, Msg, Output, OWWidget @@ -42,7 +42,7 @@ def _word_ratio(corpus, words, callback): def _embedding_similarity(corpus, words, callback, embedding_language): ticks = iter(np.linspace(0, 0.8, len(corpus) + len(words))) - # TODO: current embedding only report success unify them to report progress float + # TODO: currently embedding report success unify them to report progress float def emb_cb(sucess: bool): if sucess: callback(next(ticks)) @@ -58,9 +58,10 @@ def emb_cb(sucess: bool): SCORING_METHODS = { - "word_frequency": ("Word frequency", _word_frequency), - "word_ratio": ("Word ratio", _word_ratio), - "embedding_similarity": ("Embedding similarity", _embedding_similarity), + # key: (Method's name, Method's function, Tooltip) + "word_frequency": ("Word frequency", _word_frequency, "Relative frequency of the word in the document."), + "word_ratio": ("Word ratio", _word_ratio, "Percentage of words in the document (Jaccard index)."), + "embedding_similarity": ("Embedding similarity", _embedding_similarity, "Cosine distance between the document and the word."), } ADDITIONAL_OPTIONS = { @@ -130,6 +131,7 @@ def callback(i: float): state.set_partial_result((sm, aggregation, scs)) +# todo: get rid of it def set_cls_attributes(cls): """ Class decorator that set widget settings dynamically for each scoring method @@ -149,7 +151,7 @@ def set_cls_attributes(cls): @set_cls_attributes class OWDocumentScorer(OWWidget, ConcurrentWidgetMixin): - name = "Score documents" + name = "Score Documents" description = "" icon = "icons/CorpusViewer.svg" priority = 500 @@ -168,7 +170,7 @@ class Outputs: class Warning(OWWidget.Warning): missing_words = Msg("Provide words on the input") missing_corpus = Msg("Provide corpus on the input") - corpus_not_normalized = Msg("Use preprocesses to normalize corpus.") + corpus_not_normalized = Msg("Use Preprocess Text to normalize corpus.") class Error(OWWidget.Error): unknown_err = Msg("An error occurred.\n{}") @@ -185,11 +187,11 @@ def __init__(self): def _setup_control_area(self): box = gui.widgetBox(self.controlArea, "Scoring method") - for value, (n, _) in SCORING_METHODS.items(): + for value, (n, _, tt) in SCORING_METHODS.items(): # TODO: Gui - fix controlos layout b = gui.hBox(box, margin=0) gui.checkBox( - b, self, value, label=n, callback=self._setting_changed + b, self, value, label=n, callback=self._setting_changed, tooltip=tt ) if value in ADDITIONAL_OPTIONS: value, options = ADDITIONAL_OPTIONS[value] @@ -252,6 +254,7 @@ def set_data(self, corpus: Corpus): @Inputs.words def set_words(self, words: Table): + # todo: handle case when no type=words if words is not None: self.Warning.missing_words.clear() words_attr = next( @@ -272,7 +275,7 @@ def _send_output(self, scores, labels): orig = self.corpus domain = orig.domain corpus = Corpus( - Domain(domain.X, domain.Y, domain.metas + tuple(labels)), + Domain(domain.attributes, domain.class_var, metas=domain.metas + tuple(ContinuousVariable(l) for l in labels)), orig.X, orig.Y, np.hstack([orig.metas, scores]), @@ -293,11 +296,13 @@ def _fill_table(self, scores, labels): def _fill_and_output(self): scores, labels = self._prepare_scores() self._fill_table(scores, labels) + self._send_output(scores, labels) def _clear(self): self.scores = {} self.cancel() - self._fill_and_output() + if self.corpus is not None: + self._fill_and_output() self.commit() def _setting_changed(self): @@ -350,7 +355,6 @@ def _get_active_scorers(self): return [attr for attr in SCORING_METHODS if getattr(self, attr)] def _get_active_aggregation(self): - # todo: self.aggregations value instead of int return list(AGGREGATIONS.keys())[self.aggregation] @staticmethod