Skip to content

Commit

Permalink
Score documents
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Apr 13, 2021
1 parent c8f807b commit 13cf312
Showing 1 changed file with 16 additions and 12 deletions.
28 changes: 16 additions & 12 deletions orangecontrib/text/widgets/owdocumentscorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List

import numpy as np
from Orange.data import Domain, StringVariable, Table
from Orange.data import Domain, StringVariable, Table, ContinuousVariable
from Orange.util import wrap_callback
from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState
from Orange.widgets.widget import Input, Msg, Output, OWWidget
Expand Down Expand Up @@ -42,7 +42,7 @@ def _word_ratio(corpus, words, callback):
def _embedding_similarity(corpus, words, callback, embedding_language):
ticks = iter(np.linspace(0, 0.8, len(corpus) + len(words)))

# TODO: current embedding only report success unify them to report progress float
# TODO: currently embedding report success unify them to report progress float
def emb_cb(sucess: bool):
if sucess:
callback(next(ticks))
Expand All @@ -58,9 +58,10 @@ def emb_cb(sucess: bool):


SCORING_METHODS = {
"word_frequency": ("Word frequency", _word_frequency),
"word_ratio": ("Word ratio", _word_ratio),
"embedding_similarity": ("Embedding similarity", _embedding_similarity),
# key: (Method's name, Method's function, Tooltip)
"word_frequency": ("Word frequency", _word_frequency, "Relative frequency of the word in the document."),
"word_ratio": ("Word ratio", _word_ratio, "Percentage of words in the document (Jaccard index)."),
"embedding_similarity": ("Embedding similarity", _embedding_similarity, "Cosine distance between the document and the word."),
}

ADDITIONAL_OPTIONS = {
Expand Down Expand Up @@ -130,6 +131,7 @@ def callback(i: float):
state.set_partial_result((sm, aggregation, scs))


# todo: get rid of it
def set_cls_attributes(cls):
"""
Class decorator that set widget settings dynamically for each scoring method
Expand All @@ -149,7 +151,7 @@ def set_cls_attributes(cls):

@set_cls_attributes
class OWDocumentScorer(OWWidget, ConcurrentWidgetMixin):
name = "Score documents"
name = "Score Documents"
description = ""
icon = "icons/CorpusViewer.svg"
priority = 500
Expand All @@ -168,7 +170,7 @@ class Outputs:
class Warning(OWWidget.Warning):
missing_words = Msg("Provide words on the input")
missing_corpus = Msg("Provide corpus on the input")
corpus_not_normalized = Msg("Use preprocesses to normalize corpus.")
corpus_not_normalized = Msg("Use Preprocess Text to normalize corpus.")

class Error(OWWidget.Error):
unknown_err = Msg("An error occurred.\n{}")
Expand All @@ -185,11 +187,11 @@ def __init__(self):

def _setup_control_area(self):
box = gui.widgetBox(self.controlArea, "Scoring method")
for value, (n, _) in SCORING_METHODS.items():
for value, (n, _, tt) in SCORING_METHODS.items():
# TODO: Gui - fix controlos layout
b = gui.hBox(box, margin=0)
gui.checkBox(
b, self, value, label=n, callback=self._setting_changed
b, self, value, label=n, callback=self._setting_changed, tooltip=tt
)
if value in ADDITIONAL_OPTIONS:
value, options = ADDITIONAL_OPTIONS[value]
Expand Down Expand Up @@ -252,6 +254,7 @@ def set_data(self, corpus: Corpus):

@Inputs.words
def set_words(self, words: Table):
# todo: handle case when no type=words
if words is not None:
self.Warning.missing_words.clear()
words_attr = next(
Expand All @@ -272,7 +275,7 @@ def _send_output(self, scores, labels):
orig = self.corpus
domain = orig.domain
corpus = Corpus(
Domain(domain.X, domain.Y, domain.metas + tuple(labels)),
Domain(domain.attributes, domain.class_var, metas=domain.metas + tuple(ContinuousVariable(l) for l in labels)),
orig.X,
orig.Y,
np.hstack([orig.metas, scores]),
Expand All @@ -293,11 +296,13 @@ def _fill_table(self, scores, labels):
def _fill_and_output(self):
scores, labels = self._prepare_scores()
self._fill_table(scores, labels)
self._send_output(scores, labels)

def _clear(self):
self.scores = {}
self.cancel()
self._fill_and_output()
if self.corpus is not None:
self._fill_and_output()
self.commit()

def _setting_changed(self):
Expand Down Expand Up @@ -350,7 +355,6 @@ def _get_active_scorers(self):
return [attr for attr in SCORING_METHODS if getattr(self, attr)]

def _get_active_aggregation(self):
# todo: self.aggregations value instead of int
return list(AGGREGATIONS.keys())[self.aggregation]

@staticmethod
Expand Down

0 comments on commit 13cf312

Please sign in to comment.