Word Cloud: Run counting in new thread

biolab · Feb 26, 2020 · 2c928c8 · 2c928c8
1 parent ecb7162
commit 2c928c8
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 30 deletions.
diff --git a/orangecontrib/text/widgets/owwordcloud.py b/orangecontrib/text/widgets/owwordcloud.py
@@ -2,7 +2,7 @@
 from collections import Counter
 from itertools import cycle
 from math import pi as PI
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Tuple
 
 import numpy as np
 from AnyQt import QtCore
@@ -12,8 +12,9 @@
 from Orange.data import ContinuousVariable, Domain, StringVariable, Table
 from Orange.data.util import scale
 from Orange.widgets import gui, settings, widget
+from Orange.widgets.utils.concurrent import ConcurrentWidgetMixin, TaskState
 from Orange.widgets.utils.itemmodels import PyTableModel
-from Orange.widgets.widget import Input, Output
+from Orange.widgets.widget import Input, Output, OWWidget
 from orangecontrib.text.corpus import Corpus
 from orangecontrib.text.topics import Topic
 
@@ -26,6 +27,51 @@
 N_BEST_PLOTTED = 200
 
 
+def _bow_words(corpus):
+    """
+    This function extract words from bag of words features and assign them
+    the frequency which is average bow count.
+    """
+    average_bows = {
+        f.name: corpus.X[:, i].mean()
+        for i, f in enumerate(corpus.domain.attributes)
+        if f.attributes.get("bow-feature", False)
+    }
+    # return only positive bow weights (those == 0 are non-existing words)
+    return {f: w for f, w in average_bows.items() if w > 0}
+
+
+def count_words(data: Corpus, state: TaskState) -> Tuple[Counter, bool]:
+    """
+    This function implements counting process of the word cloud widget and
+    is called in the separate thread by concurrent.
+
+    Parameters
+    ----------
+    data
+        Corpus with the data
+    state
+        State used to report status.
+
+    Returns
+    -------
+    Reports counts as a counter and boolean that tell whether the data were
+    retrieved on bag of words basis.
+    """
+    state.set_status("Calculating...")
+    state.set_progress_value(0)
+    bow_counts = _bow_words(data)
+    state.set_progress_value(0.5)
+    if bow_counts:
+        corpus_counter = Counter(bow_counts)
+    else:
+        corpus_counter = Counter(
+            w for doc in data.ngrams for w in doc
+        )
+    state.set_progress_value(1)
+    return corpus_counter, bool(bow_counts)
+
+
 class TableModel(PyTableModel):
     def __init__(self, precision, **kwargs):
         super().__init__(**kwargs)
@@ -55,7 +101,7 @@ def set_precision(self, precision: int):
         self.precision = precision
 
 
-class OWWordCloud(widget.OWWidget):
+class OWWordCloud(OWWidget, ConcurrentWidgetMixin):
     name = "Word Cloud"
     priority = 510
     icon = "icons/WordCloud.svg"
@@ -85,7 +131,8 @@ class Info(widget.OWWidget.Information):
         bow_weights = widget.Msg("Showing bag of words weights.")
 
     def __init__(self):
-        super().__init__()
+        OWWidget.__init__(self)
+        ConcurrentWidgetMixin.__init__(self)
         self.n_topic_words = 0
         self.documents_info_str = ""
         self.webview = None
@@ -225,7 +272,7 @@ def define_colors(
             # positive and negative numbers
             palette = TOPIC_COLORS if self.words_color else GRAY_TOPIC_COLORS
             colors = {
-                word: palette[weight >= 0]
+                word: palette[int(weight >= 0)]
                 for word, weight in zip(words, weights)
             }
         else:
@@ -293,7 +340,6 @@ def is_whole(d):
 
         words, weights = words[:N_BEST_PLOTTED], weights[:N_BEST_PLOTTED]
         self.shown_words, self.shown_weights = words, weights
-
         # Repopulate table
         self.tablemodel.set_precision(
             0 if all(is_whole(w) for w in weights) else 2
@@ -316,12 +362,12 @@ def is_whole(d):
             len(word) * float(weight) for word, weight in
             self.wordlist
         ])
-
         self.on_cloud_pref_change()
 
     @Inputs.topic
     def on_topic_change(self, data):
         self.topic = data
+        self.handle_input()
 
     def _apply_topic(self):
         data = self.topic
@@ -372,30 +418,19 @@ def on_corpus_change(self, data):
 
         self.corpus_counter = Counter()
         if data is not None:
-            bow_counts = self._bow_words()
-            if bow_counts:
-                self.Info.bow_weights()
-                self.corpus_counter = Counter(bow_counts)
-            else:
-                self.corpus_counter = Counter(
-                    w for doc in data.ngrams for w in doc
-                )
+            self.start(count_words, data)
+        else:
+            self.handle_input()
         self.create_weight_list()
 
-    def _bow_words(self):
-        """
-        This function extract words from bag of words features and assign them
-        the frequency which is average bow count.
-        """
-        average_bows = {
-            f.name: self.corpus.X[:, i].mean()
-            for i, f in enumerate(self.corpus.domain.attributes)
-            if f.attributes.get("bow-feature", False)
-        }
-        # return only positive bow weights (those == 0 are non-existing words)
-        return {f: w for f, w in average_bows.items() if w > 0}
-
-    def handleNewSignals(self):
+    def on_done(self, result: Tuple[Counter, bool]) -> None:
+        self.corpus_counter = result[0]
+        self.create_weight_list()
+        if result[1]:
+            self.Info.bow_weights()
+        self.handle_input()
+
+    def handle_input(self):
         if self.topic is not None and len(self.topic):
             self._apply_topic()
         elif self.corpus is not None and len(self.corpus):
@@ -408,7 +443,6 @@ def handleNewSignals(self):
         self.Warning.topic_precedence(
             shown=self.corpus is not None and self.topic is not None
         )
-
         if self.topic is not None or self.corpus is not None:
             if self.selected_words:
                 self.update_selection(self.selected_words)

diff --git a/orangecontrib/text/widgets/tests/test_owworldcloud.py b/orangecontrib/text/widgets/tests/test_owworldcloud.py
@@ -40,6 +40,7 @@ def test_data(self):
         """
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
         self.send_signal(self.widget.Inputs.corpus, None)
+        self.wait_until_finished()
 
     def test_empty_data(self):
         """
@@ -48,6 +49,7 @@ def test_empty_data(self):
         """
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
         self.send_signal(self.widget.Inputs.corpus, self.corpus[:0])
+        self.wait_until_finished()
 
     def test_bow_features(self):
         """
@@ -62,6 +64,7 @@ def test_bow_features(self):
             v.attributes["bow-feature"] = True
 
         self.send_signal(self.widget.Inputs.corpus, data)
+        self.wait_until_finished()
         weights = list(zip(*sorted(self.widget.corpus_counter.items())))[1]
         # due to computation error in computing mean use array_almost_equal
         np.testing.assert_array_almost_equal(weights, [1, 2, 2])
@@ -86,6 +89,7 @@ def test_bow_features(self):
             v.attributes["bow-feature"] = True
 
         self.send_signal(self.widget.Inputs.corpus, data)
+        self.wait_until_finished()
         weights = list(zip(*sorted(self.widget.corpus_counter.items())))[1]
         np.testing.assert_array_almost_equal(weights, [1, 2])
 
@@ -109,8 +113,10 @@ def test_bow_info(self):
         # no data no info
         self.assertFalse(self.widget.Info.bow_weights.is_shown())
         self.send_signal(self.widget.Inputs.corpus, data)
+        self.wait_until_finished()
         self.assertFalse(self.widget.Info.bow_weights.is_shown())
         self.send_signal(self.widget.Inputs.corpus, None)
+        self.wait_until_finished()
         self.assertFalse(self.widget.Info.bow_weights.is_shown())
 
         # send bow data
@@ -120,8 +126,10 @@ def test_bow_info(self):
         for v in data.domain.attributes:
             v.attributes["bow-feature"] = True
         self.send_signal(self.widget.Inputs.corpus, data)
+        self.wait_until_finished()
         self.assertTrue(self.widget.Info.bow_weights.is_shown())
         self.send_signal(self.widget.Inputs.corpus, None)
+        self.wait_until_finished()
         self.assertFalse(self.widget.Info.bow_weights.is_shown())
 
     def test_topic(self):
@@ -141,43 +149,53 @@ def test_input_summary(self):
         insum = self.widget.info.set_input_summary = Mock()
 
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
+        self.wait_until_finished()
         insum.assert_called_with("42", "9 documents with 42 words\n")
 
         self.send_signal(self.widget.Inputs.topic, self.topic)
+        self.wait_until_finished()
         insum.assert_called_with(
             "42 | 10", "9 documents with 42 words\n10 words in a topic.")
 
         self.send_signal(self.widget.Inputs.corpus, None)
+        self.wait_until_finished()
         insum.assert_called_with(f"10", "10 words in a topic.")
 
         self.send_signal(self.widget.Inputs.topic, None)
+        self.wait_until_finished()
         insum.assert_called_with(self.widget.info.NoInput)
 
         self.send_signal(self.widget.Inputs.topic, self.topic)
+        self.wait_until_finished()
         insum.assert_called_with(f"10", "10 words in a topic.")
 
     def test_output_summary(self):
         outsum = self.widget.info.set_output_summary = Mock()
 
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
+        self.wait_until_finished()
         outsum.assert_called_with(
             "0 | 0 | 42", "0 documents\n0 selected words\n42 words with counts"
         )
 
         self.send_signal(self.widget.Inputs.topic, self.topic)
+        self.wait_until_finished()
         outsum.assert_called_with(
             "0 | 0 | 42", "0 documents\n0 selected words\n42 words with counts"
         )
 
         self.send_signal(self.widget.Inputs.corpus, None)
+        self.wait_until_finished()
         outsum.assert_called_with(self.widget.info.NoOutput)
 
         self.send_signal(self.widget.Inputs.topic, None)
+        self.wait_until_finished()
         outsum.assert_called_with(self.widget.info.NoOutput)
 
     def test_send_report(self):
         self.widget.send_report()
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
+        self.wait_until_finished()
         self.widget.send_report()