biolab · ajdapretnar · Mar 11, 2021 · Mar 8, 2021 · Mar 8, 2021
diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py
@@ -554,14 +554,20 @@ def from_table(cls, domain, source, row_indices=...):
 
     @classmethod
     def from_numpy(cls, *args, **kwargs):
-        c = super().from_numpy(*args, **kwargs)
-        c._set_unique_titles()
+        t = super().from_numpy(*args, **kwargs)
+        # t is corpus but its constructor was not called since from_numpy
+        # calls just class method __new__, call it here to set default values
+        # for attributes such as _titles, _tokens, preprocessors, text_features
+        c = Corpus(t.domain, t.X, t.Y, t.metas, t.W, ids=t.ids)
         return c
 
     @classmethod
     def from_list(cls, domain, rows, weights=None):
-        c = super().from_list(domain, rows, weights)
-        c._set_unique_titles()
+        t = super().from_list(domain, rows, weights)
+        # t is corpus but its constructor was not called since from_numpy
+        # calls just class method __new__, call it here to set default values
+        # for attributes such as _titles, _tokens, preprocessors, text_features
+        c = Corpus(t.domain, t.X, t.Y, t.metas, t.W, ids=t.ids)
         return c
 
     @classmethod

diff --git a/orangecontrib/text/tests/test_corpus.py b/orangecontrib/text/tests/test_corpus.py
@@ -53,6 +53,32 @@ def test_corpus_from_file_with_tab(self):
         c2 = Corpus.from_file('book-excerpts.tab')
         self.assertEqual(c, c2)
 
+    def test_corpus_from_numpy(self):
+        domain = Domain(
+            [], metas=[StringVariable("title"), StringVariable("a")]
+        )
+        corpus = Corpus.from_numpy(
+            domain,
+            np.empty((2, 0)),
+            metas=np.array([["title1", "a"], ["title2", "b"]])
+        )
+        self.assertEqual(2, len(corpus))
+        assert_array_equal(["Document 1", "Document 2"], corpus.titles)
+        self.assertListEqual([StringVariable("title")], corpus.text_features)
+        self.assertIsNone(corpus._tokens)
+        self.assertListEqual([], corpus.used_preprocessor.preprocessors)
+
+    def test_corpus_from_list(self):
+        domain = Domain(
+            [], metas=[StringVariable("title"), StringVariable("a")]
+        )
+        corpus = Corpus.from_list(domain, [["title1", "a"], ["title2", "b"]])
+        self.assertEqual(2, len(corpus))
+        assert_array_equal(["Document 1", "Document 2"], corpus.titles)
+        self.assertListEqual([StringVariable("title")], corpus.text_features)
+        self.assertIsNone(corpus._tokens)
+        self.assertListEqual([], corpus.used_preprocessor.preprocessors)
+
     def test_corpus_from_file_missing(self):
         with self.assertRaises(FileNotFoundError):
             Corpus.from_file('missing_file')

diff --git a/orangecontrib/text/widgets/owcorpus.py b/orangecontrib/text/widgets/owcorpus.py
@@ -82,7 +82,7 @@ def __init__(self):
 
         # Used Text Features
         fbox = gui.widgetBox(self.controlArea, orientation=0)
-        ubox = gui.widgetBox(fbox, "Used text features", addSpace=False)
+        ubox = gui.widgetBox(fbox, "Used text features")
         self.used_attrs_model = VariableListModel(enable_dnd=True)
         self.used_attrs_view = VariablesListItemView()
         self.used_attrs_view.setModel(self.used_attrs_model)
@@ -94,7 +94,7 @@ def __init__(self):
         aa.rowsRemoved.connect(self.update_feature_selection)
 
         # Ignored Text Features
-        ibox = gui.widgetBox(fbox, "Ignored text features", addSpace=False)
+        ibox = gui.widgetBox(fbox, "Ignored text features")
         self.unused_attrs_model = VariableListModel(enable_dnd=True)
         self.unused_attrs_view = VariablesListItemView()
         self.unused_attrs_view.setModel(self.unused_attrs_model)
@@ -146,6 +146,7 @@ def _load_corpus(path: str, data: Table, state: TaskState) -> Corpus:
     def open_file(self, path=None, data=None):
         self.closeContext()
         self.Error.clear()
+        self.cancel()
         self.unused_attrs_model[:] = []
         self.used_attrs_model[:] = []
         self.start(self._load_corpus, path, data)
@@ -158,7 +159,8 @@ def on_done(self, corpus: Corpus) -> None:
         self.update_output_info()
         self._setup_title_dropdown()
         self.used_attrs = list(self.corpus.text_features)
-        if not self.corpus.text_features:
+        all_str_features = [f for f in self.corpus.domain.metas if f.is_string]
+        if not all_str_features:
             self.Error.corpus_without_text_features()
             self.Outputs.corpus.send(None)
             return

diff --git a/orangecontrib/text/widgets/tests/test_owcorpus.py b/orangecontrib/text/widgets/tests/test_owcorpus.py
@@ -11,7 +11,7 @@
 
 class TestOWCorpus(WidgetTest):
     def setUp(self):
-        self.widget = self.create_widget(OWCorpus)
+        self.widget: OWCorpus = self.create_widget(OWCorpus)
 
     def check_output(self, sel_title):
         """
@@ -286,6 +286,40 @@ def test_keep_selected_variables(self):
         self.wait_until_finished()
         self.assertListEqual(list(prew_selected), self.widget.used_attrs)
 
+    def test_no_text_feature(self):
+        """
+        Test with data which have empty text_features. Widget should not show
+        the error but, should have all features unused.
+        """
+        # widget already loads book-excerpts from file and store context
+        # settings this call restore context settings to default otherwise
+        # Text variable is moved to used_attributes by the context
+        self.widget.settingsHandler.reset_to_original(self.widget)
+        data = Corpus.from_file("book-excerpts")
+        data.text_features = []
+        self.send_signal(self.widget.Inputs.data, data)
+        self.wait_until_finished()
+        self.assertFalse(
+            self.widget.Error.corpus_without_text_features.is_shown()
+        )
+        self.assertEqual(0, len(list(self.widget.used_attrs_model)))
+        self.assertListEqual(
+            [data.domain["Text"]],
+            list(self.widget.unused_attrs_model)
+        )
+
+    def test_corpus_without_text_features(self):
+        """
+        Test if corpus_without_text_features is correctly raised for data
+        without text features
+        """
+        data = Table("iris")
+        self.send_signal(self.widget.Inputs.data, data)
+        self.wait_until_finished()
+        self.assertTrue(
+            self.widget.Error.corpus_without_text_features.is_shown()
+        )
+
 
 if __name__ == "__main__":
     unittest.main()