From 0ddfc9a51fe1554afd648de03c87ce94a20f837e Mon Sep 17 00:00:00 2001
From: Primoz Godec
Date: Tue, 31 May 2022 13:11:03 +0200
Subject: [PATCH] Create corpus - widget, icon and tests
---
.../text/widgets/icons/CreateCorpus.svg | 2 +
orangecontrib/text/widgets/owcreatecorpus.py | 103 +++++----
.../text/widgets/tests/test_owcreatecorpus.py | 202 ++++++++++++++++++
3 files changed, 267 insertions(+), 40 deletions(-)
create mode 100644 orangecontrib/text/widgets/icons/CreateCorpus.svg
create mode 100644 orangecontrib/text/widgets/tests/test_owcreatecorpus.py
diff --git a/orangecontrib/text/widgets/icons/CreateCorpus.svg b/orangecontrib/text/widgets/icons/CreateCorpus.svg
new file mode 100644
index 000000000..9debc8fe5
--- /dev/null
+++ b/orangecontrib/text/widgets/icons/CreateCorpus.svg
@@ -0,0 +1,2 @@
+
+
diff --git a/orangecontrib/text/widgets/owcreatecorpus.py b/orangecontrib/text/widgets/owcreatecorpus.py
index c9fe2f78d..50426ae13 100644
--- a/orangecontrib/text/widgets/owcreatecorpus.py
+++ b/orangecontrib/text/widgets/owcreatecorpus.py
@@ -1,4 +1,3 @@
-from functools import partial
from typing import List, Tuple
import numpy as np
@@ -7,9 +6,14 @@
from Orange.widgets import gui
from Orange.widgets.widget import OWWidget, Output
from AnyQt.QtWidgets import QGroupBox
-from PyQt5.QtCore import QSize
-from PyQt5.QtWidgets import QVBoxLayout, QLineEdit, QPlainTextEdit, QSizePolicy, \
- QSpacerItem
+from PyQt5.QtCore import QSize, Qt
+from PyQt5.QtWidgets import (
+ QVBoxLayout,
+ QLineEdit,
+ QPlainTextEdit,
+ QSizePolicy,
+ QPushButton,
+)
from orangewidget.settings import Setting
from orangecontrib.text import Corpus
@@ -20,21 +24,18 @@ def __init__(self):
super().__init__(parent=None)
self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)
- def sizeHint(self):
- sh = super().sizeHint()
- sh.setHeight(350)
- return sh
-
class CustomQPlainTextEdit(QPlainTextEdit):
editingFinished = Signal()
def focusOutEvent(self, _):
+ # TextEdit does not have editingFinished
self.editingFinished.emit()
class DocumentEditor(QGroupBox):
text_changed = Signal(str, str)
+ remove_clicked = Signal()
def __init__(self, title, text, parent=None):
super().__init__(parent)
@@ -48,87 +49,109 @@ def __init__(self, title, text, parent=None):
self.text_area.setPlaceholderText("Document text")
self.text_area.setPlainText(text)
self.text_area.editingFinished.connect(self._on_text_changed)
- self.layout().addWidget(self.title_le)
+
+ remove_button = QPushButton("x")
+ remove_button.setFixedWidth(35)
+ remove_button.setFocusPolicy(Qt.NoFocus)
+ remove_button.clicked.connect(self._on_remove_clicked)
+ box = gui.hBox(self)
+ box.layout().addWidget(self.title_le)
+ box.layout().addWidget(remove_button)
self.layout().addWidget(self.text_area)
def _on_text_changed(self):
self.text_changed.emit(self.title_le.text(), self.text_area.toPlainText())
- # todo: add x button
+ def _on_remove_clicked(self):
+ self.remove_clicked.emit()
class OWCreateCorpus(OWWidget):
name = "Create Corpus"
description = "Write/paste documents to create a corpus"
- icon = "icons/TextFile.svg"
- priority = 100 # todo
+ icon = "icons/CreateCorpus.svg"
+ priority = 120
class Outputs:
- corpus = Output('Corpus', Corpus)
+ corpus = Output("Corpus", Corpus)
want_main_area = False
- texts: List[Tuple[str, str]] = Setting([("", ""), ("", ""), ("", "")])
+ texts: List[Tuple[str, str]] = Setting([("", "")] * 3)
auto_commit: bool = Setting(True)
def __init__(self):
super().__init__()
-
self.editors = []
scroll_area = EditorsVerticalScrollArea()
self.editor_vbox = gui.vBox(self.controlArea, spacing=0)
+ self.editor_vbox.layout().setSpacing(10)
scroll_area.setWidget(self.editor_vbox)
self.controlArea.layout().addWidget(scroll_area)
for t in self.texts:
- self.add_document_editor(*t)
+ self._add_document_editor(*t)
- gui.button(self.buttonsArea, self, "Add document", self.add_new_editor)
- gui.auto_send(self.buttonsArea, self, "auto_commit")
+ add_btn = gui.button(
+ self.buttonsArea, self, "Add document", self._add_new_editor
+ )
+ add_btn.setFocusPolicy(Qt.NoFocus)
+ gui.auto_apply(self.buttonsArea, self, "auto_commit")
self.commit.now()
- def add_document_editor(self, title, text):
+ def _add_document_editor(self, title, text):
+ """Function that handles adding new editor with texts provided"""
editor = DocumentEditor(title, text)
- editor.text_changed.connect(partial(self._text_changed, len(self.editors)))
+ editor.text_changed.connect(self._text_changed)
+ editor.remove_clicked.connect(self._remove_document_editor)
self.editors.append(editor)
- if len(self.editors) > 1:
- # add spacer before each item that boxes do not stick together
- # (except before the first one)
- self.editor_vbox.layout().addSpacerItem(QSpacerItem(1, 10))
self.editor_vbox.layout().addWidget(editor)
self.editor_vbox.updateGeometry()
- def remove_document_editor(self):
- if len(self.texts) > 1:
- del self.texts[-1]
- self.editor_vbox.layout().remove(self.editors[-1])
- del self.editors[-1]
-
- def add_new_editor(self):
- self.texts = [("", "")]
- self.add_document_editor(*self.texts[-1])
+ def _remove_document_editor(self):
+ """Remove the editor on the click of x button on the editor"""
+ if len(self.editors) > 1:
+ editor = self.sender()
+ i = self.editors.index(editor)
+ del self.texts[i]
+ self.editor_vbox.layout().removeWidget(editor)
+ self.editors.remove(editor)
+ editor.deleteLater()
+ self.commit.deferred()
+
+ def _add_new_editor(self):
+ """Add editor on the click of Add document button"""
+ self.texts.append(("", ""))
+ self._add_document_editor(*self.texts[-1])
+ self.commit.deferred()
- def _text_changed(self, i, title, text):
- self.texts[i] = (title, text)
+ def _text_changed(self, title, text):
+ """Called when any text change, corrects texts in settings"""
+ editor = self.sender()
+ self.texts[self.editors.index(editor)] = (title, text)
self.commit.deferred()
@gui.deferred
def commit(self):
+ """Create a new corpus and output it"""
doc_var = StringVariable("Document")
title_var = StringVariable("Title")
domain = Domain([], metas=[title_var, doc_var])
corpus = Corpus.from_numpy(
- domain, np.empty((len(self.texts), 0)), metas=np.array(self.texts),
- text_features=[doc_var]
+ domain,
+ np.empty((len(self.texts), 0)),
+ metas=np.array(self.texts),
+ text_features=[doc_var],
)
corpus.set_title_variable(title_var)
self.Outputs.corpus.send(corpus)
- def sizeHint(self):
- return QSize(600, 400)
+ def sizeHint(self) -> QSize:
+ return QSize(600, 600)
if __name__ == "__main__":
from orangewidget.utils.widgetpreview import WidgetPreview
+
WidgetPreview(OWCreateCorpus).run()
diff --git a/orangecontrib/text/widgets/tests/test_owcreatecorpus.py b/orangecontrib/text/widgets/tests/test_owcreatecorpus.py
new file mode 100644
index 000000000..2e8d738f0
--- /dev/null
+++ b/orangecontrib/text/widgets/tests/test_owcreatecorpus.py
@@ -0,0 +1,202 @@
+import unittest
+
+import numpy as np
+from Orange.data import StringVariable
+from Orange.widgets.tests.base import WidgetTest
+from PyQt5.QtWidgets import QPushButton
+
+from orangecontrib.text.widgets.owcreatecorpus import OWCreateCorpus
+
+
+class TestOWCreateCorpus(WidgetTest):
+ def setUp(self):
+ self.widget = self.create_widget(OWCreateCorpus)
+ self.add_document_btn = self.widget.buttonsArea.findChild(QPushButton)
+
+ def test_add_remove_editors(self):
+ self.assertEqual(3, len(self.widget.editors))
+ self.assertEqual(3, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 3, self.widget.texts)
+
+ self.add_document_btn.click()
+ self.assertEqual(4, len(self.widget.editors))
+ self.assertEqual(4, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 4, self.widget.texts)
+
+ self.add_document_btn.click()
+ self.assertEqual(5, len(self.widget.editors))
+ self.assertEqual(5, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 5, self.widget.texts)
+
+ # click any button in editor_vbox - there are only buttons for removing
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertEqual(4, len(self.widget.editors))
+ self.assertEqual(4, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 4, self.widget.texts)
+
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertEqual(3, len(self.widget.editors))
+ self.assertEqual(3, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 3, self.widget.texts)
+
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertEqual(2, len(self.widget.editors))
+ self.assertEqual(2, len(self.widget.texts))
+ self.assertListEqual([("", "")] * 2, self.widget.texts)
+
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertEqual(1, len(self.widget.editors))
+ self.assertEqual(1, len(self.widget.texts))
+ self.assertListEqual([("", "")], self.widget.texts)
+
+ # last editor cannot be removed
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertEqual(1, len(self.widget.editors))
+ self.assertEqual(1, len(self.widget.texts))
+ self.assertListEqual([("", "")], self.widget.texts)
+
+ def test_add_text(self):
+ # start with 1 editor
+ self.widget.editors[-1].findChild(QPushButton).click()
+ self.widget.editors[-1].findChild(QPushButton).click()
+
+ editor = self.widget.editors[0]
+ self.assertListEqual([("", "")], self.widget.texts)
+ editor.title_le.setText("Beautiful document")
+ editor.title_le.editingFinished.emit()
+ self.assertListEqual([("Beautiful document", "")], self.widget.texts)
+ editor.text_area.setPlainText("I am a beautiful document")
+ editor.text_area.editingFinished.emit()
+ self.assertListEqual(
+ [("Beautiful document", "I am a beautiful document")], self.widget.texts
+ )
+
+ self.add_document_btn.click()
+ editor = self.widget.editors[1]
+ self.assertListEqual(
+ [("Beautiful document", "I am a beautiful document"), ("", "")],
+ self.widget.texts,
+ )
+ editor.title_le.setText("Another another document")
+ editor.title_le.editingFinished.emit()
+ self.assertListEqual(
+ [
+ ("Beautiful document", "I am a beautiful document"),
+ ("Another another document", ""),
+ ],
+ self.widget.texts,
+ )
+ editor.text_area.setPlainText("I am another beautiful document")
+ editor.text_area.editingFinished.emit()
+ self.assertListEqual(
+ [
+ ("Beautiful document", "I am a beautiful document"),
+ ("Another another document", "I am another beautiful document"),
+ ],
+ self.widget.texts,
+ )
+
+ # remove first document
+ self.widget.editor_vbox.findChild(QPushButton).click()
+ self.assertListEqual(
+ [("Another another document", "I am another beautiful document")],
+ self.widget.texts,
+ )
+
+ # change the only document
+ editor = self.widget.editors[0]
+ editor.title_le.setText("Modified document")
+ editor.title_le.editingFinished.emit()
+ self.assertListEqual(
+ [("Modified document", "I am another beautiful document")],
+ self.widget.texts,
+ )
+ editor.text_area.setPlainText("Test")
+ editor.text_area.editingFinished.emit()
+ self.assertListEqual([("Modified document", "Test")], self.widget.texts)
+
+ self.add_document_btn.click()
+ self.assertListEqual(
+ [("Modified document", "Test"), ("", "")], self.widget.texts
+ )
+
+ def test_output(self):
+ # start with 1 editor
+ self.widget.editors[-1].findChild(QPushButton).click()
+ self.widget.editors[-1].findChild(QPushButton).click()
+
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ self.assertEqual(0, len(corpus.domain.attributes))
+ self.assertTupleEqual(
+ (StringVariable("Title"), StringVariable("Document")), corpus.domain.metas
+ )
+ np.testing.assert_array_equal(["?"], corpus.titles)
+ self.assertListEqual(["?"], corpus.documents)
+ np.testing.assert_array_equal([["", ""]], corpus.metas)
+
+ self.add_document_btn.click()
+ self.add_document_btn.click()
+ editor1, editor2, editor3 = self.widget.editors
+ editor1.title_le.setText("Document 1")
+ editor2.title_le.setText("Document 2")
+ editor3.title_le.setText("Document 3")
+ editor1.text_area.setPlainText("Test 1")
+ editor2.text_area.setPlainText("Test 2")
+ editor3.text_area.setPlainText("Test 3")
+ editor1.text_area.editingFinished.emit()
+ editor2.text_area.editingFinished.emit()
+ editor3.text_area.editingFinished.emit()
+
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ np.testing.assert_array_equal(
+ ["Document 1", "Document 2", "Document 3"], corpus.titles
+ )
+ self.assertListEqual(["Test 1", "Test 2", "Test 3"], corpus.documents)
+ np.testing.assert_array_equal(
+ [
+ ["Document 1", "Test 1"],
+ ["Document 2", "Test 2"],
+ ["Document 3", "Test 3"],
+ ],
+ corpus.metas,
+ )
+
+ editor2.findChild(QPushButton).click()
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ np.testing.assert_array_equal(["Document 1", "Document 3"], corpus.titles)
+ self.assertListEqual(["Test 1", "Test 3"], corpus.documents)
+ np.testing.assert_array_equal(
+ [
+ ["Document 1", "Test 1"],
+ ["Document 3", "Test 3"],
+ ],
+ corpus.metas,
+ )
+
+ self.add_document_btn.click()
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ np.testing.assert_array_equal(["Document 1", "Document 3", "?"], corpus.titles)
+ self.assertListEqual(["Test 1", "Test 3", "?"], corpus.documents)
+ np.testing.assert_array_equal(
+ [["Document 1", "Test 1"], ["Document 3", "Test 3"], ["", ""]],
+ corpus.metas,
+ )
+
+ self.widget.editors[0].findChild(QPushButton).click()
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ np.testing.assert_array_equal(["Document 3", "?"], corpus.titles)
+ self.assertListEqual(["Test 3", "?"], corpus.documents)
+ np.testing.assert_array_equal(
+ [["Document 3", "Test 3"], ["", ""]],
+ corpus.metas,
+ )
+
+ self.widget.editors[-1].findChild(QPushButton).click()
+ corpus = self.get_output(self.widget.Outputs.corpus)
+ np.testing.assert_array_equal(["Document 3"], corpus.titles)
+ self.assertListEqual(["Test 3"], corpus.documents)
+ np.testing.assert_array_equal([["Document 3", "Test 3"]], corpus.metas)
+
+
+if __name__ == "__main__":
+ unittest.main()