From 0ddfc9a51fe1554afd648de03c87ce94a20f837e Mon Sep 17 00:00:00 2001 From: Primoz Godec Date: Tue, 31 May 2022 13:11:03 +0200 Subject: [PATCH] Create corpus - widget, icon and tests --- .../text/widgets/icons/CreateCorpus.svg | 2 + orangecontrib/text/widgets/owcreatecorpus.py | 103 +++++---- .../text/widgets/tests/test_owcreatecorpus.py | 202 ++++++++++++++++++ 3 files changed, 267 insertions(+), 40 deletions(-) create mode 100644 orangecontrib/text/widgets/icons/CreateCorpus.svg create mode 100644 orangecontrib/text/widgets/tests/test_owcreatecorpus.py diff --git a/orangecontrib/text/widgets/icons/CreateCorpus.svg b/orangecontrib/text/widgets/icons/CreateCorpus.svg new file mode 100644 index 000000000..9debc8fe5 --- /dev/null +++ b/orangecontrib/text/widgets/icons/CreateCorpus.svg @@ -0,0 +1,2 @@ + + diff --git a/orangecontrib/text/widgets/owcreatecorpus.py b/orangecontrib/text/widgets/owcreatecorpus.py index c9fe2f78d..50426ae13 100644 --- a/orangecontrib/text/widgets/owcreatecorpus.py +++ b/orangecontrib/text/widgets/owcreatecorpus.py @@ -1,4 +1,3 @@ -from functools import partial from typing import List, Tuple import numpy as np @@ -7,9 +6,14 @@ from Orange.widgets import gui from Orange.widgets.widget import OWWidget, Output from AnyQt.QtWidgets import QGroupBox -from PyQt5.QtCore import QSize -from PyQt5.QtWidgets import QVBoxLayout, QLineEdit, QPlainTextEdit, QSizePolicy, \ - QSpacerItem +from PyQt5.QtCore import QSize, Qt +from PyQt5.QtWidgets import ( + QVBoxLayout, + QLineEdit, + QPlainTextEdit, + QSizePolicy, + QPushButton, +) from orangewidget.settings import Setting from orangecontrib.text import Corpus @@ -20,21 +24,18 @@ def __init__(self): super().__init__(parent=None) self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) - def sizeHint(self): - sh = super().sizeHint() - sh.setHeight(350) - return sh - class CustomQPlainTextEdit(QPlainTextEdit): editingFinished = Signal() def focusOutEvent(self, _): + # TextEdit does not have editingFinished self.editingFinished.emit() class DocumentEditor(QGroupBox): text_changed = Signal(str, str) + remove_clicked = Signal() def __init__(self, title, text, parent=None): super().__init__(parent) @@ -48,87 +49,109 @@ def __init__(self, title, text, parent=None): self.text_area.setPlaceholderText("Document text") self.text_area.setPlainText(text) self.text_area.editingFinished.connect(self._on_text_changed) - self.layout().addWidget(self.title_le) + + remove_button = QPushButton("x") + remove_button.setFixedWidth(35) + remove_button.setFocusPolicy(Qt.NoFocus) + remove_button.clicked.connect(self._on_remove_clicked) + box = gui.hBox(self) + box.layout().addWidget(self.title_le) + box.layout().addWidget(remove_button) self.layout().addWidget(self.text_area) def _on_text_changed(self): self.text_changed.emit(self.title_le.text(), self.text_area.toPlainText()) - # todo: add x button + def _on_remove_clicked(self): + self.remove_clicked.emit() class OWCreateCorpus(OWWidget): name = "Create Corpus" description = "Write/paste documents to create a corpus" - icon = "icons/TextFile.svg" - priority = 100 # todo + icon = "icons/CreateCorpus.svg" + priority = 120 class Outputs: - corpus = Output('Corpus', Corpus) + corpus = Output("Corpus", Corpus) want_main_area = False - texts: List[Tuple[str, str]] = Setting([("", ""), ("", ""), ("", "")]) + texts: List[Tuple[str, str]] = Setting([("", "")] * 3) auto_commit: bool = Setting(True) def __init__(self): super().__init__() - self.editors = [] scroll_area = EditorsVerticalScrollArea() self.editor_vbox = gui.vBox(self.controlArea, spacing=0) + self.editor_vbox.layout().setSpacing(10) scroll_area.setWidget(self.editor_vbox) self.controlArea.layout().addWidget(scroll_area) for t in self.texts: - self.add_document_editor(*t) + self._add_document_editor(*t) - gui.button(self.buttonsArea, self, "Add document", self.add_new_editor) - gui.auto_send(self.buttonsArea, self, "auto_commit") + add_btn = gui.button( + self.buttonsArea, self, "Add document", self._add_new_editor + ) + add_btn.setFocusPolicy(Qt.NoFocus) + gui.auto_apply(self.buttonsArea, self, "auto_commit") self.commit.now() - def add_document_editor(self, title, text): + def _add_document_editor(self, title, text): + """Function that handles adding new editor with texts provided""" editor = DocumentEditor(title, text) - editor.text_changed.connect(partial(self._text_changed, len(self.editors))) + editor.text_changed.connect(self._text_changed) + editor.remove_clicked.connect(self._remove_document_editor) self.editors.append(editor) - if len(self.editors) > 1: - # add spacer before each item that boxes do not stick together - # (except before the first one) - self.editor_vbox.layout().addSpacerItem(QSpacerItem(1, 10)) self.editor_vbox.layout().addWidget(editor) self.editor_vbox.updateGeometry() - def remove_document_editor(self): - if len(self.texts) > 1: - del self.texts[-1] - self.editor_vbox.layout().remove(self.editors[-1]) - del self.editors[-1] - - def add_new_editor(self): - self.texts = [("", "")] - self.add_document_editor(*self.texts[-1]) + def _remove_document_editor(self): + """Remove the editor on the click of x button on the editor""" + if len(self.editors) > 1: + editor = self.sender() + i = self.editors.index(editor) + del self.texts[i] + self.editor_vbox.layout().removeWidget(editor) + self.editors.remove(editor) + editor.deleteLater() + self.commit.deferred() + + def _add_new_editor(self): + """Add editor on the click of Add document button""" + self.texts.append(("", "")) + self._add_document_editor(*self.texts[-1]) + self.commit.deferred() - def _text_changed(self, i, title, text): - self.texts[i] = (title, text) + def _text_changed(self, title, text): + """Called when any text change, corrects texts in settings""" + editor = self.sender() + self.texts[self.editors.index(editor)] = (title, text) self.commit.deferred() @gui.deferred def commit(self): + """Create a new corpus and output it""" doc_var = StringVariable("Document") title_var = StringVariable("Title") domain = Domain([], metas=[title_var, doc_var]) corpus = Corpus.from_numpy( - domain, np.empty((len(self.texts), 0)), metas=np.array(self.texts), - text_features=[doc_var] + domain, + np.empty((len(self.texts), 0)), + metas=np.array(self.texts), + text_features=[doc_var], ) corpus.set_title_variable(title_var) self.Outputs.corpus.send(corpus) - def sizeHint(self): - return QSize(600, 400) + def sizeHint(self) -> QSize: + return QSize(600, 600) if __name__ == "__main__": from orangewidget.utils.widgetpreview import WidgetPreview + WidgetPreview(OWCreateCorpus).run() diff --git a/orangecontrib/text/widgets/tests/test_owcreatecorpus.py b/orangecontrib/text/widgets/tests/test_owcreatecorpus.py new file mode 100644 index 000000000..2e8d738f0 --- /dev/null +++ b/orangecontrib/text/widgets/tests/test_owcreatecorpus.py @@ -0,0 +1,202 @@ +import unittest + +import numpy as np +from Orange.data import StringVariable +from Orange.widgets.tests.base import WidgetTest +from PyQt5.QtWidgets import QPushButton + +from orangecontrib.text.widgets.owcreatecorpus import OWCreateCorpus + + +class TestOWCreateCorpus(WidgetTest): + def setUp(self): + self.widget = self.create_widget(OWCreateCorpus) + self.add_document_btn = self.widget.buttonsArea.findChild(QPushButton) + + def test_add_remove_editors(self): + self.assertEqual(3, len(self.widget.editors)) + self.assertEqual(3, len(self.widget.texts)) + self.assertListEqual([("", "")] * 3, self.widget.texts) + + self.add_document_btn.click() + self.assertEqual(4, len(self.widget.editors)) + self.assertEqual(4, len(self.widget.texts)) + self.assertListEqual([("", "")] * 4, self.widget.texts) + + self.add_document_btn.click() + self.assertEqual(5, len(self.widget.editors)) + self.assertEqual(5, len(self.widget.texts)) + self.assertListEqual([("", "")] * 5, self.widget.texts) + + # click any button in editor_vbox - there are only buttons for removing + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertEqual(4, len(self.widget.editors)) + self.assertEqual(4, len(self.widget.texts)) + self.assertListEqual([("", "")] * 4, self.widget.texts) + + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertEqual(3, len(self.widget.editors)) + self.assertEqual(3, len(self.widget.texts)) + self.assertListEqual([("", "")] * 3, self.widget.texts) + + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertEqual(2, len(self.widget.editors)) + self.assertEqual(2, len(self.widget.texts)) + self.assertListEqual([("", "")] * 2, self.widget.texts) + + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertEqual(1, len(self.widget.editors)) + self.assertEqual(1, len(self.widget.texts)) + self.assertListEqual([("", "")], self.widget.texts) + + # last editor cannot be removed + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertEqual(1, len(self.widget.editors)) + self.assertEqual(1, len(self.widget.texts)) + self.assertListEqual([("", "")], self.widget.texts) + + def test_add_text(self): + # start with 1 editor + self.widget.editors[-1].findChild(QPushButton).click() + self.widget.editors[-1].findChild(QPushButton).click() + + editor = self.widget.editors[0] + self.assertListEqual([("", "")], self.widget.texts) + editor.title_le.setText("Beautiful document") + editor.title_le.editingFinished.emit() + self.assertListEqual([("Beautiful document", "")], self.widget.texts) + editor.text_area.setPlainText("I am a beautiful document") + editor.text_area.editingFinished.emit() + self.assertListEqual( + [("Beautiful document", "I am a beautiful document")], self.widget.texts + ) + + self.add_document_btn.click() + editor = self.widget.editors[1] + self.assertListEqual( + [("Beautiful document", "I am a beautiful document"), ("", "")], + self.widget.texts, + ) + editor.title_le.setText("Another another document") + editor.title_le.editingFinished.emit() + self.assertListEqual( + [ + ("Beautiful document", "I am a beautiful document"), + ("Another another document", ""), + ], + self.widget.texts, + ) + editor.text_area.setPlainText("I am another beautiful document") + editor.text_area.editingFinished.emit() + self.assertListEqual( + [ + ("Beautiful document", "I am a beautiful document"), + ("Another another document", "I am another beautiful document"), + ], + self.widget.texts, + ) + + # remove first document + self.widget.editor_vbox.findChild(QPushButton).click() + self.assertListEqual( + [("Another another document", "I am another beautiful document")], + self.widget.texts, + ) + + # change the only document + editor = self.widget.editors[0] + editor.title_le.setText("Modified document") + editor.title_le.editingFinished.emit() + self.assertListEqual( + [("Modified document", "I am another beautiful document")], + self.widget.texts, + ) + editor.text_area.setPlainText("Test") + editor.text_area.editingFinished.emit() + self.assertListEqual([("Modified document", "Test")], self.widget.texts) + + self.add_document_btn.click() + self.assertListEqual( + [("Modified document", "Test"), ("", "")], self.widget.texts + ) + + def test_output(self): + # start with 1 editor + self.widget.editors[-1].findChild(QPushButton).click() + self.widget.editors[-1].findChild(QPushButton).click() + + corpus = self.get_output(self.widget.Outputs.corpus) + self.assertEqual(0, len(corpus.domain.attributes)) + self.assertTupleEqual( + (StringVariable("Title"), StringVariable("Document")), corpus.domain.metas + ) + np.testing.assert_array_equal(["?"], corpus.titles) + self.assertListEqual(["?"], corpus.documents) + np.testing.assert_array_equal([["", ""]], corpus.metas) + + self.add_document_btn.click() + self.add_document_btn.click() + editor1, editor2, editor3 = self.widget.editors + editor1.title_le.setText("Document 1") + editor2.title_le.setText("Document 2") + editor3.title_le.setText("Document 3") + editor1.text_area.setPlainText("Test 1") + editor2.text_area.setPlainText("Test 2") + editor3.text_area.setPlainText("Test 3") + editor1.text_area.editingFinished.emit() + editor2.text_area.editingFinished.emit() + editor3.text_area.editingFinished.emit() + + corpus = self.get_output(self.widget.Outputs.corpus) + np.testing.assert_array_equal( + ["Document 1", "Document 2", "Document 3"], corpus.titles + ) + self.assertListEqual(["Test 1", "Test 2", "Test 3"], corpus.documents) + np.testing.assert_array_equal( + [ + ["Document 1", "Test 1"], + ["Document 2", "Test 2"], + ["Document 3", "Test 3"], + ], + corpus.metas, + ) + + editor2.findChild(QPushButton).click() + corpus = self.get_output(self.widget.Outputs.corpus) + np.testing.assert_array_equal(["Document 1", "Document 3"], corpus.titles) + self.assertListEqual(["Test 1", "Test 3"], corpus.documents) + np.testing.assert_array_equal( + [ + ["Document 1", "Test 1"], + ["Document 3", "Test 3"], + ], + corpus.metas, + ) + + self.add_document_btn.click() + corpus = self.get_output(self.widget.Outputs.corpus) + np.testing.assert_array_equal(["Document 1", "Document 3", "?"], corpus.titles) + self.assertListEqual(["Test 1", "Test 3", "?"], corpus.documents) + np.testing.assert_array_equal( + [["Document 1", "Test 1"], ["Document 3", "Test 3"], ["", ""]], + corpus.metas, + ) + + self.widget.editors[0].findChild(QPushButton).click() + corpus = self.get_output(self.widget.Outputs.corpus) + np.testing.assert_array_equal(["Document 3", "?"], corpus.titles) + self.assertListEqual(["Test 3", "?"], corpus.documents) + np.testing.assert_array_equal( + [["Document 3", "Test 3"], ["", ""]], + corpus.metas, + ) + + self.widget.editors[-1].findChild(QPushButton).click() + corpus = self.get_output(self.widget.Outputs.corpus) + np.testing.assert_array_equal(["Document 3"], corpus.titles) + self.assertListEqual(["Test 3"], corpus.documents) + np.testing.assert_array_equal([["Document 3", "Test 3"]], corpus.metas) + + +if __name__ == "__main__": + unittest.main()