Skip to content

Commit

Permalink
Create corpus - widget, icon and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jun 2, 2022
1 parent 2a21b3f commit bb6bd21
Show file tree
Hide file tree
Showing 3 changed files with 248 additions and 39 deletions.
2 changes: 2 additions & 0 deletions orangecontrib/text/widgets/icons/CreateCorpus.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
101 changes: 62 additions & 39 deletions orangecontrib/text/widgets/owcreatecorpus.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from functools import partial
from typing import List, Tuple

import numpy as np
Expand All @@ -7,9 +6,14 @@
from Orange.widgets import gui
from Orange.widgets.widget import OWWidget, Output
from AnyQt.QtWidgets import QGroupBox
from PyQt5.QtCore import QSize
from PyQt5.QtWidgets import QVBoxLayout, QLineEdit, QPlainTextEdit, QSizePolicy, \
QSpacerItem
from PyQt5.QtCore import QSize, Qt
from PyQt5.QtWidgets import (
QVBoxLayout,
QLineEdit,
QPlainTextEdit,
QSizePolicy,
QPushButton,
)
from orangewidget.settings import Setting

from orangecontrib.text import Corpus
Expand All @@ -20,21 +24,18 @@ def __init__(self):
super().__init__(parent=None)
self.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding)

def sizeHint(self):
sh = super().sizeHint()
sh.setHeight(350)
return sh


class CustomQPlainTextEdit(QPlainTextEdit):
editingFinished = Signal()

def focusOutEvent(self, _):
# TextEdit does not have editingFinished
self.editingFinished.emit()


class DocumentEditor(QGroupBox):
text_changed = Signal(str, str)
remove_clicked = Signal()

def __init__(self, title, text, parent=None):
super().__init__(parent)
Expand All @@ -48,87 +49,109 @@ def __init__(self, title, text, parent=None):
self.text_area.setPlaceholderText("Document text")
self.text_area.setPlainText(text)
self.text_area.editingFinished.connect(self._on_text_changed)
self.layout().addWidget(self.title_le)

remove_button = QPushButton("x")
remove_button.setFixedWidth(35)
remove_button.setFocusPolicy(Qt.NoFocus)
remove_button.clicked.connect(self._on_remove_clicked)
box = gui.hBox(self)
box.layout().addWidget(self.title_le)
box.layout().addWidget(remove_button)
self.layout().addWidget(self.text_area)

def _on_text_changed(self):
self.text_changed.emit(self.title_le.text(), self.text_area.toPlainText())

# todo: add x button
def _on_remove_clicked(self):
self.remove_clicked.emit()


class OWCreateCorpus(OWWidget):
name = "Create Corpus"
description = "Write/paste documents to create a corpus"
icon = "icons/TextFile.svg"
priority = 100 # todo
icon = "icons/CreateCorpus.svg"
priority = 120

class Outputs:
corpus = Output('Corpus', Corpus)
corpus = Output("Corpus", Corpus)

want_main_area = False

texts: List[Tuple[str, str]] = Setting([("", ""), ("", ""), ("", "")])
texts: List[Tuple[str, str]] = Setting([("", "")])
auto_commit: bool = Setting(True)

def __init__(self):
super().__init__()

self.editors = []

scroll_area = EditorsVerticalScrollArea()
self.editor_vbox = gui.vBox(self.controlArea, spacing=0)
self.editor_vbox.layout().setSpacing(10)
scroll_area.setWidget(self.editor_vbox)
self.controlArea.layout().addWidget(scroll_area)

for t in self.texts:
self.add_document_editor(*t)
self._add_document_editor(*t)

gui.button(self.buttonsArea, self, "Add document", self.add_new_editor)
gui.auto_send(self.buttonsArea, self, "auto_commit")
add_btn = gui.button(
self.buttonsArea, self, "Add document", self._add_new_editor
)
add_btn.setFocusPolicy(Qt.NoFocus)
gui.auto_apply(self.buttonsArea, self, "auto_commit")
self.commit.now()

def add_document_editor(self, title, text):
def _add_document_editor(self, title, text):
"""Function that handles adding new editor with texts provided"""
editor = DocumentEditor(title, text)
editor.text_changed.connect(partial(self._text_changed, len(self.editors)))
editor.text_changed.connect(self._text_changed)
editor.remove_clicked.connect(self._remove_document_editor)
self.editors.append(editor)
if len(self.editors) > 1:
# add spacer before each item that boxes do not stick together
# (except before the first one)
self.editor_vbox.layout().addSpacerItem(QSpacerItem(1, 10))
self.editor_vbox.layout().addWidget(editor)
self.editor_vbox.updateGeometry()

def remove_document_editor(self):
if len(self.texts) > 1:
del self.texts[-1]
self.editor_vbox.layout().remove(self.editors[-1])
del self.editors[-1]

def add_new_editor(self):
self.texts = [("", "")]
self.add_document_editor(*self.texts[-1])
def _remove_document_editor(self):
"""Remove the editor on the click of x button on the editor"""
if len(self.editors) > 1:
editor = self.sender()
i = self.editors.index(editor)
del self.texts[i]
self.editor_vbox.layout().removeWidget(editor)
self.editors.remove(editor)
editor.deleteLater()
self.commit.deferred()

def _add_new_editor(self):
"""Add editor on the click of Add document button"""
self.texts.append(("", ""))
self._add_document_editor(*self.texts[-1])
self.commit.deferred()

def _text_changed(self, i, title, text):
self.texts[i] = (title, text)
def _text_changed(self, title, text):
"""Called when any text change, corrects texts in settings"""
editor = self.sender()
self.texts[self.editors.index(editor)] = (title, text)
self.commit.deferred()

@gui.deferred
def commit(self):
"""Create a new corpus and output it"""
doc_var = StringVariable("Document")
title_var = StringVariable("Title")
domain = Domain([], metas=[title_var, doc_var])
corpus = Corpus.from_numpy(
domain, np.empty((len(self.texts), 0)), metas=np.array(self.texts),
text_features=[doc_var]
domain,
np.empty((len(self.texts), 0)),
metas=np.array(self.texts),
text_features=[doc_var],
)
corpus.set_title_variable(title_var)
self.Outputs.corpus.send(corpus)

def sizeHint(self):
def sizeHint(self) -> QSize:
return QSize(600, 400)


if __name__ == "__main__":
from orangewidget.utils.widgetpreview import WidgetPreview

WidgetPreview(OWCreateCorpus).run()
184 changes: 184 additions & 0 deletions orangecontrib/text/widgets/tests/test_owcreatecorpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
import unittest

import numpy as np
from Orange.data import StringVariable
from Orange.widgets.tests.base import WidgetTest
from PyQt5.QtWidgets import QPushButton

from orangecontrib.text.widgets.owcreatecorpus import OWCreateCorpus


class TestOWCreateCorpus(WidgetTest):
def setUp(self):
self.widget = self.create_widget(OWCreateCorpus)
self.add_document_btn = self.widget.buttonsArea.findChild(QPushButton)

def test_add_remove_editors(self):
self.assertEqual(1, len(self.widget.editors))
self.assertEqual(1, len(self.widget.texts))
self.assertListEqual([("", "")], self.widget.texts)

self.add_document_btn.click()
self.assertEqual(2, len(self.widget.editors))
self.assertEqual(2, len(self.widget.texts))
self.assertListEqual([("", "")] * 2, self.widget.texts)

self.add_document_btn.click()
self.assertEqual(3, len(self.widget.editors))
self.assertEqual(3, len(self.widget.texts))
self.assertListEqual([("", "")] * 3, self.widget.texts)

# click any button in editor_vbox - there are only buttons for removing
self.widget.editor_vbox.findChild(QPushButton).click()
self.assertEqual(2, len(self.widget.editors))
self.assertEqual(2, len(self.widget.texts))
self.assertListEqual([("", "")] * 2, self.widget.texts)

self.widget.editor_vbox.findChild(QPushButton).click()
self.assertEqual(1, len(self.widget.editors))
self.assertEqual(1, len(self.widget.texts))
self.assertListEqual([("", "")] * 1, self.widget.texts)

# last editor cannot be removed
self.widget.editor_vbox.findChild(QPushButton).click()
self.assertEqual(1, len(self.widget.editors))
self.assertEqual(1, len(self.widget.texts))
self.assertListEqual([("", "")] * 1, self.widget.texts)

def test_add_text(self):
editor = self.widget.editors[0]
self.assertListEqual([("", "")], self.widget.texts)
editor.title_le.setText("Beautiful document")
editor.title_le.editingFinished.emit()
self.assertListEqual([("Beautiful document", "")], self.widget.texts)
editor.text_area.setPlainText("I am a beautiful document")
editor.text_area.editingFinished.emit()
self.assertListEqual(
[("Beautiful document", "I am a beautiful document")], self.widget.texts
)

self.add_document_btn.click()
editor = self.widget.editors[1]
self.assertListEqual(
[("Beautiful document", "I am a beautiful document"), ("", "")],
self.widget.texts,
)
editor.title_le.setText("Another another document")
editor.title_le.editingFinished.emit()
self.assertListEqual(
[
("Beautiful document", "I am a beautiful document"),
("Another another document", ""),
],
self.widget.texts,
)
editor.text_area.setPlainText("I am another beautiful document")
editor.text_area.editingFinished.emit()
self.assertListEqual(
[
("Beautiful document", "I am a beautiful document"),
("Another another document", "I am another beautiful document"),
],
self.widget.texts,
)

# remove first document
self.widget.editor_vbox.findChild(QPushButton).click()
self.assertListEqual(
[("Another another document", "I am another beautiful document")],
self.widget.texts,
)

# change the only document
editor = self.widget.editors[0]
editor.title_le.setText("Modified document")
editor.title_le.editingFinished.emit()
self.assertListEqual(
[("Modified document", "I am another beautiful document")],
self.widget.texts,
)
editor.text_area.setPlainText("Test")
editor.text_area.editingFinished.emit()
self.assertListEqual([("Modified document", "Test")], self.widget.texts)

self.add_document_btn.click()
self.assertListEqual(
[("Modified document", "Test"), ("", "")], self.widget.texts
)

def test_output(self):
corpus = self.get_output(self.widget.Outputs.corpus)
self.assertEqual(0, len(corpus.domain.attributes))
self.assertTupleEqual(
(StringVariable("Title"), StringVariable("Document")), corpus.domain.metas
)
np.testing.assert_array_equal(["?"], corpus.titles)
self.assertListEqual(["?"], corpus.documents)
np.testing.assert_array_equal([["", ""]], corpus.metas)

self.add_document_btn.click()
self.add_document_btn.click()
editor1, editor2, editor3 = self.widget.editors
editor1.title_le.setText("Document 1")
editor2.title_le.setText("Document 2")
editor3.title_le.setText("Document 3")
editor1.text_area.setPlainText("Test 1")
editor2.text_area.setPlainText("Test 2")
editor3.text_area.setPlainText("Test 3")
editor1.text_area.editingFinished.emit()
editor2.text_area.editingFinished.emit()
editor3.text_area.editingFinished.emit()

corpus = self.get_output(self.widget.Outputs.corpus)
np.testing.assert_array_equal(
["Document 1", "Document 2", "Document 3"], corpus.titles
)
self.assertListEqual(["Test 1", "Test 2", "Test 3"], corpus.documents)
np.testing.assert_array_equal(
[
["Document 1", "Test 1"],
["Document 2", "Test 2"],
["Document 3", "Test 3"],
],
corpus.metas,
)

editor2.findChild(QPushButton).click()
corpus = self.get_output(self.widget.Outputs.corpus)
np.testing.assert_array_equal(["Document 1", "Document 3"], corpus.titles)
self.assertListEqual(["Test 1", "Test 3"], corpus.documents)
np.testing.assert_array_equal(
[
["Document 1", "Test 1"],
["Document 3", "Test 3"],
],
corpus.metas,
)

self.add_document_btn.click()
corpus = self.get_output(self.widget.Outputs.corpus)
np.testing.assert_array_equal(["Document 1", "Document 3", "?"], corpus.titles)
self.assertListEqual(["Test 1", "Test 3", "?"], corpus.documents)
np.testing.assert_array_equal(
[["Document 1", "Test 1"], ["Document 3", "Test 3"], ["", ""]],
corpus.metas,
)

self.widget.editors[0].findChild(QPushButton).click()
corpus = self.get_output(self.widget.Outputs.corpus)
np.testing.assert_array_equal(["Document 3", "?"], corpus.titles)
self.assertListEqual(["Test 3", "?"], corpus.documents)
np.testing.assert_array_equal(
[["Document 3", "Test 3"], ["", ""]],
corpus.metas,
)

self.widget.editors[-1].findChild(QPushButton).click()
corpus = self.get_output(self.widget.Outputs.corpus)
np.testing.assert_array_equal(["Document 3"], corpus.titles)
self.assertListEqual(["Test 3"], corpus.documents)
np.testing.assert_array_equal([["Document 3", "Test 3"]], corpus.metas)


if __name__ == "__main__":
unittest.main()

0 comments on commit bb6bd21

Please sign in to comment.