Skip to content

Commit

Permalink
Concordance - search in the separate thread
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jun 11, 2021
1 parent f49fe2d commit 8b4e885
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 39 deletions.
51 changes: 29 additions & 22 deletions orangecontrib/text/widgets/owconcordance.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
from typing import Optional
from typing import Optional, Callable

from itertools import chain
import numpy as np

from AnyQt.QtCore import Qt, QAbstractTableModel, QSize, QItemSelectionModel, \
QItemSelection, QModelIndex
from AnyQt.QtWidgets import QSizePolicy, QApplication, QTableView, \
QStyledItemDelegate
from AnyQt.QtWidgets import QSizePolicy, QTableView, QStyledItemDelegate
from AnyQt.QtGui import QColor
from Orange.data import Domain, StringVariable, Table

from Orange.widgets import gui
from Orange.widgets.settings import Setting, ContextSetting, PerfectDomainContextHandler
from Orange.widgets.widget import OWWidget, Msg, Input, Output
from Orange.widgets.utils.concurrent import TaskState, ConcurrentWidgetMixin
from Orange.util import dummy_callback
from nltk import ConcordanceIndex

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.topics import Topic
from orangecontrib.text.preprocess import WordPunctTokenizer
Expand Down Expand Up @@ -73,10 +75,15 @@ def __init__(self):
self.width = 8
self.colored_rows = None

def set_word(self, word):
def set_word(self, word, state: TaskState):
def callback(i: float):
state.set_progress_value(i * 100)
if state.is_interruption_requested():
raise Exception

self.modelAboutToBeReset.emit()
self.word = word
self._compute_word_index()
self._compute_word_index(callback)
self.modelReset.emit()

def set_corpus(self, corpus):
Expand Down Expand Up @@ -138,13 +145,15 @@ def _compute_indices(self): # type: () -> Optional[None, list]
self.indices = [ConcordanceIndex(doc, key=lambda x: x.lower())
for doc in self.tokens]

def _compute_word_index(self):
def _compute_word_index(self, callback: Callable = dummy_callback) -> None:
if self.indices is None or self.word is None:
self.word_index = self.colored_rows = None
else:
self.word_index = [
(doc_idx, offset) for doc_idx, doc in enumerate(self.indices)
for offset in doc.offsets(self.word)]
self.word_index = []
for doc_idx, doc in enumerate(self.indices):
for offset in doc.offsets(self.word):
self.word_index.append((doc_idx, offset))
callback(doc_idx / len(self.indices))
self.colored_rows = set(sorted({d[0] for d in self.word_index})[::2])

def matching_docs(self):
Expand All @@ -169,7 +178,7 @@ def get_data(self):
return Corpus(domain, metas=conc, text_features=[domain.metas[0]])


class OWConcordance(OWWidget):
class OWConcordance(OWWidget, ConcurrentWidgetMixin):
name = "Concordance"
description = "Display the context of the word."
icon = "icons/Concordance.svg"
Expand Down Expand Up @@ -197,6 +206,7 @@ class Warning(OWWidget.Warning):

def __init__(self):
super().__init__()
ConcurrentWidgetMixin.__init__(self)

self.corpus = None # Corpus
self.n_matching = '' # Info on docs matching the word
Expand All @@ -217,7 +227,7 @@ def __init__(self):
gui.rubber(self.controlArea)

# Search
c_box = gui.widgetBox(self.mainArea, orientation="vertical")
c_box = gui.widgetBox(self.mainArea, orientation=Qt.Horizontal)
self.input = gui.lineEdit(
c_box, self, 'word', orientation=Qt.Horizontal,
sizePolicy=QSizePolicy(QSizePolicy.MinimumExpanding,
Expand Down Expand Up @@ -296,16 +306,17 @@ def set_word_from_input(self, topic):

def set_word(self):
self.selected_rows = []
self.model.set_word(self.word)
self.start(self.model.set_word, self.word)

def on_done(self, _):
self.update_widget()
self.commit()

def handleNewSignals(self):
self.set_selection(self.selected_rows)

def resize_columns(self):
col_width = (self.conc_view.width() -
self.conc_view.columnWidth(1)) / 2 - 12
col_width = (self.conc_view.width() - self.conc_view.columnWidth(1)) // 2 - 12
self.conc_view.setColumnWidth(0, col_width)
self.conc_view.setColumnWidth(2, col_width)

Expand Down Expand Up @@ -352,12 +363,8 @@ def send_report(self):
self.report_table(view)


if __name__ == '__main__': # pragma: no cover
app = QApplication([])
widget = OWConcordance()
corpus = Corpus.from_file('book-excerpts')
corpus = corpus[:3]
widget.set_corpus(corpus)
widget.show()
app.exec()
if __name__ == "__main__": # pragma: no cover
from orangewidget.utils.widgetpreview import WidgetPreview

corpus = Corpus.from_file("book-excerpts")[:3]
WidgetPreview(OWConcordance).run(corpus)
52 changes: 35 additions & 17 deletions orangecontrib/text/widgets/tests/test_owconcordances.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import unittest
from unittest.mock import Mock
from unittest.mock import Mock, ANY

from AnyQt.QtCore import QModelIndex, QItemSelection, Qt
from AnyQt.QtGui import QBrush, QColor

from Orange.widgets.tests.base import WidgetTest
from Orange.util import dummy_callback

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.widgets.owconcordance import ConcordanceModel, \
OWConcordance


class DummyState:
set_progress_value = dummy_callback
is_interruption_requested = lambda: False


class TestConcordanceModel(unittest.TestCase):
def setUp(self):
self.corpus = Corpus.from_file('deerwester')
Expand All @@ -21,7 +27,7 @@ def test_data(self):
self.assertEqual(model.rowCount(QModelIndex()), 0)

model.set_corpus(self.corpus)
model.set_word("of")
model.set_word("of", DummyState)

# The same document in two rows
self.assertEqual(model.rowCount(QModelIndex()), 7)
Expand All @@ -47,7 +53,7 @@ def test_data_non_displayroles(self):
"""Other possibly implemented roles return correct types"""
model = ConcordanceModel()
model.set_corpus(self.corpus)
model.set_word("of")
model.set_word("of", DummyState)
ind00 = model.index(0, 0)
self.assertIsInstance(model.data(ind00, Qt.ForegroundRole),
(QBrush, type(None)))
Expand All @@ -61,7 +67,7 @@ def test_color_proper_rows(self):
model = ConcordanceModel()
model.set_width(2)
model.set_corpus(self.corpus)
model.set_word("of")
model.set_word("of", DummyState)

color1 = model.data(model.index(0, 0), Qt.BackgroundRole)
self.assertEqual(model.data(model.index(1, 0), Qt.BackgroundRole),
Expand All @@ -76,15 +82,15 @@ def test_order_doesnt_matter(self):
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_corpus(self.corpus)
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_word("of")
model.set_word("of", DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 7)
model.set_word("")
model.set_word("", DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_word(None)
model.set_word(None, DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_corpus(None)
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_word("of")
model.set_word("of", DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 0)
model.set_corpus(self.corpus)
self.assertEqual(model.rowCount(QModelIndex()), 7)
Expand All @@ -100,15 +106,15 @@ def test_set_word(self):
model.set_corpus(self.corpus)
model.set_width(2)

model.set_word("of")
model.set_word("of", DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 7)
self.assertEqual(model.data(model.index(0, 0)), "A survey")

model.set_word("lab")
model.set_word("lab", DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 1)
self.assertEqual(model.data(model.index(0, 0)), "interface for")

model.set_word(None)
model.set_word(None, DummyState)
self.assertEqual(model.rowCount(QModelIndex()), 0)

def test_signals(self):
Expand All @@ -127,19 +133,19 @@ def test_signals(self):

toBeReset.reset_mock()
hasBeenReset.reset_mock()
model.set_word(None)
model.set_word(None, DummyState)
self.assertEqual(toBeReset.call_count, 1)
self.assertEqual(hasBeenReset.call_count, 1)

def test_matching_docs(self):
model = ConcordanceModel()
model.set_word("of")
model.set_word("of", DummyState)
model.set_corpus(self.corpus)
self.assertEqual(model.matching_docs(), 6)

def test_concordance_output(self):
model = ConcordanceModel()
model.set_word("of")
model.set_word("of", DummyState)
model.set_corpus(self.corpus)
output = model.get_data()
self.assertEqual(len(output), 7)
Expand All @@ -163,9 +169,12 @@ def test_set_corpus(self):
def test_set_word(self):
self.widget.model.set_word = set_word = Mock()
self.widget.controls.word.setText("foo")
set_word.assert_called_with("foo")
self.wait_until_finished()
set_word.assert_called_with("foo", ANY)

self.widget.controls.word.setText("")
set_word.assert_called_with("")
self.wait_until_finished()
set_word.assert_called_with("", ANY)

def test_set_width(self):
self.widget.model.set_width = set_width = Mock()
Expand All @@ -176,6 +185,8 @@ def test_selection(self):
self.send_signal("Corpus", self.corpus)
widget = self.widget
widget.controls.word.setText("of")
self.wait_until_finished()

view = self.widget.conc_view

# Select one row, two are selected, one document on the output
Expand Down Expand Up @@ -212,22 +223,29 @@ def test_selection(self):
view.selectRow(3)
self.assertTrue(view.selectedIndexes())
widget.controls.word.setText("o")
self.wait_until_finished()
self.assertFalse(view.selectedIndexes())

def test_signal_to_none(self):
self.send_signal("Corpus", self.corpus)
widget = self.widget
widget.controls.word.setText("of")
self.wait_until_finished()

view = self.widget.conc_view
nrows = widget.model.rowCount()
view.selectRow(1)

self.send_signal("Corpus", None)
self.wait_until_finished()

self.assertIsNone(self.get_output("Selected Documents"))
self.assertEqual(widget.model.rowCount(), 0)
self.assertEqual(widget.controls.word.text(), "")

self.send_signal("Corpus", self.corpus)
self.wait_until_finished()

self.assertEqual(widget.model.rowCount(), nrows)


Expand Down

0 comments on commit 8b4e885

Please sign in to comment.