Skip to content

Commit

Permalink
Semantic Viewer - show document when no words at the input
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jan 30, 2023
1 parent a9c89a6 commit 1a80763
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 14 deletions.
39 changes: 27 additions & 12 deletions orangecontrib/text/widgets/owsemanticviewer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import re
from types import SimpleNamespace
from typing import Optional, Any, List, Tuple
from typing import Optional, Any, List, Tuple, Union

import numpy as np

from AnyQt.QtCore import Qt, QUrl, QItemSelection, QItemSelectionModel, \
QModelIndex
from AnyQt.QtWidgets import QTableView, QSplitter, QApplication

from Orange.data import Table, Domain, StringVariable
from Orange.data import Table
from Orange.widgets import gui
from Orange.widgets.settings import Setting
from Orange.widgets.utils.annotated_data import create_annotated_table
Expand Down Expand Up @@ -303,6 +303,8 @@ def set_words(self, words: Optional[Table]):
def handleNewSignals(self):
self._clear()
self.update_scores()
if self.corpus is not None:
self._list_documents()

def update_scores(self):
self.start(run, self.corpus, self.words)
Expand All @@ -321,20 +323,31 @@ def on_done(self, results: Results):
if not self._results or not self.corpus or not self.words:
self.commit()
return
self._list_documents()

def _list_documents(self):
model = self._list_view.model()
model.setHorizontalHeaderLabels(["Match", "Score", "Document"])

def get_avg_score(result: List) -> float:
return "NA" if result is None else np.mean([r[1] for r in result])
def get_avg_score(i: int) -> Union[float, str]:
if self._results is not None:
result = self._results[i]
return "NA" if result is None else np.mean([r[1] for r in result])
else:
return ""

def get_n_matches(ngram):
return sum(ngram.count(word) for word in self.words)
if self.words is not None:
return sum(ngram.count(word) for word in self.words)
else:
return ""

data = [[get_n_matches(ngram), get_avg_score(res), title]
for res, title, ngram in zip(self._results,
self.corpus.titles.tolist(),
self.corpus.ngrams)]
data = [
[get_n_matches(ngram), get_avg_score(i), title]
for i, (title, ngram) in enumerate(
zip(self.corpus.titles.tolist(), self.corpus.ngrams)
)
]
model.wrap(data)
for i in range(len(data)):
model.setData(model.index(i, 0), i, role=IndexRole)
Expand Down Expand Up @@ -370,16 +383,18 @@ def _set_selected_rows(self, selected_rows: List[int]):
)

def _show_documents(self):
if self.corpus is None or self._results is None:
if self.corpus is None:
return

documents = self.corpus.documents
parser = DisplayDocument(self.display_index)
htmls = []
for doc_index in self.selection:
text = documents[doc_index]
matches = [ind for ind, score in self._results[doc_index] or []
if score >= self.threshold]
matches = []
if self._results:
matches = [ind for ind, score in self._results[doc_index] or []
if score >= self.threshold]
text = parser(text, matches)
text = text.replace("\n", "<br/>")
html = f"<p>{text}</p>"
Expand Down
31 changes: 29 additions & 2 deletions orangecontrib/text/widgets/tests/test_owsemanticviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,25 @@ def test_table(self):
for j in range(model.columnCount()):
self.assertEqual(model.data(model.index(i, j)), table[i][j])

def test_table_no_words(self):
"""When no words on the input still show documents but no scores"""
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.wait_until_finished()

model = self.widget._list_view.model()
table = [["", "", "Document 1"],
["", "", "Document 2"],
["", "", "Document 3"],
["", "", "Document 4"],
["", "", "Document 5"],
["", "", "Document 6"],
["", "", "Document 7"],
["", "", "Document 8"],
["", "", "Document 9"]]
for i in range(len(self.corpus)):
for j in range(model.columnCount()):
self.assertEqual(model.data(model.index(i, j)), table[i][j])

def test_webview(self):
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.send_signal(self.widget.Inputs.words, self.words)
Expand Down Expand Up @@ -398,11 +417,19 @@ def test_clear(self):
self.send_signal(self.widget.Inputs.words, None)
self.wait_until_finished()

self.assertEqual(self.widget.selection, [])
self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertEqual(self.widget.selection, [0])
self.assertIsNotNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.other_docs))
self.assertIsNotNone(self.get_output(self.widget.Outputs.corpus))

self.send_signal(self.widget.Inputs.corpus, None)
self.wait_until_finished()

self.assertEqual(self.widget.selection, [])
self.assertIsNone(self.get_output(self.widget.Outputs.matching_docs))
self.assertIsNone(self.get_output(self.widget.Outputs.other_docs))
self.assertIsNone(self.get_output(self.widget.Outputs.corpus))

def test_sorted_table_selection(self):
self.widget.controls.threshold.setValue(1)

Expand Down

0 comments on commit 1a80763

Please sign in to comment.