Skip to content

Commit

Permalink
Corpus Viewer: Output selected data and memorize selection
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Aug 24, 2020
1 parent acd3a9c commit e207135
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 43 deletions.
123 changes: 80 additions & 43 deletions orangecontrib/text/widgets/owcorpusviewer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import sre_constants
from itertools import chain
from typing import Set

from AnyQt.QtCore import (
Qt, QUrl, QItemSelection, QItemSelectionModel, QItemSelectionRange
Expand Down Expand Up @@ -38,10 +39,9 @@ class Outputs:
search_indices = ContextSetting([], exclude_metas=False) # features included in search
display_indices = ContextSetting([], exclude_metas=False) # features for display
display_features = ContextSetting([], exclude_metas=False)
selected_documents = ContextSetting([])
regexp_filter = ContextSetting("")

selection = [0] # TODO: DataHashContextHandler

show_tokens = Setting(False)
autocommit = Setting(True)

Expand All @@ -54,7 +54,6 @@ def __init__(self):

self.corpus = None # Corpus
self.corpus_docs = None # Documents generated from Corpus
self.output_mask = [] # Output corpus indices
self.doc_webview = None # WebView for showing content
self.search_features = [] # two copies are needed since Display allows drag & drop
self.display_list_indices = [0]
Expand Down Expand Up @@ -101,7 +100,6 @@ def __init__(self):
orientation=Qt.Horizontal,
childrenCollapsible=False,
)

# Document list
self.doc_list = QTableView()
self.doc_list.setSelectionBehavior(QTableView.SelectRows)
Expand All @@ -113,8 +111,9 @@ def __init__(self):

self.doc_list_model = QStandardItemModel(self)
self.doc_list.setModel(self.doc_list_model)
self.doc_list.selectionModel().selectionChanged.connect(self.show_docs)

self.doc_list.selectionModel().selectionChanged.connect(
self.selection_changed
)
# Document contents
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)

Expand All @@ -141,21 +140,31 @@ def set_data(self, corpus=None):
self.display_features = list(filter_visible(chain(domain.variables, domain.metas)))
self.search_indices = list(range(len(self.search_features)))
self.display_indices = list(range(len(self.display_features)))
self.selection = [0]
self.selected_documents = [corpus.titles[0]]
self.openContext(self.corpus)
self.display_list_indices = self.display_indices
self.regenerate_docs()
self.list_docs()
self.update_info()
self.set_selection()
self.show_docs()
self.update_splitter()
self.commit()

def update_splitter(self):
"""
Update splitter such that document list on the left never take more
than half of the space. It is only set when new data comes. If user
later changes sizes it stays as it is.
"""
w1, w2 = self.splitter.sizes()
if w2 < w1:
self.splitter.setSizes([w1, w1])

def reset_widget(self):
# Corpus
self.corpus = None
self.corpus_docs = None
self.output_mask = []
self.display_features = []
# Widgets
self.search_listbox.clear()
Expand Down Expand Up @@ -185,7 +194,6 @@ def list_docs(self):
def is_match(x):
return not bool(search_keyword) or reg.search(x)

self.output_mask.clear()
self.doc_list_model.clear()

for i, (doc, title, content) in enumerate(zip(self.corpus, self.corpus.titles,
Expand All @@ -195,28 +203,53 @@ def is_match(x):
item.setData(str(title), Qt.DisplayRole)
item.setData(doc, Qt.UserRole)
self.doc_list_model.appendRow(item)
self.output_mask.append(i)

def reset_selection(self):
if self.doc_list_model.rowCount() > 0:
self.doc_list.selectRow(0) # Select the first document
else:
self.doc_webview.setHtml('')

def set_selection(self):
def get_selected_documents_from_view(self) -> Set[str]:
"""
Returns
-------
Set with names of selected documents in the QTableView
"""
return {
i.data(Qt.DisplayRole)
for i in self.doc_list.selectionModel().selectedRows()
}

def set_selection(self) -> None:
"""
Select documents in selected_documents attribute in the view
"""
view = self.doc_list
if len(self.selection):
selection = QItemSelection()

for row in self.selection:
selection.append(
QItemSelectionRange(
view.model().index(row, 0),
view.model().index(row, 0)
)
)
view.selectionModel().select(
selection, QItemSelectionModel.ClearAndSelect)
model = view.model()

previously_selected = self.selected_documents.copy()
selection = QItemSelection()
for row in range(model.rowCount()):
document = model.data(model.index(row, 0), Qt.DisplayRole)
if document in self.selected_documents:
selection.append(QItemSelectionRange(
view.model().index(row, 0),
view.model().index(row, 0)
))
view.selectionModel().select(
selection, QItemSelectionModel.ClearAndSelect
)
# select emmit selection change signal which causes calling
# selection_changed when filtering it means that documents which
# are currently filtered out get removed from self.selected_douments
# we still want to keep them to be still selected after user removes
# filter
self.selected_documents = previously_selected

def selection_changed(self) -> None:
"""
Function is called every time the selection changes - when user select
new range of documents
"""
print("selection changed")
self.selected_documents = self.get_selected_documents_from_view()
self.show_docs()
self.commit()

def show_docs(self):
""" Show the selected documents in the right area """
Expand Down Expand Up @@ -308,9 +341,6 @@ def show_docs(self):
if i in self.search_indices]

html = '<table>'
selection = [i.row() for i in self.doc_list.selectionModel().selectedRows()]
if selection != []:
self.selection = selection
for doc_count, index in enumerate(self.doc_list.selectionModel().selectedRows()):
if doc_count > 0: # add split
html += '<tr class="line separator"><td/><td/></tr>' \
Expand Down Expand Up @@ -376,7 +406,7 @@ def regenerate_docs(self):
def refresh_search(self):
if self.corpus is not None:
self.list_docs()
self.reset_selection()
self.set_selection()
self.update_info()
self.commit()

Expand All @@ -399,16 +429,23 @@ def update_info(self):
self.ngram_range = ''

def commit(self):
if self.corpus is not None:
matched = self.corpus[self.output_mask]
output_mask = set(self.output_mask)
unmatched_mask = [i for i in range(len(self.corpus)) if i not in output_mask]
unmatched = self.corpus[unmatched_mask]
self.Outputs.matching_docs.send(matched)
self.Outputs.other_docs.send(unmatched)
else:
self.Outputs.matching_docs.send(None)
self.Outputs.other_docs.send(None)
matched = unmatched = None
corpus = self.corpus
if corpus is not None:
# it returns a set of selected documents which are in view
selected_docs = self.get_selected_documents_from_view()
titles = corpus.titles
matched_mask = [
i for i, t in enumerate(titles) if t in selected_docs
]
unmatched_mask = [
i for i, t in enumerate(titles) if t not in selected_docs
]

matched = corpus[matched_mask] if len(matched_mask) else None
unmatched = corpus[unmatched_mask] if len(unmatched_mask) else None
self.Outputs.matching_docs.send(matched)
self.Outputs.other_docs.send(unmatched)

def send_report(self):
self.report_items((
Expand Down
2 changes: 2 additions & 0 deletions orangecontrib/text/widgets/tests/test_owcorpusviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ def test_data(self):
self.assertEqual(out_corpus, self.corpus)

def test_search(self):
print('test search')
self.send_signal(self.widget.Inputs.corpus, self.corpus)
self.widget.regexp_filter = "graph"
self.process_events()
out_corpus = self.get_output(self.widget.Outputs.matching_docs)
print("tukaj")
self.assertEqual(len(out_corpus), 4)

def test_highlighting(self):
Expand Down

0 comments on commit e207135

Please sign in to comment.