Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FIX] OWCorpusViewer: mark filtered text with Python #408

Merged
merged 2 commits into from
Mar 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 28 additions & 23 deletions orangecontrib/text/widgets/owcorpusviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from itertools import chain

from AnyQt.QtCore import (
Qt, QUrl, QItemSelection, QItemSelectionModel, QItemSelectionRange,
pyqtSlot as Slot
Qt, QUrl, QItemSelection, QItemSelectionModel, QItemSelectionRange
)

from AnyQt.QtGui import QStandardItemModel, QStandardItem
Expand Down Expand Up @@ -118,7 +117,6 @@ def __init__(self):

# Document contents
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
self.doc_webview.loadFinished.connect(self.highlight_docs)

self.mainArea.layout().addWidget(self.splitter)

Expand Down Expand Up @@ -321,14 +319,16 @@ def show_docs(self):
row_ind = index.data(Qt.UserRole).row_index
for ind in self.display_indices:
feature = self.display_features[ind]
mark = ' mark-area' if feature in marked_search_features else ''
value = str(index.data(Qt.UserRole)[feature.name]).replace('\n', '<br/>')
value = str(index.data(Qt.UserRole)[feature.name])
if feature in marked_search_features:
value = self.__mark_text(value)
value = value.replace('\n', '<br/>')
is_image = feature.attributes.get('type', '') == 'image'
if is_image and value != '?':
value = '<img src="{}"></img>'.format(value)
html += '<tr><td class="variables"><strong>{}:</strong></td>' \
'<td class="content{}">{}</td></tr>'.format(
feature.name, mark, value)
'<td class="content">{}</td></tr>'.format(
feature.name, value)

if self.show_tokens:
html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
Expand All @@ -339,6 +339,27 @@ def show_docs(self):
base = QUrl.fromLocalFile(__file__)
self.doc_webview.setHtml(HTML.format(html), base)

def __mark_text(self, text):
search_keyword = self.regexp_filter.strip('|')
if not search_keyword:
return text

try:
reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
except sre_constants.error:
return text

matches = list(reg.finditer(text))
if not matches:
return text

text = list(text)
for m in matches[::-1]:
text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
.format("".join(text[m.start():m.end()])))

return "".join(text)

def search_features_changed(self):
self.regenerate_docs()
self.refresh_search()
Expand All @@ -359,22 +380,6 @@ def refresh_search(self):
self.update_info()
self.commit()

@Slot()
def highlight_docs(self):
search_keyword = self.regexp_filter.\
strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js

if search_keyword:
# mark is undefined when clearing the view (`setHtml('')`). Maybe
# set and template html with all the scripts, ... but no contents?
self.doc_webview.runJavaScript(
'''
if (typeof mark !== "undefined") {{
mark("{}");
}}
'''.format(search_keyword)
)

def update_info(self):
if self.corpus is not None:
self.n_documents = len(self.corpus)
Expand Down
17 changes: 0 additions & 17 deletions orangecontrib/text/widgets/resources/highlighter.js

This file was deleted.

22 changes: 22 additions & 0 deletions orangecontrib/text/widgets/tests/test_owcorpusviewer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from AnyQt.QtTest import QSignalSpy
from Orange.widgets.tests.base import WidgetTest
from Orange.data import StringVariable

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.widgets.owcorpusviewer import OWCorpusViewer
Expand Down Expand Up @@ -34,6 +35,27 @@ def test_highlighting(self):
html = self.widget.doc_webview.html()
self.assertIn('<mark data-markjs="true">', html)

def test_highlighting_non_latin(self):
documents = [
{
'content': """царстве есть сад с молодильными яблоками"""
}
]
metas = [
(StringVariable('content'), lambda doc: doc.get('content')),
]
dataset_name = 'RussianDocument'
corpus = Corpus.from_documents(documents, dataset_name, metas=metas)

self.send_signal(self.widget.Inputs.corpus, corpus)
self.widget.regexp_filter = "\\bсад\\b"
self.process_events()
self.widget.doc_webview.html()
spy = QSignalSpy(self.widget.doc_webview.loadFinished)
spy.wait()
html = self.widget.doc_webview.html()
self.assertIn('<mark data-markjs="true">', html)


if __name__ == "__main__":
unittest.main()