Skip to content

Commit

Permalink
Wikipedia - add language to corpus
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Feb 28, 2023
1 parent ef1dd29 commit d0103c9
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 5 deletions.
7 changes: 7 additions & 0 deletions orangecontrib/text/tests/test_wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,20 @@ def test_search(self, _, search_mock):
self.assertListEqual(
[["Article 1"], ["Article 2"]], result[:, "Title"].metas.tolist()
)
self.assertEqual("en", result.language)

self.assertEqual(on_progress.call_count, 2)
progress = 0
for arg in on_progress.call_args_list:
self.assertGreater(arg[0][0], progress)
progress = arg[0][0]

# if searched in it language Corpus's language should be it
result = api.search(
"it", ["Clinton"], articles_per_query=2, on_progress=on_progress
)
self.assertEqual("it", result.language)

@patch(
"orangecontrib.text.wikipedia_api.wikipedia.search",
side_effect=[ARTICLES[:3], [ARTICLES[4]]],
Expand Down
2 changes: 1 addition & 1 deletion orangecontrib/text/widgets/owwikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(self, *args, **kwargs):

# Language
row += 1
languages = tuple(sorted(set(LANG2ISO.items()) - {(None, None)}))
languages = sorted(set(LANG2ISO.items()) - {(None, None)})
language_edit = ComboBox(self, 'language', languages)
layout.addWidget(QLabel('Language:'), row, 0, 1, self.label_width)
layout.addWidget(language_edit, row, self.label_width, 1, self.widgets_width)
Expand Down
2 changes: 0 additions & 2 deletions orangecontrib/text/widgets/utils/widgets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

class ListEdit(QTextEdit):
PLACEHOLDER_COLOR = QColor(128, 128, 128)
USER_TEXT_COLOR = QColor(0, 0, 0)

def __init__(self, master=None, attr=None, placeholder_text=None,
fixed_height=None, *args):
Expand Down Expand Up @@ -55,7 +54,6 @@ def focusInEvent(self, event):
if self.toPlainText() == '':
self.clear()
self.setFontItalic(False)
self.setTextColor(self.USER_TEXT_COLOR)

def focusOutEvent(self, event):
self.set_placeholder()
Expand Down
11 changes: 9 additions & 2 deletions orangecontrib/text/wikipedia_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,15 @@ def search(self, lang, queries, articles_per_query=10, should_break=None, on_pro
if callable(should_break) and should_break():
break

return Corpus.from_documents(results, 'Wikipedia', self.attributes,
self.class_vars, self.metas, title_indices=[-1])
return Corpus.from_documents(
results,
"Wikipedia",
self.attributes,
self.class_vars,
self.metas,
title_indices=[-1],
language=lang,
)

def _get(self, article, query, should_break, recursive=True):
try:
Expand Down

0 comments on commit d0103c9

Please sign in to comment.