From e754876ca48c852f16bffa99a8f6711539b0a96a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Primo=C5=BE=20Godec?=
Date: Mon, 7 Sep 2020 10:52:35 +0200
Subject: [PATCH] Document embedders: additional languages
---
.../text/vectorization/document_embedder.py | 34 ++++++++++++++++++-
.../text/widgets/owdocumentembedding.py | 20 ++++++-----
2 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/orangecontrib/text/vectorization/document_embedder.py b/orangecontrib/text/vectorization/document_embedder.py
index 48da1a6cc..133c3273d 100644
--- a/orangecontrib/text/vectorization/document_embedder.py
+++ b/orangecontrib/text/vectorization/document_embedder.py
@@ -15,7 +15,39 @@
AGGREGATORS = ['Mean', 'Sum', 'Max', 'Min']
AGGREGATORS_L = ['mean', 'sum', 'max', 'min']
-LANGS_TO_ISO = {'English': 'en', 'Slovenian': 'sl', 'German': 'de'}
+LANGS_TO_ISO = {
+ 'English': 'en',
+ 'Slovenian': 'sl',
+ 'German': 'de',
+ 'Arabic': 'ar',
+ 'Azerbaijani': 'az',
+ 'Bengali': 'bn',
+ 'Chinese': 'zh',
+ 'Danish': 'da',
+ 'Dutch': 'nl',
+ 'Finnish': 'fi',
+ 'French': 'fr',
+ 'Greek': 'el',
+ 'Hebrew': 'he',
+ 'Hindi': 'hi',
+ 'Hungarian': 'hu',
+ 'Indonesian': 'id',
+ 'Italian': 'it',
+ 'Japanese': 'ja',
+ 'Kazakh': 'kk',
+ 'Korean': 'ko',
+ 'Nepali': 'ne',
+ 'Norwegian (Bokm\u00e5l)': 'no',
+ 'Norwegian (Nynorsk)': 'nn',
+ 'Polish': 'pl',
+ 'Portuguese': 'pt',
+ 'Romanian': 'ro',
+ 'Russian': 'ru',
+ 'Spanish': 'es',
+ 'Swedish': 'sv',
+ 'Tajik': 'tg',
+ 'Turkish': 'tr'
+}
LANGUAGES = list(LANGS_TO_ISO.values())
diff --git a/orangecontrib/text/widgets/owdocumentembedding.py b/orangecontrib/text/widgets/owdocumentembedding.py
index 8f536d35d..1b9a7da4d 100644
--- a/orangecontrib/text/widgets/owdocumentembedding.py
+++ b/orangecontrib/text/widgets/owdocumentembedding.py
@@ -88,7 +88,7 @@ def __init__(self):
OWWidget.__init__(self)
ConcurrentWidgetMixin.__init__(self)
- self.languages = list(LANGS_TO_ISO.keys())
+ self.languages = sorted(list(LANGS_TO_ISO.keys()))
self.aggregators = AGGREGATORS
self.corpus = None
self.new_corpus = None
@@ -104,13 +104,17 @@ def _setup_layout(self):
widget_box = widgetBox(self.controlArea, 'Settings')
- self.language_cb = comboBox(widget=widget_box,
- master=self,
- value='language',
- label='Language: ',
- orientation=Qt.Horizontal,
- items=self.languages,
- callback=self._option_changed)
+ self.language_cb = comboBox(
+ widget=widget_box,
+ master=self,
+ value='language',
+ label='Language: ',
+ orientation=Qt.Horizontal,
+ items=self.languages,
+ callback=self._option_changed,
+ searchable=True
+ )
+ self.language_cb.setCurrentText("English")
self.aggregator_cb = comboBox(widget=widget_box,
master=self,