Skip to content

Commit

Permalink
tests: add udpipe lemmatizer tests
Browse files Browse the repository at this point in the history
  • Loading branch information
robertcv committed Sep 3, 2018
1 parent 9d28c03 commit 86b99d3
Showing 1 changed file with 39 additions and 0 deletions.
39 changes: 39 additions & 0 deletions orangecontrib/text/tests/test_preprocess.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tempfile
import unittest
import os.path

import itertools
import nltk
Expand All @@ -9,6 +10,8 @@
from orangecontrib.text import preprocess
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.preprocess import Preprocessor
from orangecontrib.text.preprocess.normalize import file_to_language, \
file_to_name, language_to_name, UDPipeModels


def counted(f):
Expand Down Expand Up @@ -168,6 +171,20 @@ def test_snowball(self):
token = 'voudrais'
self.assertEqual(stemmer(token), nltk.SnowballStemmer(language='french').stem(token))

def test_udpipe(self):
"""Test udpipe token lemmatization"""
normalizer = preprocess.UDPipeLemmatizer()
normalizer.language = 'Slovenian'
self.assertEqual(normalizer('sem'), 'biti')

def test_udpipe_doc(self):
"""Test udpipe lemmatization with its own tokenization """
normalizer = preprocess.UDPipeLemmatizer()
normalizer.language = 'Slovenian'
normalizer.use_tokenizer = True
self.assertListEqual(normalizer('Gori na gori hiša gori'),
['gora', 'na', 'gora', 'hiša', 'goreti'])

def test_porter_with_bad_input(self):
stemmer = preprocess.PorterStemmer()
self.assertRaises(TypeError, stemmer, 10)
Expand All @@ -177,6 +194,28 @@ def test_lookup_normalize(self):
self.assertEqual(dln.normalize('aka'), 'also known as')


class UDPipeModelsTests(unittest.TestCase):
def test_label_transform(self):
"""Test helper functions for label transformation"""
self.assertEqual(file_to_language('slovenian-sst-ud-2.0-170801.udpipe'),
'Slovenian sst')
self.assertEqual(file_to_name('slovenian-sst-ud-2.0-170801.udpipe'),
'sloveniansstud2.0170801.udpipe')
self.assertEqual(language_to_name('Slovenian sst'), 'sloveniansstud')

def test_udpipe_model(self):
"""Test udpipe models loading from server"""
models = UDPipeModels()
self.assertIn('Slovenian', models.supported_languages)
self.assertEqual(68, len(models.supported_languages))

local_file = os.path.join(models.local_data,
'slovenian-ud-2.0-170801.udpipe')
model = models['Slovenian']
self.assertEqual(model, local_file)
self.assertTrue(os.path.isfile(local_file))


class FilteringTests(unittest.TestCase):

def setUp(self):
Expand Down

0 comments on commit 86b99d3

Please sign in to comment.