Skip to content

Commit

Permalink
preprocess: add udpipe tokenizer to report
Browse files Browse the repository at this point in the history
  • Loading branch information
robertcv committed Sep 3, 2018
1 parent 86b99d3 commit ab07afe
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions orangecontrib/text/preprocess/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ def __str__(self):
return '\n'.join(['{}: {}'.format(name, value) for name, value in self.report()])

def report(self):
return (
if getattr(self.normalizer, 'use_tokenizer', False):
self.tokenizer = \
'UDPipe Tokenizer ({})'.format(self.normalizer.language)
rep = (
('Transformers', ', '.join(str(tr) for tr in self.transformers)
if self.transformers else None),
('Tokenizer', str(self.tokenizer) if self.tokenizer else None),
Expand All @@ -144,7 +147,8 @@ def report(self):
else None),
('Pos tagger', str(self.pos_tagger) if self.pos_tagger else None),
)

del self.tokenizer
return rep

base_preprocessor = Preprocessor(transformers=BASE_TRANSFORMERS,
tokenizer=BASE_TOKENIZER)

0 comments on commit ab07afe

Please sign in to comment.