From 3159b6728ac4b025b8ff8e39df973e5f80f93a1d Mon Sep 17 00:00:00 2001 From: Primoz Godec Date: Tue, 21 Sep 2021 08:57:13 +0200 Subject: [PATCH] UDPipe Lemmatizer: remote self.model from pickle --- orangecontrib/text/preprocess/normalize.py | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/orangecontrib/text/preprocess/normalize.py b/orangecontrib/text/preprocess/normalize.py index f4b6c16d1..7a5df3047 100644 --- a/orangecontrib/text/preprocess/normalize.py +++ b/orangecontrib/text/preprocess/normalize.py @@ -136,7 +136,6 @@ def __init__(self, language='English', use_tokenizer=False): self.__use_tokenizer = use_tokenizer self.models = UDPipeModels() self.__model = None - self.__output_format = None @property def use_tokenizer(self): @@ -153,7 +152,6 @@ def __call__(self, corpus: Corpus, callback: Callable = None) -> Corpus: except StopIteration: raise UDPipeStopIteration - self.__output_format = udpipe.OutputFormat.newOutputFormat('epe') if self.__use_tokenizer: corpus = Preprocessor.__call__(self, corpus) if callback is None: @@ -184,17 +182,28 @@ def __normalize_document(self, document: str) -> List[str]: def __getstate__(self): """ - This function remove udpipe.Model that cannot be pickled - - Note: __setstate__ is not required since we do not make any harm if - model is not restored. It will be loaded on __call__ + This function remove udpipe.Model that cannot be pickled and models that + include absolute paths on computer -- so it is not transferable between + computers. """ state = super().__getstate__() - # Remove the unpicklable Model and output format. + # Remove the nonpicklable Model. state['_UDPipeLemmatizer__model'] = None - state['_UDPipeLemmatizer__output_format'] = None + # models object together with serverfiles store absolute paths to models + # on computers -- we will init it on when unpickling -- setstate + state.pop('models') return state + def __setstate__(self, state): + """ + Called on unpickling the object. It init new models object which was + deleted from the dictionary in __getstate__. + + Note: __model will be loaded on __call__ + """ + self.__dict__.update(state) + self.models = UDPipeModels() + class LemmagenLemmatizer(BaseNormalizer): name = 'Lemmagen Lemmatizer'