Skip to content

Commit

Permalink
Merge pull request #722 from PrimozGodec/updpip-pickle
Browse files Browse the repository at this point in the history
UDPipe Lemmatizer: remove self.model from pickle
  • Loading branch information
ajdapretnar authored Oct 8, 2021
2 parents 6ebcf6f + 3159b67 commit aad3d51
Showing 1 changed file with 17 additions and 8 deletions.
25 changes: 17 additions & 8 deletions orangecontrib/text/preprocess/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def __init__(self, language='English', use_tokenizer=False):
self.__use_tokenizer = use_tokenizer
self.models = UDPipeModels()
self.__model = None
self.__output_format = None

@property
def use_tokenizer(self):
Expand All @@ -153,7 +152,6 @@ def __call__(self, corpus: Corpus, callback: Callable = None) -> Corpus:
except StopIteration:
raise UDPipeStopIteration

self.__output_format = udpipe.OutputFormat.newOutputFormat('epe')
if self.__use_tokenizer:
corpus = Preprocessor.__call__(self, corpus)
if callback is None:
Expand Down Expand Up @@ -184,17 +182,28 @@ def __normalize_document(self, document: str) -> List[str]:

def __getstate__(self):
"""
This function remove udpipe.Model that cannot be pickled
Note: __setstate__ is not required since we do not make any harm if
model is not restored. It will be loaded on __call__
This function remove udpipe.Model that cannot be pickled and models that
include absolute paths on computer -- so it is not transferable between
computers.
"""
state = super().__getstate__()
# Remove the unpicklable Model and output format.
# Remove the nonpicklable Model.
state['_UDPipeLemmatizer__model'] = None
state['_UDPipeLemmatizer__output_format'] = None
# models object together with serverfiles store absolute paths to models
# on computers -- we will init it on when unpickling -- setstate
state.pop('models')
return state

def __setstate__(self, state):
"""
Called on unpickling the object. It init new models object which was
deleted from the dictionary in __getstate__.
Note: __model will be loaded on __call__
"""
self.__dict__.update(state)
self.models = UDPipeModels()


class LemmagenLemmatizer(BaseNormalizer):
name = 'Lemmagen Lemmatizer'
Expand Down

0 comments on commit aad3d51

Please sign in to comment.