Skip to content

Commit

Permalink
Add Spacy utils
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Jul 19, 2024
1 parent a0ad22a commit 3e92396
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 0 deletions.
4 changes: 4 additions & 0 deletions orangecontrib/text/corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ def has_tokens(self):
""" Return whether corpus is preprocessed or not. """
return self._tokens is not None

def has_tags(self):
""" Return whether corpus is POS tagged or not. """
return self._pos_tags is not None

def _base_tokens(self):
from orangecontrib.text.preprocess import BASE_TRANSFORMER, \
BASE_TOKENIZER, PreprocessorList
Expand Down
2 changes: 2 additions & 0 deletions orangecontrib/text/language.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@
"uk": "Ukrainian",
"ur": "Urdu",
"vi": "Vietnamese",
# Spacy code for multi-language model
"xx": "Multi-language",
"zh": "Chinese",
"zh_char": "Chinese - Chinese Characters",
None: None,
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ serverfiles
simhash >=1.11
shapely >=2.0
six
spacy
tweepy >=4.0.0
ufal.udpipe >=1.2.0.3
trimesh >=3.9.8 # required by alphashape
Expand Down

0 comments on commit 3e92396

Please sign in to comment.