Skip to content

Commit

Permalink
Merge pull request #966 from PrimozGodec/corpus-settings
Browse files Browse the repository at this point in the history
[FIX] Corpus - Fix contexts to be compatible between sessions
  • Loading branch information
lanzagar authored Apr 24, 2023
2 parents 9557862 + a2e3ea5 commit 46493fa
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions orangecontrib/text/widgets/owcorpus.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import hashlib
import os
from typing import List

import numpy as np
from AnyQt.QtCore import Qt
Expand Down Expand Up @@ -51,7 +53,7 @@ def open_context(self, widget, corpus):
def new_context(self, corpus, attributes, metas):
"""Adding hash of documents to the context"""
context = super().new_context(corpus, attributes, metas)
context.documents_hash = hash(tuple(corpus.documents))
context.documents_hash = self.__compute_hash(corpus.documents)
context.language = corpus.language
return context

Expand All @@ -62,7 +64,7 @@ def match(self, context, corpus, attrs, metas):
"""
if (
hasattr(context, "documents_hash")
and context.documents_hash != hash(tuple(corpus.documents))
and context.documents_hash != self.__compute_hash(corpus.documents)
or hasattr(context, "language")
and context.language != corpus.language
):
Expand All @@ -73,6 +75,11 @@ def decode_setting(self, setting, value, corpus=None, *args):
"""Modifying decode setting to work with Corpus instead of domain"""
return super().decode_setting(setting, value, corpus.domain, *args)

@staticmethod
def __compute_hash(texts: List[str]) -> int:
texts = " ".join(texts)
return int(hashlib.md5(texts.encode("utf-8")).hexdigest(), 16)


class OWCorpus(OWWidget, ConcurrentWidgetMixin):
name = "Corpus"
Expand Down

0 comments on commit 46493fa

Please sign in to comment.