diff --git a/orangecontrib/text/widgets/owstatistics.py b/orangecontrib/text/widgets/owstatistics.py
index dbe931dc5..ebde42410 100644
--- a/orangecontrib/text/widgets/owstatistics.py
+++ b/orangecontrib/text/widgets/owstatistics.py
@@ -3,7 +3,7 @@
 from copy import copy
 from itertools import groupby
 from string import punctuation
-from typing import Callable, List, Optional, Tuple, Union, Generator, Iterator
+from typing import Callable, List, Optional, Tuple, Union, Generator, Iterator, Dict
 
 import numpy as np
 from AnyQt.QtWidgets import QComboBox, QGridLayout, QLabel, QLineEdit, QSizePolicy
@@ -14,6 +14,7 @@
 from Orange.widgets.utils.widgetpreview import WidgetPreview
 from Orange.widgets.widget import Input, Output, OWWidget
 from nltk import tokenize
+from orangecanvas.gui.utils import disconnected
 from orangewidget.widget import Msg
 
 from orangecontrib.text import Corpus
@@ -73,28 +74,6 @@ def count_appearances(
         return sum(d.lower().count(c) for c in characters for d in document)
 
 
-def preprocess_only_words(corpus: Corpus) -> Corpus:
-    """
-    Apply the preprocessor that splits words, transforms them to lower case
-    (and removes punctuations).
-
-    Parameters
-    ----------
-    corpus
-        Corpus on which the preprocessor will be applied.
-
-    Returns
-    -------
-    Preprocessed corpus. Result of pre-processing is saved in tokens/ngrams.
-    """
-    p = PreprocessorList(
-        [LowercaseTransformer(),
-         # by default regexp keeps only words (no punctuations, no spaces)
-         RegexpTokenizer()]
-    )
-    return p(corpus)
-
-
 def get_source(corpus: Corpus, source: str) -> Union[List[str], Iterator[List[str]]]:
     """
     Extract source from corpus according to source variable:
@@ -252,7 +231,6 @@ def per_cent_unique_words(
     Ratio between unique words count and all words count
     """
     assert source == Sources.TOKENS
-    corpus = preprocess_only_words(corpus)
 
     def perc_unique(tokens: str):
         callback()
@@ -270,7 +248,6 @@ def starts_with(
     Number of words that starts with the string in `prefix`.
     """
     assert source == Sources.TOKENS
-    corpus = preprocess_only_words(corpus)
 
     def number_starts_with(tokens: List[str]):
         callback()
@@ -289,7 +266,6 @@ def ends_with(
     Number of words that ends with the string in `postfix`.
     """
     assert source == Sources.TOKENS
-    corpus = preprocess_only_words(corpus)
 
     def number_ends_with(tokens: List[str]):
         callback()
@@ -393,7 +369,6 @@ def lix(
     https://en.wikipedia.org/wiki/Lix_(readability_test)
     """
     assert source == Sources.TOKENS
-    corpus = preprocess_only_words(corpus)
     tokenizer = tokenize.PunktSentenceTokenizer()
 
     def lix_index(document, tokens):
@@ -426,18 +401,21 @@ class ComputeValue:
     pattern
         Some statistics need additional parameter with the pattern
         (e.g. starts with), for others it is set to empty string.
+    source
+        Part of the corpus used for computation: either tokens/ngrams or whole documents
     """
 
-    def __init__(self, function: Callable, pattern: str) -> None:
+    def __init__(self, function: Callable, pattern: str, source: str) -> None:
         self.function = function
         self.pattern = pattern
+        self.source = source
 
     def __call__(self, data: Corpus) -> np.ndarray:
         """
         This function compute values on new table.
         """
         # lambda is added as a placeholder for a callback.
-        return self.function(data, self.pattern, lambda: True)[0]
+        return self.function(data, self.pattern, self.source, lambda: True)[0]
 
     def __eq__(self, other):
         return self.function == other.function and self.pattern == other.pattern
@@ -455,7 +433,7 @@ def __hash__(self):
     ("Word count", words_count, None, (Sources.DOCUMENTS,)),
     ("Character count", characters_count, None, (Sources.DOCUMENTS, Sources.TOKENS)),
     ("N-gram count", n_gram_count, None, (Sources.TOKENS,)),
-    ("Average word length", average_word_len, None, (Sources.DOCUMENTS,)),  # todo: discuss
+    ("Average word length", average_word_len, None, (Sources.DOCUMENTS,)),
     ("Punctuation count", punctuation_count, None, (Sources.DOCUMENTS,)),
     ("Capital letter count", capital_count, None, (Sources.DOCUMENTS,)),
     ("Vowel count", vowel_count, "a,e,i,o,u", (Sources.DOCUMENTS,)),
@@ -505,7 +483,7 @@ def advance():
         fun = STATISTICS_FUNCTIONS[s]
         result = fun(corpus, patern, source, advance)
         if result is not None:
-            result = result + (ComputeValue(fun, patern),)
+            result = result + (ComputeValue(fun, patern, source),)
         state.set_partial_result((s, patern, source, result))
 
 
@@ -530,6 +508,7 @@ class Warning(OWWidget.Warning):
     want_main_area = False
     mainArea_width_height_ratio = None
 
+    settings_version = 2
     # rules used to reset the active rules
     default_rules = [(0, "", STATISTICS[0][-1][0]), (1, "", STATISTICS[0][-1][0])]
     active_rules: List[Tuple[int, str, str]] = Setting(default_rules[:])
@@ -633,10 +612,10 @@ def _add_line():
         def _remove_line():
             self.statistics_combos.pop().deleteLater()
             self.line_edits.pop().deleteLater()
+            self.source_combos.pop().deleteLater()
             self.remove_buttons.pop().deleteLater()
 
         def _fix_tab_order():
-            # TODO: write it differently - check create class
             for i, (r, c, l, s) in enumerate(
                 zip(self.active_rules, self.statistics_combos, self.line_edits, self.source_combos)
             ):
@@ -646,9 +625,10 @@ def _fix_tab_order():
                     l.setVisible(True)
                 else:
                     l.setVisible(False)
-                s.clear()
-                s.addItems(STATISTICS_DEFAULT_SOURCES[r[0]])
-                s.setCurrentText(r[2])
+                with disconnected(s.currentIndexChanged, self._sync_edit_source_combo):
+                    s.clear()
+                    s.addItems(STATISTICS_DEFAULT_SOURCES[r[0]])
+                    s.setCurrentText(r[2])
 
         n = len(self.active_rules)
         while n > len(self.statistics_combos):
@@ -673,7 +653,7 @@ def _sync_edit_combo(self) -> None:
         combo = self.sender()
         edit_index = self.statistics_combos.index(combo)
         selected_i = combo.currentIndex()
-        default_value = STATISTICS_DEFAULT_VALUE[selected_i]
+        default_value = STATISTICS_DEFAULT_VALUE[selected_i] or ""
         default_source = STATISTICS_DEFAULT_SOURCES[selected_i][0]
         self.active_rules[edit_index] = (selected_i, default_value, default_source)
         self.adjust_n_rule_rows()
@@ -682,18 +662,14 @@ def _sync_edit_line(self) -> None:
         """ Update rules when line edit value changed """
         line_edit = self.sender()
         edit_index = self.line_edits.index(line_edit)
-        self.active_rules[edit_index] = (
-            self.active_rules[edit_index][0],
-            line_edit.text(),
-            STATISTICS_DEFAULT_SOURCES[edit_index][0]
-        )
+        arules = self.active_rules[edit_index]
+        self.active_rules[edit_index] = (arules[0], line_edit.text(), arules[2])
 
     def _sync_edit_source_combo(self) -> None:
-        """ Update rules when line edit value changed """
+        """ Update rules when source value change """
         combo = self.sender()
         edit_index = self.source_combos.index(combo)
         value = combo.currentText()
-        print(value)
         arules = self.active_rules[edit_index]
         self.active_rules[edit_index] = (arules[0], arules[1], value)
 
@@ -766,6 +742,21 @@ def output_results(self) -> None:
         )
         self.Outputs.corpus.send(new_corpus)
 
+    @classmethod
+    def migrate_settings(cls, settings: Dict, version: int):
+        def def_source(idx):
+            """Return source that behaviour is the most similar to previous version"""
+            if STATISTICS_NAMES[idx] == "Regex":
+                # regex was working on tokens in the previous version
+                return Sources.TOKENS
+            # others that allow both sources were working on documents
+            return STATISTICS_DEFAULT_SOURCES[idx][0]
+
+        if version < 2:
+            if "active_rules" in settings:
+                new_rules = [(r, v, def_source(r)) for r, v in settings["active_rules"]]
+                settings["active_rules"] = new_rules
+
 
 if __name__ == "__main__":
     WidgetPreview(OWStatistics).run(Corpus.from_file("book-excerpts"))
diff --git a/orangecontrib/text/widgets/tests/test_owstatistics.py b/orangecontrib/text/widgets/tests/test_owstatistics.py
index e802d9622..e3082e406 100644
--- a/orangecontrib/text/widgets/tests/test_owstatistics.py
+++ b/orangecontrib/text/widgets/tests/test_owstatistics.py
@@ -7,6 +7,12 @@
 from Orange.widgets.tests.base import WidgetTest
 from Orange.widgets.tests.utils import simulate
 from orangecontrib.text import Corpus
+from orangecontrib.text.preprocess import (
+    PreprocessorList,
+    LowercaseTransformer,
+    RegexpTokenizer,
+    StopwordsFilter,
+)
 from orangecontrib.text.tag import AveragedPerceptronTagger
 from orangecontrib.text.widgets.owstatistics import (
     STATISTICS_NAMES,
@@ -57,14 +63,12 @@ def _set_feature(
         """
         simulate.combobox_activate_item(self.widget.statistics_combos[0], feature_name)
         self.widget.line_edits[0].setText(value)
-        print(self.widget.active_rules, feature_name, value, source)
         simulate.combobox_activate_item(self.widget.source_combos[0], source)
-        print(self.widget.active_rules)
         for button in self.widget.remove_buttons[1:]:
             button.click()
 
     def _compute_features(
-            self, feature_name: str, value: str = "", source: str = Sources.DOCUMENTS
+        self, feature_name: str, value: str = "", source: str = Sources.DOCUMENTS
     ) -> Corpus:
         """
         Send `self.corpus` to widget, set statistic which need bo be computed,
@@ -118,7 +122,6 @@ def test_characters_count(self):
 
         self.send_signal(self.widget.Inputs.corpus, None)
         self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
-        # todo: make different preprocessing and the test all tokens statistics again
 
     def test_n_gram_count(self):
         """ Test n-grams count statistic """
@@ -176,14 +179,14 @@ def test_per_cent_unique_words(self):
         """ Test per-cent unique words statistic """
         data = self._compute_features("Per cent unique terms", source=Sources.TOKENS)
         np.testing.assert_array_almost_equal(
-            data.X.flatten(), [1, 1, 0.909091, 1]
+            data.X.flatten(), [1, 1, 0.84615, 1], decimal=5
         )
 
         with self.corpus.unlocked():
-            self.corpus[1][-1] = ""
+            self.corpus[1][-1] = " "
         data = self._compute_features("Per cent unique terms", source=Sources.TOKENS)
         np.testing.assert_array_almost_equal(
-            data.X.flatten(), [1, np.nan, 0.909091, 1]
+            data.X.flatten(), [1, np.nan, 0.84615, 1], decimal=5
         )
         
         self.send_signal(self.widget.Inputs.corpus, None)
@@ -202,10 +205,10 @@ def test_starts_with(self):
 
     def test_ends_with(self):
         """ Test ends with count statistic """
-        data = self._compute_features("Ends with", "t")
+        data = self._compute_features("Ends with", "t", Sources.TOKENS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [3, 3, 1, 2])
 
-        data = self._compute_features("Ends with", "et")
+        data = self._compute_features("Ends with", "et", Sources.TOKENS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [1, 1, 0, 0])
 
         self.send_signal(self.widget.Inputs.corpus, None)
@@ -213,32 +216,50 @@ def test_ends_with(self):
 
     def test_contains(self):
         """ Test contains count statistic """
-        data = self._compute_features("Contains", "t")
+        data = self._compute_features("Contains", "t", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [5, 4, 4, 9])
 
-        data = self._compute_features("Contains", "et")
+        data = self._compute_features("Contains", "et", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [2, 1, 0, 0])
 
-        data = self._compute_features("Contains", "is")
+        data = self._compute_features("Contains", "is", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [1, 2, 2, 0])
 
+        data = self._compute_features("Contains", "t", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [5, 4, 4, 9])
+
+        data = self._compute_features("Contains", " ", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 0])
+
         self.send_signal(self.widget.Inputs.corpus, None)
         self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
 
     def test_regex(self):
         """ Test regex statistic """
         # words that contain digit
-        data = self._compute_features("Regex", r"\w*\d\w*")
+        data = self._compute_features("Regex", r"\w*\d\w*", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 1])
 
-        # words that contain digit
-        data = self._compute_features("Regex", r"\w*is\w*")
+        # words that contain is
+        data = self._compute_features("Regex", r"\w*is\w*", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [1, 2, 2, 0])
 
         # count specific n-gram
-        data = self._compute_features("Regex", r"ipsum\ dolor")
+        data = self._compute_features("Regex", r"ipsum\ dolor", Sources.DOCUMENTS)
         np.testing.assert_array_almost_equal(data.X.flatten(), [1, 0, 0, 0])
 
+        # words that contain digit
+        data = self._compute_features("Regex", r"\w*\d\w*", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 1])
+
+        # words that contain is
+        data = self._compute_features("Regex", r"\w*is\w*", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [1, 2, 2, 0])
+
+        # count specific n-gram
+        data = self._compute_features("Regex", r"ipsum\ dolor", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 0])
+
         self.send_signal(self.widget.Inputs.corpus, None)
         self.assertIsNone(self.get_output(self.widget.Outputs.corpus))
 
@@ -249,7 +270,7 @@ def test_pos(self):
         - test with corpus that has pos tags
         """
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
-        self._set_feature("POS tag", "NN")
+        self._set_feature("POS tag", "NN", Sources.TOKENS)
         self.widget.apply()
         self.wait_until_finished()
         res = self.get_output(self.widget.Outputs.corpus)
@@ -260,7 +281,7 @@ def test_pos(self):
         result = tagger(self.corpus)
 
         self.send_signal(self.widget.Inputs.corpus, result)
-        self._set_feature("POS tag", "NN")
+        self._set_feature("POS tag", "NN", Sources.TOKENS)
         self.widget.apply()
         self.wait_until_finished()
         res = self.get_output(self.widget.Outputs.corpus)
@@ -275,7 +296,7 @@ def test_yule(self):
         - test with corpus that has pos tags
         """
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
-        self._set_feature("Yule's I")
+        self._set_feature("Yule's I", source=Sources.TOKENS)
         self.widget.apply()
         self.wait_until_finished()
         res = self.get_output(self.widget.Outputs.corpus)
@@ -288,7 +309,7 @@ def test_yule(self):
         result = tagger(self.corpus)
 
         self.send_signal(self.widget.Inputs.corpus, result)
-        self._set_feature("Yule's I")
+        self._set_feature("Yule's I", source=Sources.TOKENS)
         self.widget.apply()
         self.wait_until_finished()
         res = self.get_output(self.widget.Outputs.corpus)
@@ -304,7 +325,7 @@ def test_lix(self):
         with self.corpus.unlocked():
             self.corpus[1][-1] = "simple. simple."
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
-        self._set_feature("LIX index")
+        self._set_feature("LIX index", source=Sources.TOKENS)
         self.widget.apply()
         self.wait_until_finished()
         res = self.get_output(self.widget.Outputs.corpus)
@@ -312,6 +333,40 @@ def test_lix(self):
         # the second document will have lower complexity than the first one
         self.assertLess(res[1][0], res[0][0])
 
+    def test_stats_different_preprocessing(self):
+        pp = [LowercaseTransformer(), RegexpTokenizer(), StopwordsFilter(language="en")]
+        pp = PreprocessorList(pp)
+        self.corpus = pp(self.corpus)
+
+        data = self._compute_features("Character count", "", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [47, 44, 46, 51])
+
+        data = self._compute_features("N-gram count", "", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [8, 9, 9, 9])
+
+        data = self._compute_features("Per cent unique terms", "", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [1, 1, 1, 1])
+
+        # none start with the capital because of Lowercase preprocessor
+        data = self._compute_features("Starts with", "L", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 0])
+
+        data = self._compute_features("Starts with", "a", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [2, 0, 0, 2])
+
+        data = self._compute_features("Ends with", "a", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 1, 2, 1])
+
+        # non contain comma since we use RegexP preprocessor
+        data = self._compute_features("Contains", ",", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 0])
+
+        data = self._compute_features("Contains", "a", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [2, 2, 6, 5])
+
+        data = self._compute_features("Regex", "{e", Sources.TOKENS)
+        np.testing.assert_array_almost_equal(data.X.flatten(), [0, 0, 0, 0])
+
     def test_statistics_combination(self):
         """
         Testing three statistics at same time and see if column concatenated
@@ -323,9 +378,9 @@ def test_statistics_combination(self):
         starts_with_index = STATISTICS_NAMES.index("Starts with")
         capital_counts_index = STATISTICS_NAMES.index("Capital letter count")
         self.widget.active_rules = [
-            (wc_index, ""),
-            (starts_with_index, "a"),
-            (capital_counts_index, ""),
+            (wc_index, "", Sources.DOCUMENTS),
+            (starts_with_index, "a", Sources.TOKENS),
+            (capital_counts_index, "", Sources.DOCUMENTS),
         ]
         self.widget.adjust_n_rule_rows()
 
@@ -350,43 +405,44 @@ def test_dictionary_statistics(self):
         """
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
 
-        self.widget.active_rules = [
-            (1, ""),
-        ]
+        self.widget.active_rules = [(1, "", Sources.DOCUMENTS)]
         self.widget.adjust_n_rule_rows()
         self.widget.apply()
         self.wait_until_finished()
 
-        self.assertListEqual([(1, None)], list(self.widget.result_dict.keys()))
+        expected = [(1, "", Sources.DOCUMENTS)]
+        self.assertListEqual(expected, list(self.widget.result_dict.keys()))
 
-        self.widget.active_rules = [(1, ""), (2, "")]
+        self.widget.active_rules = [(1, "", Sources.DOCUMENTS), (2, "", Sources.TOKENS)]
         self.widget.adjust_n_rule_rows()
         self.widget.apply()
         self.wait_until_finished()
 
-        self.assertListEqual(
-            [(1, ""), (2, None)], list(self.widget.result_dict.keys())
-        )
+        expected = [(1, "", Sources.DOCUMENTS), (2, "", Sources.TOKENS)]
+        self.assertListEqual(expected, list(self.widget.result_dict.keys()))
 
-        self.widget.active_rules = [(2, "")]
+        self.widget.active_rules = [(2, "", Sources.TOKENS)]
         self.widget.adjust_n_rule_rows()
         self.widget.apply()
         self.wait_until_finished()
 
-        self.assertListEqual([(2, None)], list(self.widget.result_dict.keys()))
+        expected = [(2, "", Sources.TOKENS)]
+        self.assertListEqual(expected, list(self.widget.result_dict.keys()))
 
         # dict should empty on new data
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
         self.assertListEqual([], list(self.widget.result_dict.keys()))
 
     def test_settings(self):
-        """ Test whether context correctly restore rules """
-        rules = [(0, ""), (1, ""), (2, None)]
+        """Test whether context correctly restore rules"""
+        doc, tk = Sources.DOCUMENTS, Sources.TOKENS
+        rules = [(0, "", doc), (1, "", doc), (2, "", tk)]
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
         self.widget.active_rules = rules[:]
 
         self.send_signal(self.widget.Inputs.corpus, self.book_data)
-        self.assertListEqual([(0, ""), (1, ""), (2, None)], self.widget.active_rules)
+        expected = [(0, "", doc), (1, "", doc), (2, "", tk)]
+        self.assertListEqual(expected, self.widget.active_rules)
 
     def test_compute_values(self):
         """ Test compute values on new data """
@@ -418,13 +474,13 @@ def test_add_row(self):
             if x.text() == "+"
         ][0]
         add_button.click()
-        self.assertListEqual([(0, "")], self.widget.active_rules)
+        self.assertListEqual([(0, "", Sources.DOCUMENTS)], self.widget.active_rules)
 
     def test_remove_row(self):
         self.send_signal(self.widget.Inputs.corpus, self.corpus)
-        self.widget.active_rules = [(0, "")]
+        self.widget.active_rules = [(0, "", Sources.DOCUMENTS)]
         self.widget.adjust_n_rule_rows()
-        self.assertListEqual([(0, "")], self.widget.active_rules)
+        self.assertListEqual([(0, "", Sources.DOCUMENTS)], self.widget.active_rules)
 
         remove_button = [
             x
@@ -434,6 +490,32 @@ def test_remove_row(self):
         remove_button.click()
         self.assertListEqual([], self.widget.active_rules)
 
+    def test_migrate_settings(self):
+        vals = [""] * 6 + ["a,e", "b,c", "", "a", "b", "c", r"\w*is", "NN,VV", "", ""]
+        settings = {"__version__": 1, "active_rules": list(zip(range(17), vals))}
+        widget = self.create_widget(OWStatistics, stored_settings=settings)
+        self.send_signal(self.widget.Inputs.corpus, self.corpus, widget=widget)
+
+        expected = [
+            (0, "", Sources.DOCUMENTS),
+            (1, "", Sources.DOCUMENTS),
+            (2, "", Sources.TOKENS),
+            (3, "", Sources.DOCUMENTS),
+            (4, "", Sources.DOCUMENTS),
+            (5, "", Sources.DOCUMENTS),
+            (6, "a,e", Sources.DOCUMENTS),
+            (7, "b,c", Sources.DOCUMENTS),
+            (8, "", Sources.TOKENS),
+            (9, "a", Sources.TOKENS),
+            (10, "b", Sources.TOKENS),
+            (11, "c", Sources.DOCUMENTS),
+            (12, r"\w*is", Sources.DOCUMENTS),
+            (13, "NN,VV", Sources.TOKENS),
+            (14, "", Sources.TOKENS),
+            (15, "", Sources.TOKENS),
+        ]
+        self.assertListEqual(expected, widget.active_rules)
+
 
 if __name__ == "__main__":
     unittest.main()