Skip to content

Commit

Permalink
Datasets - add languages
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Nov 4, 2022
1 parent e69636c commit 9914fa7
Show file tree
Hide file tree
Showing 16 changed files with 33 additions and 1 deletion.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
recursive-include orangecontrib/text/datasets *.tab *.txt
recursive-include orangecontrib/text/datasets *.tab *.txt *.metadata
recursive-include orangecontrib/text/models *.ftz
recursive-include orangecontrib/text/sentiment *.txt
recursive-include orangecontrib/text/tests *.txt *.json *.pkl
Expand Down
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/20newsgroups-test.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/andersen.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/book-excerpts.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/deerwester.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/grimm-tales.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/reuters-r52-test.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/reuters-r52-train.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/reuters-r8-test.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
1 change: 1 addition & 0 deletions orangecontrib/text/datasets/reuters-r8-train.tab.metadata
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: en
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
language: sl
18 changes: 18 additions & 0 deletions orangecontrib/text/tests/test_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import unittest

from orangecontrib.text import Corpus


class TestDatasets(unittest.TestCase):
def test_languages(self):
cur_file = os.path.dirname(os.path.abspath(__file__))

for file in os.listdir(os.path.join(cur_file, "..", "datasets")):
if file.endswith((".tab", ".xlsx", ".pkl", ".csv")):
c = Corpus.from_file(file)
self.assertIsNotNone(c.language)


if __name__ == "__main__":
unittest.main()

0 comments on commit 9914fa7

Please sign in to comment.