From 5b2ee48fb394cc1a75a97f96f2c22b43de121820 Mon Sep 17 00:00:00 2001
From: PrimozGodec
Date: Tue, 16 Aug 2022 15:35:55 +0200
Subject: [PATCH] Datasets - add languages
---
MANIFEST.in | 2 +-
.../datasets/20newsgroups-test.tab.metadata | 1 +
.../datasets/20newsgroups-train.tab.metadata | 1 +
.../text/datasets/andersen.tab.metadata | 1 +
.../text/datasets/book-excerpts.tab.metadata | 1 +
.../text/datasets/deerwester.tab.metadata | 1 +
.../datasets/election-tweets-2016.tab.metadata | 1 +
.../datasets/friends-transcripts.tab.metadata | 1 +
.../datasets/grimm-tales-selected.tab.metadata | 1 +
.../text/datasets/grimm-tales.tab.metadata | 1 +
.../datasets/reuters-r52-test.tab.metadata | 1 +
.../datasets/reuters-r52-train.tab.metadata | 1 +
.../text/datasets/reuters-r8-test.tab.metadata | 1 +
.../datasets/reuters-r8-train.tab.metadata | 1 +
.../datasets/slo-opinion-corpus.tab.metadata | 1 +
orangecontrib/text/tests/test_datasets.py | 18 ++++++++++++++++++
16 files changed, 33 insertions(+), 1 deletion(-)
create mode 100644 orangecontrib/text/datasets/20newsgroups-test.tab.metadata
create mode 100644 orangecontrib/text/datasets/20newsgroups-train.tab.metadata
create mode 100644 orangecontrib/text/datasets/andersen.tab.metadata
create mode 100644 orangecontrib/text/datasets/book-excerpts.tab.metadata
create mode 100644 orangecontrib/text/datasets/deerwester.tab.metadata
create mode 100644 orangecontrib/text/datasets/election-tweets-2016.tab.metadata
create mode 100644 orangecontrib/text/datasets/friends-transcripts.tab.metadata
create mode 100644 orangecontrib/text/datasets/grimm-tales-selected.tab.metadata
create mode 100644 orangecontrib/text/datasets/grimm-tales.tab.metadata
create mode 100644 orangecontrib/text/datasets/reuters-r52-test.tab.metadata
create mode 100644 orangecontrib/text/datasets/reuters-r52-train.tab.metadata
create mode 100644 orangecontrib/text/datasets/reuters-r8-test.tab.metadata
create mode 100644 orangecontrib/text/datasets/reuters-r8-train.tab.metadata
create mode 100644 orangecontrib/text/datasets/slo-opinion-corpus.tab.metadata
create mode 100644 orangecontrib/text/tests/test_datasets.py
diff --git a/MANIFEST.in b/MANIFEST.in
index 44485ac82..cb442d94d 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
-recursive-include orangecontrib/text/datasets *.tab *.txt
+recursive-include orangecontrib/text/datasets *.tab *.txt *.metadata
recursive-include orangecontrib/text/models *.ftz
recursive-include orangecontrib/text/sentiment *.txt
recursive-include orangecontrib/text/tests *.txt *.json *.pkl
diff --git a/orangecontrib/text/datasets/20newsgroups-test.tab.metadata b/orangecontrib/text/datasets/20newsgroups-test.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/20newsgroups-test.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/20newsgroups-train.tab.metadata b/orangecontrib/text/datasets/20newsgroups-train.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/20newsgroups-train.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/andersen.tab.metadata b/orangecontrib/text/datasets/andersen.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/andersen.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/book-excerpts.tab.metadata b/orangecontrib/text/datasets/book-excerpts.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/book-excerpts.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/deerwester.tab.metadata b/orangecontrib/text/datasets/deerwester.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/deerwester.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/election-tweets-2016.tab.metadata b/orangecontrib/text/datasets/election-tweets-2016.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/election-tweets-2016.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/friends-transcripts.tab.metadata b/orangecontrib/text/datasets/friends-transcripts.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/friends-transcripts.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/grimm-tales-selected.tab.metadata b/orangecontrib/text/datasets/grimm-tales-selected.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/grimm-tales-selected.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/grimm-tales.tab.metadata b/orangecontrib/text/datasets/grimm-tales.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/grimm-tales.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/reuters-r52-test.tab.metadata b/orangecontrib/text/datasets/reuters-r52-test.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/reuters-r52-test.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/reuters-r52-train.tab.metadata b/orangecontrib/text/datasets/reuters-r52-train.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/reuters-r52-train.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/reuters-r8-test.tab.metadata b/orangecontrib/text/datasets/reuters-r8-test.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/reuters-r8-test.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/reuters-r8-train.tab.metadata b/orangecontrib/text/datasets/reuters-r8-train.tab.metadata
new file mode 100644
index 000000000..bfb1a3048
--- /dev/null
+++ b/orangecontrib/text/datasets/reuters-r8-train.tab.metadata
@@ -0,0 +1 @@
+language: en
\ No newline at end of file
diff --git a/orangecontrib/text/datasets/slo-opinion-corpus.tab.metadata b/orangecontrib/text/datasets/slo-opinion-corpus.tab.metadata
new file mode 100644
index 000000000..2e18e2049
--- /dev/null
+++ b/orangecontrib/text/datasets/slo-opinion-corpus.tab.metadata
@@ -0,0 +1 @@
+language: sl
\ No newline at end of file
diff --git a/orangecontrib/text/tests/test_datasets.py b/orangecontrib/text/tests/test_datasets.py
new file mode 100644
index 000000000..04836354a
--- /dev/null
+++ b/orangecontrib/text/tests/test_datasets.py
@@ -0,0 +1,18 @@
+import os
+import unittest
+
+from orangecontrib.text import Corpus
+
+
+class TestDatasets(unittest.TestCase):
+ def test_languages(self):
+ cur_file = os.path.dirname(os.path.abspath(__file__))
+
+ for file in os.listdir(os.path.join(cur_file, "..", "datasets")):
+ if file.endswith((".tab", ".xlsx", ".pkl", ".csv")):
+ c = Corpus.from_file(file)
+ self.assertIsNotNone(c.language)
+
+
+if __name__ == "__main__":
+ unittest.main()