diff --git a/doc/widgets/concordance.rst b/doc/widgets/concordance.rst index 49a0cc1f8..4b4d457b9 100644 --- a/doc/widgets/concordance.rst +++ b/doc/widgets/concordance.rst @@ -19,12 +19,16 @@ Signals - **Selected Documents** - A :ref:`Corpus` instance. + Documents containing the queried word. + +- **Concordances** + + A table of concordances. Description ----------- -**Concordance** finds the queried word in a text and displays the context in which this word is used. It can output selected documents for further analysis. +**Concordance** finds the queried word in a text and displays the context in which this word is used. Results in a single color come from the same document. The widget can output selected documents for further analysis or a table of concordances for the queried word. Note that the widget finds only exact matches of a word, which means that if you query the word 'do', the word 'doctor' won't appear in the results. .. figure:: images/Concordance-stamped.png @@ -37,11 +41,17 @@ Description 3. Queried word. 4. If *Auto commit is on*, selected documents are communicated automatically. Alternatively press *Commit*. -Example -------- +Examples +-------- -*Concordance* can be used for displaying word contexts in a corpus. First, we load *book-excerpts.tab* in :doc:`Corpus `. Then we connect **Corpus** to **Concordances** and search for concordances of a word "doctor". The widget displays all documents containing the word "doctor" together with their surrounding (contextual) words. Note that the widget finds only exact matches of a word. +*Concordance* can be used for displaying word contexts in a corpus. First, we load *book-excerpts.tab* in :doc:`Corpus `. Then we connect **Corpus** to **Concordance** and search for concordances of a word "doctor". The widget displays all documents containing the word "doctor" together with their surrounding (contextual) words. Now we can select those documents that contain interesting contexts and output them to :doc:`Corpus Viewer ` to inspect them further. -.. figure:: images/Concordance-Example.png +.. figure:: images/Concordance-Example1.png + +In the second example, we will output concordances instead. We will keep the *book-excerpts.tab* in :doc:`Corpus ` and the connection to **Concordance**. Our queried word remain "doctor". + +This time, we will connect **Data Table** to **Concordance** and select Concordances output instead. In the **Data Table**, we get a list of concordances for the queried word and the corresponding documents. Now, we will save this table with **Save Data** widget, so we can use it in other projects or for further analysis. + +.. figure:: images/Concordance-Example2.png diff --git a/doc/widgets/images/Concordance-Example.png b/doc/widgets/images/Concordance-Example1.png similarity index 100% rename from doc/widgets/images/Concordance-Example.png rename to doc/widgets/images/Concordance-Example1.png diff --git a/doc/widgets/images/Concordance-Example2.png b/doc/widgets/images/Concordance-Example2.png new file mode 100644 index 000000000..9558d7166 Binary files /dev/null and b/doc/widgets/images/Concordance-Example2.png differ diff --git a/orangecontrib/text/widgets/owconcordance.py b/orangecontrib/text/widgets/owconcordance.py index 7c933afa6..5fc382ccc 100644 --- a/orangecontrib/text/widgets/owconcordance.py +++ b/orangecontrib/text/widgets/owconcordance.py @@ -1,11 +1,14 @@ from typing import Optional from itertools import chain +import numpy as np + from AnyQt.QtCore import Qt, QAbstractTableModel, QSize, QItemSelectionModel, \ QItemSelection, QModelIndex from AnyQt.QtWidgets import QSizePolicy, QApplication, QTableView, \ QStyledItemDelegate from AnyQt.QtGui import QColor +from Orange.data import Domain, StringVariable, Table from Orange.widgets import gui from Orange.widgets.settings import Setting, ContextSetting, PerfectDomainContextHandler @@ -151,6 +154,21 @@ def matching_docs(self): else: return 0 + def get_data(self): + domain = Domain([], metas=[StringVariable("Conc. {}".format( + self.word)), StringVariable("Document")]) + data = [] + docs = [] + for row in range(self.rowCount()): + txt = [] + for column in range(self.columnCount()): + index = self.index(row, column) + txt.append(str(self.data(index))) + data.append([" ".join(txt)]) + docs.append([self.corpus.titles[self.word_index[row][0]]]) + conc = np.array(np.hstack((data, docs)), dtype=object) + return Corpus(domain, metas=conc, text_features=[domain.metas[1]]) + class OWConcordance(OWWidget): name = "Concordance" @@ -164,6 +182,7 @@ class Inputs: class Outputs: selected_documents = Output("Selected Documents", Corpus) + concordances = Output("Concordances", Corpus) settingsHandler = PerfectDomainContextHandler( match_values = PerfectDomainContextHandler.MATCH_VALUES_ALL @@ -314,11 +333,13 @@ def update_widget(self): def commit(self): selected_docs = sorted(set(self.model.word_index[row][0] for row in self.selected_rows)) + concordance = self.model.get_data() if selected_docs: selected = self.corpus[selected_docs] self.Outputs.selected_documents.send(selected) else: self.Outputs.selected_documents.send(None) + self.Outputs.concordances.send(concordance) def send_report(self): view = self.conc_view diff --git a/orangecontrib/text/widgets/tests/test_owconcordances.py b/orangecontrib/text/widgets/tests/test_owconcordances.py index d6a37d528..2e1b15b27 100644 --- a/orangecontrib/text/widgets/tests/test_owconcordances.py +++ b/orangecontrib/text/widgets/tests/test_owconcordances.py @@ -137,6 +137,12 @@ def test_matching_docs(self): model.set_corpus(self.corpus) self.assertEqual(model.matching_docs(), 6) + def test_concordance_output(self): + model = ConcordanceModel() + model.set_word("of") + model.set_corpus(self.corpus) + self.assertEqual(len(model.get_data()), 7) + class TestConcordanceWidget(WidgetTest): def setUp(self):