diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..02310e2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,175 @@
+
+.idea/misc.xml
+.idea/dictionaries
+
+# Created by https://www.gitignore.io/api/intellij,jetbrains,java,maven,eclipse
+
+### Eclipse ###
+
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+
+# Eclipse Core
+.project
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# PyDev specific (Python IDE for Eclipse)
+*.pydevproject
+
+# CDT-specific (C/C++ Development Tooling)
+.cproject
+
+# JDT-specific (Eclipse Java Development Tools)
+.classpath
+
+# Java annotation processor (APT)
+.factorypath
+
+# PDT-specific (PHP Development Tools)
+.buildpath
+
+# sbteclipse plugin
+.target
+
+# Tern plugin
+.tern-project
+
+# TeXlipse plugin
+.texlipse
+
+# STS (Spring Tool Suite)
+.springBeans
+
+# Code Recommenders
+.recommenders/
+
+### Intellij ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+.idea/workspace.xml
+.idea/tasks.xml
+
+# Sensitive or high-churn files:
+.idea/dataSources/
+.idea/dataSources.ids
+.idea/dataSources.xml
+.idea/dataSources.local.xml
+.idea/sqlDataSources.xml
+.idea/dynamic.xml
+.idea/uiDesigner.xml
+
+# Gradle:
+.idea/gradle.xml
+.idea/libraries
+
+# Mongo Explorer plugin:
+.idea/mongoSettings.xml
+
+## File-based project format:
+*.iws
+
+## Plugin-specific files:
+
+# IntelliJ
+/out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+### Intellij Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+### Java ###
+*.class
+
+# BlueJ files
+*.ctxt
+
+# Mobile Tools for Java (J2ME)
+.mtj.tmp/
+
+# Package Files #
+*.jar
+*.war
+*.ear
+
+# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
+hs_err_pid*
+
+### JetBrains ###
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff:
+
+# Sensitive or high-churn files:
+
+# Gradle:
+
+# Mongo Explorer plugin:
+
+## File-based project format:
+
+## Plugin-specific files:
+
+# IntelliJ
+
+# mpeltonen/sbt-idea plugin
+
+# JIRA plugin
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+
+### JetBrains Patch ###
+# Comment Reason: https://github.com/joeblau/gitignore.io/issues/186#issuecomment-215987721
+
+# *.iml
+# modules.xml
+# .idea/misc.xml
+# *.ipr
+
+### Maven ###
+target/
+pom.xml.tag
+pom.xml.releaseBackup
+pom.xml.versionsBackup
+pom.xml.next
+release.properties
+dependency-reduced-pom.xml
+buildNumber.properties
+.mvn/timing.properties
+
+# Exclude maven wrapper
+!/.mvn/wrapper/maven-wrapper.jar
+
+# End of https://www.gitignore.io/api/intellij,jetbrains,java,maven,eclipse
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
new file mode 100644
index 0000000..4e42293
--- /dev/null
+++ b/.idea/compiler.xml
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/copyright/Lambda___GPL_v3_.xml b/.idea/copyright/Lambda___GPL_v3_.xml
new file mode 100644
index 0000000..d201bc0
--- /dev/null
+++ b/.idea/copyright/Lambda___GPL_v3_.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/copyright/Lambda___MIT_.xml b/.idea/copyright/Lambda___MIT_.xml
new file mode 100644
index 0000000..c987fe2
--- /dev/null
+++ b/.idea/copyright/Lambda___MIT_.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml
new file mode 100644
index 0000000..ca4cabf
--- /dev/null
+++ b/.idea/copyright/profiles_settings.xml
@@ -0,0 +1,3 @@
+
+
+
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..b26911b
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
new file mode 100644
index 0000000..6612519
--- /dev/null
+++ b/.idea/inspectionProfiles/Project_Default.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..25ec0e9
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..6c792a2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,10 @@
+language: java
+script: mvn test -B
+jdk: oraclejdk8
+
+cache:
+ directories:
+ - $HOME/.m2
+
+before_install:
+ - ./install-SentenceSimplification.sh
diff --git a/DiscourseSimplification.iml b/DiscourseSimplification.iml
new file mode 100644
index 0000000..6df30cf
--- /dev/null
+++ b/DiscourseSimplification.iml
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..9cecc1d
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ {one line to give the program's name and a brief idea of what it does.}
+ Copyright (C) {year} {name of author}
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ {project} Copyright (C) {year} {fullname}
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..8d4c9a7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,30 @@
+[![Build Status](https://travis-ci.org/Lambda-3/DiscourseSimplification.svg?branch=master)](https://travis-ci.org/Lambda-3/DiscourseSimplification)
+
+# Discourse Simplification
+
+A project for simplifying sentences wrt. discourse/rhetorical structures.
+This works as a wrapper for the [SentenceSimplification](https://github.com/Lambda-3/SentenceSimplification) project.
+
+## Dependencies
+
+### SentenceSimplification
+
+Clone and install locally
+
+ git clone --branch v5.0.0 https://github.com/Lambda-3/SentenceSimplification.git
+ cd SentenceSimplification
+ mvn install
+
+## Building and Running
+
+ mvn package
+
+### Run the program
+
+ mvn clean compile exec:java
+
+## Use as library
+Check `App.java`.
+Or its usage in the [Graphene](https://github.com/Lambda-3/Graphene) project.
+
+
diff --git a/install-SentenceSimplification.sh b/install-SentenceSimplification.sh
new file mode 100755
index 0000000..7de0be1
--- /dev/null
+++ b/install-SentenceSimplification.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+VERSION=5.0.0
+
+set -ex
+wget https://github.com/Lambda-3/SentenceSimplification/archive/v$VERSION.tar.gz
+tar xfa v$VERSION.tar.gz
+cd SentenceSimplification-$VERSION && mvn install -B -DskipTest
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..bf2088d
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,174 @@
+
+
+
+ 4.0.0
+
+ org.lambda3.text.simplification
+ discourse-simplification
+ 5.0.0
+ jar
+
+ Discourse Simplification
+
+ Discourse Simplification
+
+
+ scm:git:https://github.com/Lambda-3/DiscourseSimplification
+ scm:git:https://github.com/Lambda-3/DiscourseSimplification
+ https://github.com/Lambda-3/DiscourseSimplification
+
+
+
+
+ 5.0.0
+
+ 3.7.0
+ 1.1.8
+
+ 1.0.0-M3
+ 5.0.0-M3
+
+ 1.8
+ UTF-8
+ UTF-8
+
+
+
+
+
+
+ org.lambda3.text.simplification
+ sentence-simplification
+ ${simplification.version}
+
+
+
+
+ edu.stanford.nlp
+ stanford-corenlp
+ ${corenlp.version}
+
+
+ slf4j-api
+ org.slf4j
+
+
+
+
+ edu.stanford.nlp
+ stanford-corenlp
+ ${corenlp.version}
+ models
+
+
+ slf4j-api
+ org.slf4j
+
+
+
+
+
+
+ ch.qos.logback
+ logback-classic
+ ${logback.version}
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter-api
+ ${junit.jupiter.version}
+ test
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.6.1
+ true
+
+
+ ${jdk.version}
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.0.2
+
+
+ org.codehaus.mojo
+ exec-maven-plugin
+ 1.5.0
+
+ org.lambda3.text.simplification.discourse.App
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.0.0
+
+
+ jar-with-dependencies
+
+
+
+
+ org.lambda3.text.simplification.sentence.segmentation.SentenceSeparator
+
+
+
+
+
+
+ package
+
+ single
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-source-plugin
+ 3.0.1
+
+
+ attach-sources
+ package
+
+ jar-no-fork
+
+
+
+
+
+
+
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/App.java b/src/main/java/org/lambda3/text/simplification/discourse/App.java
new file mode 100644
index 0000000..e4f9f44
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/App.java
@@ -0,0 +1,47 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : App
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse;
+
+import org.lambda3.text.simplification.discourse.processing.Processor;
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DCore;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Hello world!
+ */
+public class App {
+ private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(App.class);
+ private static final Processor PROCESSOR = new Processor();
+
+ public static void main(String[] args) throws IOException {
+ List cores = PROCESSOR.process(new File("input.txt"), Processor.ProcessingType.WHOLE);
+// List cores = PROCESSOR.process("The whole text...", Processor.ProcessingType.WHOLE);
+// List cores = PROCESSOR.processWikipediaArticles(Arrays.asList("Barack_Obama"), Processor.ProcessingType.WHOLE);
+
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/Test.java b/src/main/java/org/lambda3/text/simplification/discourse/Test.java
new file mode 100644
index 0000000..e05ad6b
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/Test.java
@@ -0,0 +1,66 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Test
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse;
+
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeVisualizer;
+import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.List;
+
+/**
+ *
+ */
+public class Test {
+ private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(App.class);
+
+ public static void printParseTree(File file) throws FileNotFoundException {
+ List sentences = SentencesUtils.splitIntoSentencesFromFile(file);
+
+ printParseTree(sentences);
+ }
+
+ public static void printParseTree(String text) {
+ List sentences = SentencesUtils.splitIntoSentences(text);
+
+ printParseTree(sentences);
+ }
+
+ public static void printParseTree(List sentences) {
+ for (String sentence : sentences) {
+ LOGGER.info("Generate parse tree for sentence:\n'{}'", sentence);
+ try {
+ Tree parseTree = ParseTreeParser.parse(sentence);
+ LOGGER.info(ParseTreeVisualizer.prettyPrint(parseTree));
+ } catch (ParseTreeException e) {
+ LOGGER.error("Failed to generate parse tree");
+ }
+ }
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/processing/ExtendedProcessor.java b/src/main/java/org/lambda3/text/simplification/discourse/processing/ExtendedProcessor.java
new file mode 100644
index 0000000..4b47374
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/processing/ExtendedProcessor.java
@@ -0,0 +1,80 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ExtendedProcessor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.processing;
+
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DCore;
+import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class ExtendedProcessor extends Processor {
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+
+ public static List filterSentences(List sentences, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
+
+ // select sentences to process
+ List res = new ArrayList<>();
+ res.addAll(sentences);
+
+ // shuffle
+ if (shuffleSentences) {
+ Collections.shuffle(res);
+ }
+
+ // remove too long sentences
+ if (maxSentenceLength != null) {
+ res = res.stream().filter(s -> s.length() <= maxSentenceLength).collect(Collectors.toList());
+ }
+
+ // limit number of sentences
+ if (maxSentences != null) {
+ if (res.size() > maxSentences) {
+ res = res.subList(0, maxSentences);
+ }
+ }
+
+ return res;
+ }
+
+ public List process(File file, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) throws FileNotFoundException {
+ return process(SentencesUtils.splitIntoSentencesFromFile(file), type, shuffleSentences, maxSentenceLength, maxSentences);
+ }
+
+ public List process(String text, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
+ return process(SentencesUtils.splitIntoSentences(text), type, shuffleSentences, maxSentenceLength, maxSentences);
+ }
+
+ public List process(List sentences, ProcessingType type, boolean shuffleSentences, Integer maxSentenceLength, Integer maxSentences) {
+ return process(filterSentences(sentences, shuffleSentences, maxSentenceLength, maxSentences), type);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/processing/Processor.java b/src/main/java/org/lambda3/text/simplification/discourse/processing/Processor.java
new file mode 100644
index 0000000..44c582c
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/processing/Processor.java
@@ -0,0 +1,163 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Processor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.processing;
+
+import org.lambda3.text.simplification.discourse.relation_extraction.DiscourseExtractor;
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseCore;
+import org.lambda3.text.simplification.discourse.sentence_simplification.Simplifier;
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DCore;
+import org.lambda3.text.simplification.discourse.tree.DiscourseTreeCreator;
+import org.lambda3.text.simplification.discourse.utils.sentences.SentencesUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class Processor {
+ private final static DiscourseTreeCreator DISCOURSE_TREE_CREATOR = new DiscourseTreeCreator();
+ private final static DiscourseExtractor DISCOURSE_EXTRACTOR = new DiscourseExtractor();
+ private final static Simplifier SIMPLIFIER = new Simplifier();
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+
+ public List process(File file, ProcessingType type) throws FileNotFoundException {
+ List sentences = SentencesUtils.splitIntoSentencesFromFile(file);
+ return process(sentences, type);
+ }
+
+ public List process(String text, ProcessingType type) {
+ List sentences = SentencesUtils.splitIntoSentences(text);
+ return process(sentences, type);
+ }
+
+ public List process(List sentences, ProcessingType type) {
+ if (type.equals(ProcessingType.SEPARATE)) {
+ return processSeparate(sentences);
+ } else if (type.equals(ProcessingType.WHOLE)) {
+ return processWhole(sentences);
+ } else {
+ throw new IllegalArgumentException("Unknown ProcessingType.");
+ }
+ }
+
+ // creates one discourse tree over all sentences (investigates intra-sentential and inter-sentential relations)
+ private List processWhole(List sentences) {
+ List res = new ArrayList<>();
+
+ // Step 1) create document discourse tree
+ logger.info("Step 1) Create document discourse tree");
+ DISCOURSE_TREE_CREATOR.reset();
+
+ int idx = 0;
+ for (String sentence : sentences) {
+ logger.info("### Processing sentence ###");
+ logger.info(sentence);
+
+ // extend discourse tree
+ DISCOURSE_TREE_CREATOR.addSentence(sentence, idx);
+ DISCOURSE_TREE_CREATOR.update();
+ if (logger.isDebugEnabled()) {
+
+ Optional.ofNullable(DISCOURSE_TREE_CREATOR.getLastSentenceTree())
+ .ifPresent(t -> logger.debug(t.toString()));
+
+// logger.debug(DISCOURSE_TREE_CREATOR.getDiscourseTree().toString()); // to show the current document discourse tree
+ }
+
+ ++idx;
+ }
+
+ // Step 2) extract discourse cores
+ logger.info("Step 2) extract discourse cores");
+
+ List discourseCores = DISCOURSE_EXTRACTOR.extract(DISCOURSE_TREE_CREATOR.getDiscourseTree());
+ if (logger.isDebugEnabled()) {
+ discourseCores.forEach(x -> logger.debug(x.toString()));
+ }
+
+ // Step 3) generate output format
+ logger.info("Step 3) Generate output format");
+
+ List dCores = SIMPLIFIER.simplify(discourseCores);
+ res.addAll(dCores);
+
+ if (logger.isInfoEnabled()) {
+ dCores.forEach(core -> logger.info(core.toString()));
+ }
+
+ return res;
+ }
+
+ // creates discourse trees for each individual sentence (investigates intra-sentential relations only)
+ private List processSeparate(List sentences) {
+ List res = new ArrayList<>();
+
+ int idx = 0;
+ for (String sentence : sentences) {
+ logger.info("### Processing sentence ###");
+ logger.info("'" + sentence + "'");
+
+ // Step 1) create sentence discourse tree
+ logger.debug("Step 1) Create sentence discourse tree");
+ DISCOURSE_TREE_CREATOR.reset();
+ DISCOURSE_TREE_CREATOR.addSentence(sentence, idx);
+ DISCOURSE_TREE_CREATOR.update();
+ if (logger.isDebugEnabled()) {
+ logger.debug(DISCOURSE_TREE_CREATOR.getDiscourseTree().toString());
+ }
+
+ // Step 2) extract discourse cores
+ logger.debug("Step 2) extract discourse cores");
+
+ List discourseCores = DISCOURSE_EXTRACTOR.extract(DISCOURSE_TREE_CREATOR.getDiscourseTree());
+ if (logger.isDebugEnabled()) {
+ discourseCores.forEach(x -> logger.debug(x.toString()));
+ }
+
+ // Step 3) generate output format
+ logger.debug("Step 3) generate output format");
+
+ List dCores = SIMPLIFIER.simplify(discourseCores);
+ res.addAll(dCores);
+
+ if (logger.isInfoEnabled()) {
+ dCores.forEach(core -> logger.info(core.toString()));
+ }
+
+ ++idx;
+ }
+
+ return res;
+ }
+
+ public enum ProcessingType {
+ SEPARATE,
+ WHOLE
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/DiscourseExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/DiscourseExtractor.java
new file mode 100644
index 0000000..0178af4
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/DiscourseExtractor.java
@@ -0,0 +1,175 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.relation_extraction;
+
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseContext;
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseCore;
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreContextRelation;
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreCoreRelation;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.model.Coordination;
+import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.tree.model.Subordination;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Created by Matthias on 08.12.16.
+ */
+public class DiscourseExtractor {
+ private static final List IGNORED_RELATIONS = Arrays.asList(
+ Relation.UNKNOWN_COORDINATION
+ );
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+ private LinkedHashMap processedCores;
+ private LinkedHashMap processedContexts;
+
+ public DiscourseExtractor() {
+ this.processedCores = new LinkedHashMap();
+ this.processedContexts = new LinkedHashMap();
+ }
+
+ public List extract(DiscourseTree discourseTree) {
+ this.processedCores = new LinkedHashMap();
+ this.processedContexts = new LinkedHashMap();
+
+ extractRec(discourseTree);
+
+ return processedCores.values().stream().collect(Collectors.toList());
+ }
+
+ // should be called on a superordinate node
+ private List getCores(DiscourseTree node) {
+ List res = new ArrayList();
+
+ for (Leaf leaf : node.getNucleusPathLeaves()) {
+ DiscourseCore core;
+ if (processedCores.containsKey(leaf)) {
+ core = processedCores.get(leaf);
+ } else {
+ core = new DiscourseCore(leaf.getText(), leaf.getSentenceIdx());
+ processedCores.put(leaf, core);
+ }
+ res.add(core);
+ }
+
+ return res;
+ }
+
+ // should be called on a subordinate node
+ private List getContexts(DiscourseTree node) {
+ List res = new ArrayList();
+
+ for (Leaf leaf : node.getNucleusPathLeaves()) {
+ DiscourseContext context;
+ if (processedContexts.containsKey(leaf)) {
+ context = processedContexts.get(leaf);
+ } else {
+ context = new DiscourseContext(leaf.getText(), leaf.getSentenceIdx());
+ if (leaf.getType().equals(Leaf.Type.SENT_SIM_CONTEXT)) {
+ context.setSentSimContext();
+ }
+ processedContexts.put(leaf, context);
+
+ }
+ res.add(context);
+ }
+
+ return res;
+ }
+
+ // only visit nucleus nodes, do not handle References
+ private void extractRec(DiscourseTree node) {
+
+ if (node instanceof Leaf) {
+ getCores(node);
+ }
+
+ if (node instanceof Coordination) {
+ Coordination coordination = (Coordination) node;
+
+ // recursion
+ for (DiscourseTree child : coordination.getCoordinations()) {
+ extractRec(child);
+ }
+
+ // add core relations
+ if (!IGNORED_RELATIONS.contains(coordination.getRelation())) {
+ for (DiscourseTree child : coordination.getCoordinations()) {
+ List childCores = getCores(child);
+
+ // forward direction
+ for (DiscourseTree sibling : coordination.getOtherFollowingCoordinations(child)) {
+ List siblingCores = getCores(sibling);
+
+ for (DiscourseCore childCore : childCores) {
+ for (DiscourseCore siblingCore : siblingCores) {
+ childCore.addCoreRelation(new DiscourseCoreCoreRelation(coordination.getRelation(), siblingCore));
+ }
+ }
+ }
+
+ // reverse direction
+ if (coordination.getRelation().getReverseRelation().isPresent()) {
+ for (DiscourseTree sibling : coordination.getOtherPrecedingCoordinations(child)) {
+ List siblingCores = getCores(sibling);
+
+ for (DiscourseCore childCore : childCores) {
+ for (DiscourseCore siblingCore : siblingCores) {
+ childCore.addCoreRelation(new DiscourseCoreCoreRelation(coordination.getRelation().getReverseRelation().get(), siblingCore));
+ }
+ }
+ }
+ }
+
+ }
+ }
+ }
+
+ if (node instanceof Subordination) {
+ Subordination subordination = (Subordination) node;
+
+ // recursion
+ extractRec(subordination.getSuperordination());
+
+ // add context relations
+ if (!IGNORED_RELATIONS.contains(subordination.getRelation())) {
+ List cores = getCores(subordination.getSuperordination());
+ List contexts = getContexts(subordination.getSubordination());
+
+ for (DiscourseCore core : cores) {
+ for (DiscourseContext context : contexts) {
+ core.addContextRelation(new DiscourseCoreContextRelation(subordination.getRelation(), context));
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseContext.java b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseContext.java
new file mode 100644
index 0000000..8ce971c
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseContext.java
@@ -0,0 +1,71 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseContext
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.relation_extraction.element;
+
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class DiscourseContext implements PrettyTreePrinter.Node {
+ private final String text;
+ private final int sentenceIdx;
+ private boolean sentSimContext;
+
+ public DiscourseContext(String text, int sentenceIdx) {
+ this.text = text;
+ this.sentenceIdx = sentenceIdx;
+ this.sentSimContext = false;
+ }
+
+ public void setSentSimContext() {
+ this.sentSimContext = true;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public int getSentenceIdx() {
+ return sentenceIdx;
+ }
+
+ public boolean isSentSimContext() {
+ return sentSimContext;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ String sentSimContextStr = (sentSimContext) ? " [s-context]" : "";
+ return Collections.singletonList("'" + text + "'" + sentSimContextStr);
+ }
+
+ @Override
+ public List getPTPEdges() {
+ return new ArrayList<>();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseCore.java b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseCore.java
new file mode 100644
index 0000000..0c09de1
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/element/DiscourseCore.java
@@ -0,0 +1,102 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseCore
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.relation_extraction.element;
+
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreContextRelation;
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreCoreRelation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class DiscourseCore implements PrettyTreePrinter.Node {
+ private final String text;
+ private final int sentenceIdx;
+ private final List coreRelations;
+ private final List contextRelations;
+
+ public DiscourseCore(String text, int sentenceIdx) {
+ this.text = text;
+ this.sentenceIdx = sentenceIdx;
+ this.coreRelations = new ArrayList<>();
+ this.contextRelations = new ArrayList<>();
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public int getSentenceIdx() {
+ return sentenceIdx;
+ }
+
+ public void addCoreRelation(DiscourseCoreCoreRelation coreRelation) {
+ if (!coreRelations.contains(coreRelation)) {
+ coreRelations.add(coreRelation);
+ }
+ }
+
+ public List getCoreRelations() {
+ return coreRelations;
+ }
+
+ public void addContextRelation(DiscourseCoreContextRelation contextRelation) {
+ if (!contextRelations.contains(contextRelation)) {
+ contextRelations.add(contextRelation);
+ }
+ }
+
+ public List getContextRelations() {
+ return contextRelations;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ return Collections.singletonList("'" + text + "'");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ List res = new ArrayList<>();
+
+ res.addAll(coreRelations.stream().map(
+ cr -> new PrettyTreePrinter.DefaultEdge("", cr.getCore(), false)
+ ).collect(Collectors.toList()));
+
+ res.addAll(contextRelations.stream().map(
+ cr -> new PrettyTreePrinter.DefaultEdge("", cr.getContext(), true)
+ ).collect(Collectors.toList()));
+
+ return res;
+ }
+
+ @Override
+ public String toString() {
+ return PrettyTreePrinter.prettyPrint(this, false, 40);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreContextRelation.java b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreContextRelation.java
new file mode 100644
index 0000000..1a8d601
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreContextRelation.java
@@ -0,0 +1,54 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseCoreContextRelation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.relation_extraction.relation;
+
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseContext;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+
+/**
+ *
+ */
+public class DiscourseCoreContextRelation {
+ private final Relation relation;
+ private final DiscourseContext context;
+
+ public DiscourseCoreContextRelation(Relation relation, DiscourseContext context) {
+ this.relation = relation;
+ this.context = context;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public DiscourseContext getContext() {
+ return context;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return ((o instanceof DiscourseCoreContextRelation)
+ && (((DiscourseCoreContextRelation) o).relation.equals(relation))
+ && (((DiscourseCoreContextRelation) o).context.equals(context)));
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreCoreRelation.java b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreCoreRelation.java
new file mode 100644
index 0000000..6ec9f79
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/relation_extraction/relation/DiscourseCoreCoreRelation.java
@@ -0,0 +1,54 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseCoreCoreRelation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.relation_extraction.relation;
+
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseCore;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+
+/**
+ *
+ */
+public class DiscourseCoreCoreRelation {
+ private final Relation relation;
+ private final DiscourseCore core;
+
+ public DiscourseCoreCoreRelation(Relation relation, DiscourseCore core) {
+ this.relation = relation;
+ this.core = core;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public DiscourseCore getCore() {
+ return core;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ return ((o instanceof DiscourseCoreCoreRelation)
+ && (((DiscourseCoreCoreRelation) o).relation.equals(relation))
+ && (((DiscourseCoreCoreRelation) o).core.equals(core)));
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/Simplifier.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/Simplifier.java
new file mode 100644
index 0000000..b0902bc
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/Simplifier.java
@@ -0,0 +1,194 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Simplifier
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification;
+
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.sentence.transformation.CoreContextSentence;
+import org.lambda3.text.simplification.sentence.transformation.SentenceSimplifyingException;
+import org.lambda3.text.simplification.sentence.transformation.Transformer;
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseContext;
+import org.lambda3.text.simplification.discourse.relation_extraction.element.DiscourseCore;
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreContextRelation;
+import org.lambda3.text.simplification.discourse.relation_extraction.relation.DiscourseCoreCoreRelation;
+import org.lambda3.text.simplification.discourse.sentence_simplification.classification.SContextClassifier;
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DContext;
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DCore;
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.SContext;
+import org.lambda3.text.simplification.discourse.sentence_simplification.relation.DContextRelation;
+import org.lambda3.text.simplification.discourse.sentence_simplification.relation.DCoreRelation;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class Simplifier {
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+
+ private LinkedHashMap processedDiscourseCores;
+ private LinkedHashMap processedDiscourseContexts;
+
+ public Simplifier() {
+ this.processedDiscourseCores = new LinkedHashMap<>();
+ this.processedDiscourseContexts = new LinkedHashMap<>();
+ }
+
+ private static SContext createSContext(String text, int sentenceIdx) {
+ Optional relation = SContextClassifier.classify(text);
+ return relation.map(relation1 -> new SContext(text, sentenceIdx, relation1)).orElseGet(() -> new SContext(text, sentenceIdx, Relation.UNKNOWN_SENT_SIM));
+ }
+
+ private DContext getDContext(DiscourseContext discourseContext) {
+ DContext res;
+
+ if (processedDiscourseContexts.containsKey(discourseContext)) {
+ res = processedDiscourseContexts.get(discourseContext);
+ } else {
+ String text = discourseContext.getText();
+ List sentSimContexts = new ArrayList<>();
+
+ // apply sentence simplification
+ Transformer t = new Transformer();
+ try {
+ logger.debug("Simplifying: '{}'", discourseContext.getText());
+ CoreContextSentence s = t.simplify(discourseContext.getText());
+
+ // set coreText (assume that there is usually only one core)
+ if ((s.getCore() != null) && (s.getCore().size() > 0)) {
+ Tree c = s.getCore().get(0);
+ if (c != null) {
+ text = WordsUtils.wordsToString(c.yieldWords());
+ }
+ }
+
+ // add (sentence simplification) contexts
+ if (s.getContext() != null) {
+ for (Tree c : s.getContext()) {
+ if (c != null) {
+ sentSimContexts.add(createSContext(WordsUtils.wordsToString(c.yieldWords()), discourseContext.getSentenceIdx()));
+ }
+ }
+ }
+ } catch (SentenceSimplifyingException e) {
+ // nothing
+ }
+
+ res = new DContext(text, discourseContext.getSentenceIdx(), discourseContext.getText());
+
+ // add (sentence simplification) context relations
+ for (SContext sentSimContext : sentSimContexts) {
+ res.addSContext(sentSimContext);
+ }
+
+ processedDiscourseContexts.put(discourseContext, res);
+ }
+
+ return res;
+ }
+
+ private DCore getDCore(DiscourseCore discourseCore) {
+ DCore res;
+
+ if (processedDiscourseCores.containsKey(discourseCore)) {
+ res = processedDiscourseCores.get(discourseCore);
+ } else {
+ String text = discourseCore.getText();
+ List sentSimContexts = new ArrayList<>();
+
+ // apply sentence simplification
+ Transformer t = new Transformer();
+ try {
+ logger.debug("Simplifying: '{}'", discourseCore.getText());
+ CoreContextSentence s = t.simplify(discourseCore.getText());
+
+ // set coreText (assume that there is usually only one core)
+ if ((s.getCore() != null) && (s.getCore().size() > 0)) {
+ Tree c = s.getCore().get(0);
+ if (c != null) {
+ text = WordsUtils.wordsToString(c.yieldWords());
+ }
+ }
+
+ // add (sentence simplification) contexts
+ if (s.getContext() != null) {
+ for (Tree c : s.getContext()) {
+ if (c != null) {
+ sentSimContexts.add(createSContext(WordsUtils.wordsToString(c.yieldWords()), discourseCore.getSentenceIdx()));
+ }
+ }
+ }
+ } catch (SentenceSimplifyingException e) {
+ // nothing
+ }
+
+ res = new DCore(text, discourseCore.getSentenceIdx(), discourseCore.getText());
+
+ // add (sentence simplification) context relations
+ for (SContext sentSimContext : sentSimContexts) {
+ res.addSContext(sentSimContext);
+ }
+
+ processedDiscourseCores.put(discourseCore, res);
+ }
+
+ return res;
+ }
+
+ public List simplify(List discourseCores) {
+ this.processedDiscourseCores = new LinkedHashMap<>();
+ this.processedDiscourseContexts = new LinkedHashMap<>();
+
+ List res = new ArrayList<>();
+
+ for (DiscourseCore discourseCore : discourseCores) {
+ DCore dCore = getDCore(discourseCore);
+
+ // add (discourse) core relations
+ for (DiscourseCoreCoreRelation discourseCoreCoreRelation : discourseCore.getCoreRelations()) {
+ dCore.addDCoreRelation(new DCoreRelation(discourseCoreCoreRelation.getRelation(), getDCore(discourseCoreCoreRelation.getCore())));
+ }
+
+ // add (discourse) context relations
+ for (DiscourseCoreContextRelation discourseCoreContextRelation : discourseCore.getContextRelations()) {
+
+ // convert into a DContext or a SContext
+ if (discourseCoreContextRelation.getContext().isSentSimContext()) {
+ dCore.addSContext(new SContext(discourseCoreContextRelation.getContext().getText(), discourseCoreContextRelation.getContext().getSentenceIdx(), discourseCoreContextRelation.getRelation()));
+ } else {
+ dCore.addDContextRelation(new DContextRelation(discourseCoreContextRelation.getRelation(), getDContext(discourseCoreContextRelation.getContext())));
+ }
+ }
+
+ res.add(dCore);
+ }
+
+ return res;
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/classification/SContextClassifier.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/classification/SContextClassifier.java
new file mode 100644
index 0000000..2c05e09
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/classification/SContextClassifier.java
@@ -0,0 +1,134 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SContextClassifier
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.classification;
+
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import edu.stanford.nlp.trees.tregex.TregexPattern;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.ner.NERStringParseException;
+import org.lambda3.text.simplification.discourse.utils.ner.NERStringParser;
+import org.lambda3.text.simplification.discourse.utils.ner.tner.TNERString;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ *
+ */
+public class SContextClassifier {
+ private static final Logger LOGGER = LoggerFactory.getLogger(SContextClassifier.class);
+
+ private static final String PATTERN_PREFIX = "^.*(? monthPatterns = Stream.of(
+ "january", "jan.",
+ "february", "feb.",
+ "march", "mar.",
+ "april", "apr.",
+ "may",
+ "june",
+ "july",
+ "august", "aug.",
+ "september", "sept.",
+ "october", "oct.",
+ "november", "nov.",
+ "december", "dec."
+ ).map(p -> PATTERN_PREFIX + p + PATTERN_SUFFIX).collect(Collectors.toList());
+
+ final List days = Stream.of(
+ "monday", "mon.",
+ "tuesday", "tues.",
+ "wednesday", "wed.",
+ "thursday", "thurs.",
+ "friday", "fri.",
+ "saturday", "sat.",
+ "sunday", "sun."
+ ).map(p -> PATTERN_PREFIX + p + PATTERN_SUFFIX).collect(Collectors.toList());
+
+ final String yearPattern = PATTERN_PREFIX + "[1-2]\\d\\d\\d" + PATTERN_SUFFIX;
+ final String bcadPattern = PATTERN_PREFIX + "(\\d+\\s+(bc|ad)|ad\\s+\\d+)" + PATTERN_SUFFIX;
+ final String centuryPattern = PATTERN_PREFIX + "(1st|2nd|3rd|\\d+th)\\s+century" + PATTERN_SUFFIX;
+ final String timePattern = PATTERN_PREFIX + "([0-1]?\\d|2[0-4])\\s*:\\s*[0-5]\\d" + PATTERN_SUFFIX;
+
+ String text = WordsUtils.wordsToString(np.yieldWords()).toLowerCase();
+ return ((monthPatterns.stream().anyMatch(text::matches))
+ || (days.stream().anyMatch(text::matches))
+ || (text.matches(yearPattern))
+ || (text.matches(bcadPattern))
+ || (text.matches(centuryPattern))
+ || (text.matches(timePattern)));
+ }
+
+ private static boolean isLocationNP(Tree np) {
+ try {
+ TNERString ner = NERStringParser.parse(np);
+
+ return ner.getTokens().stream().anyMatch(t -> t.getCategory().equals("LOCATION"));
+ } catch (NERStringParseException e) {
+ return false;
+ }
+ }
+
+ public static Optional classify(String sContext) {
+
+ try {
+ Tree parseTree = ParseTreeParser.parse(sContext);
+
+ // find TIME-relation
+ TregexPattern p = TregexPattern.compile("ROOT <<, (/This/ . (/(is|was)/ . (/(in|at|around)/ . NP=np)))");
+ TregexMatcher matcher = p.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ if (isTimeNP(matcher.getNode("np"))) {
+ return Optional.of(Relation.TIME);
+ }
+ }
+
+ // find LOCATION-relation
+ p = TregexPattern.compile("ROOT <<, (/This/ . (/(is|was)/ . (__ . NP=np)))");
+ matcher = p.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ if (isLocationNP(matcher.getNode("np"))) {
+ return Optional.of(Relation.LOCATION);
+ }
+ }
+
+
+ } catch (ParseTreeException e) {
+ LOGGER.error("Could not generate parse tree for sContext: '" + sContext + "'");
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DContext.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DContext.java
new file mode 100644
index 0000000..e6d45bb
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DContext.java
@@ -0,0 +1,100 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DContext
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.element;
+
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class DContext implements PrettyTreePrinter.Node {
+ private final String text;
+ private final int sentenceIndex;
+ private final String notSimplifiedText;
+ private final List sContexts;
+
+ public DContext(String text, int sentenceIndex, String notSimplifiedText) {
+ this.text = text;
+ this.sentenceIndex = sentenceIndex;
+ this.notSimplifiedText = notSimplifiedText;
+ this.sContexts = new ArrayList<>();
+ }
+
+ public void addSContext(SContext sContext) {
+ this.sContexts.add(sContext);
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public int getSentenceIndex() {
+ return sentenceIndex;
+ }
+
+ public String getNotSimplifiedText() {
+ return notSimplifiedText;
+ }
+
+ public List getSContexts() {
+ return sContexts;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ return Collections.singletonList("'" + text + "'");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ List res = new ArrayList<>();
+
+ res.addAll(sContexts.stream().map(
+ sc -> new PrettyTreePrinter.DefaultEdge("", sc, true)
+ ).collect(Collectors.toList()));
+
+ return res;
+ }
+
+ @Override
+ public String toString() {
+ return PrettyTreePrinter.prettyPrint(this, false, 40);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof DContext)) return false;
+ DContext dContext = (DContext) o;
+ return getSentenceIndex() == dContext.getSentenceIndex() &&
+ Objects.equals(getText(), dContext.getText()) &&
+ Objects.equals(getNotSimplifiedText(), dContext.getNotSimplifiedText()) &&
+ Objects.equals(sContexts, dContext.sContexts);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DCore.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DCore.java
new file mode 100644
index 0000000..e0cdd58
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/DCore.java
@@ -0,0 +1,136 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DCore
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.element;
+
+import org.lambda3.text.simplification.discourse.sentence_simplification.relation.DContextRelation;
+import org.lambda3.text.simplification.discourse.sentence_simplification.relation.DCoreRelation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class DCore implements PrettyTreePrinter.Node {
+ private final String text;
+ private final int sentenceIndex;
+ private final String notSimplifiedText;
+ private final List dCoreRelations;
+ private final List dContextRelations;
+ private final List sContexts;
+
+ public DCore(String text, int sentenceIndex, String notSimplifiedText) {
+ this.text = text;
+ this.sentenceIndex = sentenceIndex;
+ this.notSimplifiedText = notSimplifiedText;
+ this.dCoreRelations = new ArrayList<>();
+ this.dContextRelations = new ArrayList<>();
+ this.sContexts = new ArrayList<>();
+ }
+
+ public void addDCoreRelation(DCoreRelation dCoreRelation) {
+ if (!dCoreRelations.contains(dCoreRelation)) {
+ dCoreRelations.add(dCoreRelation);
+ }
+ }
+
+ public void addDContextRelation(DContextRelation dContextRelation) {
+ if (!dContextRelations.contains(dContextRelation)) {
+ dContextRelations.add(dContextRelation);
+ }
+ }
+
+ public void addSContext(SContext sContext) {
+ this.sContexts.add(sContext);
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public int getSentenceIndex() {
+ return sentenceIndex;
+ }
+
+ public String getNotSimplifiedText() {
+ return notSimplifiedText;
+ }
+
+ public List getDCoreRelations() {
+ return dCoreRelations;
+ }
+
+ public List getDContextRelations() {
+ return dContextRelations;
+ }
+
+ public List getSContexts() {
+ return sContexts;
+ }
+
+
+ @Override
+ public List getPTPCaption() {
+ return Collections.singletonList("'" + text + "'");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ List res = new ArrayList<>();
+
+ res.addAll(dCoreRelations.stream().map(
+ cr -> new PrettyTreePrinter.DefaultEdge("", cr.getDCore(), false)
+ ).collect(Collectors.toList()));
+
+ res.addAll(dContextRelations.stream().map(
+ cr -> new PrettyTreePrinter.DefaultEdge("", cr.getDContext(), true)
+ ).collect(Collectors.toList()));
+
+ res.addAll(sContexts.stream().map(
+ sc -> new PrettyTreePrinter.DefaultEdge("", sc, true)
+ ).collect(Collectors.toList()));
+
+ return res;
+ }
+
+ @Override
+ public String toString() {
+ return PrettyTreePrinter.prettyPrint(this, false, 40);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof DCore)) return false;
+ DCore dCore = (DCore) o;
+ return getSentenceIndex() == dCore.getSentenceIndex() &&
+ Objects.equals(getText(), dCore.getText()) &&
+ Objects.equals(getNotSimplifiedText(), dCore.getNotSimplifiedText()) &&
+ Objects.equals(toString(), dCore.toString());
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/SContext.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/SContext.java
new file mode 100644
index 0000000..f26d964
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/element/SContext.java
@@ -0,0 +1,78 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SContext
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.element;
+
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+
+/**
+ *
+ */
+public class SContext implements PrettyTreePrinter.Node {
+ private final String text;
+ private final int sentenceIndex;
+ private final Relation relation;
+
+ public SContext(String text, int sentenceIndex, Relation relation) {
+ this.text = text;
+ this.sentenceIndex = sentenceIndex;
+ this.relation = relation;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public int getSentenceIndex() {
+ return sentenceIndex;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ return Collections.singletonList("'" + text + "'");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ return new ArrayList<>();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof SContext)) return false;
+ SContext sContext = (SContext) o;
+ return getSentenceIndex() == sContext.getSentenceIndex() &&
+ Objects.equals(getText(), sContext.getText()) &&
+ getRelation() == sContext.getRelation();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DContextRelation.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DContextRelation.java
new file mode 100644
index 0000000..b418fde
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DContextRelation.java
@@ -0,0 +1,58 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DContextRelation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.relation;
+
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DContext;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+
+import java.util.Objects;
+
+/**
+ *
+ */
+public class DContextRelation {
+ private final Relation relation;
+ private final DContext dContext;
+
+ public DContextRelation(Relation relation, DContext dContext) {
+ this.relation = relation;
+ this.dContext = dContext;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public DContext getDContext() {
+ return dContext;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof DContextRelation)) return false;
+ DContextRelation that = (DContextRelation) o;
+ return getRelation() == that.getRelation() &&
+ Objects.equals(dContext, that.dContext);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DCoreRelation.java b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DCoreRelation.java
new file mode 100644
index 0000000..3911bc1
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/sentence_simplification/relation/DCoreRelation.java
@@ -0,0 +1,59 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DCoreRelation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.sentence_simplification.relation;
+
+import org.lambda3.text.simplification.discourse.sentence_simplification.element.DCore;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+
+import java.util.Objects;
+
+/**
+ *
+ */
+public class DCoreRelation {
+ private final Relation relation;
+ private final DCore dCore;
+
+ public DCoreRelation(Relation relation, DCore dCore) {
+ this.relation = relation;
+ this.dCore = dCore;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public DCore getDCore() {
+ return dCore;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (!(o instanceof DCoreRelation)) return false;
+ DCoreRelation that = (DCoreRelation) o;
+ return getRelation() == that.getRelation() &&
+ Objects.equals(dCore, that.dCore);
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/DiscourseTreeCreator.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/DiscourseTreeCreator.java
new file mode 100644
index 0000000..db65e94
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/DiscourseTreeCreator.java
@@ -0,0 +1,233 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseTreeCreator
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree;
+
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.CoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.RefCoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.RefSubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.rules.*;
+import org.lambda3.text.simplification.discourse.tree.model.*;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeVisualizer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class DiscourseTreeCreator {
+ private static final List rules;
+
+ static {
+ rules = new ArrayList<>();
+
+ rules.add(new ReferenceExtractorForContainingWords());
+ rules.add(new ReferenceExtractorForPrecedingWords());
+ rules.add(new CoordinationExtractor());
+ rules.add(new SharedNPCoordinationExtractor());
+ rules.add(new SubordinationExtractor());
+ rules.add(new IntraSententialSubordinationExtraction());
+ rules.add(new RightSubordinateEnablementExtractor());
+ rules.add(new LeftSubordinateEnablementExtractor());
+
+ rules.add(new ListNPExtractor("ROOT <<: (S < (NP=np < (NP $.. NP) $.. VP))"));
+ rules.add(new ListNPExtractor("ROOT <<: (S < (NP $.. (VP << (NP=np < (NP $.. NP)))))"));
+ }
+
+ private final Logger logger = LoggerFactory.getLogger(getClass());
+ private Coordination discourseTree;
+
+ public DiscourseTreeCreator() {
+ reset();
+ }
+
+ public void reset() {
+ this.discourseTree = new Coordination(
+ "ROOT",
+ Relation.UNKNOWN_COORDINATION,
+ null,
+ new ArrayList<>()
+
+ );
+ }
+
+ public void addSentence(String sentence, int sentenceIdx) {
+ discourseTree.addCoordination(new SentenceLeaf(sentence, sentenceIdx));
+ }
+
+ public DiscourseTree getLastSentenceTree() {
+ DiscourseTree res = null;
+ if (discourseTree.getCoordinations().size() > 0) {
+ res = discourseTree.getCoordinations().get(discourseTree.getCoordinations().size() - 1);
+ }
+
+ return res;
+ }
+
+ public Coordination getDiscourseTree() {
+ return discourseTree;
+ }
+
+ public void update() {
+ processDiscourseTreeRec(discourseTree);
+ discourseTree.cleanup();
+ }
+
+ private void processDiscourseTreeRec(DiscourseTree discourseTree) {
+
+ if (discourseTree instanceof Coordination) {
+ Coordination coordination = (Coordination) discourseTree;
+
+ for (DiscourseTree child : coordination.getCoordinations()) {
+
+ // process coordination-leaf if not processed yet
+ if (child.isNotProcessed()) {
+ DiscourseTree c = child;
+
+ if (child instanceof Leaf) {
+ Optional newChild = applyRules((Leaf) child);
+ if (newChild.isPresent()) {
+ coordination.replaceCoordination(child, newChild.get());
+ c = newChild.get();
+ }
+ }
+
+ child.setProcessed();
+
+ // recursion
+ processDiscourseTreeRec(c);
+ }
+ }
+ }
+
+ if (discourseTree instanceof Subordination) {
+ Subordination subordination = (Subordination) discourseTree;
+
+ // process superordination-leaf if not processed yet
+ if (subordination.getSuperordination().isNotProcessed()) {
+
+ if (subordination.getSuperordination() instanceof Leaf) {
+ Optional newChild = applyRules((Leaf) subordination.getSuperordination());
+ newChild.ifPresent(subordination::replaceSuperordination);
+ }
+
+ subordination.getSuperordination().setProcessed();
+
+ // recursion
+ processDiscourseTreeRec(subordination.getSuperordination());
+ }
+
+ // process subordination-leaf if not processed yet
+ if (subordination.getSubordination().isNotProcessed()) {
+
+ if (subordination.getSubordination() instanceof Leaf) {
+ Optional newChild = applyRules((Leaf) subordination.getSubordination());
+ newChild.ifPresent(subordination::replaceSubordination);
+ }
+
+ subordination.getSubordination().setProcessed();
+
+ // recursion
+ processDiscourseTreeRec(subordination.getSubordination());
+ }
+ }
+ }
+
+ private Optional applyRules(Leaf leaf) {
+ logger.debug("Processing leaf:");
+ if (logger.isDebugEnabled()) {
+ logger.debug(leaf.toString());
+ }
+
+ if ((leaf.getType().equals(Leaf.Type.TERMINAL)) || (leaf.getType().equals(Leaf.Type.SENT_SIM_CONTEXT))) {
+ logger.debug("Leaf will not be split.");
+ return Optional.empty();
+ }
+
+ // try to generate parseTree
+ Tree parseTree;
+ try {
+ parseTree = ParseTreeParser.parse(leaf.getText());
+ } catch (ParseTreeException e) {
+ logger.error("Failed to generate parse tree");
+
+ return Optional.empty();
+ }
+ logger.debug("Parse tree:");
+ if (logger.isDebugEnabled()) {
+ logger.debug(ParseTreeVisualizer.prettyPrint(parseTree));
+ }
+
+ // check rules
+ for (ExtractionRule rule : rules) {
+
+ Optional extraction = rule.extract(parseTree);
+ if (extraction.isPresent()) {
+ logger.debug("Extraction rule " + rule.getClass().getSimpleName() + " matched.");
+
+ // handle CoordinationExtraction
+ if (extraction.get() instanceof CoordinationExtraction) {
+ return Optional.of(((CoordinationExtraction) extraction.get()).convert());
+ }
+
+ // handle SubordinationExtraction
+ if (extraction.get() instanceof SubordinationExtraction) {
+ return Optional.of(((SubordinationExtraction) extraction.get()).convert());
+ }
+
+ // handle RefCoordinationExtraction
+ if (extraction.get() instanceof RefCoordinationExtraction) {
+ Optional r = ((RefCoordinationExtraction) extraction.get()).convert(leaf);
+ if (r.isPresent()) {
+ return r;
+ } else {
+ logger.debug("Reference could not be used, checking other model rules.");
+ }
+ }
+
+ // handle RefSubordinationExtraction
+ if (extraction.get() instanceof RefSubordinationExtraction) {
+ Optional r = ((RefSubordinationExtraction) extraction.get()).convert(leaf);
+ if (r.isPresent()) {
+ return r;
+ } else {
+ logger.debug("Reference could not be used, checking other model rules.");
+ }
+ }
+ }
+ }
+ logger.debug("No model rule applied.");
+
+ return Optional.empty();
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/Relation.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/Relation.java
new file mode 100644
index 0000000..4676e40
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/Relation.java
@@ -0,0 +1,75 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Relation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree;
+
+import java.util.Optional;
+
+public enum Relation {
+
+ // default relations
+ UNKNOWN_COORDINATION, // the default for coordination
+ UNKNOWN_SUBORDINATION, // the default for subordination
+ UNKNOWN_SENT_SIM, // the default for sentence simplification
+
+ BACKGROUND,
+ CAUSE,
+ CONDITION,
+ CONTRAST,
+ ELABORATION,
+ ENABLEMENT,
+ EXPLANATION,
+ JOINT_LIST,
+ JOINT_DISJUNCTION,
+ TEMPORAL_BEFORE,
+ TEMPORAL_AFTER,
+ TEMPORAL_SEQUENCE,
+
+ // special relations
+ INTRA_SENTENTIAL_ATTRIBUTION,
+ JOINT_NP_LIST,
+ JOINT_NP_DISJUNCTION,
+
+ // sentence simplification
+ TIME,
+ LOCATION;
+
+ static {
+ TEMPORAL_AFTER.reverseRelation = TEMPORAL_BEFORE;
+ TEMPORAL_BEFORE.reverseRelation = TEMPORAL_AFTER;
+ }
+
+ private Relation reverseRelation;
+
+ Relation() {
+ /*
+ * by default, each relation is bidirectional with an equal reverse relation.
+ * To make a relation unidirectional, set reverseRelation to null.
+ */
+ this.reverseRelation = this;
+ }
+
+
+ public Optional getReverseRelation() {
+ return Optional.ofNullable(reverseRelation);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/classification/SignalPhraseClassifier.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/classification/SignalPhraseClassifier.java
new file mode 100644
index 0000000..41afd82
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/classification/SignalPhraseClassifier.java
@@ -0,0 +1,167 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SignalPhraseClassifier
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.classification;
+
+import edu.stanford.nlp.ling.Word;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class SignalPhraseClassifier {
+
+ private static final List GENERAL_MAPPINGS = Arrays.asList(
+
+ // BACKGROUND
+ new Mapping(Relation.BACKGROUND, "...as..."),
+ new Mapping(Relation.BACKGROUND, "...now..."),
+ new Mapping(Relation.BACKGROUND, "...once..."),
+// new Mapping(Relation.BACKGROUND, "previously"),
+ new Mapping(Relation.BACKGROUND, "...when..."),
+ new Mapping(Relation.BACKGROUND, "...with..."),
+ new Mapping(Relation.BACKGROUND, "...without..."),
+
+ // CAUSE
+ new Mapping(Relation.CAUSE, "...largely because..."),
+ new Mapping(Relation.CAUSE, "...as a result..."),
+ new Mapping(Relation.CAUSE, "...as a result of..."),
+ new Mapping(Relation.CAUSE, "...because..."),
+ new Mapping(Relation.CAUSE, "...since..."),
+
+ // CONDITION
+ new Mapping(Relation.CONDITION, "...if..."),
+ new Mapping(Relation.CONDITION, "...in case..."),
+ new Mapping(Relation.CONDITION, "...unless..."),
+ new Mapping(Relation.CONDITION, "...until..."),
+
+ // CONTRAST
+ new Mapping(Relation.CONTRAST, "...although..."),
+ new Mapping(Relation.CONTRAST, "...but..."),
+ new Mapping(Relation.CONTRAST, "...but now..."),
+ new Mapping(Relation.CONTRAST, "...despite..."),
+ new Mapping(Relation.CONTRAST, "...even though..."),
+ new Mapping(Relation.CONTRAST, "...even when..."),
+ new Mapping(Relation.CONTRAST, "...however..."),
+ new Mapping(Relation.CONTRAST, "...instead..."),
+ new Mapping(Relation.CONTRAST, "...rather..."),
+ new Mapping(Relation.CONTRAST, "...still..."),
+ new Mapping(Relation.CONTRAST, "...though..."),
+ new Mapping(Relation.CONTRAST, "...thus..."),
+ new Mapping(Relation.CONTRAST, "...until recently..."),
+ new Mapping(Relation.CONTRAST, "...while..."),
+ new Mapping(Relation.CONTRAST, "...yet..."),
+
+ // ELABORATION
+ new Mapping(Relation.ELABORATION, "...more provocatively..."),
+ new Mapping(Relation.ELABORATION, "...even before..."),
+ new Mapping(Relation.ELABORATION, "...for example..."),
+ new Mapping(Relation.ELABORATION, "...further..."),
+ new Mapping(Relation.ELABORATION, "...recently..."),
+ new Mapping(Relation.ELABORATION, "...since...now..."),
+ new Mapping(Relation.ELABORATION, "...so..."),
+ new Mapping(Relation.ELABORATION, "...so far..."),
+ new Mapping(Relation.ELABORATION, "...where..."),
+ new Mapping(Relation.ELABORATION, "...whereby..."),
+ new Mapping(Relation.ELABORATION, "...whether..."),
+
+ // EXPLANATION
+ new Mapping(Relation.EXPLANATION, "...simply because..."),
+ new Mapping(Relation.EXPLANATION, "...because of..."),
+ new Mapping(Relation.EXPLANATION, "...indeed..."),
+ new Mapping(Relation.EXPLANATION, "...so...that..."),
+
+ // JOINT_LIST
+ new Mapping(Relation.JOINT_LIST, "...and..."),
+
+ // JOINT_DISJUNCTION
+ new Mapping(Relation.JOINT_DISJUNCTION, "...or..."),
+
+ // TEMPORAL_BEFORE
+ new Mapping(Relation.TEMPORAL_BEFORE, "...before..."),
+ new Mapping(Relation.TEMPORAL_BEFORE, "...previously..."), // changed from BACKGROUND TO TEMPORAL_BEFORE
+
+ // TEMPORAL_AFTER
+ new Mapping(Relation.TEMPORAL_AFTER, "...after..."),
+ new Mapping(Relation.TEMPORAL_AFTER, "...and after..."),
+ new Mapping(Relation.TEMPORAL_AFTER, "...next..."),
+ new Mapping(Relation.TEMPORAL_AFTER, "...then..."),
+
+ // TEMPORAL_SEQUENCE
+ new Mapping(Relation.TEMPORAL_SEQUENCE, "...thereafter...")
+ );
+
+ private static Optional classify(List mappings, List signalPhraseWords) {
+ String signalPhrase = WordsUtils.wordsToString(signalPhraseWords);
+
+ Optional bestMapping = Optional.empty();
+ for (Mapping mapping : mappings) {
+ if (mapping.check(signalPhrase)) {
+ if (!bestMapping.isPresent()) {
+ bestMapping = Optional.of(mapping);
+ } else if (mapping.getSignalPhrasePatternSize() >= bestMapping.get().getSignalPhrasePatternSize()) {
+ bestMapping = Optional.of(mapping);
+ }
+ }
+ }
+
+ return bestMapping.map(Mapping::getRelation);
+ }
+
+ public static Optional classifyCustom(List mappings, List signalPhraseWords) {
+ return classify(mappings, signalPhraseWords);
+ }
+
+ public static Optional classifyGeneral(List signalPhraseWords) {
+ return classify(GENERAL_MAPPINGS, signalPhraseWords);
+ }
+
+ public static class Mapping {
+ private final Relation relation;
+
+ private final String signalPhrasePattern; // optional
+ private final int signalPhrasePatternSize;
+
+ public Mapping(Relation relation, String signalPhrasePattern) {
+ this.relation = relation;
+ this.signalPhrasePattern = "^" + signalPhrasePattern.replaceAll("\\.\\.\\.", "((?<=^)(.*\\\\W)?|\\\\W|\\\\W.*\\\\W|(\\\\W.*)?(?=\\$))") + "$";
+ this.signalPhrasePatternSize = signalPhrasePattern.length();
+ }
+
+ boolean check(String signalPhrase) {
+ return signalPhrase.toLowerCase().matches(signalPhrasePattern);
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ int getSignalPhrasePatternSize() {
+ return signalPhrasePatternSize;
+ }
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/Extraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/Extraction.java
new file mode 100644
index 0000000..201dfb5
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/Extraction.java
@@ -0,0 +1,30 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Extraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction;
+
+/**
+ *
+ */
+public abstract class Extraction {
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/ExtractionRule.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/ExtractionRule.java
new file mode 100644
index 0000000..51653f7
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/ExtractionRule.java
@@ -0,0 +1,153 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ExtractionRule
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import edu.stanford.nlp.trees.tregex.TregexPattern;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeException;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeParser;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+public abstract class ExtractionRule {
+ protected final Logger logger = LoggerFactory.getLogger(getClass());
+ protected final TregexPattern pattern;
+
+ public ExtractionRule(String pattern) {
+ this.pattern = TregexPattern.compile(pattern);
+ }
+
+ protected static boolean isInfinitival(Tree clauseParseTree) {
+ TregexPattern p = TregexPattern.compile(clauseParseTree.value() + " <<, VP <<, /(T|t)o/");
+ TregexMatcher matcher = p.matcher(clauseParseTree);
+
+ return (matcher.findAt(clauseParseTree));
+ }
+
+ protected static List getSiblings(Tree parseTree, List tags) {
+ return parseTree.getChildrenAsList().stream().filter(c -> tags.contains(c.value())).collect(Collectors.toList());
+ }
+
+ private static Tense getTense(Tree vp) {
+ Tense res = Tense.PRESENT;
+
+ // find past tense
+ TregexPattern p = TregexPattern.compile("VBD|VBN");
+ TregexMatcher matcher = p.matcher(vp);
+
+ if (matcher.find()) {
+ res = Tense.PAST;
+ }
+
+ return res;
+ }
+
+ private static List appendWordsFromTree(List words, Tree tree) {
+ List res = new ArrayList<>();
+ res.addAll(words);
+
+ TregexPattern p = TregexPattern.compile(tree.value() + " <<, NNP|NNPS");
+ TregexMatcher matcher = p.matcher(tree);
+
+ boolean isFirst = true;
+ for (Word word : tree.yieldWords()) {
+ if ((isFirst) && (!matcher.findAt(tree))) {
+ res.add(WordsUtils.lowercaseWord(word));
+ } else {
+ res.add(word);
+ }
+ isFirst = false;
+ }
+
+ return res;
+ }
+
+ // pp is optional
+ protected static List rephraseIntraSententialAttribution(List words) {
+ try {
+ List res = new ArrayList<>();
+
+ Tree parseTree = ParseTreeParser.parse(WordsUtils.wordsToProperSentenceString(words));
+
+ TregexPattern p = TregexPattern.compile("ROOT <: (S < (NP=np ?$,, PP=pp $.. VP=vp))");
+ TregexMatcher matcher = p.matcher(parseTree);
+ if (matcher.findAt(parseTree)) {
+ Tree pp = matcher.getNode("pp"); // optional
+ Tree np = matcher.getNode("np");
+ Tree vp = matcher.getNode("vp");
+
+ Tense tense = getTense(vp);
+ if (tense.equals(Tense.PRESENT)) {
+ res.add(new Word("This"));
+ res.add(new Word("is"));
+ res.add(new Word("what"));
+ } else {
+ res.add(new Word("This"));
+ res.add(new Word("was"));
+ res.add(new Word("what"));
+ }
+ res = appendWordsFromTree(res, np);
+ res = appendWordsFromTree(res, vp);
+ if (pp != null) {
+ res = appendWordsFromTree(res, pp);
+ }
+ }
+
+ return res;
+ } catch (ParseTreeException e) {
+ return words;
+ }
+ }
+
+ protected static List rephraseEnablement(Tree s, Tree vp) {
+ List res = new ArrayList<>();
+
+ Tense tense = getTense(vp);
+ if (tense.equals(Tense.PRESENT)) {
+ res.add(new Word("This"));
+ res.add(new Word("is"));
+ } else {
+ res.add(new Word("This"));
+ res.add(new Word("was"));
+ }
+ res = appendWordsFromTree(res, s);
+
+ return res;
+ }
+
+ public abstract Optional extract(Tree parseTree);
+
+ protected enum Tense {
+ PRESENT,
+ PAST
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/CoordinationExtraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/CoordinationExtraction.java
new file mode 100644
index 0000000..1d52a60
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/CoordinationExtraction.java
@@ -0,0 +1,75 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : CoordinationExtraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.model;
+
+import edu.stanford.nlp.ling.Word;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.model.Coordination;
+import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+/**
+ *
+ */
+public class CoordinationExtraction extends Extraction {
+ private final String extractionRule;
+ private final Relation relation;
+ private final String signalPhrase; // optional
+ private final List coordinations;
+ private final Leaf.Type coordinationsType;
+
+ public CoordinationExtraction(String extractionRule, Relation relation, List> coordinationsWords, Leaf.Type coordinationsType) {
+ this.extractionRule = extractionRule;
+ this.relation = relation;
+ this.signalPhrase = null;
+ this.coordinations = coordinationsWords.stream().map(WordsUtils::wordsToProperSentenceString).collect(Collectors.toList());
+ this.coordinationsType = coordinationsType;
+ }
+
+ // binary
+ public CoordinationExtraction(String extractionRule, Relation relation, List signalPhraseWords, List leftCoordinationWords, List rightCoordinationWords, Leaf.Type coordinationsType) {
+ this.extractionRule = extractionRule;
+ this.relation = relation;
+ this.signalPhrase = (signalPhraseWords != null) ? WordsUtils.wordsToString(signalPhraseWords) : null;
+ this.coordinations = Stream.of(leftCoordinationWords, rightCoordinationWords).map(WordsUtils::wordsToProperSentenceString).collect(Collectors.toList());
+ this.coordinationsType = coordinationsType;
+ }
+
+ public DiscourseTree convert() {
+ return new Coordination(
+ extractionRule,
+ relation,
+ signalPhrase,
+ coordinations.stream().map(
+ s -> new Leaf(coordinationsType, extractionRule, s)
+ ).collect(Collectors.toList())
+ );
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefCoordinationExtraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefCoordinationExtraction.java
new file mode 100644
index 0000000..a228934
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefCoordinationExtraction.java
@@ -0,0 +1,81 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : RefCoordinationExtraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.model;
+
+import edu.stanford.nlp.ling.Word;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.model.Coordination;
+import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class RefCoordinationExtraction extends Extraction {
+ private final String extractionRule;
+ private final Relation relation;
+ private final String signalPhrase; // optional
+ private final String rightCoordination;
+ private final Leaf.Type rightCoordinationType;
+
+ // binary
+ public RefCoordinationExtraction(String extractionRule, Relation relation, List signalPhraseWords, List rightCoordinationWords, Leaf.Type rightCoordinationType) {
+ this.extractionRule = extractionRule;
+ this.relation = relation;
+ this.signalPhrase = (signalPhraseWords != null) ? WordsUtils.wordsToString(signalPhraseWords) : null;
+ this.rightCoordination = WordsUtils.wordsToProperSentenceString(rightCoordinationWords);
+ this.rightCoordinationType = rightCoordinationType;
+ }
+
+ public Optional convert(Leaf currChild) {
+
+ // find previous node to use as a reference
+ Optional prevNode = currChild.getPreviousNode();
+ if ((prevNode.isPresent()) && (prevNode.get().usableAsReference())) {
+
+ // use prev node as a reference
+ prevNode.get().useAsReference();
+
+ Coordination res = new Coordination(
+ extractionRule,
+ relation,
+ signalPhrase,
+ Collections.emptyList()
+ );
+ res.addCoordination(prevNode.get()); // set prev node as a reference
+ res.addCoordination(new Leaf(rightCoordinationType, extractionRule, rightCoordination));
+
+ return Optional.of(res);
+
+ }
+
+ return Optional.empty();
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefSubordinationExtraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefSubordinationExtraction.java
new file mode 100644
index 0000000..9509bf6
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/RefSubordinationExtraction.java
@@ -0,0 +1,81 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : RefSubordinationExtraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.model;
+
+import edu.stanford.nlp.ling.Word;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.tree.model.Subordination;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class RefSubordinationExtraction extends Extraction {
+ private final String extractionRule;
+ private final Relation relation;
+ private final String signalPhrase; // optional
+ private final String rightConstituent;
+ private final boolean superordinationIsLeft;
+ private final Leaf.Type rightConstituentType;
+
+ // binary
+ public RefSubordinationExtraction(String extractionRule, Relation relation, List signalPhraseWords, List rightConstituentWords, boolean superordinationIsLeft, Leaf.Type rightConstituentType) {
+ this.extractionRule = extractionRule;
+ this.relation = relation;
+ this.signalPhrase = (signalPhraseWords != null) ? WordsUtils.wordsToString(signalPhraseWords) : null;
+ this.rightConstituent = WordsUtils.wordsToProperSentenceString(rightConstituentWords);
+ this.superordinationIsLeft = superordinationIsLeft;
+ this.rightConstituentType = rightConstituentType;
+ }
+
+ public Optional convert(Leaf currChild) {
+
+ // find previous node to use as a reference
+ Optional prevNode = currChild.getPreviousNode();
+ if ((prevNode.isPresent()) && (prevNode.get().usableAsReference())) {
+
+ // use prev node as a reference
+ prevNode.get().useAsReference();
+
+ Subordination res = new Subordination(
+ extractionRule,
+ relation,
+ signalPhrase,
+ new Leaf(Leaf.Type.DEFAULT, extractionRule, "tmp"),
+ new Leaf(rightConstituentType, extractionRule, rightConstituent),
+ superordinationIsLeft
+ );
+ res.replaceLeftConstituent(prevNode.get()); // set prev node as a reference
+
+ return Optional.of(res);
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/SubordinationExtraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/SubordinationExtraction.java
new file mode 100644
index 0000000..72cbe8d
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/model/SubordinationExtraction.java
@@ -0,0 +1,70 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SubordinationExtraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.model;
+
+import edu.stanford.nlp.ling.Word;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.model.DiscourseTree;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.tree.model.Subordination;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.List;
+
+/**
+ *
+ */
+public class SubordinationExtraction extends Extraction {
+ private final String extractionRule;
+ private final Relation relation;
+ private final String signalPhrase; // optional
+ private final String leftConstituent;
+ private final String rightConstituent;
+ private final boolean superordinationIsLeft;
+ private final Leaf.Type leftConstituentType;
+ private final Leaf.Type rightConstituentType;
+
+ // binary
+ public SubordinationExtraction(String extractionRule, Relation relation, List signalPhraseWords, List leftConstituentWords, List rightConstituentWords, boolean superordinationIsLeft, Leaf.Type leftConstituentType, Leaf.Type rightConstituentType) {
+ this.extractionRule = extractionRule;
+ this.relation = relation;
+ this.signalPhrase = (signalPhraseWords != null) ? WordsUtils.wordsToString(signalPhraseWords) : null;
+ this.leftConstituent = WordsUtils.wordsToProperSentenceString(leftConstituentWords);
+ this.rightConstituent = WordsUtils.wordsToProperSentenceString(rightConstituentWords);
+ this.superordinationIsLeft = superordinationIsLeft;
+ this.leftConstituentType = leftConstituentType;
+ this.rightConstituentType = rightConstituentType;
+ }
+
+ public DiscourseTree convert() {
+ return new Subordination(
+ extractionRule,
+ relation,
+ signalPhrase,
+ new Leaf(leftConstituentType, extractionRule, leftConstituent),
+ new Leaf(rightConstituentType, extractionRule, rightConstituent),
+ superordinationIsLeft
+ );
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/CoordinationExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/CoordinationExtractor.java
new file mode 100644
index 0000000..c3ba166
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/CoordinationExtractor.java
@@ -0,0 +1,110 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : CoordinationExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.CoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+public class CoordinationExtractor extends ExtractionRule {
+
+ private final String node1;
+ private final String node2;
+
+ public CoordinationExtractor() {
+ this("ROOT <<: (S=s < (S $.. S))", "s", "S");
+ }
+
+ CoordinationExtractor(String pattern, String node1, String node2) {
+ super(pattern);
+ this.node1 = node1;
+ this.node2 = node2;
+ }
+
+ private List> combineSiblings(List precedingWords, List followingWords, List siblings) {
+ List> constituentsWords = new ArrayList<>();
+ for (Tree sibling : siblings) {
+ List constituentWords = new ArrayList<>();
+
+ constituentWords.addAll(precedingWords);
+ constituentWords.addAll(ParseTreeExtractionUtils.getContainingWords(sibling));
+ constituentWords.addAll(followingWords);
+
+ constituentsWords.add(constituentWords);
+ }
+
+ return constituentsWords;
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ List siblings = getSiblings(matcher.getNode(node1), Collections.singletonList(node2));
+
+ // constituents
+ List precedingWords = ParseTreeExtractionUtils.getPrecedingWords(parseTree, siblings.get(0), false);
+ List followingWords = ParseTreeExtractionUtils.getFollowingWords(parseTree, siblings.get(siblings.size() - 1), false);
+ List> constituentsWords = combineSiblings(precedingWords, followingWords, siblings);
+
+ // result
+ if (constituentsWords.size() == 2) {
+ List signalPhraseWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, siblings.get(0), siblings.get(siblings.size() - 1), false, false);
+ Optional relation = SignalPhraseClassifier.classifyGeneral(signalPhraseWords);
+
+ return Optional.of(new CoordinationExtraction(
+ getClass().getSimpleName(),
+ relation.orElse(Relation.UNKNOWN_COORDINATION),
+ signalPhraseWords,
+ constituentsWords.get(0),
+ constituentsWords.get(constituentsWords.size() - 1),
+ Leaf.Type.DEFAULT)
+ );
+ } else {
+ return Optional.of(new CoordinationExtraction(
+ getClass().getSimpleName(),
+ Relation.UNKNOWN_COORDINATION,
+ constituentsWords,
+ Leaf.Type.DEFAULT)
+ );
+ }
+ }
+
+ return Optional.empty();
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/IntraSententialSubordinationExtraction.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/IntraSententialSubordinationExtraction.java
new file mode 100644
index 0000000..4c9f468
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/IntraSententialSubordinationExtraction.java
@@ -0,0 +1,114 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : IntraSententialSubordinationExtraction
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Optional;
+
+public class IntraSententialSubordinationExtraction extends SubordinationExtractor {
+ // custom mappings
+ private static final List INTRA_SENTENTIAL_ATTRIBUTION_MAPPINGS = Arrays.asList(
+
+ // INTRA_SENTENTIAL_ATTRIBUTION
+ new SignalPhraseClassifier.Mapping(Relation.INTRA_SENTENTIAL_ATTRIBUTION, ""),
+ new SignalPhraseClassifier.Mapping(Relation.INTRA_SENTENTIAL_ATTRIBUTION, "...that...")
+ );
+
+
+ public IntraSententialSubordinationExtraction() {
+ super("ROOT <<: (S < (NP $.. (VP=vp <+(VP) (SBAR=sbar < (S=s)))))");
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ List signalPhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
+
+ // the left, (usually) superordinate constituent
+ List leftConstituentWords = new ArrayList<>();
+ leftConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, matcher.getNode("sbar"), false));
+ leftConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("sbar"), false));
+
+ // the right, (usually) subordinate constituent
+ List rightConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
+
+ // result
+ Optional relation;
+ boolean superordinationIsLeft = true;
+ Leaf.Type leftConstituentType = Leaf.Type.DEFAULT;
+ Leaf.Type rightConstituentType = Leaf.Type.DEFAULT;
+
+ // intra sentential attribution
+ relation = SignalPhraseClassifier.classifyCustom(INTRA_SENTENTIAL_ATTRIBUTION_MAPPINGS, signalPhraseWords);
+ if (relation.isPresent()) {
+ leftConstituentWords = rephraseIntraSententialAttribution(leftConstituentWords);
+ leftConstituentType = Leaf.Type.SENT_SIM_CONTEXT;
+
+ // swap superordinate with subordinate assignment
+ superordinationIsLeft = false;
+ }
+
+ // enablement
+ if (!relation.isPresent()) {
+ if (isInfinitival(matcher.getNode("s"))) {
+ relation = Optional.of(Relation.ENABLEMENT);
+ rightConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
+ rightConstituentType = Leaf.Type.SENT_SIM_CONTEXT;
+ }
+ }
+
+ // general
+ if (!relation.isPresent()) {
+ relation = SignalPhraseClassifier.classifyGeneral(signalPhraseWords);
+ }
+
+ return Optional.of(new SubordinationExtraction(
+ getClass().getSimpleName(),
+ relation.orElse(Relation.UNKNOWN_SUBORDINATION),
+ signalPhraseWords,
+ leftConstituentWords, // the superordinate constituent
+ rightConstituentWords, // the subordinate constituent
+ superordinationIsLeft,
+ leftConstituentType,
+ rightConstituentType));
+ }
+
+ return Optional.empty();
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/LeftSubordinateEnablementExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/LeftSubordinateEnablementExtractor.java
new file mode 100644
index 0000000..7d03be8
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/LeftSubordinateEnablementExtractor.java
@@ -0,0 +1,33 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : LeftSubordinateEnablementExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+/**
+ *
+ */
+public class LeftSubordinateEnablementExtractor extends SubordinationEnablementExtractor {
+
+ public LeftSubordinateEnablementExtractor() {
+ super("ROOT <<: (S < (S=s $.. (NP $.. VP=vp)))");
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ListNPExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ListNPExtractor.java
new file mode 100644
index 0000000..ea90d34
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ListNPExtractor.java
@@ -0,0 +1,90 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ListNPExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.CoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.utils.ListNPSplitter;
+import org.lambda3.text.simplification.discourse.tree.extraction.utils.TregexUtils;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class ListNPExtractor extends ExtractionRule {
+
+ public ListNPExtractor(String pattern) {
+ super(pattern);
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ List matches = TregexUtils.sortedFindAt(parseTree, pattern, Collections.singletonList("np"));
+ if (matches.size() > 0) {
+ TregexUtils.MyMatch match = matches.get(0);
+
+ Optional r = ListNPSplitter.split(match.getNode("np"));
+ if (r.isPresent()) {
+
+ // constituents
+ List precedingWords = ParseTreeExtractionUtils.getPrecedingWords(parseTree, match.getNode("np"), false);
+ List followingWords = ParseTreeExtractionUtils.getFollowingWords(parseTree, match.getNode("np"), false);
+ List> constituentsWords = new ArrayList<>();
+
+
+ for (List element : r.get().getElementsWords()) {
+ List constituentWords = new ArrayList<>();
+
+ constituentWords.addAll(precedingWords);
+ constituentWords.addAll(element);
+ constituentWords.addAll(followingWords);
+
+ constituentsWords.add(constituentWords);
+ }
+
+ // result
+ Extraction res = new CoordinationExtraction(
+ getClass().getSimpleName(),
+ r.get().getRelation(),
+ constituentsWords,
+ Leaf.Type.TERMINAL
+ );
+
+ return Optional.of(res);
+ }
+ }
+
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForContainingWords.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForContainingWords.java
new file mode 100644
index 0000000..35e1864
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForContainingWords.java
@@ -0,0 +1,77 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ReferenceExtractorForContainingWords
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.RefCoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class ReferenceExtractorForContainingWords extends ExtractionRule {
+
+ public ReferenceExtractorForContainingWords() {
+ super("ROOT <<: S <<, (__=node >1 S <<: (__=leaf !< __))");
+ }
+
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ List signalPhraseWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("leaf"));
+
+ // the right constituent
+ List rightConstituentWords = ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("node"), false);
+
+ // result
+ Optional relation = SignalPhraseClassifier.classifyGeneral(signalPhraseWords);
+ if (relation.isPresent()) {
+ Extraction res = new RefCoordinationExtraction(
+ getClass().getSimpleName(),
+ relation.get(),
+ signalPhraseWords,
+ rightConstituentWords,
+ Leaf.Type.DEFAULT
+ );
+
+ return Optional.of(res);
+ }
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForPrecedingWords.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForPrecedingWords.java
new file mode 100644
index 0000000..76817a8
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/ReferenceExtractorForPrecedingWords.java
@@ -0,0 +1,76 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ReferenceExtractorForPrecedingWords
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.RefCoordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+public class ReferenceExtractorForPrecedingWords extends ExtractionRule {
+
+ public ReferenceExtractorForPrecedingWords() {
+ super("ROOT <<: S <<, (__=node >1 S << /this|that/=det)");
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ List signalPhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("node"), matcher.getNode("det"), true);
+
+ // the right constituent
+ List rightConstituentWords = ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("node"), false);
+
+ // result
+ Optional relation = SignalPhraseClassifier.classifyGeneral(signalPhraseWords);
+ if (relation.isPresent()) {
+ Extraction res = new RefCoordinationExtraction(
+ getClass().getSimpleName(),
+ relation.get(),
+ signalPhraseWords,
+ rightConstituentWords,
+ Leaf.Type.DEFAULT
+ );
+
+ return Optional.of(res);
+ }
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/RightSubordinateEnablementExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/RightSubordinateEnablementExtractor.java
new file mode 100644
index 0000000..092a50f
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/RightSubordinateEnablementExtractor.java
@@ -0,0 +1,33 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : RightSubordinateEnablementExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+/**
+ *
+ */
+public class RightSubordinateEnablementExtractor extends SubordinationEnablementExtractor {
+
+ public RightSubordinateEnablementExtractor() {
+ super("ROOT <<: (S < (NP $.. (VP=vp <+(VP) (NP|PP $.. (S=s)))))");
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SharedNPCoordinationExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SharedNPCoordinationExtractor.java
new file mode 100644
index 0000000..132cb19
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SharedNPCoordinationExtractor.java
@@ -0,0 +1,30 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SharedNPCoordinationExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+public class SharedNPCoordinationExtractor extends CoordinationExtractor {
+
+ public SharedNPCoordinationExtractor() {
+ super("ROOT <<: (S < (NP $.. (VP=vp < (VP $..VP))))", "vp", "VP");
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationEnablementExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationEnablementExtractor.java
new file mode 100644
index 0000000..f6c961d
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationEnablementExtractor.java
@@ -0,0 +1,101 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SubordinationEnablementExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+/**
+ *
+ */
+abstract class SubordinationEnablementExtractor extends ExtractionRule {
+
+ SubordinationEnablementExtractor(String pattern) {
+ super(pattern);
+ }
+
+ private List getSuperordinateConstituentWords(Tree parseTree, Tree node) {
+ List constituentWords = new ArrayList<>();
+ constituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, node, false));
+ constituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, node, false));
+ return constituentWords;
+ }
+
+ private List getSubordinateConstituentWords(Tree node) {
+ return ParseTreeExtractionUtils.getContainingWords(node);
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+
+ List superordinateConstituentWords;
+ List subordinateConstituentWords;
+
+ superordinateConstituentWords = getSuperordinateConstituentWords(parseTree, matcher.getNode("s"));
+ subordinateConstituentWords = getSubordinateConstituentWords(matcher.getNode("s"));
+
+ // result
+ Optional relation = Optional.empty();
+ Leaf.Type leftConstituentType = Leaf.Type.DEFAULT;
+ Leaf.Type rightConstituentType = Leaf.Type.DEFAULT;
+
+ // enablement
+ if (isInfinitival(matcher.getNode("s"))) {
+ relation = Optional.of(Relation.ENABLEMENT);
+ subordinateConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
+ rightConstituentType = Leaf.Type.SENT_SIM_CONTEXT;
+ }
+
+ if (relation.isPresent()) {
+ return Optional.of(
+ new SubordinationExtraction(
+ getClass().getSimpleName(),
+ relation.get(),
+ null,
+ superordinateConstituentWords, // the superordinate constituent
+ subordinateConstituentWords, // the subordinate constituent
+ true,
+ leftConstituentType,
+ rightConstituentType)
+ );
+ }
+ }
+
+ return Optional.empty();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationExtractor.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationExtractor.java
new file mode 100644
index 0000000..2585678
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/rules/SubordinationExtractor.java
@@ -0,0 +1,97 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SubordinationExtractor
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.rules;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.tree.classification.SignalPhraseClassifier;
+import org.lambda3.text.simplification.discourse.tree.extraction.Extraction;
+import org.lambda3.text.simplification.discourse.tree.extraction.ExtractionRule;
+import org.lambda3.text.simplification.discourse.tree.extraction.model.SubordinationExtraction;
+import org.lambda3.text.simplification.discourse.tree.model.Leaf;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+public class SubordinationExtractor extends ExtractionRule {
+
+ SubordinationExtractor(String pattern) {
+ super(pattern);
+ }
+
+ public SubordinationExtractor() {
+ this("ROOT <<: (S < (SBAR=sbar < (S=s) $.. (NP $.. VP=vp)))");
+ }
+
+ @Override
+ public Optional extract(Tree parseTree) {
+
+ TregexMatcher matcher = pattern.matcher(parseTree);
+
+ if (matcher.findAt(parseTree)) {
+ List signalPhraseWords = ParseTreeExtractionUtils.getPrecedingWords(matcher.getNode("sbar"), matcher.getNode("s"), false);
+
+ // the left, subordinate constituent
+ List leftConstituentWords = ParseTreeExtractionUtils.getContainingWords(matcher.getNode("s"));
+
+ // the right, superordinate constituent
+ List rightConstituentWords = new ArrayList<>();
+ rightConstituentWords.addAll(ParseTreeExtractionUtils.getPrecedingWords(parseTree, matcher.getNode("sbar"), false));
+ rightConstituentWords.addAll(ParseTreeExtractionUtils.getFollowingWords(parseTree, matcher.getNode("sbar"), false));
+
+ // result
+ Optional relation = Optional.empty();
+ Leaf.Type leftConstituentType = Leaf.Type.DEFAULT;
+ Leaf.Type rightConstituentType = Leaf.Type.DEFAULT;
+
+ // enablement
+ if (isInfinitival(matcher.getNode("s"))) {
+ relation = Optional.of(Relation.ENABLEMENT);
+ leftConstituentWords = rephraseEnablement(matcher.getNode("s"), matcher.getNode("vp"));
+ leftConstituentType = Leaf.Type.SENT_SIM_CONTEXT;
+ }
+
+ // general
+ if (!relation.isPresent()) {
+ relation = SignalPhraseClassifier.classifyGeneral(signalPhraseWords);
+ }
+
+ return Optional.of(new SubordinationExtraction(
+ getClass().getSimpleName(),
+ relation.orElse(Relation.UNKNOWN_SUBORDINATION),
+ signalPhraseWords,
+ leftConstituentWords, // the subordinate constituent
+ rightConstituentWords, // the superordinate constituent
+ false,
+ leftConstituentType,
+ rightConstituentType));
+ }
+
+ return Optional.empty();
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/ListNPSplitter.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/ListNPSplitter.java
new file mode 100644
index 0000000..aafb73e
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/ListNPSplitter.java
@@ -0,0 +1,99 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ListNPSplitter
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.utils;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.tree.Relation;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class ListNPSplitter {
+
+ public static Optional split(Tree np) {
+
+ // representation
+ String representation = np.getChildrenAsList().stream().map(c -> (c.value().equals("CC")) ? c.yieldWords().get(0).value() : c.value()).collect(Collectors.joining(""));
+
+ final String LIST_CONJUNCTION_PATTERN = "^(NP|,)*NP(NP|,)*(and(NP|,)*NP(NP|,)*)+$";
+ final String LIST_DISJUNCTION_PATTERN = "^(NP|,)*NP(NP|,)*(or(NP|,)*NP(NP|,)*)+$";
+ if (representation.matches(LIST_CONJUNCTION_PATTERN) || representation.matches(LIST_DISJUNCTION_PATTERN)) {
+ Relation relation = representation.matches(LIST_CONJUNCTION_PATTERN) ? Relation.JOINT_NP_LIST : Relation.JOINT_NP_DISJUNCTION;
+
+ // get last CC index
+ int lastCCIdx = 0;
+ for (int i = np.getChildrenAsList().size() - 1; i >= 0; i--) {
+ Tree child = np.getChildrenAsList().get(i);
+ if (child.value().equals("CC")) {
+ lastCCIdx = i;
+ break;
+ }
+ }
+
+ // extract
+ List> elementsWords = new ArrayList<>();
+ boolean foundFirstNPAfterCC = false;
+ for (int i = 0; i < np.getChildrenAsList().size(); i++) {
+ Tree child = np.getChildrenAsList().get(i);
+
+ if (foundFirstNPAfterCC) {
+ elementsWords.get(elementsWords.size() - 1).addAll(child.yieldWords());
+ } else if (child.value().equals("NP")) {
+ elementsWords.add(child.yieldWords());
+ if (i > lastCCIdx) {
+ foundFirstNPAfterCC = true;
+ }
+ }
+ }
+
+ return Optional.of(new Result(elementsWords, relation));
+ }
+
+ return Optional.empty();
+ }
+
+ public static class Result {
+ private final List> elementsWords;
+ private final Relation relation;
+
+ public Result(List> elementsWords, Relation relation) {
+ this.elementsWords = elementsWords;
+ this.relation = relation;
+ }
+
+ public List> getElementsWords() {
+ return elementsWords;
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/TregexUtils.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/TregexUtils.java
new file mode 100644
index 0000000..7132b5b
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/extraction/utils/TregexUtils.java
@@ -0,0 +1,111 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : TregexUtils
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.extraction.utils;
+
+import edu.stanford.nlp.trees.Tree;
+import edu.stanford.nlp.trees.tregex.TregexMatcher;
+import edu.stanford.nlp.trees.tregex.TregexPattern;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ *
+ */
+public class TregexUtils {
+
+ public static List sortedFindAt(Tree parseTree, TregexPattern p, List groupsToOrder) {
+ List res = new ArrayList<>();
+
+ TregexMatcher matcher = p.matcher(parseTree);
+ while (matcher.findAt(parseTree)) {
+ HashMap groups = new HashMap<>();
+ for (String name : matcher.getNodeNames()) {
+ groups.put(name, matcher.getNode(name));
+ }
+ res.add(new MyMatch(groups));
+ }
+
+ // sort groups
+ res.sort(new MyMatch.Comparator(parseTree, groupsToOrder));
+
+ return res;
+ }
+
+ public static List sortedFind(Tree parseTree, TregexPattern p, List groupsToOrder) {
+ List res = new ArrayList<>();
+
+ TregexMatcher matcher = p.matcher(parseTree);
+ while (matcher.find()) {
+ HashMap groups = new HashMap<>();
+ for (String name : matcher.getNodeNames()) {
+ groups.put(name, matcher.getNode(name));
+ }
+ res.add(new MyMatch(groups));
+ }
+
+ // sort groups
+ res.sort(new MyMatch.Comparator(parseTree, groupsToOrder));
+
+ return res;
+ }
+
+ public static class MyMatch {
+ private final HashMap groups;
+
+ public MyMatch(HashMap groups) {
+ this.groups = groups;
+ }
+
+ public Tree getNode(String name) {
+ if (groups.containsKey(name)) {
+ return groups.get(name);
+ } else {
+ throw new IllegalArgumentException("No tree for name: '" + name + "'");
+ }
+ }
+
+ public static class Comparator implements java.util.Comparator {
+ private final Tree anchorTree;
+ private final List names;
+
+ public Comparator(Tree anchorTree, List names) {
+ this.anchorTree = anchorTree;
+ this.names = names;
+ }
+
+ @Override
+ public int compare(MyMatch myMatch, MyMatch otherMatch) {
+ int myMatchValue = 0;
+ int otherMatchValue = 0;
+ for (String name : names) {
+ myMatchValue += myMatch.getNode(name).nodeNumber(anchorTree);
+ otherMatchValue += otherMatch.getNode(name).nodeNumber(anchorTree);
+ }
+
+ return myMatchValue - otherMatchValue;
+ }
+ }
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Coordination.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Coordination.java
new file mode 100644
index 0000000..e91e176
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Coordination.java
@@ -0,0 +1,131 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Coordination
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class Coordination extends DiscourseTree {
+ final Relation relation;
+ private final String signalPhrase; // optional
+ private final List coordinations;
+
+ public Coordination(String extractionRule, Relation relation, String signalPhrase, List coordinations) {
+ super(extractionRule);
+ this.relation = relation;
+ this.signalPhrase = signalPhrase;
+ this.coordinations = new ArrayList<>();
+ coordinations.forEach(this::addCoordination);
+ }
+
+ public void addCoordination(DiscourseTree coordination) {
+ this.coordinations.add(coordination);
+ coordination.parent = this;
+ }
+
+ public void invalidateCoordination(DiscourseTree coordination) {
+ replaceCoordination(coordination, new Invalidation());
+ }
+
+ public void replaceCoordination(DiscourseTree oldCoordination, DiscourseTree newCoordination) {
+ for (int i = 0; i < coordinations.size(); i++) {
+ if (coordinations.get(i).equals(oldCoordination)) {
+ coordinations.set(i, newCoordination);
+ newCoordination.parent = this;
+ newCoordination.setRecursiveUnsetSentenceIdx(oldCoordination.getSentenceIdx());
+ break;
+ }
+ }
+ }
+
+ public void removeInvalidations() {
+ for (int i = coordinations.size() - 1; i >= 0; i--) {
+ if (coordinations.get(i) instanceof Invalidation) {
+ coordinations.remove(i);
+ }
+ }
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public List getCoordinations() {
+ return coordinations;
+ }
+
+ public List getOtherCoordinations(DiscourseTree coordination) {
+ return coordinations.stream().filter(c -> !c.equals(coordination)).collect(Collectors.toList());
+ }
+
+ public List getOtherPrecedingCoordinations(DiscourseTree coordination) {
+ List res = new ArrayList<>();
+
+ for (DiscourseTree child : coordinations) {
+ if (child.equals(coordination)) {
+ break;
+ } else {
+ res.add(child);
+ }
+ }
+
+ return res;
+ }
+
+ public List getOtherFollowingCoordinations(DiscourseTree coordination) {
+ List res = new ArrayList<>();
+
+ boolean found = false;
+ for (DiscourseTree child : coordinations) {
+ if (child.equals(coordination)) {
+ found = true;
+ } else {
+ if (found) {
+ res.add(child);
+ }
+ }
+ }
+
+ return res;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ String signalPhraseStr = (signalPhrase != null) ? "'" + signalPhrase + "'" : "NULL";
+ return Collections.singletonList("CO/" + relation + " (" + signalPhraseStr + ", " + extractionRule + ")");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ return coordinations.stream().map(c -> new PrettyTreePrinter.DefaultEdge("n", c, true)).collect(Collectors.toList());
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/DiscourseTree.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/DiscourseTree.java
new file mode 100644
index 0000000..f0f3d9c
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/DiscourseTree.java
@@ -0,0 +1,158 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : DiscourseTree
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+
+public abstract class DiscourseTree implements PrettyTreePrinter.Node {
+ final String extractionRule;
+ DiscourseTree parent; //optional
+ private boolean processed;
+ private int sentenceIdx;
+
+ DiscourseTree(String extractionRule) {
+ this.extractionRule = extractionRule;
+ this.processed = false;
+ this.parent = null; // should be set by inherited classes
+ this.sentenceIdx = -1; // should be set by inherited classes
+ }
+
+ void setRecursiveUnsetSentenceIdx(int sentenceIdx) {
+ if (this.sentenceIdx < 0) {
+ this.sentenceIdx = sentenceIdx;
+
+ // recursive
+ if (this instanceof Coordination) {
+ ((Coordination) this).getCoordinations().forEach(c -> c.setRecursiveUnsetSentenceIdx(sentenceIdx));
+ }
+ if (this instanceof Subordination) {
+ ((Subordination) this).getLeftConstituent().setRecursiveUnsetSentenceIdx(sentenceIdx);
+ ((Subordination) this).getRightConstituent().setRecursiveUnsetSentenceIdx(sentenceIdx);
+ }
+ }
+ }
+
+ public void cleanup() {
+ if (this instanceof Coordination) {
+
+ // remove invalidations
+ ((Coordination) this).removeInvalidations();
+
+ // recursion
+ ((Coordination) this).getCoordinations().forEach(DiscourseTree::cleanup);
+ }
+
+ if (this instanceof Subordination) {
+
+ // recursion
+ ((Subordination) this).getLeftConstituent().cleanup();
+ ((Subordination) this).getRightConstituent().cleanup();
+ }
+ }
+
+ public boolean usableAsReference() {
+ return ((parent != null) && (parent instanceof Coordination) && (((Coordination) parent).relation.equals(Relation.UNKNOWN_COORDINATION)));
+ }
+
+ public void useAsReference() {
+ if (usableAsReference()) {
+ ((Coordination) parent).invalidateCoordination(this);
+ } else {
+ throw new AssertionError("Not useable as reference");
+ }
+ }
+
+ public List getNucleusPathLeaves() {
+ List res = new ArrayList<>();
+
+ if (this instanceof Leaf) {
+ res.add((Leaf) this);
+ } else {
+ // recursion on coordinations
+ if (this instanceof Coordination) {
+ for (DiscourseTree child : ((Coordination) this).getCoordinations()) {
+ res.addAll(child.getNucleusPathLeaves());
+ }
+ }
+
+ // recursion on superordinations
+ if (this instanceof Subordination) {
+ res.addAll(((Subordination) this).getSuperordination().getNucleusPathLeaves());
+ }
+ }
+
+ return res;
+ }
+
+ public Optional getPreviousNode() {
+ if (parent != null) {
+ if (parent instanceof Coordination) {
+ Coordination p = (Coordination) parent;
+ DiscourseTree prev = null;
+ for (DiscourseTree child : p.getCoordinations()) {
+ if ((child.equals(this)) && (prev != null)) {
+ return Optional.of(prev);
+ }
+ prev = child;
+ }
+ }
+ if (parent instanceof Subordination) {
+ Subordination p = (Subordination) parent;
+ if (p.getRightConstituent().equals(this)) {
+ return Optional.of(p.getLeftConstituent());
+ }
+ }
+
+ // recursion
+ return parent.getPreviousNode();
+ }
+
+ return Optional.empty();
+ }
+
+ public void setProcessed() {
+ this.processed = true;
+ }
+
+ public boolean isNotProcessed() {
+ return !processed;
+ }
+
+ public String getExtractionRule() {
+ return extractionRule;
+ }
+
+ public int getSentenceIdx() {
+ return sentenceIdx;
+ }
+
+ @Override
+ public String toString() {
+ return PrettyTreePrinter.prettyPrint(this, false);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Invalidation.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Invalidation.java
new file mode 100644
index 0000000..07d30bb
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Invalidation.java
@@ -0,0 +1,49 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Invalidation
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class Invalidation extends DiscourseTree {
+
+ public Invalidation() {
+ super("");
+ }
+
+ @Override
+ public List getPTPCaption() {
+ return Collections.singletonList("INVALIDATED");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ return new ArrayList<>();
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Leaf.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Leaf.java
new file mode 100644
index 0000000..2049e85
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Leaf.java
@@ -0,0 +1,75 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Leaf
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class Leaf extends DiscourseTree {
+ private final Type type;
+ private final String text;
+
+ public Leaf(Type type, String extractionRule, String text) {
+ super(extractionRule);
+ this.type = type;
+ this.text = text;
+ }
+
+ public Type getType() {
+ return type;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ String typeStr = "";
+ if (type.equals(Type.TERMINAL)) {
+ typeStr = " [terminal]";
+ } else if (type.equals(Type.SENT_SIM_CONTEXT)) {
+ typeStr = " [s-context]";
+ }
+
+ return Collections.singletonList("'" + text + "'" + typeStr);
+ }
+
+ @Override
+ public List getPTPEdges() {
+ return new ArrayList<>();
+ }
+
+ public enum Type {
+ DEFAULT, // can be splitted
+ TERMINAL, // will not be splitted
+ SENT_SIM_CONTEXT // will not be splitted and will act like a sentence-simplification content in Step 3
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/SentenceLeaf.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/SentenceLeaf.java
new file mode 100644
index 0000000..777e6bf
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/SentenceLeaf.java
@@ -0,0 +1,34 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : SentenceLeaf
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+/**
+ *
+ */
+public class SentenceLeaf extends Leaf {
+
+ public SentenceLeaf(String sentence, int sentenceIdx) {
+ super(Type.DEFAULT, "SENTENCE", sentence);
+ this.setRecursiveUnsetSentenceIdx(sentenceIdx);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Subordination.java b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Subordination.java
new file mode 100644
index 0000000..e7dc77c
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/tree/model/Subordination.java
@@ -0,0 +1,119 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : Subordination
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.tree.model;
+
+import org.lambda3.text.simplification.discourse.tree.Relation;
+import org.lambda3.text.simplification.discourse.utils.PrettyTreePrinter;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class Subordination extends DiscourseTree {
+ private final Relation relation;
+ private final String signalPhrase; // optional
+ private final boolean superordinationIsLeft;
+ private DiscourseTree leftConstituent;
+ private DiscourseTree rightConstituent;
+
+ public Subordination(String extractionRule, Relation relation, String signalPhrase, DiscourseTree leftConstituent, DiscourseTree rightConstituent, boolean superordinationIsLeft) {
+ super(extractionRule);
+ this.relation = relation;
+ this.signalPhrase = signalPhrase;
+ this.superordinationIsLeft = superordinationIsLeft;
+
+ this.leftConstituent = new Leaf(Leaf.Type.DEFAULT, "tmp", "tmp");
+ this.rightConstituent = new Leaf(Leaf.Type.DEFAULT, "tmp", "tmp");
+ replaceLeftConstituent(leftConstituent);
+ replaceRightConstituent(rightConstituent);
+ }
+
+ public void replaceLeftConstituent(DiscourseTree newLeftConstituent) {
+ DiscourseTree oldLeftConstituent = this.leftConstituent;
+ this.leftConstituent = newLeftConstituent;
+ newLeftConstituent.parent = this;
+ newLeftConstituent.setRecursiveUnsetSentenceIdx(oldLeftConstituent.getSentenceIdx());
+ }
+
+ private void replaceRightConstituent(DiscourseTree newRightConstituent) {
+ DiscourseTree oldRightConstituent = this.rightConstituent;
+ this.rightConstituent = newRightConstituent;
+ newRightConstituent.parent = this;
+ newRightConstituent.setRecursiveUnsetSentenceIdx(oldRightConstituent.getSentenceIdx());
+ }
+
+ public void replaceSuperordination(DiscourseTree newSuperordination) {
+ if (superordinationIsLeft) {
+ replaceLeftConstituent(newSuperordination);
+ } else {
+ replaceRightConstituent(newSuperordination);
+ }
+ }
+
+ public void replaceSubordination(DiscourseTree newSubordination) {
+ if (superordinationIsLeft) {
+ replaceRightConstituent(newSubordination);
+ } else {
+ replaceLeftConstituent(newSubordination);
+ }
+ }
+
+ public Relation getRelation() {
+ return relation;
+ }
+
+ public DiscourseTree getLeftConstituent() {
+ return leftConstituent;
+ }
+
+ public DiscourseTree getRightConstituent() {
+ return rightConstituent;
+ }
+
+ public DiscourseTree getSuperordination() {
+ return (superordinationIsLeft) ? leftConstituent : rightConstituent;
+ }
+
+ public DiscourseTree getSubordination() {
+ return (superordinationIsLeft) ? rightConstituent : leftConstituent;
+ }
+
+ @Override
+ public List getPTPCaption() {
+ String signalPhraseStr = (signalPhrase != null) ? "'" + signalPhrase + "'" : "NULL";
+ return Collections.singletonList("SUB/" + relation + " (" + signalPhraseStr + ", " + extractionRule + ")");
+ }
+
+ @Override
+ public List getPTPEdges() {
+ List res = new ArrayList<>();
+ res.add(new PrettyTreePrinter.DefaultEdge((superordinationIsLeft) ? "n" : "s", leftConstituent, true));
+ res.add(new PrettyTreePrinter.DefaultEdge((superordinationIsLeft) ? "s" : "n", rightConstituent, true));
+
+ return res;
+ }
+
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/IndexRange.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/IndexRange.java
new file mode 100644
index 0000000..b13eafe
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/IndexRange.java
@@ -0,0 +1,49 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : IndexRange
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils;
+
+/**
+ *
+ */
+public class IndexRange {
+ private final int fromIdx;
+ private final int toIdx;
+
+ public IndexRange(int fromIdx, int toIdx) {
+ this.fromIdx = fromIdx;
+ this.toIdx = toIdx;
+ }
+
+ public int getFromIdx() {
+ return fromIdx;
+ }
+
+ public int getToIdx() {
+ return toIdx;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + fromIdx + " | " + toIdx + ")";
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/PrettyTreePrinter.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/PrettyTreePrinter.java
new file mode 100644
index 0000000..dc1535a
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/PrettyTreePrinter.java
@@ -0,0 +1,369 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : PrettyTreePrinter
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class PrettyTreePrinter {
+
+ private static final String DOT_INDENT = " ";
+
+ private static final NodeShape DEFAULT_NODE_SHAPE = NodeShape.box;
+ private static final List DEFAULT_NODE_STYLES = Collections.singletonList(NodeStyle.solid);
+ private static final String DEFAULT_NODE_COLOR = "black";
+ private static final String DEFAULT_NODE_FILLCOLOR = "white";
+
+ private static final EdgeShape DEFAULT_EDGE_SHAPE = EdgeShape.box;
+ private static final List DEFAULT_EDGE_STYLES = Collections.singletonList(EdgeStyle.solid);
+ private static final String DEFAULT_EDGE_COLOR = "black";
+
+ // INTERFACES & CLASSES ////////////////////////////////////////////////////////////////////////////////////////////
+
+ private static String trimText(String text, Integer maxTextLen) {
+ final String SUFFIX = "...";
+
+ if ((maxTextLen != null) && (text.length() > maxTextLen)) {
+ if (maxTextLen < SUFFIX.length()) {
+ throw new IllegalArgumentException("maxTextLen should have at least the length: " + SUFFIX.length());
+ }
+
+ return text.substring(0, maxTextLen - SUFFIX.length()) + SUFFIX;
+ } else {
+ return text;
+ }
+ }
+
+ private static int getBottomDepth(Node node, boolean follow) {
+ if ((!follow) || (node.getPTPEdges().size() <= 0)) {
+ return 0;
+ } else {
+ OptionalInt max = node.getPTPEdges().stream().mapToInt(e -> getBottomDepth(e.getPTPChild(), e.followPTPChild())).max();
+ return max.orElse(-1) + 1;
+ }
+ }
+
+ private static String getEdgeIndent(int size, String edgeCaption, boolean lastChild) {
+ String front = (lastChild) ? "└─" : "├─";
+ String back = "─> ";
+
+ String middle = trimText(edgeCaption, size - front.length() - back.length());
+
+ boolean right = true;
+ while (front.length() + middle.length() + back.length() < size) {
+ middle = (right) ? middle + "─" : "─" + middle;
+ right = !right;
+ }
+
+ return front + middle + back;
+ }
+
+ private static String getIndent(int size, boolean lastChild) {
+ StringBuilder res = new StringBuilder((lastChild) ? " " : "|");
+ while (res.length() < size) {
+ res.append(" ");
+ }
+
+ return res.toString();
+ }
+
+ private static List prettyPrintRec(Node node, boolean follow, boolean reversed, int size) {
+ List res = new ArrayList<>();
+
+ int bottomDepth = getBottomDepth(node, follow);
+
+ // this node
+ res.addAll(node.getPTPCaption());
+
+ // edges
+ if (follow) {
+
+ ListIterator iter = (reversed) ? node.getPTPEdges().listIterator(node.getPTPEdges().size()) : node.getPTPEdges().listIterator();
+ while ((reversed) ? iter.hasPrevious() : iter.hasNext()) {
+ Edge edge = (reversed) ? iter.previous() : iter.next();
+ boolean endChild = ((reversed) ? !iter.hasPrevious() : !iter.hasNext());
+ int indentSize = (bottomDepth - getBottomDepth(edge.getPTPChild(), edge.followPTPChild())) * size;
+
+ boolean firstChildLine = true;
+ for (String childLine : prettyPrintRec(edge.getPTPChild(), edge.followPTPChild(), reversed, size)) {
+ if (firstChildLine) {
+ res.add(getEdgeIndent(indentSize, edge.getPTPCaption(), endChild) + childLine);
+ firstChildLine = false;
+ } else {
+ res.add(getIndent(indentSize, endChild) + childLine);
+ }
+ }
+ }
+ }
+
+ return res;
+ }
+
+ // GRAPHICAL ENUMS /////////////////////////////////////////////////////////////////////////////////////////////////
+
+ public static String prettyPrint(Node node, boolean reversed, int size) {
+ return prettyPrintRec(node, true, reversed, size).stream().collect(Collectors.joining("\n"));
+ }
+
+ public static String prettyPrint(Node node, boolean reversed) {
+ return prettyPrint(node, reversed, 10);
+ }
+
+ public static String prettyPrint(Node node, int size) {
+ return prettyPrint(node, false, size);
+ }
+
+ public static String prettyPrint(Node node) {
+ return prettyPrintRec(node, true, false, 10).stream().collect(Collectors.joining("\n"));
+ }
+
+ private static long addDotLineRec(Node node, boolean follow, StringBuilder strb, HashMap idMap) {
+
+ long id;
+ if (idMap.containsKey(node)) {
+ id = idMap.get(node);
+ } else {
+ id = idMap.size();
+ idMap.put(node, id);
+ }
+
+ NodeShape nodeShape = (node instanceof GNode) ? ((GNode) node).getPTPNodeShape() : DEFAULT_NODE_SHAPE;
+ List nodeStyles = (node instanceof GNode) ? ((GNode) node).getPTPNodeStyles() : DEFAULT_NODE_STYLES;
+ String nodeColor = (node instanceof GNode) ? ((GNode) node).getPTPFillColor() : DEFAULT_NODE_COLOR;
+ String nodeFillColor = (node instanceof GNode) ? ((GNode) node).getPTPFillColor() : DEFAULT_NODE_FILLCOLOR;
+ String nodeLabel = node.getPTPCaption().stream().collect(Collectors.joining("\n"));
+ nodeLabel = nodeLabel.replaceAll("\\n", "\\\\n");
+
+ // this node
+ String nodeLine = String.format("\"%d\" [shape=\"%s\", style=\"%s\", color=\"%s\", fillcolor=\"%s\", label=\"%s\"];",
+ id,
+ nodeShape,
+ nodeStyles.stream().map(Enum::name).collect(Collectors.joining(",")),
+ nodeColor,
+ nodeFillColor,
+ nodeLabel
+ );
+ strb.append(DOT_INDENT).append(nodeLine).append("\n");
+
+ // edges
+ if (follow) {
+
+ for (Edge edge : node.getPTPEdges()) {
+
+ // child (recursion)
+ long childId = addDotLineRec(edge.getPTPChild(), edge.followPTPChild(), strb, idMap);
+
+ EdgeShape edgeShape = (edge instanceof GEdge) ? ((GEdge) edge).getPTPEdgeShape() : DEFAULT_EDGE_SHAPE;
+ List edgeStyles = (edge instanceof GEdge) ? ((GEdge) edge).getPTPEdgeStyles() : DEFAULT_EDGE_STYLES;
+ String edgeColor = (edge instanceof GEdge) ? ((GEdge) edge).getPTPColor() : DEFAULT_EDGE_COLOR;
+ String edgeLabel = edge.getPTPCaption();
+ edgeLabel = edgeLabel.replaceAll("\\n", "\\\\n");
+
+ String edgeLine = String.format("\"%s\" -> \"%s\" [shape=\"%s\", style=\"%s\", color=\"%s\", label=\"%s\"];",
+ id,
+ childId,
+ edgeShape,
+ edgeStyles.stream().map(Enum::name).collect(Collectors.joining(",")),
+ edgeColor,
+ edgeLabel
+ );
+ strb.append(DOT_INDENT).append(edgeLine).append("\n");
+ }
+ }
+
+ return id;
+ }
+
+ // GENERAL FUNCTIONS ///////////////////////////////////////////////////////////////////////////////////////////////
+
+ public static String visualize(Node node, String graphName, String title) {
+ StringBuilder strb = new StringBuilder();
+ strb.append(String.format("digraph %s {", graphName)).append("\n");
+
+ if (title != null) {
+ strb.append(DOT_INDENT + "labelloc=\"t\";" + "\n");
+ strb.append(DOT_INDENT).append(String.format("label=\"%s\";", title)).append("\n");
+ }
+
+ HashMap idMap = new HashMap<>();
+ addDotLineRec(node, true, strb, idMap);
+
+ List leafIDs = idMap.keySet().stream().filter(n -> n.getPTPEdges().size() <= 0).map(idMap::get).collect(Collectors.toList());
+ String sameRankLine = String.format("{rank = same; %s};", leafIDs.stream().map(i -> "\"" + i + "\"").collect(Collectors.joining("; ")));
+ strb.append(DOT_INDENT).append(sameRankLine).append("\n");
+
+ strb.append("}");
+ return strb.toString();
+ }
+
+ // TEXTUAL REPRESENTATION //////////////////////////////////////////////////////////////////////////////////////////
+
+ public static void visualizeToFile(Node node, String graphName, String title, String filepath) throws IOException {
+ String str = visualize(node, graphName, title);
+
+ BufferedWriter writer;
+ writer = new BufferedWriter(new FileWriter(filepath));
+ writer.write(str);
+ writer.close();
+ }
+
+ public enum NodeShape {
+ box,
+ polygon,
+ ellipse,
+ circle,
+ point,
+ egg,
+ triangle,
+ plaintext,
+ diamond,
+ trapezium,
+ parallelogram,
+ house,
+ pentagon,
+ hexagon,
+ septagon,
+ octagon,
+ doublecircle,
+ doubleoctagon,
+ tripleoctagon,
+ invtriangle,
+ invtrapezium,
+ invhouse,
+ Mdiamond,
+ Msquare,
+ Mcircle,
+ rect,
+ rectangle,
+ square,
+ none,
+ note,
+ tab,
+ folder,
+ box3d,
+ component
+ }
+
+ public enum NodeStyle {
+ dashed,
+ dotted,
+ solid,
+ invis,
+ bold,
+ filled,
+ diagonals,
+ rounded
+ }
+
+ public enum EdgeShape {
+ box,
+ crow,
+ diamond,
+ dot,
+ inv,
+ none,
+ normal,
+ tee,
+ vee
+ }
+
+ public enum EdgeStyle {
+ dashed,
+ dotted,
+ solid,
+ invis,
+ bold
+ }
+
+ public enum EdgeDir {
+ forward,
+ back,
+ both,
+ none
+ }
+
+ public interface Node {
+ List getPTPCaption();
+
+ List getPTPEdges();
+ }
+
+ public interface Edge {
+ String getPTPCaption();
+
+ Node getPTPChild();
+
+ boolean followPTPChild();
+ }
+
+ // GRAPHICAL REPRESENTATION ////////////////////////////////////////////////////////////////////////////////////////
+
+ public interface GNode extends Node {
+ NodeShape getPTPNodeShape();
+
+ List getPTPNodeStyles();
+
+ String getPTPColor();
+
+ String getPTPFillColor();
+ }
+
+ public interface GEdge extends Edge {
+ EdgeShape getPTPEdgeShape();
+
+ List getPTPEdgeStyles();
+
+ String getPTPColor();
+ }
+
+ public static class DefaultEdge implements Edge {
+ private final String caption;
+ private final Node child;
+ private final boolean followPTPChild;
+
+ public DefaultEdge(String caption, Node child, boolean followPTPChild) {
+ this.caption = caption;
+ this.child = child;
+ this.followPTPChild = followPTPChild;
+ }
+
+ @Override
+ public String getPTPCaption() {
+ return caption;
+ }
+
+ @Override
+ public Node getPTPChild() {
+ return child;
+ }
+
+ @Override
+ public boolean followPTPChild() {
+ return followPTPChild;
+ }
+
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERExtractionUtils.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERExtractionUtils.java
new file mode 100644
index 0000000..b14c417
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERExtractionUtils.java
@@ -0,0 +1,44 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERExtractionUtils
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+import org.lambda3.text.simplification.discourse.utils.IndexRange;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ *
+ */
+public class NERExtractionUtils {
+
+ public static List getNERIndexRanges(NERString nerString) {
+ List res = new ArrayList<>();
+
+ for (NERTokenGroup group : nerString.getGroups()) {
+ res.add(new IndexRange(group.getFromTokenIndex(), group.getToTokenIndex()));
+ }
+
+ return res;
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERString.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERString.java
new file mode 100644
index 0000000..c4d0f51
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERString.java
@@ -0,0 +1,86 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERString
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+import edu.stanford.nlp.ling.Word;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+public class NERString {
+ public static final String NO_CATEGORY = "O";
+
+ protected final List tokens;
+ private List groups;
+
+ public NERString(List tokens) {
+ this.tokens = tokens;
+ this.createGroups();
+ }
+
+ private void createGroups() {
+ this.groups = new ArrayList<>();
+
+ String lastCategory = null;
+ List currGroupTokens = new ArrayList<>();
+ for (NERToken nerToken : this.tokens) {
+
+ if ((lastCategory != null) && (!nerToken.getCategory().equals(lastCategory))) {
+ // add
+ this.groups.add(new NERTokenGroup(currGroupTokens));
+ currGroupTokens = new ArrayList<>();
+ }
+
+ currGroupTokens.add(nerToken);
+ lastCategory = nerToken.getCategory();
+ }
+
+ // add
+ this.groups.add(new NERTokenGroup(currGroupTokens));
+ }
+
+ public List getTokens() {
+ return tokens;
+ }
+
+ public List getGroups() {
+ return groups;
+ }
+
+ private List getWords(int fromIndex, int toIndex) {
+ return tokens.subList(fromIndex, toIndex).stream().map(t -> new Word(t.getText())).collect(Collectors.toList());
+ }
+
+ public List getWords() {
+ return getWords(0, tokens.size());
+ }
+
+ @Override
+ public String toString() {
+ return tokens.stream().map(NERToken::toString).collect(Collectors.joining("\n"));
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParseException.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParseException.java
new file mode 100644
index 0000000..a9783ef
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParseException.java
@@ -0,0 +1,33 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERStringParseException
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+/**
+ *
+ */
+public class NERStringParseException extends Exception {
+
+ public NERStringParseException(String msg) {
+ super(msg);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParser.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParser.java
new file mode 100644
index 0000000..b63ae32
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERStringParser.java
@@ -0,0 +1,91 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERStringParser
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+import edu.stanford.nlp.ie.AbstractSequenceClassifier;
+import edu.stanford.nlp.ie.crf.CRFClassifier;
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.utils.ner.tner.TNERString;
+import org.lambda3.text.simplification.discourse.utils.ner.tner.TNERToken;
+import org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils;
+import org.lambda3.text.simplification.discourse.utils.words.WordsUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ *
+ */
+public class NERStringParser {
+
+ private static final AbstractSequenceClassifier NER_CLASSIFIER = CRFClassifier.getClassifierNoExceptions("edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz");
+
+ public static NERString parse(String text) {
+ List tokens = new ArrayList<>();
+
+ String nerString = NER_CLASSIFIER.classifyToString(text);
+ String[] nerTokens = nerString.split(" ");
+
+ int idx = 0;
+ for (String nerToken : nerTokens) {
+ int sep_idx = nerToken.lastIndexOf("/");
+
+ // create text
+ String txt = nerToken.substring(0, sep_idx);
+ String category = nerToken.substring(sep_idx + 1);
+ NERToken token = new NERToken(idx, txt, category);
+ tokens.add(token);
+
+ ++idx;
+ }
+
+ return new NERString(tokens);
+ }
+
+ public static TNERString parse(Tree parseTree) throws NERStringParseException {
+ List tokens = new ArrayList<>();
+
+ List parseTreeLeafNumbers = ParseTreeExtractionUtils.getLeafNumbers(parseTree, parseTree);
+ String nerString = NER_CLASSIFIER.classifyToString(WordsUtils.wordsToString(parseTree.yieldWords()));
+ String[] nerTokens = nerString.split(" ");
+
+ if (parseTreeLeafNumbers.size() != nerTokens.length) {
+ throw new NERStringParseException("Could not map NER string to parseTree");
+ }
+
+ int idx = 0;
+ for (String nerToken : nerTokens) {
+ int sep_idx = nerToken.lastIndexOf("/");
+
+ // create token
+ String text = nerToken.substring(0, sep_idx);
+ String category = nerToken.substring(sep_idx + 1);
+ TNERToken token = new TNERToken(idx, text, category, parseTree.getNodeNumber(parseTreeLeafNumbers.get(idx)));
+ tokens.add(token);
+
+ ++idx;
+ }
+
+ return new TNERString(tokens, parseTree);
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERToken.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERToken.java
new file mode 100644
index 0000000..2d02714
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERToken.java
@@ -0,0 +1,61 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERToken
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+import edu.stanford.nlp.ling.Word;
+
+/**
+ *
+ */
+public class NERToken {
+ protected final int index;
+ protected final String text;
+ protected final String category;
+
+ public NERToken(int index, String text, String category) {
+ this.index = index;
+ this.text = text;
+ this.category = category;
+ }
+
+ public int getIndex() {
+ return index;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public Word getWord() {
+ return new Word(text);
+ }
+
+ public String getCategory() {
+ return category;
+ }
+
+ @Override
+ public String toString() {
+ return "(" + index + ": " + category + ", '" + text + "')";
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERTokenGroup.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERTokenGroup.java
new file mode 100644
index 0000000..6e41098
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/NERTokenGroup.java
@@ -0,0 +1,68 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : NERTokenGroup
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner;
+
+import edu.stanford.nlp.ling.Word;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ *
+ */
+class NERTokenGroup {
+ private final List tokens;
+
+ public NERTokenGroup(List tokens) {
+ this.tokens = tokens;
+ }
+
+ public int getFromTokenIndex() {
+ return tokens.get(0).index;
+ }
+
+ public int getToTokenIndex() {
+ return tokens.get(tokens.size() - 1).index;
+ }
+
+ public List getTokens() {
+ return tokens;
+ }
+
+ private String getCategory() {
+ return tokens.get(0).getCategory();
+ }
+
+ public boolean isNamedEntity() {
+ return !getCategory().equals(NERString.NO_CATEGORY);
+ }
+
+ public List getWords() {
+ return tokens.stream().map(t -> new Word(t.getText())).collect(Collectors.toList());
+ }
+
+ @Override
+ public String toString() {
+ return "[\n" + tokens.stream().map(t -> "\t" + t.toString()).collect(Collectors.joining("\n")) + "\n]";
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERString.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERString.java
new file mode 100644
index 0000000..56df00f
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERString.java
@@ -0,0 +1,46 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : TNERString
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner.tner;
+
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.utils.ner.NERString;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ *
+ */
+public class TNERString extends NERString {
+ private final Tree parseTree;
+
+ public TNERString(List tokens, Tree parseTree) {
+ super(new ArrayList<>(tokens));
+ this.parseTree = parseTree;
+ this.tokens.forEach(t -> ((TNERToken) t).setNerString(this));
+ }
+
+ public Tree getParseTree() {
+ return parseTree;
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERToken.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERToken.java
new file mode 100644
index 0000000..be56769
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/ner/tner/TNERToken.java
@@ -0,0 +1,69 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : TNERToken
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.ner.tner;
+
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.utils.ner.NERToken;
+
+/**
+ *
+ */
+public class TNERToken extends NERToken {
+
+ private final Tree leafNode;
+ private TNERString nerString;
+ private Tree posNode;
+
+ public TNERToken(int index, String token, String category, Tree leafNode) {
+ super(index, token, category);
+ this.nerString = null;
+ this.leafNode = leafNode;
+ this.posNode = null; // wait until nerString is set
+ }
+
+ public void setNerString(TNERString nerString) {
+ this.nerString = nerString;
+ this.posNode = leafNode.parent(getParseTree());
+ }
+
+ private Tree getParseTree() {
+ return nerString.getParseTree();
+ }
+
+ public Tree getLeafNode() {
+ return leafNode;
+ }
+
+ public Tree getPosNode() {
+ return posNode;
+ }
+
+ private String getPOSTag() {
+ return posNode.value();
+ }
+
+ @Override
+ public String toString() {
+ return "(" + index + ": " + category + ", '" + text + "', " + getPOSTag() + ")";
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeException.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeException.java
new file mode 100644
index 0000000..4c40e23
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeException.java
@@ -0,0 +1,33 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ParseTreeException
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.parseTree;
+
+/**
+ *
+ */
+public class ParseTreeException extends Exception {
+
+ public ParseTreeException(String text) {
+ super("Failed to parse text: \"" + text + "\"");
+ }
+}
diff --git a/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeExtractionUtils.java b/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeExtractionUtils.java
new file mode 100644
index 0000000..5151158
--- /dev/null
+++ b/src/main/java/org/lambda3/text/simplification/discourse/utils/parseTree/ParseTreeExtractionUtils.java
@@ -0,0 +1,133 @@
+/*
+ * ==========================License-Start=============================
+ * DiscourseSimplification : ParseTreeExtractionUtils
+ *
+ * Copyright © 2017 Lambda³
+ *
+ * GNU General Public License 3
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see http://www.gnu.org/licenses/.
+ * ==========================License-End==============================
+ */
+
+package org.lambda3.text.simplification.discourse.utils.parseTree;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.trees.Tree;
+import org.lambda3.text.simplification.discourse.utils.IndexRange;
+import org.lambda3.text.simplification.discourse.utils.ner.NERExtractionUtils;
+import org.lambda3.text.simplification.discourse.utils.ner.NERString;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParseTreeExtractionUtils {
+
+ public static List getLeafNumbers(Tree anchorTree, Tree node) {
+ List