From 5d8882e9585c563e13217edaecce2a21947ef51d Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 12:20:30 -0400 Subject: [PATCH 01/32] renaming "topology master" to "topology manager" in integration_test --- .../src/python/local_test_runner/main.py | 4 ++-- .../python/local_test_runner/test_kill_bolt.py | 4 ++-- ...est_kill_tmaster.py => test_kill_tmanager.py} | 16 ++++++++-------- .../python/local_test_runner/test_template.py | 6 +++--- 4 files changed, 15 insertions(+), 15 deletions(-) rename integration_test/src/python/local_test_runner/{test_kill_tmaster.py => test_kill_tmanager.py} (79%) diff --git a/integration_test/src/python/local_test_runner/main.py b/integration_test/src/python/local_test_runner/main.py index 5d51937bdc2..dc68f8be8b3 100644 --- a/integration_test/src/python/local_test_runner/main.py +++ b/integration_test/src/python/local_test_runner/main.py @@ -37,14 +37,14 @@ from . import test_kill_metricsmgr from . import test_kill_stmgr from . import test_kill_stmgr_metricsmgr -from . import test_kill_tmaster +from . import test_kill_tmanager from . import test_scale_up from . import test_template from . import test_explorer TEST_CLASSES = [ test_template.TestTemplate, - test_kill_tmaster.TestKillTMaster, + test_kill_tmanager.TestKillTManager, test_kill_stmgr.TestKillStmgr, test_kill_metricsmgr.TestKillMetricsMgr, test_kill_stmgr_metricsmgr.TestKillStmgrMetricsMgr, diff --git a/integration_test/src/python/local_test_runner/test_kill_bolt.py b/integration_test/src/python/local_test_runner/test_kill_bolt.py index 7a4eb16d610..5efe2ad3310 100644 --- a/integration_test/src/python/local_test_runner/test_kill_bolt.py +++ b/integration_test/src/python/local_test_runner/test_kill_bolt.py @@ -23,7 +23,7 @@ import logging from . import test_template -NON_TMASTER_SHARD = 1 +NON_TMANAGER_SHARD = 1 HERON_BOLT = 'identity-bolt_3' class TestKillBolt(test_template.TestTemplate): @@ -31,5 +31,5 @@ class TestKillBolt(test_template.TestTemplate): def execute_test_case(self): logging.info("Executing kill bolt") bolt_pid = self.get_pid( - 'container_%d_%s' % (NON_TMASTER_SHARD, HERON_BOLT), self.params['workingDirectory']) + 'container_%d_%s' % (NON_TMANAGER_SHARD, HERON_BOLT), self.params['workingDirectory']) self.kill_process(bolt_pid) diff --git a/integration_test/src/python/local_test_runner/test_kill_tmaster.py b/integration_test/src/python/local_test_runner/test_kill_tmanager.py similarity index 79% rename from integration_test/src/python/local_test_runner/test_kill_tmaster.py rename to integration_test/src/python/local_test_runner/test_kill_tmanager.py index 0e0b49ae3d3..bee4b01f352 100644 --- a/integration_test/src/python/local_test_runner/test_kill_tmaster.py +++ b/integration_test/src/python/local_test_runner/test_kill_tmanager.py @@ -19,24 +19,24 @@ # under the License. -"""test_kill_tmaster.py""" +"""test_kill_tmanager.py""" import logging import subprocess from . import test_template -TMASTER_SHARD = 0 +TMANAGER_SHARD = 0 -class TestKillTMaster(test_template.TestTemplate): +class TestKillTManager(test_template.TestTemplate): def execute_test_case(self): restart_shard(self.params['cliPath'], self.params['cluster'], - self.params['topologyName'], TMASTER_SHARD) + self.params['topologyName'], TMANAGER_SHARD) def restart_shard(heron_cli_path, test_cluster, topology_name, shard_num): - """ restart tmaster """ + """ restart tmanager """ splitcmd = [heron_cli_path, 'restart', '--verbose', test_cluster, topology_name, str(shard_num)] - logging.info("Killing TMaster: %s", splitcmd) + logging.info("Killing TManager: %s", splitcmd) if subprocess.call(splitcmd) != 0: - raise RuntimeError("Unable to kill TMaster") - logging.info("Killed TMaster") + raise RuntimeError("Unable to kill TManager") + logging.info("Killed TManager") diff --git a/integration_test/src/python/local_test_runner/test_template.py b/integration_test/src/python/local_test_runner/test_template.py index fd6ccf8a816..ebb3a98cc0b 100644 --- a/integration_test/src/python/local_test_runner/test_template.py +++ b/integration_test/src/python/local_test_runner/test_template.py @@ -40,7 +40,7 @@ RETRY_COUNT = 5 RETRY_INTERVAL = 10 # Topology shard definitions -NON_TMASTER_SHARD = 1 +NON_TMANAGER_SHARD = 1 # Topology process name definitions STMGR = 'stmgr' HERON_BIN = "bin" @@ -240,13 +240,13 @@ def kill_process(self, process_number): def kill_strmgr(self): logging.info("Executing kill stream manager") - stmgr_pid = self.get_pid('%s-%d' % (STMGR, NON_TMASTER_SHARD), self.params['workingDirectory']) + stmgr_pid = self.get_pid('%s-%d' % (STMGR, NON_TMANAGER_SHARD), self.params['workingDirectory']) self.kill_process(stmgr_pid) def kill_metricsmgr(self): logging.info("Executing kill metrics manager") metricsmgr_pid = self.get_pid( - '%s-%d' % (HERON_METRICSMGR, NON_TMASTER_SHARD), self.params['workingDirectory']) + '%s-%d' % (HERON_METRICSMGR, NON_TMANAGER_SHARD), self.params['workingDirectory']) self.kill_process(metricsmgr_pid) def _get_tracker_pplan(self): From 2e7b0d4140cda50600241fefb7a74e21f95e0312 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 12:23:48 -0400 Subject: [PATCH 02/32] renaming "topology master" to "topology manager" in tools --- tools/rules/heron_deps.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/rules/heron_deps.bzl b/tools/rules/heron_deps.bzl index 0488cc0855d..c51942c1a08 100644 --- a/tools/rules/heron_deps.bzl +++ b/tools/rules/heron_deps.bzl @@ -27,7 +27,7 @@ def heron_java_proto_files(): "//heron/proto:proto_physical_plan_java", "//heron/proto:proto_scheduler_java", "//heron/proto:proto_ckptmgr_java", - "//heron/proto:proto_tmaster_java", + "//heron/proto:proto_tmanager_java", "//heron/proto:proto_topology_java", "//heron/proto:proto_tuple_java", "//heron/proto:proto_stmgr_java", From 0e7ad0fa7fd27457c914970e5e65f2873b945f6f Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 12:56:51 -0400 Subject: [PATCH 03/32] renaming "topology master" to "topology manager" in vagrant --- vagrant/README.md | 2 +- vagrant/Vagrantfile | 30 ++++++++++++++++-------------- vagrant/init.sh | 3 ++- vagrant/local-ci.sh | 8 ++++---- 4 files changed, 23 insertions(+), 20 deletions(-) diff --git a/vagrant/README.md b/vagrant/README.md index 662362e3d6d..6a4f0cf144e 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -18,6 +18,6 @@ --> Vagrant VM for CI and debugging ================================= -Running `vagrant up master` will bring up an environment similar to the one used by Travis for CI. If the build fails, it can be inspected by entering the machine with `vagrant ssh master`. When you're down with the VM, you can clean up with `vagrant destroy -f`. +Running `vagrant up primary` will bring up an environment similar to the one used by Travis for CI. If the build fails, it can be inspected by entering the machine with `vagrant ssh primary`. When you're down with the VM, you can clean up with `vagrant destroy -f`. The advantage of this is you don't need to worry about the potential environment pollution, and others can reproduce the results from other platforms. diff --git a/vagrant/Vagrantfile b/vagrant/Vagrantfile index ad39ee48deb..c45a80a7a48 100644 --- a/vagrant/Vagrantfile +++ b/vagrant/Vagrantfile @@ -15,11 +15,11 @@ # -*- mode: ruby -*- # vi: set ft=ruby : -SLAVES=0 +SECONDARIES=0 NET_PREFIX="192.168.25." -NODES={"master" => NET_PREFIX + "5"} -(0..SLAVES-1).each do |i| NODES["slave#{i}"] = NET_PREFIX + (6 + i).to_s end +NODES={"primary" => NET_PREFIX + "5"} +(0..SECONDARIES-1).each do |i| NODES["secondary#{i}"] = NET_PREFIX + (6 + i).to_s end # create hosts File.open('.vagrant/hosts', 'w') do |file| @@ -34,8 +34,8 @@ Vagrant.configure(2) do |config| config.vm.synced_folder "../", "/vagrant" config.vm.boot_timeout = 600 - config.vm.define "master" do |master| - master.vm.provider "virtualbox" do |v| + config.vm.define "primary" do |primary| + primary.vm.provider "virtualbox" do |v| host = RbConfig::CONFIG['host_os'] mem_ratio = 1.0/2 cpu_exec_cap = 75 @@ -58,23 +58,25 @@ Vagrant.configure(2) do |config| v.cpus = cpus end - master.vm.hostname = "master" - master.vm.network :private_network, ip: NODES["master"] + primary.vm.hostname = "primary" + primary.vm.network :private_network, ip: NODES["primary"] - master.vm.provision "shell", path: "init.sh", args: "master" + # NB: Apache Mesos requires the use of "master"/"slave" + primary.vm.provision "shell", path: "init.sh", args: "master" end - (0..SLAVES-1).each do |i| - config.vm.define "slave#{i}" do |slave| - slave.vm.provider "virtualbox" do |v| + (0..SECONDARIES-1).each do |i| + config.vm.define "secondary#{i}" do |secondary| + secondary.vm.provider "virtualbox" do |v| v.memory = 2048 v.cpus = 2 end - slave.vm.hostname = "slave#{i}" - slave.vm.network :private_network, ip: NODES[slave.vm.hostname] + secondary.vm.hostname = "secondary#{i}" + secondary.vm.network :private_network, ip: NODES[secondary.vm.hostname] - slave.vm.provision "shell", path: "init.sh", args: "slave" + # NB: Apache Mesos requires the use of "master"/"slave" + secondary.vm.provision "shell", path: "init.sh", args: "slave" end end end diff --git a/vagrant/init.sh b/vagrant/init.sh index 5b793bb6ddc..3a3bfdad729 100644 --- a/vagrant/init.sh +++ b/vagrant/init.sh @@ -16,11 +16,12 @@ set -o errexit -o nounset -o pipefail # See the License for the specific language governing permissions and # limitations under the License. +# NB: Apache Mesos requires the use of "master"/"slave" install_mesos() { mode=$1 # master | slave apt-get -qy install mesos=0.25.0* - echo "zk://master:2181/mesos" > /etc/mesos/zk + echo "zk://primary:2181/mesos" > /etc/mesos/zk echo '5mins' > /etc/mesos-slave/executor_registration_timeout ip=$(cat /etc/hosts | grep `hostname` | grep -E -o "([0-9]{1,3}[\.]){3}[0-9]{1,3}") diff --git a/vagrant/local-ci.sh b/vagrant/local-ci.sh index 20be8c3e6b2..087894c6664 100755 --- a/vagrant/local-ci.sh +++ b/vagrant/local-ci.sh @@ -33,9 +33,9 @@ HERE="$(cd "$(dirname "$0")" && pwd -P)" cd "$HERE" -state="$(vagrant status master --machine-readable | grep master,state, | cut -d, -f4)" +state="$(vagrant status primary --machine-readable | grep primary,state, | cut -d, -f4)" if [ "$state" != "running" ]; then - vagrant up master + vagrant up primary fi @@ -43,5 +43,5 @@ fi script="${1-ci}" env="PLATFORM=Ubuntu JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64/" # run the CI, if it fails drop into a shell -vagrant ssh master --command "cd /vagrant && $env ./scripts/travis/$script.sh" \ - || vagrant ssh master --command "cd /vagrant && $env exec bash" +vagrant ssh primary --command "cd /vagrant && $env ./scripts/travis/$script.sh" \ + || vagrant ssh primary --command "cd /vagrant && $env exec bash" From 106bc90eb0d898f3b9dbde457de0065965bbc17d Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 12:59:10 -0400 Subject: [PATCH 04/32] renaming "topology master" to "topology manager" in deploy --- deploy/docker/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deploy/docker/README.md b/deploy/docker/README.md index 42e899cbf95..832f59534b5 100644 --- a/deploy/docker/README.md +++ b/deploy/docker/README.md @@ -57,11 +57,11 @@ root@16092325a696:/heron# heron submit sandbox /heron/examples/heron-api-example [2018-02-01 20:24:21 +0000] [INFO]: Successfully launched topology 'exclamation' root@16092325a696:/heron# heron deactivate sandbox exclamation [2018-02-01 20:24:46 +0000] [INFO]: Using cluster definition in /heron/heron-tools/conf/sandbox -[2018-02-01 20:24:47 +0000] [INFO] org.apache.heron.spi.utils.TMasterUtils: Topology command DEACTIVATE completed successfully. +[2018-02-01 20:24:47 +0000] [INFO] org.apache.heron.spi.utils.TManagerUtils: Topology command DEACTIVATE completed successfully. [2018-02-01 20:24:47 +0000] [INFO]: Successfully deactivate topology: exclamation root@16092325a696:/heron# heron activate sandbox exclamation [2018-02-01 20:24:55 +0000] [INFO]: Using cluster definition in /heron/heron-tools/conf/sandbox -[2018-02-01 20:24:56 +0000] [INFO] org.apache.heron.spi.utils.TMasterUtils: Topology command ACTIVATE completed successfully. +[2018-02-01 20:24:56 +0000] [INFO] org.apache.heron.spi.utils.TManagerUtils: Topology command ACTIVATE completed successfully. [2018-02-01 20:24:56 +0000] [INFO]: Successfully activate topology: exclamation root@16092325a696:/heron# heron kill sandbox exclamation [2018-02-01 20:25:08 +0000] [INFO]: Using cluster definition in /heron/heron-tools/conf/sandbox From cbfb14c8cf7392c04fb78856358621d517c71b64 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 13:00:03 -0400 Subject: [PATCH 05/32] renaming "topology master" to "topology manager" in heronpy --- heronpy/proto/BUILD | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/heronpy/proto/BUILD b/heronpy/proto/BUILD index 5eb56958e10..ef9e679f09a 100644 --- a/heronpy/proto/BUILD +++ b/heronpy/proto/BUILD @@ -12,7 +12,7 @@ genrule( "networktests.proto", "physical_plan.proto", "stats.proto", - "tmaster.proto", + "tmanager.proto", "tuple.proto", "metrics.proto", "packing_plan.proto", @@ -40,7 +40,7 @@ pex_library( ":proto_scheduler_py", ":proto_stats_py", ":proto_stmgr_py", - ":proto_tmaster_py", + ":proto_tmanager_py", ":proto_topology_py", ":proto_tuple_py", ], @@ -63,7 +63,7 @@ pex_binary( ":proto_scheduler_py", ":proto_stats_py", ":proto_stmgr_py", - ":proto_tmaster_py", + ":proto_tmanager_py", ":proto_topology_py", ":proto_tuple_py", ], @@ -139,8 +139,8 @@ proto_library( ) proto_library( - name = "proto_tmaster", - src = "tmaster.proto", + name = "proto_tmanager", + src = "tmanager.proto", gen_py = 1, includes = ["$(GENDIR)/heronpy/proto"], deps = [ @@ -159,7 +159,7 @@ proto_library( includes = ["$(GENDIR)/heronpy/proto"], deps = [ ":proto_common", - ":proto_tmaster", + ":proto_tmanager", ], ) From e44291bafeba425af9a8fe52fefdac7ea241a375 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 15:59:33 -0400 Subject: [PATCH 06/32] renaming "topology master" to "topology manager" in website2 --- website2/README.md | 4 +- .../docs/assets/{tmaster.png => tmanager.png} | Bin website2/docs/cluster-config-overview.md | 2 +- website2/docs/cluster-config-stream.md | 4 +- website2/docs/cluster-config-system-level.md | 4 +- website2/docs/cluster-config-tmanager.md | 38 +++++++++++++++++ website2/docs/cluster-config-tmaster.md | 38 ----------------- website2/docs/compiling-code-organization.md | 8 ++-- website2/docs/compiling-running-tests.md | 2 +- website2/docs/deployment-configuration.md | 2 +- website2/docs/extending-heron-metric-sink.md | 2 +- .../getting-started-troubleshooting-guide.md | 6 +-- website2/docs/guides-troubeshooting-guide.md | 12 +++--- website2/docs/guides-ui-guide.md | 2 +- website2/docs/heron-architecture.md | 28 ++++++------- website2/docs/schedulers-k8s-by-hand.md | 34 +++++++-------- website2/docs/schedulers-standalone.md | 22 +++++----- website2/docs/schedulers-yarn.md | 10 ++--- website2/docs/state-managers-local-fs.md | 4 +- website2/docs/state-managers-zookeeper.md | 4 +- website2/docs/user-manuals-tracker-rest.md | 14 +++---- website2/website/README.md | 4 +- website2/website/sidebars.json | 4 +- .../cluster-config-overview.md | 2 +- .../cluster-config-stream.md | 4 +- .../cluster-config-system-level.md | 4 +- .../cluster-config-tmanager.md | 39 ++++++++++++++++++ .../cluster-config-tmaster.md | 39 ------------------ .../compiling-code-organization.md | 8 ++-- .../compiling-running-tests.md | 2 +- .../deployment-configuration.md | 2 +- .../extending-heron-metric-sink.md | 2 +- .../getting-started-troubleshooting-guide.md | 6 +-- .../guides-troubeshooting-guide.md | 12 +++--- .../guides-ui-guide.md | 2 +- .../heron-architecture.md | 28 ++++++------- .../schedulers-k8s-by-hand.md | 34 +++++++-------- .../schedulers-standalone.md | 22 +++++----- .../schedulers-yarn.md | 10 ++--- .../state-managers-local-fs.md | 4 +- .../state-managers-zookeeper.md | 4 +- .../user-manuals-tracker-rest.md | 14 +++---- .../version-0.20.0-incubating-sidebars.json | 2 +- 43 files changed, 244 insertions(+), 244 deletions(-) rename website2/docs/assets/{tmaster.png => tmanager.png} (100%) create mode 100644 website2/docs/cluster-config-tmanager.md delete mode 100644 website2/docs/cluster-config-tmaster.md create mode 100644 website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmanager.md delete mode 100644 website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmaster.md diff --git a/website2/README.md b/website2/README.md index 56b8c3bb19f..9429dc8de0b 100644 --- a/website2/README.md +++ b/website2/README.md @@ -66,7 +66,7 @@ All the markdown files are placed in the `docs` directory. It is a flat structur │   ├── cluster-config-overview.md │   ├── cluster-config-stream.md │   ├── cluster-config-system-level.md -│   ├── cluster-config-tmaster.md +│   ├── cluster-config-tmanager.md │   ├── compiling-code-organization.md │   ├── compiling-docker.md │   ├── compiling-linux.md @@ -199,4 +199,4 @@ For more details about versioning, refer to [Versioning](https://docusaurus.io/d ### Check issues, fix and verify After download the translated documents, you can open the target markdown file, check issues and fix them. -To verify if you have fixed the issues correctly, [run the site locally](#running-the-site-locally). \ No newline at end of file +To verify if you have fixed the issues correctly, [run the site locally](#running-the-site-locally). diff --git a/website2/docs/assets/tmaster.png b/website2/docs/assets/tmanager.png similarity index 100% rename from website2/docs/assets/tmaster.png rename to website2/docs/assets/tmanager.png diff --git a/website2/docs/cluster-config-overview.md b/website2/docs/cluster-config-overview.md index ed23d872c04..26e0be212c2 100644 --- a/website2/docs/cluster-config-overview.md +++ b/website2/docs/cluster-config-overview.md @@ -51,7 +51,7 @@ specific components in a topology and are detailed in the docs below: * [Heron Instance](cluster-config-instance) * [Heron Metrics Manager](cluster-config-metrics) * [Heron Stream Manager](cluster-config-stream) -* [Heron Topology Master](cluster-config-tmaster) +* [Heron Topology Manager](cluster-config-tmanager) ### Overriding Heron Cluster Configuration diff --git a/website2/docs/cluster-config-stream.md b/website2/docs/cluster-config-stream.md index 229347ba694..5678ed5e517 100644 --- a/website2/docs/cluster-config-stream.md +++ b/website2/docs/cluster-config-stream.md @@ -48,7 +48,7 @@ Parameter | Meaning | Default `heron.streammgr.cache.drain.frequency.ms` | The frequency (in milliseconds) at which the SM's tuple cache is drained | `10` `heron.streammgr.cache.drain.size.mb` | The size threshold (in megabytes) at which the SM's tuple cache is drained | `100` `heron.streammgr.client.reconnect.interval.sec` | The reconnect interval to other SMs for the SM client (in seconds) | `1` -`heron.streammgr.client.reconnect.tmaster.interval.sec` | The reconnect interval to the Topology Master for the SM client (in seconds) | `10` -`heron.streammgr.tmaster.heartbeat.interval.sec` | The interval (in seconds) at which a heartbeat is sent to the Topology Master | `10` +`heron.streammgr.client.reconnect.tmanager.interval.sec` | The reconnect interval to the Topology Manager for the SM client (in seconds) | `10` +`heron.streammgr.tmanager.heartbeat.interval.sec` | The interval (in seconds) at which a heartbeat is sent to the Topology Manager | `10` `heron.streammgr.connection.read.batch.size.mb` | The maximum batch size (in megabytes) at which the SM reads from the socket | `1` `heron.streammgr.connection.write.batch.size.mb` | The maximum batch size (in megabytes) to write by the stream manager to the socket | `1` diff --git a/website2/docs/cluster-config-system-level.md b/website2/docs/cluster-config-system-level.md index 302a185ac40..899167f89e9 100644 --- a/website2/docs/cluster-config-system-level.md +++ b/website2/docs/cluster-config-system-level.md @@ -27,7 +27,7 @@ apply to any specific component. Config | Meaning | Default :----- |:------- |:------- -`heron.check.tmaster.location.interval.sec` | The interval, in seconds, after which to check if the topology master location has been fetched or not | 120 +`heron.check.tmanager.location.interval.sec` | The interval, in seconds, after which to check if the topology manager location has been fetched or not | 120 `heron.metrics.export.interval` | The interval, in seconds, at which components export metrics to the topology's Metrics Manager ## Logging @@ -39,4 +39,4 @@ Config | Meaning | Default `heron.logging.maximum.files` | The maximum number of log files | 5 `heron.logging.prune.interval.sec` | The time interval, in seconds, at which Heron prunes log files | 300 `heron.logging.flush.interval.sec` | The time interval, in seconds, at which Heron flushes log files | 10 -`heron.logging.err.threshold` | The threshold level to log error | 3 \ No newline at end of file +`heron.logging.err.threshold` | The threshold level to log error | 3 diff --git a/website2/docs/cluster-config-tmanager.md b/website2/docs/cluster-config-tmanager.md new file mode 100644 index 00000000000..1906b64c7fa --- /dev/null +++ b/website2/docs/cluster-config-tmanager.md @@ -0,0 +1,38 @@ +--- +id: cluster-config-tmanager +title: Topology Manager +sidebar_label: Topology Manager +--- + + +You can configure the [Topology +Manager](heron-architecture#topology-manager) (TM) for a topology +using the parameters below. + +Parameter | Meaning | Default +:-------- |:------- |:------- +`heron.tmanager.metrics.collector.maximum.interval.min` | The maximum interval, in minutes, for metrics to be kept in the Topology Manager | 180 +`heron.tmanager.establish.retry.times` | The maximum time to retry to establish the Topology Manager | 30 +`heron.tmanager.establish.retry.interval.sec` | The interval to retry to establish the Topology Manager | 1 +`heron.tmanager.network.server.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for Stream Managers to connect to | 16 +`heron.tmanager.network.controller.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for scheduler to connect to | 1 +`heron.tmanager.network.stats.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for stat queries | 1 +`heron.tmanager.metrics.collector.purge.interval.sec` | The interval, in seconds, at which the Topology Manager purges metrics from the socket | 60 +`heron.tmanager.metrics.collector.maximum.exception` | The maximum number of exceptions to be stored in the topology's metrics collector, to prevent potential out-of-memory issues | 256 +`heron.tmanager.metrics.network.bindallinterfaces` | Whether the metrics reporter binds on all interfaces | `False` +`heron.tmanager.stmgr.state.timeout.sec` | The timeout, in seconds, for the Stream Manager, compared with (current time - last heartbeat time) | 60 diff --git a/website2/docs/cluster-config-tmaster.md b/website2/docs/cluster-config-tmaster.md deleted file mode 100644 index 7e76753b0bb..00000000000 --- a/website2/docs/cluster-config-tmaster.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -id: cluster-config-tmaster -title: Topology Master -sidebar_label: Topology Master ---- - - -You can configure the [Topology -Master](heron-architecture#topology-master) (TM) for a topology -using the parameters below. - -Parameter | Meaning | Default -:-------- |:------- |:------- -`heron.tmaster.metrics.collector.maximum.interval.min` | The maximum interval, in minutes, for metrics to be kept in the Topology Master | 180 -`heron.tmaster.establish.retry.times` | The maximum time to retry to establish the Topology Master | 30 -`heron.tmaster.establish.retry.interval.sec` | The interval to retry to establish the Topology Master | 1 -`heron.tmaster.network.master.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for Stream Managers to connect to | 16 -`heron.tmaster.network.controller.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for scheduler to connect to | 1 -`heron.tmaster.network.stats.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for stat queries | 1 -`heron.tmaster.metrics.collector.purge.interval.sec` | The interval, in seconds, at which the Topology Master purges metrics from the socket | 60 -`heron.tmaster.metrics.collector.maximum.exception` | The maximum number of exceptions to be stored in the topology's metrics collector, to prevent potential out-of-memory issues | 256 -`heron.tmaster.metrics.network.bindallinterfaces` | Whether the metrics reporter binds on all interfaces | `False` -`heron.tmaster.stmgr.state.timeout.sec` | The timeout, in seconds, for the Stream Manager, compared with (current time - last heartbeat time) | 60 \ No newline at end of file diff --git a/website2/docs/compiling-code-organization.md b/website2/docs/compiling-code-organization.md index 053bc480587..2b84d2ee13d 100644 --- a/website2/docs/compiling-code-organization.md +++ b/website2/docs/compiling-code-organization.md @@ -32,7 +32,7 @@ cluster, see [Building Topologies](topology-development-topology-api-java) inste The primary programming languages for Heron are C++, Java, and Python. * **C++ 11** is used for most of Heron's core components, including the -[Topology Master](heron-architecture#topology-master), and +[Topology Manager](heron-architecture#topology-manager), and [Stream Manager](heron-architecture#stream-manager). * **Java 11** is used primarily for Heron's [topology @@ -91,11 +91,11 @@ Heron components. ## Topology Components -### Topology Master +### Topology Manager The C++ code for Heron's [Topology -Master](heron-architecture#topology-master) is written in C++ can be -found in [`heron/tmaster`]({{% githubMaster %}}/heron/tmaster). +Manager](heron-architecture#topology-manager) is written in C++ can be +found in [`heron/tmanager`]({{% githubMaster %}}/heron/tmanager). ### Stream Manager diff --git a/website2/docs/compiling-running-tests.md b/website2/docs/compiling-running-tests.md index 11b157cbc73..8f18b3d23a6 100644 --- a/website2/docs/compiling-running-tests.md +++ b/website2/docs/compiling-running-tests.md @@ -82,7 +82,7 @@ Integration tests are divided into two categories: * Failure integration tests These integration tests are designed for testing recovery from failure/restart - in certain processes, such as Topology Master and Metrics Manager. + in certain processes, such as Topology Manager and Metrics Manager. To run the failure integration tests on a Mac OS X, do the following: ```bash diff --git a/website2/docs/deployment-configuration.md b/website2/docs/deployment-configuration.md index d5c786e19da..c9cb07c587d 100644 --- a/website2/docs/deployment-configuration.md +++ b/website2/docs/deployment-configuration.md @@ -42,7 +42,7 @@ configuration. Once you are familiar with the system you can tune these paramete high throughput or low latency topologies. * **metrics_sinks.yaml** --- This file specifies where the run-time system and topology metrics -will be routed. By default, the `file sink` and `tmaster sink` need to be present. In addition, +will be routed. By default, the `file sink` and `tmanager sink` need to be present. In addition, `scribe sink` and `graphite sink` are also supported. * **packing.yaml** --- This file specifies the classes for `packing algorithm`, which defaults diff --git a/website2/docs/extending-heron-metric-sink.md b/website2/docs/extending-heron-metric-sink.md index 96e75a3ed42..4f5285ab2ad 100644 --- a/website2/docs/extending-heron-metric-sink.md +++ b/website2/docs/extending-heron-metric-sink.md @@ -138,7 +138,7 @@ sink by name. You should add the sink you want to use to that list. Here's an ex sinks: - file-sink - scribe-sink - - tmaster-sink + - tmanager-sink - print-sink - prometheus-sink ``` diff --git a/website2/docs/getting-started-troubleshooting-guide.md b/website2/docs/getting-started-troubleshooting-guide.md index 37127943d6b..ad79c632c3d 100644 --- a/website2/docs/getting-started-troubleshooting-guide.md +++ b/website2/docs/getting-started-troubleshooting-guide.md @@ -33,7 +33,7 @@ heron submit ... ExclamationTopology --verbose ### 2. Why does the topology launch successfully but fail to start? Even if the topology is submitted successfully, it could still fail to -start some component. For example, TMaster may fail to start due to unfulfilled +start some component. For example, TManager may fail to start due to unfulfilled dependencies. For example, the following message can appear: @@ -50,7 +50,7 @@ java.nio.file.NoSuchFileException: \ ... -[2016-05-27 12:02:38 -0600] org.apache.heron.spi.utils.TMasterUtils SEVERE: \ +[2016-05-27 12:02:38 -0600] org.apache.heron.spi.utils.TManagerUtils SEVERE: \ Failed to get physical plan for topology ExclamationTopology ... @@ -121,7 +121,7 @@ To check, run the following command in a shell. ### 3. Why does the process fail during runtime? -If a component (e.g., TMaster or Stream Manager) has failed during runtime, visit the component's logs in +If a component (e.g., TManager or Stream Manager) has failed during runtime, visit the component's logs in ```bash ~/.herondata/topologies/{cluster}/{role}/{TopologyName}/log-files/ diff --git a/website2/docs/guides-troubeshooting-guide.md b/website2/docs/guides-troubeshooting-guide.md index 2be222c7450..024b61c3e30 100644 --- a/website2/docs/guides-troubeshooting-guide.md +++ b/website2/docs/guides-troubeshooting-guide.md @@ -113,14 +113,14 @@ We assume here that heron client has successfully launched the topology. *Symptom* - Physical plan or logical plan does not show up on UI *Possible Cause* - One of more of stream managers have not yet connected to -Tmaster. +Tmanager. *What to do* - -1. Go to the Tmaster logs for the topology. The zeroth container is reserved for - Tmaster. Go to the container and browse to +1. Go to the Tmanager logs for the topology. The zeroth container is reserved for + Tmanager. Go to the container and browse to - log-files/heron-tmaster-.INFO + log-files/heron-tmanager-.INFO and see which stream managers have not yet connected. The `stmgr` ID corresponds to the container number. For example, `stmgr-10` corresponds to @@ -128,7 +128,7 @@ Tmaster. 2. Visit that container to see what is wrong in stream manager's logs, which can be found in `log-files` - directory similar to Tmaster. + directory similar to Tmanager. #### 3. Instances are not starting up @@ -234,4 +234,4 @@ Follow these steps to enable remote debugging: To setup remote debugging with intelij use [remote debugging instructions](https://www.jetbrains.com/help/idea/2016.2/run-debug-configuration-remote.html) . 3. Once the topology is activated start the debugger at ```localhost:{port}``` if in standalone - local deployment or ``` {IP}/{hostname}:{port}``` for multi container remote deployment. And you will be able to debug the code step by step. \ No newline at end of file + local deployment or ``` {IP}/{hostname}:{port}``` for multi container remote deployment. And you will be able to debug the code step by step. diff --git a/website2/docs/guides-ui-guide.md b/website2/docs/guides-ui-guide.md index 52ee5fef440..8f6dd20a4e7 100644 --- a/website2/docs/guides-ui-guide.md +++ b/website2/docs/guides-ui-guide.md @@ -148,7 +148,7 @@ amongst other things. 1. The jar or tar file associated with this topology 2. Logs for heron-executor 3. `log-files` folder which has instance logs, as well as `stream manager` or - `tmaster` logs. + `tmanager` logs. ![Jobpage](assets/jobpage1.png) diff --git a/website2/docs/heron-architecture.md b/website2/docs/heron-architecture.md index efd1dee07e5..7adbd10ef9d 100644 --- a/website2/docs/heron-architecture.md +++ b/website2/docs/heron-architecture.md @@ -100,16 +100,16 @@ components. The following core components of Heron topologies are discussed in depth in the sections below: -* [Topology Master](#topology-master) +* [Topology Manager](#topology-manager) * [Containers](#containers) * [Stream Manager](#stream-manager) * [Heron Instance](#heron-instance) * [Metrics Manager](#metrics-manager) * [Heron Tracker](#heron-tracker) -### Topology Master +### Topology Manager -The **Topology Master** \(TM) manages a topology throughout its entire lifecycle, +The **Topology Manager** \(TM) manages a topology throughout its entire lifecycle, from the time it's submitted until it's ultimately killed. When `heron` deploys a topology it starts a single TM and multiple [containers](heron-architecture#container). The **TM** creates an ephemeral [ZooKeeper](http://zookeeper.apache.org) node to @@ -118,12 +118,12 @@ discoverable by any process in the topology. The **TM** also constructs the [phy plan](heron-topology-concepts#physical-plan) for a topology which it relays to different components. -![Topology Master](assets/tmaster.png) +![Topology Manager](assets/tmanager.png) -#### Topology Master Configuration +#### Topology Manager Configuration TMs have a variety of [configurable -parameters](cluster-config-tmaster) that you can adjust at each +parameters](cluster-config-tmanager) that you can adjust at each phase of a topology's [lifecycle](heron-topology-concepts#topology-lifecycle). ### Containers @@ -134,7 +134,7 @@ Manager](#stream-manager), and a [Metrics Manager](#metrics-manager). Containers communicate with the topology's **TM** to ensure that the topology forms a fully connected graph. -For an illustration, see the figure in the [Topology Master](#topology-master) +For an illustration, see the figure in the [Topology Manager](#topology-manager) section above. > In Heron, all topology containerization is handled by the scheduler, be it [Mesos](schedulers-meso-local-mac), [Kubernetes](schedulers-k8s-with-helm), [YARN](schedulers-k8s-by-hand), or something else. Heron schedulers typically use [cgroups](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/ch01) to manage Heron topology processes. @@ -197,7 +197,7 @@ each phase of a topology's [lifecycle](heron-topology-concepts##topology-lifecyc Each topology runs a **Metrics Manager** (MM) that collects and exports metrics from all components in a [container]({{< ref "#container" >}}). It then routes those metrics to -both the [Topology Master]({{< ref "#topology-master" >}}) and to external collectors, such as +both the [Topology Manager]({{< ref "#topology-manager" >}}) and to external collectors, such as [Scribe](https://github.com/facebookarchive/scribe), [Graphite](http://graphite.wikidot.com/), or analogous systems. @@ -288,25 +288,25 @@ Storage | When the topology is deployed to containers by the scheduler, the code * Topologies `heron-executor` process is started for each container and is responsible for - executing the **Topology Master** or **Heron Instances** (Bolt/Spout) that are - assigned to the container. Note that the **Topology Master** is always executed + executing the **Topology Manager** or **Heron Instances** (Bolt/Spout) that are + assigned to the container. Note that the **Topology Manager** is always executed on container 0. When `heron-executor` executes normal **Heron Instances** (i.e. except for container 0), it first prepares the **Stream Manager** and the **Metrics Manager** before starting `org.apache.heron.instance.HeronInstance` for each instance that is assigned to the container. - **Heron Instance** has two threads: the gateway thread and the slave thread. + **Heron Instance** has two threads: the gateway thread and the executor thread. The gateway thread is mainly responsible for communicating with the **Stream Manager** and the **Metrics Manager** using `StreamManagerClient` and `MetricsManagerClient` - respectively, as well as sending/receiving tuples to/from the slave - thread. On the other hand, the slave thread runs either Spout or Bolt + respectively, as well as sending/receiving tuples to/from the executor + thread. On the other hand, the executor thread runs either Spout or Bolt of the topology based on the physical plan. When a new **Heron Instance** is started, its `StreamManagerClient` establishes a connection and registers itself with the **Stream Manager**. After the successful registration, the gateway thread sends its physical plan to - the slave thread, which then executes the assigned instance accordingly. + the executor thread, which then executes the assigned instance accordingly. ## Codebase diff --git a/website2/docs/schedulers-k8s-by-hand.md b/website2/docs/schedulers-k8s-by-hand.md index 149353df837..a540d5b589c 100644 --- a/website2/docs/schedulers-k8s-by-hand.md +++ b/website2/docs/schedulers-k8s-by-hand.md @@ -461,14 +461,14 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list ### heron_internals.yaml -#### Configuration for a wide variety of Heron components, including logging, each topology's stream manager and topology master, and more. +#### Configuration for a wide variety of Heron components, including logging, each topology's stream manager and topology manager, and more. | name | description | default | |--------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------| | heron.logging.directory | The relative path to the logging directory | log-files | | heron.logging.maximum.size.mb | The maximum log file size (in MB) | 100 | | heron.logging.maximum.files | The maximum number of log files | 5 | -| heron.check.tmaster.location.interval.sec | The interval, in seconds, after which to check if the topology master location has been fetched or not | 120 | +| heron.check.tmanager.location.interval.sec | The interval, in seconds, after which to check if the topology manager location has been fetched or not | 120 | | heron.logging.prune.interval.sec | The interval, in seconds, at which to prune C++ log files | 300 | | heron.logging.flush.interval.sec | The interval, in seconds, at which to flush C++ log files | 10 | | heron.logging.err.threshold | The threshold level at which to log errors | 3 | @@ -480,25 +480,25 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.streammgr.xormgr.rotatingmap.nbuckets | For efficient acknowledgements | 3 | | heron.streammgr.mempool.max.message.number | The max number of messages in the memory pool for each message type | 512 | | heron.streammgr.client.reconnect.interval.sec | The reconnect interval to other stream managers (in seconds) for the stream manager client | 1 | -| heron.streammgr.client.reconnect.tmaster.interval.sec | The reconnect interval to the topology master (in seconds) for the stream manager client | 10 | -| heron.streammgr.client.reconnect.tmaster.max.attempts | The max reconnect attempts to tmaster for stream manager client | 30 | +| heron.streammgr.client.reconnect.tmanager.interval.sec | The reconnect interval to the topology manager (in seconds) for the stream manager client | 10 | +| heron.streammgr.client.reconnect.tmanager.max.attempts | The max reconnect attempts to tmanager for stream manager client | 30 | | heron.streammgr.network.options.maximum.packet.mb | The maximum packet size (in MB) of the stream manager's network options | 10 | -| heron.streammgr.tmaster.heartbeat.interval.sec | The interval (in seconds) at which to send heartbeats | 10 | +| heron.streammgr.tmanager.heartbeat.interval.sec | The interval (in seconds) at which to send heartbeats | 10 | | heron.streammgr.connection.read.batch.size.mb | The maximum batch size (in MB) for the stream manager to read from socket | 1 | | heron.streammgr.connection.write.batch.size.mb | Maximum batch size (in MB) for the stream manager to write to socket | 1 | | heron.streammgr.network.backpressure.threshold | The number of times Heron should wait to see a buffer full while enqueueing data before declaring the start of backpressure | 3 | | heron.streammgr.network.backpressure.highwatermark.mb | The high-water mark on the number (in MB) that can be left outstanding on a connection | 100 | | heron.streammgr.network.backpressure.lowwatermark.mb | The low-water mark on the number (in MB) that can be left outstanding on a connection | | -| heron.tmaster.metrics.collector.maximum.interval.min | The maximum interval (in minutes) for metrics to be kept in the topology master | 180 | -| heron.tmaster.establish.retry.times | The maximum number of times to retry establishing connection with the topology master | 30 | -| heron.tmaster.establish.retry.interval.sec | The interval at which to retry establishing connection with the topology master | 1 | -| heron.tmaster.network.master.options.maximum.packet.mb | Maximum packet size (in MB) of topology master's network options to connect to stream managers | 16 | -| heron.tmaster.network.controller.options.maximum.packet.mb | Maximum packet size (in MB) of the topology master's network options to connect to scheduler | 1 | -| heron.tmaster.network.stats.options.maximum.packet.mb | Maximum packet size (in MB) of the topology master's network options for stat queries | 1 | -| heron.tmaster.metrics.collector.purge.interval.sec | The interval (in seconds) at which the topology master purges metrics from socket | 60 | -| heron.tmaster.metrics.collector.maximum.exception | The maximum number of exceptions to be stored in the topology metrics collector, to prevent out-of-memory errors | 256 | -| heron.tmaster.metrics.network.bindallinterfaces | Whether the metrics reporter should bind on all interfaces | False | -| heron.tmaster.stmgr.state.timeout.sec | The timeout (in seconds) for the stream manager, compared with (current time - last heartbeat time) | 60 | +| heron.tmanager.metrics.collector.maximum.interval.min | The maximum interval (in minutes) for metrics to be kept in the topology manager | 180 | +| heron.tmanager.establish.retry.times | The maximum number of times to retry establishing connection with the topology manager | 30 | +| heron.tmanager.establish.retry.interval.sec | The interval at which to retry establishing connection with the topology manager | 1 | +| heron.tmanager.network.server.options.maximum.packet.mb | Maximum packet size (in MB) of topology manager's network options to connect to stream managers | 16 | +| heron.tmanager.network.controller.options.maximum.packet.mb | Maximum packet size (in MB) of the topology manager's network options to connect to scheduler | 1 | +| heron.tmanager.network.stats.options.maximum.packet.mb | Maximum packet size (in MB) of the topology manager's network options for stat queries | 1 | +| heron.tmanager.metrics.collector.purge.interval.sec | The interval (in seconds) at which the topology manager purges metrics from socket | 60 | +| heron.tmanager.metrics.collector.maximum.exception | The maximum number of exceptions to be stored in the topology metrics collector, to prevent out-of-memory errors | 256 | +| heron.tmanager.metrics.network.bindallinterfaces | Whether the metrics reporter should bind on all interfaces | False | +| heron.tmanager.stmgr.state.timeout.sec | The timeout (in seconds) for the stream manager, compared with (current time - last heartbeat time) | 60 | | heron.metricsmgr.network.read.batch.time.ms | The maximum batch time (in milliseconds) for the metrics manager to read from socket | 16 | | heron.metricsmgr.network.read.batch.size.bytes | The maximum batch size (in bytes) to read from socket | 32768 | | heron.metricsmgr.network.write.batch.time.ms | The maximum batch time (in milliseconds) for the metrics manager to write to socket | 32768 | @@ -532,7 +532,7 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.instance.reconnect.metricsmgr.interval.sec | Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting | 5 | | heron.instance.reconnect.metricsmgr.times | Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting | 60 | | heron.instance.metrics.system.sample.interval.sec | The interval in second for an instance to sample its system metrics, for instance, CPU load. | 10 | -| heron.instance.slave.fetch.pplan.interval.sec | The time interval (in seconds) at which Heron instances fetch the physical plan from slaves | 1 | +| heron.instance.executor.fetch.pplan.interval.sec | The time interval (in seconds) at which Heron instances fetch the physical plan from executors | 1 | | heron.instance.acknowledgement.nbuckets | For efficient acknowledgement | 10 | | heron.instance.tuning.expected.bolt.read.queue.size | The expected size on read queue in bolt | 8 | | heron.instance.tuning.expected.bolt.write.queue.size | The expected size on write queue in bolt | 8 | @@ -585,4 +585,4 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.class.uploader | uploader class for transferring the topology files (jars, tars, PEXes, etc.) to storage | org.apache.heron.uploader.s3.S3Uploader | | heron.uploader.s3.bucket | S3 bucket in which topology assets will be stored (if AWS S3 is being used) | | | heron.uploader.s3.access_key | AWS access key (if AWS S3 is being used) | | -| heron.uploader.s3.secret_key | AWS secret access key (if AWS S3 is being used) | | \ No newline at end of file +| heron.uploader.s3.secret_key | AWS secret access key (if AWS S3 is being used) | | diff --git a/website2/docs/schedulers-standalone.md b/website2/docs/schedulers-standalone.md index a118702c067..8953da53739 100644 --- a/website2/docs/schedulers-standalone.md +++ b/website2/docs/schedulers-standalone.md @@ -65,14 +65,14 @@ You should see output like this: ```bash [2018-01-22 10:37:06 -0800] [INFO]: Roles: -[2018-01-22 10:37:06 -0800] [INFO]: - Master Servers: ['127.0.0.1'] -[2018-01-22 10:37:06 -0800] [INFO]: - Slave Servers: ['127.0.0.1'] +[2018-01-22 10:37:06 -0800] [INFO]: - Primary Servers: ['127.0.0.1'] +[2018-01-22 10:37:06 -0800] [INFO]: - Secondary Servers: ['127.0.0.1'] [2018-01-22 10:37:06 -0800] [INFO]: - Zookeeper Servers: ['127.0.0.1'] [2018-01-22 10:37:06 -0800] [INFO]: Updating config files... -[2018-01-22 10:37:06 -0800] [INFO]: Starting master on 127.0.0.1 -[2018-01-22 10:37:06 -0800] [INFO]: Done starting masters -[2018-01-22 10:37:06 -0800] [INFO]: Starting slave on 127.0.0.1 -[2018-01-22 10:37:06 -0800] [INFO]: Done starting slaves +[2018-01-22 10:37:06 -0800] [INFO]: Starting primary on 127.0.0.1 +[2018-01-22 10:37:06 -0800] [INFO]: Done starting primaries +[2018-01-22 10:37:06 -0800] [INFO]: Starting secondary on 127.0.0.1 +[2018-01-22 10:37:06 -0800] [INFO]: Done starting secondaries [2018-01-22 10:37:06 -0800] [INFO]: Waiting for cluster to come up... 0 [2018-01-22 10:37:08 -0800] [INFO]: Starting Heron API Server on 127.0.0.1 [2018-01-22 10:37:08 -0800] [INFO]: Waiting for API server to come up... 0 @@ -111,10 +111,10 @@ This will return a JSON string containing a list of hosts for Heron and ZooKeepe "127.0.0.1" ], "roles": { - "masters": [ + "primaries": [ "127.0.0.1" ], - "slaves": [ + "secondaries": [ "127.0.0.1" ], "zookeepers": [ @@ -206,11 +206,11 @@ Once the topology has been submitted, it can be deactivated, killed, updated, an Heron standalone uses [Nomad](https://www.nomadproject.io/) as a scheduler. For the most part, you shouldn't need to interact with Nomad when managing your Heron standalone cluster. If you do need to manage Nomad directly, however, you can do so using the `heron-nomad` executable, which is installed at `~/.heron/bin/heron-nomad`. That executable is essentially an alias for the `nomad` CLI tool. You can find documentation in the [official Nomad docs](https://www.nomadproject.io/docs/commands/index.html). -You can also access the [Nomad Web UI](https://www.nomadproject.io/guides/ui.html) on port 4646 of any master node in the Heron cluster. You can see a list of master nodes by running `heron-admin standalone info`. If you're running a standalone cluster locally on your machine, you can access the Nomad UI at `localhost:4646`. +You can also access the [Nomad Web UI](https://www.nomadproject.io/guides/ui.html) on port 4646 of any primary node in the Heron cluster. You can see a list of primary nodes by running `heron-admin standalone info`. If you're running a standalone cluster locally on your machine, you can access the Nomad UI at `localhost:4646`. ## Debugging Help -The locations of the logs for the Nomad Server (master node) and Nomad Clients (slave nodes) are located at '/tmp/nomad_server_log' and '/tmp/nomad_client.log' respectively. Please look through these logs to see if there was a error setting up the Nomad cluster +The locations of the logs for the Nomad Server (primary node) and Nomad Clients (secondary nodes) are located at '/tmp/nomad_server_log' and '/tmp/nomad_client.log' respectively. Please look through these logs to see if there was a error setting up the Nomad cluster ### Common Problems @@ -222,7 +222,7 @@ Error starting agent: Failed to start Consul server: Failed to start lan serf: F The Nomad server cannot determine the network address to advertise itself on. You will need to manually set that address. You can do that by modifying the configuration file: -~/.heron/conf/standalone/resources/master.hcl +~/.heron/conf/standalone/resources/primary.hcl You will need to add a stanza like: diff --git a/website2/docs/schedulers-yarn.md b/website2/docs/schedulers-yarn.md index 10bf73f1985..d0f72682b62 100644 --- a/website2/docs/schedulers-yarn.md +++ b/website2/docs/schedulers-yarn.md @@ -43,7 +43,7 @@ Following steps are executed when a Heron topology is submitted: 1. The REEF client copies the `Heron Core package` and the `topology package` on the distributed file system. 1. It then starts the YARN Application Master (AM) for the topology. 1. The AM subsequently invokes the `Heron Scheduler` in the same process. -1. This is followed by container allocation for the topology's master and workers. As a result `N+2` +1. This is followed by container allocation for the topology's manager and workers. As a result `N+2` containers are allocated for each topology. ### Configuring the Heron client classpath @@ -149,10 +149,10 @@ Assuming HDFS as the file system, Heron logs and REEF logs can be found in the f 1. Ths scheduler's logs are created on the first/AM container: `/usercache/heron/appcache/application_1466548964728_0004/container_1466548964728_0004_01_000001/log-files` -1. Logs generated when the TMaster starts in its container: +1. Logs generated when the TManager starts in its container: `/userlogs/application_1466548964728_0004/container_1466548964728_0004_01_000002/evaluator.stderr` -1. The TMaster's logs are created on the second container owned by the topology app: +1. The TManager's logs are created on the second container owned by the topology app: `/usercache/heron/appcache/application_1466548964728_0004/container_1466548964728_0004_01_000002/log-files` 1. Worker logs are created on the remaining containers in the YARN NodeManager's local directory. @@ -160,8 +160,8 @@ Assuming HDFS as the file system, Heron logs and REEF logs can be found in the f ## Work in Progress -1. The YARN Scheduler will restart any failed workers and TMaster containers. However [AM HA](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html) is not +1. The YARN Scheduler will restart any failed workers and TManager containers. However [AM HA](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html) is not supported yet. As a result AM failure will result in topology failure. Issue: [#949](https://github.com/apache/incubator-heron/issues/949) -1. TMaster and Scheduler are started in separate containers. Increased network latency can result +1. TManager and Scheduler are started in separate containers. Increased network latency can result in warnings or failures. Issue: [#951](https://github.com/apache/incubator-heron/issues/951) diff --git a/website2/docs/state-managers-local-fs.md b/website2/docs/state-managers-local-fs.md index 44b3c9ac68c..228167fd11e 100644 --- a/website2/docs/state-managers-local-fs.md +++ b/website2/docs/state-managers-local-fs.md @@ -39,11 +39,11 @@ state manager. You should set this to `org.apache.heron.statemgr.localfs.LocalFi * `heron.statemgr.root.path` --- The root path in the local file system where state information is stored. We recommend providing Heron with an exclusive directory; if you do not, make sure that -the following sub-directories are unused: `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, +the following sub-directories are unused: `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. * `heron.statemgr.localfs.is.initialize.file.tree` --- Indicates whether the nodes under root -`/tmasters`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they +`/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they are not found. Set it to `True`, if you could like Heron to create those directories. If those directories are already there, set it to `False`. The absence of this configuration implies `True`. diff --git a/website2/docs/state-managers-zookeeper.md b/website2/docs/state-managers-zookeeper.md index b0891912001..b57ff1f3a01 100644 --- a/website2/docs/state-managers-zookeeper.md +++ b/website2/docs/state-managers-zookeeper.md @@ -50,10 +50,10 @@ cluster (e.g) "127.0.0.1:2181". * `heron.statemgr.root.path` --- The root ZooKeeper node to be used by Heron. We recommend providing Heron with an exclusive root node; if you do not, make sure that the following child -nodes are unused: `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. +nodes are unused: `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. * `heron.statemgr.zookeeper.is.initialize.tree` --- Indicates whether the nodes under ZooKeeper -root `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, +root `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they are not found. Set it to `True` if you could like Heron to create those nodes. If those nodes are already there, set it to `False`. The absence of this configuration implies `True`. diff --git a/website2/docs/user-manuals-tracker-rest.md b/website2/docs/user-manuals-tracker-rest.md index bfa76baf070..6055592862a 100644 --- a/website2/docs/user-manuals-tracker-rest.md +++ b/website2/docs/user-manuals-tracker-rest.md @@ -198,7 +198,7 @@ Each execution state object lists the following: topology * `release_version` --- Release version * `has_physical_plan` --- Whether the topology has a physical plan -* `has_tmaster_location` --- Whether the topology has a Topology Master Location +* `has_tmanager_location` --- Whether the topology has a Topology Manager Location * `has_scheduler_location` --- Whether the topology has a Scheduler Location * `viz` --- Metric visualization UI URL for the topology if it was [configured](user-manuals-heron-tracker-runbook) @@ -224,8 +224,8 @@ $ curl "http://heron-tracker-url/topologies/states?cluster=cluster1&environ=deve ### /topologies/info Returns a JSON representation of a dictionary containing logical plan, physical plan, -execution state, scheduler location and TMaster location for a topology, as described above. -`TMasterLocation` is the location of the TMaster, including its host, +execution state, scheduler location and TManager location for a topology, as described above. +`TManagerLocation` is the location of the TManager, including its host, port, and the heron-shell port that it exposes. #### Parameters @@ -279,7 +279,7 @@ Returns a JSON map of instances of the topology to their respective metrics. To filter instances returned use the `instance` parameter discussed below. -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative values. If the `interval` is greater than `10800` seconds, the values will be for all-time metrics. @@ -304,7 +304,7 @@ The difference between this and `/metrics` endpoint above, is that `/metrics` wi cumulative value over the period of `interval` provided. On the other hand, `/metricstimeline` endpoint will report minutely values for each metricname for each instance. -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative all-time values. If the starttime is older than 3 hours ago, those minutes would not be part of the response. @@ -325,7 +325,7 @@ is older than 3 hours ago, those minutes would not be part of the response. Executes the metrics query for the topology and returns the result in form of minutely timeseries. A detailed description of query language is given [below](#metricsquery). -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative all-time values. If the starttime is older than 3 hours ago, those minutes would not be part of the response. @@ -471,7 +471,7 @@ Example: TS(component1, *, __emit-count/stream1) ``` -Time Series Operator. This is the basic operator that is responsible for getting metrics from TMaster. +Time Series Operator. This is the basic operator that is responsible for getting metrics from TManager. Accepts a list of 3 elements: 1. componentName diff --git a/website2/website/README.md b/website2/website/README.md index f44f6b889dd..64fe0d0af05 100755 --- a/website2/website/README.md +++ b/website2/website/README.md @@ -69,7 +69,7 @@ docs ├── cluster-config-overview.md ├── cluster-config-stream.md ├── cluster-config-system-level.md -├── cluster-config-tmaster.md +├── cluster-config-tmanager.md ├── compiling-code-organization.md ├── compiling-docker.md ├── compiling-linux.md @@ -172,4 +172,4 @@ Versioned sidebars are also copied into `website/versioned_sidebars` and are nam If you want to change the documentation for a previous version, you can access files for that respective version. -For more details about versioning, refer to [Versioning](https://docusaurus.io/docs/en/versioning). \ No newline at end of file +For more details about versioning, refer to [Versioning](https://docusaurus.io/docs/en/versioning). diff --git a/website2/website/sidebars.json b/website2/website/sidebars.json index b4672166916..ef7df39f71a 100755 --- a/website2/website/sidebars.json +++ b/website2/website/sidebars.json @@ -68,7 +68,7 @@ "cluster-config-instance", "cluster-config-metrics", "cluster-config-stream", - "cluster-config-tmaster" + "cluster-config-tmanager" ], "Observability": [ "observability-prometheus", @@ -99,4 +99,4 @@ "heron-resources-resources" ] } -} \ No newline at end of file +} diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-overview.md b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-overview.md index 8182ae2e54d..37bf61ca3ad 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-overview.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-overview.md @@ -52,7 +52,7 @@ specific components in a topology and are detailed in the docs below: * [Heron Instance](cluster-config-instance) * [Heron Metrics Manager](cluster-config-metrics) * [Heron Stream Manager](cluster-config-stream) -* [Heron Topology Master](cluster-config-tmaster) +* [Heron Topology Manager](cluster-config-tmanager) ### Overriding Heron Cluster Configuration diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-stream.md b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-stream.md index 7ae2d0ccfd3..b82bffbbb24 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-stream.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-stream.md @@ -49,7 +49,7 @@ Parameter | Meaning | Default `heron.streammgr.cache.drain.frequency.ms` | The frequency (in milliseconds) at which the SM's tuple cache is drained | `10` `heron.streammgr.cache.drain.size.mb` | The size threshold (in megabytes) at which the SM's tuple cache is drained | `100` `heron.streammgr.client.reconnect.interval.sec` | The reconnect interval to other SMs for the SM client (in seconds) | `1` -`heron.streammgr.client.reconnect.tmaster.interval.sec` | The reconnect interval to the Topology Master for the SM client (in seconds) | `10` -`heron.streammgr.tmaster.heartbeat.interval.sec` | The interval (in seconds) at which a heartbeat is sent to the Topology Master | `10` +`heron.streammgr.client.reconnect.tmanager.interval.sec` | The reconnect interval to the Topology Manager for the SM client (in seconds) | `10` +`heron.streammgr.tmanager.heartbeat.interval.sec` | The interval (in seconds) at which a heartbeat is sent to the Topology Manager | `10` `heron.streammgr.connection.read.batch.size.mb` | The maximum batch size (in megabytes) at which the SM reads from the socket | `1` `heron.streammgr.connection.write.batch.size.mb` | The maximum batch size (in megabytes) to write by the stream manager to the socket | `1` diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-system-level.md b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-system-level.md index 3d7447eb39f..e15cff068e4 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-system-level.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-system-level.md @@ -28,7 +28,7 @@ apply to any specific component. Config | Meaning | Default :----- |:------- |:------- -`heron.check.tmaster.location.interval.sec` | The interval, in seconds, after which to check if the topology master location has been fetched or not | 120 +`heron.check.tmanager.location.interval.sec` | The interval, in seconds, after which to check if the topology manager location has been fetched or not | 120 `heron.metrics.export.interval` | The interval, in seconds, at which components export metrics to the topology's Metrics Manager ## Logging @@ -40,4 +40,4 @@ Config | Meaning | Default `heron.logging.maximum.files` | The maximum number of log files | 5 `heron.logging.prune.interval.sec` | The time interval, in seconds, at which Heron prunes log files | 300 `heron.logging.flush.interval.sec` | The time interval, in seconds, at which Heron flushes log files | 10 -`heron.logging.err.threshold` | The threshold level to log error | 3 \ No newline at end of file +`heron.logging.err.threshold` | The threshold level to log error | 3 diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmanager.md b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmanager.md new file mode 100644 index 00000000000..31cc4dc3b68 --- /dev/null +++ b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmanager.md @@ -0,0 +1,39 @@ +--- +id: version-0.20.0-incubating-cluster-config-tmanager +title: Topology Manager +sidebar_label: Topology Manager +original_id: cluster-config-tmanager +--- + + +You can configure the [Topology +Manager](heron-architecture#topology-manager) (TM) for a topology +using the parameters below. + +Parameter | Meaning | Default +:-------- |:------- |:------- +`heron.tmanager.metrics.collector.maximum.interval.min` | The maximum interval, in minutes, for metrics to be kept in the Topology Manager | 180 +`heron.tmanager.establish.retry.times` | The maximum time to retry to establish the Topology Manager | 30 +`heron.tmanager.establish.retry.interval.sec` | The interval to retry to establish the Topology Manager | 1 +`heron.tmanager.network.server.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for Stream Managers to connect to | 16 +`heron.tmanager.network.controller.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for scheduler to connect to | 1 +`heron.tmanager.network.stats.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Manager's network options for stat queries | 1 +`heron.tmanager.metrics.collector.purge.interval.sec` | The interval, in seconds, at which the Topology Manager purges metrics from the socket | 60 +`heron.tmanager.metrics.collector.maximum.exception` | The maximum number of exceptions to be stored in the topology's metrics collector, to prevent potential out-of-memory issues | 256 +`heron.tmanager.metrics.network.bindallinterfaces` | Whether the metrics reporter binds on all interfaces | `False` +`heron.tmanager.stmgr.state.timeout.sec` | The timeout, in seconds, for the Stream Manager, compared with (current time - last heartbeat time) | 60 diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmaster.md b/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmaster.md deleted file mode 100644 index 9566bf7022e..00000000000 --- a/website2/website/versioned_docs/version-0.20.0-incubating/cluster-config-tmaster.md +++ /dev/null @@ -1,39 +0,0 @@ ---- -id: version-0.20.0-incubating-cluster-config-tmaster -title: Topology Master -sidebar_label: Topology Master -original_id: cluster-config-tmaster ---- - - -You can configure the [Topology -Master](heron-architecture#topology-master) (TM) for a topology -using the parameters below. - -Parameter | Meaning | Default -:-------- |:------- |:------- -`heron.tmaster.metrics.collector.maximum.interval.min` | The maximum interval, in minutes, for metrics to be kept in the Topology Master | 180 -`heron.tmaster.establish.retry.times` | The maximum time to retry to establish the Topology Master | 30 -`heron.tmaster.establish.retry.interval.sec` | The interval to retry to establish the Topology Master | 1 -`heron.tmaster.network.master.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for Stream Managers to connect to | 16 -`heron.tmaster.network.controller.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for scheduler to connect to | 1 -`heron.tmaster.network.stats.options.maximum.packet.mb` | The maximum packet size, in megabytes, of the Topology Master's network options for stat queries | 1 -`heron.tmaster.metrics.collector.purge.interval.sec` | The interval, in seconds, at which the Topology Master purges metrics from the socket | 60 -`heron.tmaster.metrics.collector.maximum.exception` | The maximum number of exceptions to be stored in the topology's metrics collector, to prevent potential out-of-memory issues | 256 -`heron.tmaster.metrics.network.bindallinterfaces` | Whether the metrics reporter binds on all interfaces | `False` -`heron.tmaster.stmgr.state.timeout.sec` | The timeout, in seconds, for the Stream Manager, compared with (current time - last heartbeat time) | 60 \ No newline at end of file diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/compiling-code-organization.md b/website2/website/versioned_docs/version-0.20.0-incubating/compiling-code-organization.md index aa423f979a9..4191ce3bc69 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/compiling-code-organization.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/compiling-code-organization.md @@ -33,7 +33,7 @@ cluster, see [Building Topologies](topology-development-topology-api-java) inste The primary programming languages for Heron are C++, Java, and Python. * **C++ 11** is used for most of Heron's core components, including the -[Topology Master](heron-architecture#topology-master), and +[Topology Manager](heron-architecture#topology-manager), and [Stream Manager](heron-architecture#stream-manager). * **Java 8** is used primarily for Heron's [topology @@ -93,11 +93,11 @@ Heron components. ## Topology Components -### Topology Master +### Topology Manager The C++ code for Heron's [Topology -Master](heron-architecture#topology-master) is written in C++ can be -found in [`heron/tmaster`]({{% githubMaster %}}/heron/tmaster). +Manager](heron-architecture#topology-manager) is written in C++ can be +found in [`heron/tmanager`]({{% githubMaster %}}/heron/tmanager). ### Stream Manager diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/compiling-running-tests.md b/website2/website/versioned_docs/version-0.20.0-incubating/compiling-running-tests.md index 96ea39010aa..5641151788d 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/compiling-running-tests.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/compiling-running-tests.md @@ -83,7 +83,7 @@ Integration tests are divided into two categories: * Failure integration tests These integration tests are designed for testing recovery from failure/restart - in certain processes, such as Topology Master and Metrics Manager. + in certain processes, such as Topology Manager and Metrics Manager. To run the failure integration tests on a Mac OS X, do the following: ```bash diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/deployment-configuration.md b/website2/website/versioned_docs/version-0.20.0-incubating/deployment-configuration.md index 6dd8f0d72d2..26947b07ecd 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/deployment-configuration.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/deployment-configuration.md @@ -43,7 +43,7 @@ configuration. Once you are familiar with the system you can tune these paramete high throughput or low latency topologies. * **metrics_sinks.yaml** --- This file specifies where the run-time system and topology metrics -will be routed. By default, the `file sink` and `tmaster sink` need to be present. In addition, +will be routed. By default, the `file sink` and `tmanager sink` need to be present. In addition, `scribe sink` and `graphite sink` are also supported. * **packing.yaml** --- This file specifies the classes for `packing algorithm`, which defaults diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/extending-heron-metric-sink.md b/website2/website/versioned_docs/version-0.20.0-incubating/extending-heron-metric-sink.md index 1de2564fbe1..2f900d1ba81 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/extending-heron-metric-sink.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/extending-heron-metric-sink.md @@ -139,7 +139,7 @@ sink by name. You should add the sink you want to use to that list. Here's an ex sinks: - file-sink - scribe-sink - - tmaster-sink + - tmanager-sink - print-sink - prometheus-sink ``` diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/getting-started-troubleshooting-guide.md b/website2/website/versioned_docs/version-0.20.0-incubating/getting-started-troubleshooting-guide.md index 7fa23ae88cb..c986dd9776f 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/getting-started-troubleshooting-guide.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/getting-started-troubleshooting-guide.md @@ -34,7 +34,7 @@ heron submit ... ExclamationTopology --verbose ### 2. Why does the topology launch successfully but fail to start? Even if the topology is submitted successfully, it could still fail to -start some component. For example, TMaster may fail to start due to unfulfilled +start some component. For example, TManager may fail to start due to unfulfilled dependencies. For example, the following message can appear: @@ -51,7 +51,7 @@ java.nio.file.NoSuchFileException: \ ... -[2016-05-27 12:02:38 -0600] org.apache.heron.spi.utils.TMasterUtils SEVERE: \ +[2016-05-27 12:02:38 -0600] org.apache.heron.spi.utils.TManagerUtils SEVERE: \ Failed to get physical plan for topology ExclamationTopology ... @@ -122,7 +122,7 @@ To check, run the following command in a shell. ### 3. Why does the process fail during runtime? -If a component (e.g., TMaster or Stream Manager) has failed during runtime, visit the component's logs in +If a component (e.g., TManager or Stream Manager) has failed during runtime, visit the component's logs in ```bash ~/.herondata/topologies/{cluster}/{role}/{TopologyName}/log-files/ diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/guides-troubeshooting-guide.md b/website2/website/versioned_docs/version-0.20.0-incubating/guides-troubeshooting-guide.md index 363748f7ba6..48bbdf05886 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/guides-troubeshooting-guide.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/guides-troubeshooting-guide.md @@ -114,14 +114,14 @@ We assume here that heron client has successfully launched the topology. *Symptom* - Physical plan or logical plan does not show up on UI *Possible Cause* - One of more of stream managers have not yet connected to -Tmaster. +Tmanager. *What to do* - -1. Go to the Tmaster logs for the topology. The zeroth container is reserved for - Tmaster. Go to the container and browse to +1. Go to the Tmanager logs for the topology. The zeroth container is reserved for + Tmanager. Go to the container and browse to - log-files/heron-tmaster-.INFO + log-files/heron-tmanager-.INFO and see which stream managers have not yet connected. The `stmgr` ID corresponds to the container number. For example, `stmgr-10` corresponds to @@ -129,7 +129,7 @@ Tmaster. 2. Visit that container to see what is wrong in stream manager's logs, which can be found in `log-files` - directory similar to Tmaster. + directory similar to Tmanager. #### 3. Instances are not starting up @@ -235,4 +235,4 @@ Follow these steps to enable remote debugging: To setup remote debugging with intelij use [remote debugging instructions](https://www.jetbrains.com/help/idea/2016.2/run-debug-configuration-remote.html) . 3. Once the topology is activated start the debugger at ```localhost:{port}``` if in standalone - local deployment or ``` {IP}/{hostname}:{port}``` for multi container remote deployment. And you will be able to debug the code step by step. \ No newline at end of file + local deployment or ``` {IP}/{hostname}:{port}``` for multi container remote deployment. And you will be able to debug the code step by step. diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/guides-ui-guide.md b/website2/website/versioned_docs/version-0.20.0-incubating/guides-ui-guide.md index 3a02204bf2d..cc36e46506c 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/guides-ui-guide.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/guides-ui-guide.md @@ -149,7 +149,7 @@ amongst other things. 1. The jar or tar file associated with this topology 2. Logs for heron-executor 3. `log-files` folder which has instance logs, as well as `stream manager` or - `tmaster` logs. + `tmanager` logs. ![Jobpage](assets/jobpage1.png) diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/heron-architecture.md b/website2/website/versioned_docs/version-0.20.0-incubating/heron-architecture.md index a4847f9c5b6..74c6d052529 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/heron-architecture.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/heron-architecture.md @@ -101,16 +101,16 @@ components. The following core components of Heron topologies are discussed in depth in the sections below: -* [Topology Master](#topology-master) +* [Topology Manager](#topology-manager) * [Containers](#containers) * [Stream Manager](#stream-manager) * [Heron Instance](#heron-instance) * [Metrics Manager](#metrics-manager) * [Heron Tracker](#heron-tracker) -### Topology Master +### Topology Manager -The **Topology Master** \(TM) manages a topology throughout its entire lifecycle, +The **Topology Manager** \(TM) manages a topology throughout its entire lifecycle, from the time it's submitted until it's ultimately killed. When `heron` deploys a topology it starts a single TM and multiple [containers](heron-architecture#container). The **TM** creates an ephemeral [ZooKeeper](http://zookeeper.apache.org) node to @@ -119,12 +119,12 @@ discoverable by any process in the topology. The **TM** also constructs the [phy plan](heron-topology-concepts#physical-plan) for a topology which it relays to different components. -![Topology Master](assets/tmaster.png) +![Topology Manager](assets/tmanager.png) -#### Topology Master Configuration +#### Topology Manager Configuration TMs have a variety of [configurable -parameters](cluster-config-tmaster) that you can adjust at each +parameters](cluster-config-tmanager) that you can adjust at each phase of a topology's [lifecycle](heron-topology-concepts#topology-lifecycle). ### Containers @@ -135,7 +135,7 @@ Manager](#stream-manager), and a [Metrics Manager](#metrics-manager). Containers communicate with the topology's **TM** to ensure that the topology forms a fully connected graph. -For an illustration, see the figure in the [Topology Master](#topology-master) +For an illustration, see the figure in the [Topology Manager](#topology-manager) section above. > In Heron, all topology containerization is handled by the scheduler, be it [Mesos](schedulers-meso-local-mac), [Kubernetes](schedulers-k8s-with-helm), [YARN](schedulers-k8s-by-hand), or something else. Heron schedulers typically use [cgroups](https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/ch01) to manage Heron topology processes. @@ -198,7 +198,7 @@ each phase of a topology's [lifecycle](heron-topology-concepts##topology-lifecyc Each topology runs a **Metrics Manager** (MM) that collects and exports metrics from all components in a [container]({{< ref "#container" >}}). It then routes those metrics to -both the [Topology Master]({{< ref "#topology-master" >}}) and to external collectors, such as +both the [Topology Manager]({{< ref "#topology-manager" >}}) and to external collectors, such as [Scribe](https://github.com/facebookarchive/scribe), [Graphite](http://graphite.wikidot.com/), or analogous systems. @@ -289,25 +289,25 @@ Storage | When the topology is deployed to containers by the scheduler, the code * Topologies `heron-executor` process is started for each container and is responsible for - executing the **Topology Master** or **Heron Instances** (Bolt/Spout) that are - assigned to the container. Note that the **Topology Master** is always executed + executing the **Topology Manager** or **Heron Instances** (Bolt/Spout) that are + assigned to the container. Note that the **Topology Manager** is always executed on container 0. When `heron-executor` executes normal **Heron Instances** (i.e. except for container 0), it first prepares the **Stream Manager** and the **Metrics Manager** before starting `org.apache.heron.instance.HeronInstance` for each instance that is assigned to the container. - **Heron Instance** has two threads: the gateway thread and the slave thread. + **Heron Instance** has two threads: the gateway thread and the executor thread. The gateway thread is mainly responsible for communicating with the **Stream Manager** and the **Metrics Manager** using `StreamManagerClient` and `MetricsManagerClient` - respectively, as well as sending/receiving tuples to/from the slave - thread. On the other hand, the slave thread runs either Spout or Bolt + respectively, as well as sending/receiving tuples to/from the executor + thread. On the other hand, the executor thread runs either Spout or Bolt of the topology based on the physical plan. When a new **Heron Instance** is started, its `StreamManagerClient` establishes a connection and registers itself with the **Stream Manager**. After the successful registration, the gateway thread sends its physical plan to - the slave thread, which then executes the assigned instance accordingly. + the executor thread, which then executes the assigned instance accordingly. ## Codebase diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-k8s-by-hand.md b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-k8s-by-hand.md index 6a6ed160a8b..cda77d09dcc 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-k8s-by-hand.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-k8s-by-hand.md @@ -462,14 +462,14 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list ### heron_internals.yaml -#### Configuration for a wide variety of Heron components, including logging, each topology's stream manager and topology master, and more. +#### Configuration for a wide variety of Heron components, including logging, each topology's stream manager and topology manager, and more. | name | description | default | |--------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------|-----------| | heron.logging.directory | The relative path to the logging directory | log-files | | heron.logging.maximum.size.mb | The maximum log file size (in MB) | 100 | | heron.logging.maximum.files | The maximum number of log files | 5 | -| heron.check.tmaster.location.interval.sec | The interval, in seconds, after which to check if the topology master location has been fetched or not | 120 | +| heron.check.tmanager.location.interval.sec | The interval, in seconds, after which to check if the topology manager location has been fetched or not | 120 | | heron.logging.prune.interval.sec | The interval, in seconds, at which to prune C++ log files | 300 | | heron.logging.flush.interval.sec | The interval, in seconds, at which to flush C++ log files | 10 | | heron.logging.err.threshold | The threshold level at which to log errors | 3 | @@ -481,25 +481,25 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.streammgr.xormgr.rotatingmap.nbuckets | For efficient acknowledgements | 3 | | heron.streammgr.mempool.max.message.number | The max number of messages in the memory pool for each message type | 512 | | heron.streammgr.client.reconnect.interval.sec | The reconnect interval to other stream managers (in seconds) for the stream manager client | 1 | -| heron.streammgr.client.reconnect.tmaster.interval.sec | The reconnect interval to the topology master (in seconds) for the stream manager client | 10 | -| heron.streammgr.client.reconnect.tmaster.max.attempts | The max reconnect attempts to tmaster for stream manager client | 30 | +| heron.streammgr.client.reconnect.tmanager.interval.sec | The reconnect interval to the topology manager (in seconds) for the stream manager client | 10 | +| heron.streammgr.client.reconnect.tmanager.max.attempts | The max reconnect attempts to tmanager for stream manager client | 30 | | heron.streammgr.network.options.maximum.packet.mb | The maximum packet size (in MB) of the stream manager's network options | 10 | -| heron.streammgr.tmaster.heartbeat.interval.sec | The interval (in seconds) at which to send heartbeats | 10 | +| heron.streammgr.tmanager.heartbeat.interval.sec | The interval (in seconds) at which to send heartbeats | 10 | | heron.streammgr.connection.read.batch.size.mb | The maximum batch size (in MB) for the stream manager to read from socket | 1 | | heron.streammgr.connection.write.batch.size.mb | Maximum batch size (in MB) for the stream manager to write to socket | 1 | | heron.streammgr.network.backpressure.threshold | The number of times Heron should wait to see a buffer full while enqueueing data before declaring the start of backpressure | 3 | | heron.streammgr.network.backpressure.highwatermark.mb | The high-water mark on the number (in MB) that can be left outstanding on a connection | 100 | | heron.streammgr.network.backpressure.lowwatermark.mb | The low-water mark on the number (in MB) that can be left outstanding on a connection | | -| heron.tmaster.metrics.collector.maximum.interval.min | The maximum interval (in minutes) for metrics to be kept in the topology master | 180 | -| heron.tmaster.establish.retry.times | The maximum number of times to retry establishing connection with the topology master | 30 | -| heron.tmaster.establish.retry.interval.sec | The interval at which to retry establishing connection with the topology master | 1 | -| heron.tmaster.network.master.options.maximum.packet.mb | Maximum packet size (in MB) of topology master's network options to connect to stream managers | 16 | -| heron.tmaster.network.controller.options.maximum.packet.mb | Maximum packet size (in MB) of the topology master's network options to connect to scheduler | 1 | -| heron.tmaster.network.stats.options.maximum.packet.mb | Maximum packet size (in MB) of the topology master's network options for stat queries | 1 | -| heron.tmaster.metrics.collector.purge.interval.sec | The interval (in seconds) at which the topology master purges metrics from socket | 60 | -| heron.tmaster.metrics.collector.maximum.exception | The maximum number of exceptions to be stored in the topology metrics collector, to prevent out-of-memory errors | 256 | -| heron.tmaster.metrics.network.bindallinterfaces | Whether the metrics reporter should bind on all interfaces | False | -| heron.tmaster.stmgr.state.timeout.sec | The timeout (in seconds) for the stream manager, compared with (current time - last heartbeat time) | 60 | +| heron.tmanager.metrics.collector.maximum.interval.min | The maximum interval (in minutes) for metrics to be kept in the topology manager | 180 | +| heron.tmanager.establish.retry.times | The maximum number of times to retry establishing connection with the topology manager | 30 | +| heron.tmanager.establish.retry.interval.sec | The interval at which to retry establishing connection with the topology manager | 1 | +| heron.tmanager.network.server.options.maximum.packet.mb | Maximum packet size (in MB) of topology manager's network options to connect to stream managers | 16 | +| heron.tmanager.network.controller.options.maximum.packet.mb | Maximum packet size (in MB) of the topology manager's network options to connect to scheduler | 1 | +| heron.tmanager.network.stats.options.maximum.packet.mb | Maximum packet size (in MB) of the topology manager's network options for stat queries | 1 | +| heron.tmanager.metrics.collector.purge.interval.sec | The interval (in seconds) at which the topology manager purges metrics from socket | 60 | +| heron.tmanager.metrics.collector.maximum.exception | The maximum number of exceptions to be stored in the topology metrics collector, to prevent out-of-memory errors | 256 | +| heron.tmanager.metrics.network.bindallinterfaces | Whether the metrics reporter should bind on all interfaces | False | +| heron.tmanager.stmgr.state.timeout.sec | The timeout (in seconds) for the stream manager, compared with (current time - last heartbeat time) | 60 | | heron.metricsmgr.network.read.batch.time.ms | The maximum batch time (in milliseconds) for the metrics manager to read from socket | 16 | | heron.metricsmgr.network.read.batch.size.bytes | The maximum batch size (in bytes) to read from socket | 32768 | | heron.metricsmgr.network.write.batch.time.ms | The maximum batch time (in milliseconds) for the metrics manager to write to socket | 32768 | @@ -533,7 +533,7 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.instance.reconnect.metricsmgr.interval.sec | Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting | 5 | | heron.instance.reconnect.metricsmgr.times | Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting | 60 | | heron.instance.metrics.system.sample.interval.sec | The interval in second for an instance to sample its system metrics, for instance, CPU load. | 10 | -| heron.instance.slave.fetch.pplan.interval.sec | The time interval (in seconds) at which Heron instances fetch the physical plan from slaves | 1 | +| heron.instance.executor.fetch.pplan.interval.sec | The time interval (in seconds) at which Heron instances fetch the physical plan from executors | 1 | | heron.instance.acknowledgement.nbuckets | For efficient acknowledgement | 10 | | heron.instance.tuning.expected.bolt.read.queue.size | The expected size on read queue in bolt | 8 | | heron.instance.tuning.expected.bolt.write.queue.size | The expected size on write queue in bolt | 8 | @@ -586,4 +586,4 @@ You can configure Heron on Kubernetes using a variety of YAML config files, list | heron.class.uploader | uploader class for transferring the topology files (jars, tars, PEXes, etc.) to storage | org.apache.heron.uploader.s3.S3Uploader | | heron.uploader.s3.bucket | S3 bucket in which topology assets will be stored (if AWS S3 is being used) | | | heron.uploader.s3.access_key | AWS access key (if AWS S3 is being used) | | -| heron.uploader.s3.secret_key | AWS secret access key (if AWS S3 is being used) | | \ No newline at end of file +| heron.uploader.s3.secret_key | AWS secret access key (if AWS S3 is being used) | | diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-standalone.md b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-standalone.md index 4a2d0581a22..342a9fb1074 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-standalone.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-standalone.md @@ -66,14 +66,14 @@ You should see output like this: ```bash [2018-01-22 10:37:06 -0800] [INFO]: Roles: -[2018-01-22 10:37:06 -0800] [INFO]: - Master Servers: ['127.0.0.1'] -[2018-01-22 10:37:06 -0800] [INFO]: - Slave Servers: ['127.0.0.1'] +[2018-01-22 10:37:06 -0800] [INFO]: - Primary Servers: ['127.0.0.1'] +[2018-01-22 10:37:06 -0800] [INFO]: - Secondary Servers: ['127.0.0.1'] [2018-01-22 10:37:06 -0800] [INFO]: - Zookeeper Servers: ['127.0.0.1'] [2018-01-22 10:37:06 -0800] [INFO]: Updating config files... -[2018-01-22 10:37:06 -0800] [INFO]: Starting master on 127.0.0.1 -[2018-01-22 10:37:06 -0800] [INFO]: Done starting masters -[2018-01-22 10:37:06 -0800] [INFO]: Starting slave on 127.0.0.1 -[2018-01-22 10:37:06 -0800] [INFO]: Done starting slaves +[2018-01-22 10:37:06 -0800] [INFO]: Starting primary on 127.0.0.1 +[2018-01-22 10:37:06 -0800] [INFO]: Done starting primaries +[2018-01-22 10:37:06 -0800] [INFO]: Starting secondary on 127.0.0.1 +[2018-01-22 10:37:06 -0800] [INFO]: Done starting secondaries [2018-01-22 10:37:06 -0800] [INFO]: Waiting for cluster to come up... 0 [2018-01-22 10:37:08 -0800] [INFO]: Starting Heron API Server on 127.0.0.1 [2018-01-22 10:37:08 -0800] [INFO]: Waiting for API server to come up... 0 @@ -112,10 +112,10 @@ This will return a JSON string containing a list of hosts for Heron and ZooKeepe "127.0.0.1" ], "roles": { - "masters": [ + "primaries": [ "127.0.0.1" ], - "slaves": [ + "secondaries": [ "127.0.0.1" ], "zookeepers": [ @@ -207,11 +207,11 @@ Once the topology has been submitted, it can be deactivated, killed, updated, an Heron standalone uses [Nomad](https://www.nomadproject.io/) as a scheduler. For the most part, you shouldn't need to interact with Nomad when managing your Heron standalone cluster. If you do need to manage Nomad directly, however, you can do so using the `heron-nomad` executable, which is installed at `~/.heron/bin/heron-nomad`. That executable is essentially an alias for the `nomad` CLI tool. You can find documentation in the [official Nomad docs](https://www.nomadproject.io/docs/commands/index.html). -You can also access the [Nomad Web UI](https://www.nomadproject.io/guides/ui.html) on port 4646 of any master node in the Heron cluster. You can see a list of master nodes by running `heron-admin standalone info`. If you're running a standalone cluster locally on your machine, you can access the Nomad UI at `localhost:4646`. +You can also access the [Nomad Web UI](https://www.nomadproject.io/guides/ui.html) on port 4646 of any primary node in the Heron cluster. You can see a list of primary nodes by running `heron-admin standalone info`. If you're running a standalone cluster locally on your machine, you can access the Nomad UI at `localhost:4646`. ## Debugging Help -The locations of the logs for the Nomad Server (master node) and Nomad Clients (slave nodes) are located at '/tmp/nomad_server_log' and '/tmp/nomad_client.log' respectively. Please look through these logs to see if there was a error setting up the Nomad cluster +The locations of the logs for the Nomad Server (primary node) and Nomad Clients (secondary nodes) are located at '/tmp/nomad_server_log' and '/tmp/nomad_client.log' respectively. Please look through these logs to see if there was a error setting up the Nomad cluster ### Common Problems @@ -223,7 +223,7 @@ Error starting agent: Failed to start Consul server: Failed to start lan serf: F The Nomad server cannot determine the network address to advertise itself on. You will need to manually set that address. You can do that by modifying the configuration file: -~/.heron/conf/standalone/resources/master.hcl +~/.heron/conf/standalone/resources/primary.hcl You will need to add a stanza like: diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-yarn.md b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-yarn.md index a2163638197..4c57808bbad 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-yarn.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/schedulers-yarn.md @@ -44,7 +44,7 @@ Following steps are executed when a Heron topology is submitted: 1. The REEF client copies the `Heron Core package` and the `topology package` on the distributed file system. 1. It then starts the YARN Application Master (AM) for the topology. 1. The AM subsequently invokes the `Heron Scheduler` in the same process. -1. This is followed by container allocation for the topology's master and workers. As a result `N+2` +1. This is followed by container allocation for the topology's manager and workers. As a result `N+2` containers are allocated for each topology. ### Configuring the Heron client classpath @@ -150,10 +150,10 @@ Assuming HDFS as the file system, Heron logs and REEF logs can be found in the f 1. Ths scheduler's logs are created on the first/AM container: `/usercache/heron/appcache/application_1466548964728_0004/container_1466548964728_0004_01_000001/log-files` -1. Logs generated when the TMaster starts in its container: +1. Logs generated when the TManager starts in its container: `/userlogs/application_1466548964728_0004/container_1466548964728_0004_01_000002/evaluator.stderr` -1. The TMaster's logs are created on the second container owned by the topology app: +1. The TManager's logs are created on the second container owned by the topology app: `/usercache/heron/appcache/application_1466548964728_0004/container_1466548964728_0004_01_000002/log-files` 1. Worker logs are created on the remaining containers in the YARN NodeManager's local directory. @@ -161,8 +161,8 @@ Assuming HDFS as the file system, Heron logs and REEF logs can be found in the f ## Work in Progress -1. The YARN Scheduler will restart any failed workers and TMaster containers. However [AM HA](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html) is not +1. The YARN Scheduler will restart any failed workers and TManager containers. However [AM HA](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html) is not supported yet. As a result AM failure will result in topology failure. Issue: [#949](https://github.com/apache/incubator-heron/issues/949) -1. TMaster and Scheduler are started in separate containers. Increased network latency can result +1. TManager and Scheduler are started in separate containers. Increased network latency can result in warnings or failures. Issue: [#951](https://github.com/apache/incubator-heron/issues/951) diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-local-fs.md b/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-local-fs.md index 265c88f191a..3c1a77b3df8 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-local-fs.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-local-fs.md @@ -40,11 +40,11 @@ state manager. You should set this to `org.apache.heron.statemgr.localfs.LocalFi * `heron.statemgr.root.path` --- The root path in the local file system where state information is stored. We recommend providing Heron with an exclusive directory; if you do not, make sure that -the following sub-directories are unused: `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, +the following sub-directories are unused: `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. * `heron.statemgr.localfs.is.initialize.file.tree` --- Indicates whether the nodes under root -`/tmasters`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they +`/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they are not found. Set it to `True`, if you could like Heron to create those directories. If those directories are already there, set it to `False`. The absence of this configuration implies `True`. diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-zookeeper.md b/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-zookeeper.md index cfde1835fdc..b66ccdabd12 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-zookeeper.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/state-managers-zookeeper.md @@ -51,10 +51,10 @@ cluster (e.g) "127.0.0.1:2181". * `heron.statemgr.root.path` --- The root ZooKeeper node to be used by Heron. We recommend providing Heron with an exclusive root node; if you do not, make sure that the following child -nodes are unused: `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. +nodes are unused: `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, `/schedulers`. * `heron.statemgr.zookeeper.is.initialize.tree` --- Indicates whether the nodes under ZooKeeper -root `/tmasters`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, +root `/tmanagers`, `/topologies`, `/pplans`, `/executionstate`, and `/schedulers` need to created, if they are not found. Set it to `True` if you could like Heron to create those nodes. If those nodes are already there, set it to `False`. The absence of this configuration implies `True`. diff --git a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md index 7d4a970e393..8dd511d0b9c 100644 --- a/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md +++ b/website2/website/versioned_docs/version-0.20.0-incubating/user-manuals-tracker-rest.md @@ -199,7 +199,7 @@ Each execution state object lists the following: topology * `release_version` --- Release version * `has_physical_plan` --- Whether the topology has a physical plan -* `has_tmaster_location` --- Whether the topology has a Topology Master Location +* `has_tmanager_location` --- Whether the topology has a Topology Manager Location * `has_scheduler_location` --- Whether the topology has a Scheduler Location * `viz` --- Metric visualization UI URL for the topology if it was [configured](user-manuals-heron-tracker-runbook) @@ -225,8 +225,8 @@ $ curl "http://heron-tracker-url/topologies/states?cluster=cluster1&environ=deve ### /topologies/info Returns a JSON representation of a dictionary containing logical plan, physical plan, -execution state, scheduler location and TMaster location for a topology, as described above. -`TMasterLocation` is the location of the TMaster, including its host, +execution state, scheduler location and TManager location for a topology, as described above. +`TManagerLocation` is the location of the TManager, including its host, port, and the heron-shell port that it exposes. #### Parameters @@ -280,7 +280,7 @@ Returns a JSON map of instances of the topology to their respective metrics. To filter instances returned use the `instance` parameter discussed below. -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative values. If the `interval` is greater than `10800` seconds, the values will be for all-time metrics. @@ -305,7 +305,7 @@ The difference between this and `/metrics` endpoint above, is that `/metrics` wi cumulative value over the period of `interval` provided. On the other hand, `/metricstimeline` endpoint will report minutely values for each metricname for each instance. -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative all-time values. If the starttime is older than 3 hours ago, those minutes would not be part of the response. @@ -326,7 +326,7 @@ is older than 3 hours ago, those minutes would not be part of the response. Executes the metrics query for the topology and returns the result in form of minutely timeseries. A detailed description of query language is given [below](#metricsquery). -Note that these metrics come from TMaster, which only holds metrics +Note that these metrics come from TManager, which only holds metrics for last 3 hours minutely data, as well as cumulative all-time values. If the starttime is older than 3 hours ago, those minutes would not be part of the response. @@ -472,7 +472,7 @@ Example: TS(component1, *, __emit-count/stream1) ``` -Time Series Operator. This is the basic operator that is responsible for getting metrics from TMaster. +Time Series Operator. This is the basic operator that is responsible for getting metrics from TManager. Accepts a list of 3 elements: 1. componentName diff --git a/website2/website/versioned_sidebars/version-0.20.0-incubating-sidebars.json b/website2/website/versioned_sidebars/version-0.20.0-incubating-sidebars.json index 81d7befa9b1..e500f42a339 100644 --- a/website2/website/versioned_sidebars/version-0.20.0-incubating-sidebars.json +++ b/website2/website/versioned_sidebars/version-0.20.0-incubating-sidebars.json @@ -67,7 +67,7 @@ "version-0.20.0-incubating-cluster-config-instance", "version-0.20.0-incubating-cluster-config-metrics", "version-0.20.0-incubating-cluster-config-stream", - "version-0.20.0-incubating-cluster-config-tmaster" + "version-0.20.0-incubating-cluster-config-tmanager" ], "Observability": [ "version-0.20.0-incubating-observability-prometheus", From dd34e3d1313211f49973f953676c85d74b6cdeb2 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 16:18:56 -0400 Subject: [PATCH 07/32] renaming "topology master" to "topology manager" in scripts --- scripts/packages/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/packages/BUILD b/scripts/packages/BUILD index db1198bb872..3ed2619df58 100644 --- a/scripts/packages/BUILD +++ b/scripts/packages/BUILD @@ -78,7 +78,7 @@ pkg_tar( "//heron/instance/src/python:heron-python-instance", "//heron/shell/src/python:heron-shell", "//heron/stmgr/src/cpp:heron-stmgr", - "//heron/tmaster/src/cpp:heron-tmaster", + "//heron/tmanager/src/cpp:heron-tmanager", ], package_dir = "heron-core/bin", ) From e929f6ad22fa926286719a60f5f6e194c79e092d Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 19:27:34 -0400 Subject: [PATCH 08/32] renaming "topology master" to "topology manager" in heron/scheduler-core --- .../apache/heron/scheduler/ExecutorFlag.java | 10 ++--- .../heron/scheduler/RuntimeManagerMain.java | 6 +-- .../heron/scheduler/RuntimeManagerRunner.java | 44 +++++++++---------- .../scheduler/UpdateTopologyManager.java | 26 +++++------ .../heron/scheduler/utils/SchedulerUtils.java | 30 ++++++------- .../scheduler/RuntimeManagerRunnerTest.java | 30 ++++++------- .../scheduler/UpdateTopologyManagerTest.java | 20 ++++----- 7 files changed, 83 insertions(+), 83 deletions(-) diff --git a/heron/scheduler-core/src/java/org/apache/heron/scheduler/ExecutorFlag.java b/heron/scheduler-core/src/java/org/apache/heron/scheduler/ExecutorFlag.java index 62aa448793b..8fe0a06f0fd 100644 --- a/heron/scheduler-core/src/java/org/apache/heron/scheduler/ExecutorFlag.java +++ b/heron/scheduler-core/src/java/org/apache/heron/scheduler/ExecutorFlag.java @@ -28,14 +28,14 @@ public enum ExecutorFlag { StateManagerConnection("state-manager-connection"), StateManagerRoot("state-manager-root"), StateManagerConfigFile("state-manager-config-file"), - TMasterBinary("tmaster-binary"), + TManagerBinary("tmanager-binary"), StmgrBinary("stmgr-binary"), MetricsManagerClasspath("metrics-manager-classpath"), InstanceJvmOpts("instance-jvm-opts"), Classpath("classpath"), - MasterPort("master-port"), - TMasterControllerPort("tmaster-controller-port"), - TMasterStatsPort("tmaster-stats-port"), + ServerPort("server-port"), + TManagerControllerPort("tmanager-controller-port"), + TManagerStatsPort("tmanager-stats-port"), HeronInternalsConfigFile("heron-internals-config-file"), OverrideConfigFile("override-config-file"), ComponentRamMap("component-ram-map"), @@ -56,7 +56,7 @@ public enum ExecutorFlag { PythonInstanceBinary("python-instance-binary"), CppInstanceBinary("cpp-instance-binary"), MetricsCacheManagerClasspath("metricscache-manager-classpath"), - MetricsCacheManagerMasterPort("metricscache-manager-master-port"), + MetricsCacheManagerServerPort("metricscache-manager-server-port"), MetricsCacheManagerStatsPort("metricscache-manager-stats-port"), MetricsCacheManagerMode("metricscache-manager-mode"), IsStateful("is-stateful"), diff --git a/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerMain.java b/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerMain.java index 221e133d675..56df04e250c 100644 --- a/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerMain.java +++ b/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerMain.java @@ -49,7 +49,7 @@ import org.apache.heron.spi.statemgr.IStateManager; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.ReflectionUtils; -import org.apache.heron.spi.utils.TMasterException; +import org.apache.heron.spi.utils.TManagerException; public class RuntimeManagerMain { private static final Logger LOG = Logger.getLogger(RuntimeManagerMain.class.getName()); @@ -371,7 +371,7 @@ public RuntimeManagerMain( * 3. Complete the runtime management for a specific command */ public void manageTopology() - throws TopologyRuntimeManagementException, TMasterException, PackingException { + throws TopologyRuntimeManagementException, TManagerException, PackingException { String topologyName = Context.topologyName(config); // 1. Do prepare work // create an instance of state manager @@ -488,7 +488,7 @@ protected void callRuntimeManagerRunner( Config runtime, ISchedulerClient schedulerClient, boolean potentialStaleExecutionData) - throws TopologyRuntimeManagementException, TMasterException, PackingException { + throws TopologyRuntimeManagementException, TManagerException, PackingException { // create an instance of the runner class RuntimeManagerRunner runtimeManagerRunner = new RuntimeManagerRunner(config, runtime, command, schedulerClient, diff --git a/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerRunner.java b/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerRunner.java index 38c4a94c1d1..83ac8ae3d39 100644 --- a/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerRunner.java +++ b/heron/scheduler-core/src/java/org/apache/heron/scheduler/RuntimeManagerRunner.java @@ -45,8 +45,8 @@ import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.NetworkUtils; import org.apache.heron.spi.utils.ReflectionUtils; -import org.apache.heron.spi.utils.TMasterException; -import org.apache.heron.spi.utils.TMasterUtils; +import org.apache.heron.spi.utils.TManagerException; +import org.apache.heron.spi.utils.TManagerUtils; public class RuntimeManagerRunner { // Internal config keys. They are used internally only to pass command line arguments @@ -80,7 +80,7 @@ public RuntimeManagerRunner(Config config, } public void call() - throws TMasterException, TopologyRuntimeManagementException, + throws TManagerException, TopologyRuntimeManagementException, PackingException, UpdateDryRunResponse { // execute the appropriate command String topologyName = Context.topologyName(config); @@ -108,24 +108,24 @@ public void call() /** * Handler to activate a topology */ - private void activateTopologyHandler(String topologyName) throws TMasterException { + private void activateTopologyHandler(String topologyName) throws TManagerException { assert !potentialStaleExecutionData; NetworkUtils.TunnelConfig tunnelConfig = NetworkUtils.TunnelConfig.build(config, NetworkUtils.HeronSystem.SCHEDULER); - TMasterUtils.transitionTopologyState(topologyName, - TMasterUtils.TMasterCommand.ACTIVATE, Runtime.schedulerStateManagerAdaptor(runtime), + TManagerUtils.transitionTopologyState(topologyName, + TManagerUtils.TManagerCommand.ACTIVATE, Runtime.schedulerStateManagerAdaptor(runtime), TopologyAPI.TopologyState.PAUSED, TopologyAPI.TopologyState.RUNNING, tunnelConfig); } /** * Handler to deactivate a topology */ - private void deactivateTopologyHandler(String topologyName) throws TMasterException { + private void deactivateTopologyHandler(String topologyName) throws TManagerException { assert !potentialStaleExecutionData; NetworkUtils.TunnelConfig tunnelConfig = NetworkUtils.TunnelConfig.build(config, NetworkUtils.HeronSystem.SCHEDULER); - TMasterUtils.transitionTopologyState(topologyName, - TMasterUtils.TMasterCommand.DEACTIVATE, Runtime.schedulerStateManagerAdaptor(runtime), + TManagerUtils.transitionTopologyState(topologyName, + TManagerUtils.TManagerCommand.DEACTIVATE, Runtime.schedulerStateManagerAdaptor(runtime), TopologyAPI.TopologyState.RUNNING, TopologyAPI.TopologyState.PAUSED, tunnelConfig); } @@ -141,16 +141,16 @@ void restartTopologyHandler(String topologyName) throws TopologyRuntimeManagemen .setTopologyName(topologyName) .setContainerIndex(containerId) .build(); - // If we restart the container including TMaster, wee need to clean TMasterLocation, - // since when starting up, TMaster expects no other existing TMaster, - // i.e. TMasterLocation does not exist + // If we restart the container including TManager, wee need to clean TManagerLocation, + // since when starting up, TManager expects no other existing TManager, + // i.e. TManagerLocation does not exist if (containerId == -1 || containerId == 0) { // get the instance of state manager to clean state SchedulerStateManagerAdaptor stateManager = Runtime.schedulerStateManagerAdaptor(runtime); - Boolean result = stateManager.deleteTMasterLocation(topologyName); + Boolean result = stateManager.deleteTManagerLocation(topologyName); if (result == null || !result) { throw new TopologyRuntimeManagementException( - "Failed to clear TMaster location. Check whether TMaster set it correctly."); + "Failed to clear TManager location. Check whether TManager set it correctly."); } } @@ -314,17 +314,17 @@ void updateTopologyContainerCount(String topologyName, @VisibleForTesting void updateTopologyUserRuntimeConfig(String topologyName, String userRuntimeConfig) - throws TopologyRuntimeManagementException, TMasterException { + throws TopologyRuntimeManagementException, TManagerException { String[] runtimeConfigs = parseUserRuntimeConfigParam(userRuntimeConfig); if (runtimeConfigs.length == 0) { throw new TopologyRuntimeManagementException("No user config is found"); } - // Send user runtime config to TMaster + // Send user runtime config to TManager NetworkUtils.TunnelConfig tunnelConfig = NetworkUtils.TunnelConfig.build(config, NetworkUtils.HeronSystem.SCHEDULER); - TMasterUtils.sendRuntimeConfig(topologyName, - TMasterUtils.TMasterCommand.RUNTIME_CONFIG_UPDATE, + TManagerUtils.sendRuntimeConfig(topologyName, + TManagerUtils.TManagerCommand.RUNTIME_CONFIG_UPDATE, Runtime.schedulerStateManagerAdaptor(runtime), runtimeConfigs, tunnelConfig); @@ -333,7 +333,7 @@ void updateTopologyUserRuntimeConfig(String topologyName, String userRuntimeConf /** * Clean all states of a heron topology * 1. Topology def and ExecutionState are required to exist to delete - * 2. TMasterLocation, SchedulerLocation and PhysicalPlan may not exist to delete + * 2. TManagerLocation, SchedulerLocation and PhysicalPlan may not exist to delete */ protected void cleanState( String topologyName, @@ -342,10 +342,10 @@ protected void cleanState( Boolean result; - result = statemgr.deleteTMasterLocation(topologyName); + result = statemgr.deleteTManagerLocation(topologyName); if (result == null || !result) { throw new TopologyRuntimeManagementException( - "Failed to clear TMaster location. Check whether TMaster set it correctly."); + "Failed to clear TManager location. Check whether TManager set it correctly."); } result = statemgr.deleteMetricsCacheLocation(topologyName); @@ -363,7 +363,7 @@ protected void cleanState( result = statemgr.deletePhysicalPlan(topologyName); if (result == null || !result) { throw new TopologyRuntimeManagementException( - "Failed to clear physical plan. Check whether TMaster set it correctly."); + "Failed to clear physical plan. Check whether TManager set it correctly."); } result = statemgr.deleteSchedulerLocation(topologyName); diff --git a/heron/scheduler-core/src/java/org/apache/heron/scheduler/UpdateTopologyManager.java b/heron/scheduler-core/src/java/org/apache/heron/scheduler/UpdateTopologyManager.java index e7fb8668814..059c0e058b1 100644 --- a/heron/scheduler-core/src/java/org/apache/heron/scheduler/UpdateTopologyManager.java +++ b/heron/scheduler-core/src/java/org/apache/heron/scheduler/UpdateTopologyManager.java @@ -53,8 +53,8 @@ import org.apache.heron.spi.statemgr.Lock; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.NetworkUtils; -import org.apache.heron.spi.utils.TMasterException; -import org.apache.heron.spi.utils.TMasterUtils; +import org.apache.heron.spi.utils.TManagerException; +import org.apache.heron.spi.utils.TManagerUtils; import static org.apache.heron.api.Config.TOPOLOGY_UPDATE_DEACTIVATE_WAIT_SECS; import static org.apache.heron.api.Config.TOPOLOGY_UPDATE_REACTIVATE_WAIT_SECS; @@ -155,7 +155,7 @@ private void updateTopology(final PackingPlans.PackingPlan existingProtoPackingP // deactivate and sleep if (initiallyRunning) { // Update the topology since the state should have changed from RUNNING to PAUSED - // Will throw exceptions internally if tmaster fails to deactivate + // Will throw exceptions internally if tmanager fails to deactivate deactivateTopology(stateManager, topology, proposedPackingPlan); } @@ -192,11 +192,11 @@ private void updateTopology(final PackingPlans.PackingPlan existingProtoPackingP // reactivate topology if (initiallyRunning) { - // wait before reactivating to give the tmaster a chance to receive the packing update and - // delete the packing plan. Instead we could message tmaster to invalidate the physical plan + // wait before reactivating to give the tmanager a chance to receive the packing update and + // delete the packing plan. Instead we could message tmanager to invalidate the physical plan // and/or possibly even update the packing plan directly SysUtils.sleep(Duration.ofSeconds(10)); - // Will throw exceptions internally if tmaster fails to deactivate + // Will throw exceptions internally if tmanager fails to deactivate reactivateTopology(stateManager, topology, removableContainerCount); } @@ -209,7 +209,7 @@ private void updateTopology(final PackingPlans.PackingPlan existingProtoPackingP void deactivateTopology(SchedulerStateManagerAdaptor stateManager, final TopologyAPI.Topology topology, PackingPlan proposedPackingPlan) - throws InterruptedException, TMasterException { + throws InterruptedException, TManagerException { List topologyConfig = topology.getTopologyConfig().getKvsList(); long deactivateSleepSeconds = TopologyUtils.getConfigWithDefault( @@ -218,8 +218,8 @@ void deactivateTopology(SchedulerStateManagerAdaptor stateManager, logInfo("Deactivating topology %s before handling update request", topology.getName()); NetworkUtils.TunnelConfig tunnelConfig = NetworkUtils.TunnelConfig.build(config, NetworkUtils.HeronSystem.SCHEDULER); - TMasterUtils.transitionTopologyState( - topology.getName(), TMasterUtils.TMasterCommand.DEACTIVATE, stateManager, + TManagerUtils.transitionTopologyState( + topology.getName(), TManagerUtils.TManagerCommand.DEACTIVATE, stateManager, TopologyAPI.TopologyState.RUNNING, TopologyAPI.TopologyState.PAUSED, tunnelConfig); if (deactivateSleepSeconds > 0) { logInfo("Deactivated topology %s. Sleeping for %d seconds before handling update request", @@ -301,10 +301,10 @@ public synchronized void run() { NetworkUtils.TunnelConfig tunnelConfig = NetworkUtils.TunnelConfig.build(config, NetworkUtils.HeronSystem.SCHEDULER); try { - TMasterUtils.transitionTopologyState( - topologyName, TMasterUtils.TMasterCommand.ACTIVATE, stateManager, + TManagerUtils.transitionTopologyState( + topologyName, TManagerUtils.TManagerCommand.ACTIVATE, stateManager, TopologyAPI.TopologyState.PAUSED, TopologyAPI.TopologyState.RUNNING, tunnelConfig); - } catch (TMasterException e) { + } catch (TManagerException e) { if (removableContainerCount < 1) { throw new TopologyRuntimeManagementException(String.format( "Topology reactivation failed for topology %s after topology update", @@ -326,7 +326,7 @@ public synchronized void run() { cancel(); } else { logInfo("Couldn't fetch physical plan for topology %s. This is probably because stream " - + "managers are still registering with TMaster. Will sleep and try again", + + "managers are still registering with TManager. Will sleep and try again", topologyName); } } diff --git a/heron/scheduler-core/src/java/org/apache/heron/scheduler/utils/SchedulerUtils.java b/heron/scheduler-core/src/java/org/apache/heron/scheduler/utils/SchedulerUtils.java index bd34c3c601c..7240707842b 100644 --- a/heron/scheduler-core/src/java/org/apache/heron/scheduler/utils/SchedulerUtils.java +++ b/heron/scheduler-core/src/java/org/apache/heron/scheduler/utils/SchedulerUtils.java @@ -56,13 +56,13 @@ public final class SchedulerUtils { */ public enum ExecutorPort { - MASTER_PORT("master", true), - TMASTER_CONTROLLER_PORT("tmaster-ctl", true), - TMASTER_STATS_PORT("tmaster-stats", true), + SERVER_PORT("server", true), + TMANAGER_CONTROLLER_PORT("tmanager-ctl", true), + TMANAGER_STATS_PORT("tmanager-stats", true), SHELL_PORT("shell-port", true), METRICS_MANAGER_PORT("metrics-mgr", true), SCHEDULER_PORT("scheduler", true), - METRICS_CACHE_MASTER_PORT("metrics-cache-m", true), + METRICS_CACHE_SERVER_PORT("metrics-cache-m", true), METRICS_CACHE_STATS_PORT("metrics-cache-s", true), CHECKPOINT_MANAGER_PORT("ckptmgr", true), JVM_REMOTE_DEBUGGER_PORTS("jvm-remote-debugger", false); @@ -260,7 +260,7 @@ public static void addExecutorTopologyArgs(List args, Config config, Con Context.stateManagerRootPath(config))); args.add(createCommandArg(ExecutorFlag.StateManagerConfigFile, Context.stateManagerFile(config))); - args.add(createCommandArg(ExecutorFlag.TMasterBinary, Context.tmasterBinary(config))); + args.add(createCommandArg(ExecutorFlag.TManagerBinary, Context.tmanagerBinary(config))); args.add(createCommandArg(ExecutorFlag.StmgrBinary, Context.stmgrBinary(config))); args.add(createCommandArg(ExecutorFlag.MetricsManagerClasspath, Context.metricsManagerClassPath(config))); @@ -335,15 +335,15 @@ public static void addExecutorContainerArgs( List args, Map ports, String containerIndex) { - String masterPort = ExecutorPort.getPort(ExecutorPort.MASTER_PORT, ports); - String tmasterControllerPort = ExecutorPort.getPort( - ExecutorPort.TMASTER_CONTROLLER_PORT, ports); - String tmasterStatsPort = ExecutorPort.getPort(ExecutorPort.TMASTER_STATS_PORT, ports); + String serverPort = ExecutorPort.getPort(ExecutorPort.SERVER_PORT, ports); + String tmanagerControllerPort = ExecutorPort.getPort( + ExecutorPort.TMANAGER_CONTROLLER_PORT, ports); + String tmanagerStatsPort = ExecutorPort.getPort(ExecutorPort.TMANAGER_STATS_PORT, ports); String shellPort = ExecutorPort.getPort(ExecutorPort.SHELL_PORT, ports); String metricsmgrPort = ExecutorPort.getPort(ExecutorPort.METRICS_MANAGER_PORT, ports); String schedulerPort = ExecutorPort.getPort(ExecutorPort.SCHEDULER_PORT, ports); - String metricsCacheMasterPort = ExecutorPort.getPort( - ExecutorPort.METRICS_CACHE_MASTER_PORT, ports); + String metricsCacheServerPort = ExecutorPort.getPort( + ExecutorPort.METRICS_CACHE_SERVER_PORT, ports); String metricsCacheStatsPort = ExecutorPort.getPort( ExecutorPort.METRICS_CACHE_STATS_PORT, ports); String ckptmgrPort = ExecutorPort.getPort(ExecutorPort.CHECKPOINT_MANAGER_PORT, ports); @@ -353,13 +353,13 @@ public static void addExecutorContainerArgs( if (containerIndex != null) { args.add(createCommandArg(ExecutorFlag.Shard, containerIndex)); } - args.add(createCommandArg(ExecutorFlag.MasterPort, masterPort)); - args.add(createCommandArg(ExecutorFlag.TMasterControllerPort, tmasterControllerPort)); - args.add(createCommandArg(ExecutorFlag.TMasterStatsPort, tmasterStatsPort)); + args.add(createCommandArg(ExecutorFlag.ServerPort, serverPort)); + args.add(createCommandArg(ExecutorFlag.TManagerControllerPort, tmanagerControllerPort)); + args.add(createCommandArg(ExecutorFlag.TManagerStatsPort, tmanagerStatsPort)); args.add(createCommandArg(ExecutorFlag.ShellPort, shellPort)); args.add(createCommandArg(ExecutorFlag.MetricsManagerPort, metricsmgrPort)); args.add(createCommandArg(ExecutorFlag.SchedulerPort, schedulerPort)); - args.add(createCommandArg(ExecutorFlag.MetricsCacheManagerMasterPort, metricsCacheMasterPort)); + args.add(createCommandArg(ExecutorFlag.MetricsCacheManagerServerPort, metricsCacheServerPort)); args.add(createCommandArg(ExecutorFlag.MetricsCacheManagerStatsPort, metricsCacheStatsPort)); args.add(createCommandArg(ExecutorFlag.CheckpointManagerPort, ckptmgrPort)); if (remoteDebuggerPorts != null) { diff --git a/heron/scheduler-core/tests/java/org/apache/heron/scheduler/RuntimeManagerRunnerTest.java b/heron/scheduler-core/tests/java/org/apache/heron/scheduler/RuntimeManagerRunnerTest.java index eb3ea3e92a7..bfe4b01b85b 100644 --- a/heron/scheduler-core/tests/java/org/apache/heron/scheduler/RuntimeManagerRunnerTest.java +++ b/heron/scheduler-core/tests/java/org/apache/heron/scheduler/RuntimeManagerRunnerTest.java @@ -35,7 +35,7 @@ import org.apache.heron.packing.roundrobin.RoundRobinPacking; import org.apache.heron.proto.scheduler.Scheduler; import org.apache.heron.proto.system.PackingPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.scheduler.client.ISchedulerClient; import org.apache.heron.scheduler.utils.Runtime; import org.apache.heron.spi.common.Config; @@ -101,7 +101,7 @@ public void testRestartTopologyHandlerFailRestartTopology() { SchedulerStateManagerAdaptor adaptor = mock(SchedulerStateManagerAdaptor.class); RuntimeManagerRunner runner = newRuntimeManagerRunner(Command.RESTART, client); - // Restart container 1, not containing TMaster + // Restart container 1, not containing TManager Scheduler.RestartTopologyRequest restartTopologyRequest = Scheduler.RestartTopologyRequest.newBuilder() .setTopologyName(TOPOLOGY_NAME).setContainerIndex(1).build(); @@ -110,7 +110,7 @@ public void testRestartTopologyHandlerFailRestartTopology() { try { runner.restartTopologyHandler(TOPOLOGY_NAME); } finally { - verify(adaptor, never()).deleteTMasterLocation(TOPOLOGY_NAME); + verify(adaptor, never()).deleteTManagerLocation(TOPOLOGY_NAME); } } @@ -120,7 +120,7 @@ public void testRestartTopologyHandlerSuccRestartTopology() { SchedulerStateManagerAdaptor adaptor = mock(SchedulerStateManagerAdaptor.class); RuntimeManagerRunner runner = newRuntimeManagerRunner(Command.RESTART, client); - // Restart container 1, not containing TMaster + // Restart container 1, not containing TManager Scheduler.RestartTopologyRequest restartTopologyRequest = Scheduler.RestartTopologyRequest.newBuilder() .setTopologyName(TOPOLOGY_NAME).setContainerIndex(1).build(); @@ -129,30 +129,30 @@ public void testRestartTopologyHandlerSuccRestartTopology() { // Success case when(client.restartTopology(restartTopologyRequest)).thenReturn(true); runner.restartTopologyHandler(TOPOLOGY_NAME); - // Should not invoke DeleteTMasterLocation - verify(adaptor, never()).deleteTMasterLocation(TOPOLOGY_NAME); + // Should not invoke DeleteTManagerLocation + verify(adaptor, never()).deleteTManagerLocation(TOPOLOGY_NAME); } @Test(expected = TopologyRuntimeManagementException.class) - public void testRestartTopologyHandlerFailDeleteTMasterLoc() { + public void testRestartTopologyHandlerFailDeleteTManagerLoc() { ISchedulerClient client = mock(ISchedulerClient.class); SchedulerStateManagerAdaptor adaptor = mock(SchedulerStateManagerAdaptor.class); RuntimeManagerRunner runner = newRuntimeManagerRunner(Command.RESTART, client); - // Restart container 1, not containing TMaster + // Restart container 1, not containing TManager Scheduler.RestartTopologyRequest restartTopologyRequest = Scheduler.RestartTopologyRequest.newBuilder() .setTopologyName(TOPOLOGY_NAME).setContainerIndex(1).build(); when(config.getIntegerValue(Key.TOPOLOGY_CONTAINER_ID)).thenReturn(1); - // Restart container 0, containing TMaster + // Restart container 0, containing TManager when(config.getIntegerValue(Key.TOPOLOGY_CONTAINER_ID)).thenReturn(0); when(runtime.get(Key.SCHEDULER_STATE_MANAGER_ADAPTOR)).thenReturn(adaptor); - when(adaptor.deleteTMasterLocation(TOPOLOGY_NAME)).thenReturn(false); + when(adaptor.deleteTManagerLocation(TOPOLOGY_NAME)).thenReturn(false); try { runner.restartTopologyHandler(TOPOLOGY_NAME); } finally { - // DeleteTMasterLocation should be invoked - verify(adaptor).deleteTMasterLocation(TOPOLOGY_NAME); + // DeleteTManagerLocation should be invoked + verify(adaptor).deleteTManagerLocation(TOPOLOGY_NAME); } } @@ -268,10 +268,10 @@ public void testUpdateTopologyUserRuntimeConfig() throws Exception { SchedulerStateManagerAdaptor manager = mock(SchedulerStateManagerAdaptor.class); HttpURLConnection connection = mock(HttpURLConnection.class); RuntimeManagerRunner runner = newRuntimeManagerRunner(Command.UPDATE, client); - TopologyMaster.TMasterLocation location = TopologyMaster.TMasterLocation.newBuilder(). + TopologyManager.TManagerLocation location = TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id"). - setHost("host").setControllerPort(1).setMasterPort(2).build(); - when(manager.getTMasterLocation(TOPOLOGY_NAME)).thenReturn(location); + setHost("host").setControllerPort(1).setServerPort(2).build(); + when(manager.getTManagerLocation(TOPOLOGY_NAME)).thenReturn(location); when(connection.getResponseCode()).thenReturn(HttpURLConnection.HTTP_OK); PowerMockito.mockStatic(Runtime.class); diff --git a/heron/scheduler-core/tests/java/org/apache/heron/scheduler/UpdateTopologyManagerTest.java b/heron/scheduler-core/tests/java/org/apache/heron/scheduler/UpdateTopologyManagerTest.java index 17f70e5e075..ff03eb26b1f 100644 --- a/heron/scheduler-core/tests/java/org/apache/heron/scheduler/UpdateTopologyManagerTest.java +++ b/heron/scheduler-core/tests/java/org/apache/heron/scheduler/UpdateTopologyManagerTest.java @@ -53,7 +53,7 @@ import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.NetworkUtils; import org.apache.heron.spi.utils.PackingTestUtils; -import org.apache.heron.spi.utils.TMasterUtils; +import org.apache.heron.spi.utils.TManagerUtils; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -153,7 +153,7 @@ public void testContainerDelta() { * Test scalable scheduler invocation */ @Test - @PrepareForTest(TMasterUtils.class) + @PrepareForTest(TManagerUtils.class) public void requestsToAddAndRemoveContainers() throws Exception { Lock lock = mockLock(true); SchedulerStateManagerAdaptor mockStateMgr = mockStateManager( @@ -168,14 +168,14 @@ public void requestsToAddAndRemoveContainers() throws Exception { UpdateTopologyManager spyUpdateManager = spyUpdateManager(mockStateMgr, mockScheduler, testTopology); - PowerMockito.spy(TMasterUtils.class); - PowerMockito.doNothing().when(TMasterUtils.class, "sendToTMaster", + PowerMockito.spy(TManagerUtils.class); + PowerMockito.doNothing().when(TManagerUtils.class, "sendToTManager", any(String.class), eq(TOPOLOGY_NAME), eq(mockStateMgr), any(NetworkUtils.TunnelConfig.class)); // reactivation won't happen since topology state is still running due to mock state manager - PowerMockito.doNothing().when(TMasterUtils.class, "transitionTopologyState", - eq(TOPOLOGY_NAME), eq(TMasterUtils.TMasterCommand.ACTIVATE), eq(mockStateMgr), + PowerMockito.doNothing().when(TManagerUtils.class, "transitionTopologyState", + eq(TOPOLOGY_NAME), eq(TManagerUtils.TManagerCommand.ACTIVATE), eq(mockStateMgr), eq(TopologyAPI.TopologyState.PAUSED), eq(TopologyAPI.TopologyState.RUNNING), any(NetworkUtils.TunnelConfig.class)); @@ -190,14 +190,14 @@ public void requestsToAddAndRemoveContainers() throws Exception { verify(lock).unlock(); PowerMockito.verifyStatic(times(1)); - TMasterUtils.transitionTopologyState(eq(TOPOLOGY_NAME), - eq(TMasterUtils.TMasterCommand.DEACTIVATE), eq(mockStateMgr), + TManagerUtils.transitionTopologyState(eq(TOPOLOGY_NAME), + eq(TManagerUtils.TManagerCommand.DEACTIVATE), eq(mockStateMgr), eq(TopologyAPI.TopologyState.RUNNING), eq(TopologyAPI.TopologyState.PAUSED), any(NetworkUtils.TunnelConfig.class)); PowerMockito.verifyStatic(times(1)); - TMasterUtils.transitionTopologyState(eq(TOPOLOGY_NAME), - eq(TMasterUtils.TMasterCommand.ACTIVATE), eq(mockStateMgr), + TManagerUtils.transitionTopologyState(eq(TOPOLOGY_NAME), + eq(TManagerUtils.TManagerCommand.ACTIVATE), eq(mockStateMgr), eq(TopologyAPI.TopologyState.PAUSED), eq(TopologyAPI.TopologyState.RUNNING), any(NetworkUtils.TunnelConfig.class)); } From a1c7d546599159bf48da281f9a65d7f7a61004f3 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 20:11:53 -0400 Subject: [PATCH 09/32] renaming "topology master" to "topology manager" in heron/tools --- heron/tools/admin/src/python/standalone.py | 222 ++++++++--------- heron/tools/explorer/tests/python/info.json | 6 +- .../src/python/handlers/exceptionhandler.py | 20 +- .../handlers/exceptionsummaryhandler.py | 20 +- .../src/python/handlers/metricshandler.py | 24 +- .../python/handlers/metricsqueryhandler.py | 6 +- .../python/handlers/metricstimelinehandler.py | 2 +- .../python/handlers/runtimestatehandler.py | 22 +- .../tracker/src/python/metricstimeline.py | 24 +- heron/tools/tracker/src/python/query.py | 12 +- .../tracker/src/python/query_operators.py | 50 ++-- heron/tools/tracker/src/python/topology.py | 8 +- heron/tools/tracker/src/python/tracker.py | 52 ++-- .../tools/tracker/tests/python/mock_proto.py | 8 +- .../tests/python/query_operator_unittest.py | 224 +++++++++--------- .../tracker/tests/python/topology_unittest.py | 14 +- .../tracker/tests/python/tracker_unittest.py | 2 +- .../ui/resources/static/js/alltopologies.js | 6 +- 18 files changed, 361 insertions(+), 361 deletions(-) diff --git a/heron/tools/admin/src/python/standalone.py b/heron/tools/admin/src/python/standalone.py index ec0d2703aa4..9ec2d815a51 100644 --- a/heron/tools/admin/src/python/standalone.py +++ b/heron/tools/admin/src/python/standalone.py @@ -51,8 +51,8 @@ class Action: class Role: ZOOKEEPERS = "zookeepers" - MASTERS = "masters" - SLAVES = "slaves" + PRIMARIES = "primaries" + SECONDARIES = "secondaries" CLUSTER = "cluster" class Cluster: @@ -92,7 +92,7 @@ def create_parser(subparsers): parser_set = parser_action.add_parser( Action.SET, - help='Set configurations for standalone cluster e.g. master or slave nodes', + help='Set configurations for standalone cluster e.g. primary or secondary nodes', add_help=True, formatter_class=argparse.RawTextHelpFormatter ) @@ -205,71 +205,71 @@ def update_config_files(cl_args): Log.info("Updating config files...") roles = read_and_parse_roles(cl_args) Log.debug("roles: %s" % roles) - masters = list(roles[Role.MASTERS]) + primaries = list(roles[Role.PRIMARIES]) zookeepers = list(roles[Role.ZOOKEEPERS]) - template_slave_hcl(cl_args, masters) - template_scheduler_yaml(cl_args, masters) - template_uploader_yaml(cl_args, masters) - template_apiserver_hcl(cl_args, masters, zookeepers) + template_secondary_hcl(cl_args, primaries) + template_scheduler_yaml(cl_args, primaries) + template_uploader_yaml(cl_args, primaries) + template_apiserver_hcl(cl_args, primaries, zookeepers) template_statemgr_yaml(cl_args, zookeepers) - template_heron_tools_hcl(cl_args, masters, zookeepers) + template_heron_tools_hcl(cl_args, primaries, zookeepers) ##################### Templating functions ###################################### -def template_slave_hcl(cl_args, masters): +def template_secondary_hcl(cl_args, primaries): ''' - Template slave config file + Template secondary config file ''' - slave_config_template = "%s/standalone/templates/slave.template.hcl" % cl_args["config_path"] - slave_config_actual = "%s/standalone/resources/slave.hcl" % cl_args["config_path"] - masters_in_quotes = ['"%s"' % master for master in masters] - template_file(slave_config_template, slave_config_actual, - {"": ", ".join(masters_in_quotes)}) + secondary_config_template = "%s/standalone/templates/secondary.template.hcl" % cl_args["config_path"] + secondary_config_actual = "%s/standalone/resources/secondary.hcl" % cl_args["config_path"] + primaries_in_quotes = ['"%s"' % primary for primary in primaries] + template_file(secondary_config_template, secondary_config_actual, + {"": ", ".join(primaries_in_quotes)}) -def template_scheduler_yaml(cl_args, masters): +def template_scheduler_yaml(cl_args, primaries): ''' Template scheduler.yaml ''' - single_master = masters[0] + single_primary = primaries[0] scheduler_config_actual = "%s/standalone/scheduler.yaml" % cl_args["config_path"] scheduler_config_template = "%s/standalone/templates/scheduler.template.yaml" \ % cl_args["config_path"] template_file(scheduler_config_template, scheduler_config_actual, - {"": "http://%s:4646" % single_master}) + {"": "http://%s:4646" % single_primary}) -def template_uploader_yaml(cl_args, masters): +def template_uploader_yaml(cl_args, primaries): ''' Tempate uploader.yaml ''' - single_master = masters[0] + single_primary = primaries[0] uploader_config_template = "%s/standalone/templates/uploader.template.yaml" \ % cl_args["config_path"] uploader_config_actual = "%s/standalone/uploader.yaml" % cl_args["config_path"] template_file(uploader_config_template, uploader_config_actual, - {"": "http://%s:9000/api/v1/file/upload" % single_master}) + {"": "http://%s:9000/api/v1/file/upload" % single_primary}) -def template_apiserver_hcl(cl_args, masters, zookeepers): +def template_apiserver_hcl(cl_args, primaries, zookeepers): """ template apiserver.hcl """ - single_master = masters[0] + single_primary = primaries[0] apiserver_config_template = "%s/standalone/templates/apiserver.template.hcl" \ % cl_args["config_path"] apiserver_config_actual = "%s/standalone/resources/apiserver.hcl" % cl_args["config_path"] replacements = { - "": '"%s"' % get_hostname(single_master, cl_args), + "": '"%s"' % get_hostname(single_primary, cl_args), "": '"%s/heron-apiserver"' % config.get_heron_bin_dir() - if is_self(single_master) + if is_self(single_primary) else '"%s/.heron/bin/heron-apiserver"' - % get_remote_home(single_master, cl_args), + % get_remote_home(single_primary, cl_args), "": ",".join( ['%s' % zk if ":" in zk else '%s:2181' % zk for zk in zookeepers]), - "": "http://%s:4646" % single_master + "": "http://%s:4646" % single_primary } template_file(apiserver_config_template, apiserver_config_actual, replacements) @@ -287,7 +287,7 @@ def template_statemgr_yaml(cl_args, zookeepers): {"": ",".join( ['"%s"' % zk if ":" in zk else '"%s:2181"' % zk for zk in zookeepers])}) -def template_heron_tools_hcl(cl_args, masters, zookeepers): +def template_heron_tools_hcl(cl_args, primaries, zookeepers): ''' template heron tools ''' @@ -296,13 +296,13 @@ def template_heron_tools_hcl(cl_args, masters, zookeepers): heron_tools_hcl_actual = "%s/standalone/resources/heron_tools.hcl" \ % cl_args["config_path"] - single_master = masters[0] + single_primary = primaries[0] template_file(heron_tools_hcl_template, heron_tools_hcl_actual, { "": ",".join( ['%s' % zk if ":" in zk else '%s:2181' % zk for zk in zookeepers]), "": '"%s/heron-tracker"' % config.get_heron_bin_dir(), - "": '"%s"' % get_hostname(single_master, cl_args), + "": '"%s"' % get_hostname(single_primary, cl_args), "": '"%s/heron-ui"' % config.get_heron_bin_dir() }) @@ -330,29 +330,29 @@ def get_service_url(cl_args): get service url for standalone cluster ''' roles = read_and_parse_roles(cl_args) - return "http://%s:9000" % list(roles[Role.MASTERS])[0] + return "http://%s:9000" % list(roles[Role.PRIMARIES])[0] def get_heron_tracker_url(cl_args): ''' get service url for standalone cluster ''' roles = read_and_parse_roles(cl_args) - return "http://%s:8888" % list(roles[Role.MASTERS])[0] + return "http://%s:8888" % list(roles[Role.PRIMARIES])[0] def get_heron_ui_url(cl_args): ''' get service url for standalone cluster ''' roles = read_and_parse_roles(cl_args) - return "http://%s:8889" % list(roles[Role.MASTERS])[0] + return "http://%s:8889" % list(roles[Role.PRIMARIES])[0] def print_cluster_info(cl_args): ''' get cluster info for standalone cluster ''' parsed_roles = read_and_parse_roles(cl_args) - masters = list(parsed_roles[Role.MASTERS]) - slaves = list(parsed_roles[Role.SLAVES]) + primaries = list(parsed_roles[Role.PRIMARIES]) + secondaries = list(parsed_roles[Role.SECONDARIES]) zookeepers = list(parsed_roles[Role.ZOOKEEPERS]) cluster = list(parsed_roles[Role.CLUSTER]) @@ -362,8 +362,8 @@ def print_cluster_info(cl_args): info['numNodes'] = len(cluster) info['nodes'] = cluster roles = OrderedDict() - roles['masters'] = masters - roles['slaves'] = slaves + roles['primaries'] = primaries + roles['secondaries'] = secondaries roles['zookeepers'] = zookeepers urls = OrderedDict() urls['serviceUrl'] = get_service_url(cl_args) @@ -393,19 +393,19 @@ def stop_cluster(cl_args): Log.info("Terminating cluster...") roles = read_and_parse_roles(cl_args) - masters = roles[Role.MASTERS] - slaves = roles[Role.SLAVES] - dist_nodes = masters.union(slaves) + primaries = roles[Role.PRIMARIES] + secondaries = roles[Role.SECONDARIES] + dist_nodes = primaries.union(secondaries) # stop all jobs - if masters: + if primaries: try: - single_master = list(masters)[0] - jobs = get_jobs(cl_args, single_master) + single_primary = list(primaries)[0] + jobs = get_jobs(cl_args, single_primary) for job in jobs: job_id = job["ID"] Log.info("Terminating job %s" % job_id) - delete_job(cl_args, job_id, single_master) + delete_job(cl_args, job_id, single_primary) except: Log.debug("Error stopping jobs") Log.debug(sys.exc_info()[0]) @@ -431,7 +431,7 @@ def stop_cluster(cl_args): Log.debug("return code: %s output: %s" % (return_code, output)) Log.info("Cleaning up directories on %s" % node) - cmd = "rm -rf /tmp/slave ; rm -rf /tmp/master" + cmd = "rm -rf /tmp/secondary ; rm -rf /tmp/primary" if not is_self(node): cmd = ssh_remote_execute(cmd, node, cl_args) Log.debug(cmd) @@ -450,18 +450,18 @@ def start_cluster(cl_args): Start a Heron standalone cluster ''' roles = read_and_parse_roles(cl_args) - masters = roles[Role.MASTERS] - slaves = roles[Role.SLAVES] + primaries = roles[Role.PRIMARIES] + secondaries = roles[Role.SECONDARIES] zookeepers = roles[Role.ZOOKEEPERS] Log.info("Roles:") - Log.info(" - Master Servers: %s" % list(masters)) - Log.info(" - Slave Servers: %s" % list(slaves)) + Log.info(" - Primary Servers: %s" % list(primaries)) + Log.info(" - Secondary Servers: %s" % list(secondaries)) Log.info(" - Zookeeper Servers: %s" % list(zookeepers)) - if not masters: - Log.error("No master servers specified!") + if not primaries: + Log.error("No primary servers specified!") sys.exit(-1) - if not slaves: - Log.error("No slave servers specified!") + if not secondaries: + Log.error("No secondary servers specified!") sys.exit(-1) if not zookeepers: Log.error("No zookeeper servers specified!") @@ -469,30 +469,30 @@ def start_cluster(cl_args): # make sure configs are templated update_config_files(cl_args) - dist_nodes = list(masters.union(slaves)) + dist_nodes = list(primaries.union(secondaries)) # if just local deployment if not (len(dist_nodes) == 1 and is_self(dist_nodes[0])): distribute_package(roles, cl_args) - start_master_nodes(masters, cl_args) - start_slave_nodes(slaves, cl_args) - start_api_server(masters, cl_args) - start_heron_tools(masters, cl_args) + start_primary_nodes(primaries, cl_args) + start_secondary_nodes(secondaries, cl_args) + start_api_server(primaries, cl_args) + start_heron_tools(primaries, cl_args) Log.info("Heron standalone cluster complete!") -def start_api_server(masters, cl_args): +def start_api_server(primaries, cl_args): ''' Start the Heron API server ''' # make sure nomad cluster is up - single_master = list(masters)[0] - wait_for_master_to_start(single_master) + single_primary = list(primaries)[0] + wait_for_primary_to_start(single_primary) cmd = "%s run %s >> /tmp/apiserver_start.log 2>&1 &" \ % (get_nomad_path(cl_args), get_apiserver_job_file(cl_args)) - Log.info("Starting Heron API Server on %s" % single_master) + Log.info("Starting Heron API Server on %s" % single_primary) - if not is_self(single_master): - cmd = ssh_remote_execute(cmd, single_master, cl_args) + if not is_self(single_primary): + cmd = ssh_remote_execute(cmd, single_primary, cl_args) Log.debug(cmd) pid = subprocess.Popen(cmd, shell=True, @@ -504,25 +504,25 @@ def start_api_server(masters, cl_args): output = pid.communicate() Log.debug("return code: %s output: %s" % (return_code, output)) if return_code != 0: - Log.error("Failed to start API server on %s with error:\n%s" % (single_master, output[1])) + Log.error("Failed to start API server on %s with error:\n%s" % (single_primary, output[1])) sys.exit(-1) - wait_for_job_to_start(single_master, "apiserver") + wait_for_job_to_start(single_primary, "apiserver") Log.info("Done starting Heron API Server") -def start_heron_tools(masters, cl_args): +def start_heron_tools(primaries, cl_args): ''' Start Heron tracker and UI ''' - single_master = list(masters)[0] - wait_for_master_to_start(single_master) + single_primary = list(primaries)[0] + wait_for_primary_to_start(single_primary) cmd = "%s run %s >> /tmp/heron_tools_start.log 2>&1 &" \ % (get_nomad_path(cl_args), get_heron_tools_job_file(cl_args)) - Log.info("Starting Heron Tools on %s" % single_master) + Log.info("Starting Heron Tools on %s" % single_primary) - if not is_self(single_master): - cmd = ssh_remote_execute(cmd, single_master, cl_args) + if not is_self(single_primary): + cmd = ssh_remote_execute(cmd, single_primary, cl_args) Log.debug(cmd) pid = subprocess.Popen(cmd, shell=True, @@ -534,10 +534,10 @@ def start_heron_tools(masters, cl_args): output = pid.communicate() Log.debug("return code: %s output: %s" % (return_code, output)) if return_code != 0: - Log.error("Failed to start Heron Tools on %s with error:\n%s" % (single_master, output[1])) + Log.error("Failed to start Heron Tools on %s with error:\n%s" % (single_primary, output[1])) sys.exit(-1) - wait_for_job_to_start(single_master, "heron-tools") + wait_for_job_to_start(single_primary, "heron-tools") Log.info("Done starting Heron Tools") def distribute_package(roles, cl_args): @@ -545,24 +545,24 @@ def distribute_package(roles, cl_args): distribute Heron packages to all nodes ''' Log.info("Distributing heron package to nodes (this might take a while)...") - masters = roles[Role.MASTERS] - slaves = roles[Role.SLAVES] + primaries = roles[Role.PRIMARIES] + secondaries = roles[Role.SECONDARIES] tar_file = tempfile.NamedTemporaryFile(suffix=".tmp").name Log.debug("TAR file %s to %s" % (cl_args["heron_dir"], tar_file)) make_tarfile(tar_file, cl_args["heron_dir"]) - dist_nodes = masters.union(slaves) + dist_nodes = primaries.union(secondaries) scp_package(tar_file, dist_nodes, cl_args) -def wait_for_master_to_start(single_master): +def wait_for_primary_to_start(single_primary): ''' - Wait for a nomad master to start + Wait for a nomad primary to start ''' i = 0 while True: try: - r = requests.get("http://%s:4646/v1/status/leader" % single_master) + r = requests.get("http://%s:4646/v1/status/leader" % single_primary) if r.status_code == 200: break except: @@ -574,14 +574,14 @@ def wait_for_master_to_start(single_master): sys.exit(-1) i = i + 1 -def wait_for_job_to_start(single_master, job): +def wait_for_job_to_start(single_primary, job): ''' Wait for a Nomad job to start ''' i = 0 while True: try: - r = requests.get("http://%s:4646/v1/job/%s" % (single_master, job)) + r = requests.get("http://%s:4646/v1/job/%s" % (single_primary, job)) if r.status_code == 200 and r.json()["Status"] == "running": break raise RuntimeError() @@ -642,24 +642,24 @@ def make_tarfile(output_filename, source_dir): with tarfile.open(output_filename, "w:gz") as tar: tar.add(source_dir, arcname=os.path.basename(source_dir)) -def start_master_nodes(masters, cl_args): +def start_primary_nodes(primaries, cl_args): ''' - Start master nodes + Start primary nodes ''' pids = [] - for master in masters: - Log.info("Starting master on %s" % master) + for primary in primaries: + Log.info("Starting primary on %s" % primary) cmd = "%s agent -config %s >> /tmp/nomad_server_log 2>&1 &" \ - % (get_nomad_path(cl_args), get_nomad_master_config_file(cl_args)) - if not is_self(master): - cmd = ssh_remote_execute(cmd, master, cl_args) + % (get_nomad_path(cl_args), get_nomad_primary_config_file(cl_args)) + if not is_self(primary): + cmd = ssh_remote_execute(cmd, primary, cl_args) Log.debug(cmd) pid = subprocess.Popen(cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - pids.append({"pid": pid, "dest": master}) + pids.append({"pid": pid, "dest": primary}) errors = [] for entry in pids: @@ -668,33 +668,33 @@ def start_master_nodes(masters, cl_args): output = pid.communicate() Log.debug("return code: %s output: %s" % (return_code, output)) if return_code != 0: - errors.append("Failed to start master on %s with error:\n%s" % (entry["dest"], output[1])) + errors.append("Failed to start primary on %s with error:\n%s" % (entry["dest"], output[1])) if errors: for error in errors: Log.error(error) sys.exit(-1) - Log.info("Done starting masters") + Log.info("Done starting primaries") -def start_slave_nodes(slaves, cl_args): +def start_secondary_nodes(secondaries, cl_args): ''' - Star slave nodes + Star secondary nodes ''' pids = [] - for slave in slaves: - Log.info("Starting slave on %s" % slave) + for secondary in secondaries: + Log.info("Starting secondary on %s" % secondary) cmd = "%s agent -config %s >> /tmp/nomad_client.log 2>&1 &" \ - % (get_nomad_path(cl_args), get_nomad_slave_config_file(cl_args)) - if not is_self(slave): - cmd = ssh_remote_execute(cmd, slave, cl_args) + % (get_nomad_path(cl_args), get_nomad_secondary_config_file(cl_args)) + if not is_self(secondary): + cmd = ssh_remote_execute(cmd, secondary, cl_args) Log.debug(cmd) pid = subprocess.Popen(cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - pids.append({"pid": pid, "dest": slave}) + pids.append({"pid": pid, "dest": secondary}) errors = [] for entry in pids: @@ -703,14 +703,14 @@ def start_slave_nodes(slaves, cl_args): output = pid.communicate() Log.debug("return code: %s output: %s" % (return_code, output)) if return_code != 0: - errors.append("Failed to start slave on %s with error:\n%s" % (entry["dest"], output[1])) + errors.append("Failed to start secondary on %s with error:\n%s" % (entry["dest"], output[1])) if errors: for error in errors: Log.error(error) sys.exit(-1) - Log.info("Done starting slaves") + Log.info("Done starting secondaries") def read_and_parse_roles(cl_args): @@ -735,8 +735,8 @@ def read_and_parse_roles(cl_args): sys.exit(-1) # Set roles - roles[Role.MASTERS] = set([roles[Role.CLUSTER][0]]) - roles[Role.SLAVES] = set(roles[Role.CLUSTER]) + roles[Role.PRIMARIES] = set([roles[Role.CLUSTER][0]]) + roles[Role.SECONDARIES] = set(roles[Role.CLUSTER]) roles[Role.ZOOKEEPERS] = set(roles[Role.ZOOKEEPERS]) roles[Role.CLUSTER] = set(roles[Role.CLUSTER]) @@ -788,17 +788,17 @@ def get_nomad_path(cl_args): return "%s/heron-nomad" % config.get_heron_bin_dir() -def get_nomad_master_config_file(cl_args): +def get_nomad_primary_config_file(cl_args): ''' - get path to nomad master config file + get path to nomad primary config file ''' - return "%s/standalone/resources/master.hcl" % config.get_heron_conf_dir() + return "%s/standalone/resources/primary.hcl" % config.get_heron_conf_dir() -def get_nomad_slave_config_file(cl_args): +def get_nomad_secondary_config_file(cl_args): ''' - get path to nomad slave config file + get path to nomad secondary config file ''' - return "%s/standalone/resources/slave.hcl" % config.get_heron_conf_dir() + return "%s/standalone/resources/secondary.hcl" % config.get_heron_conf_dir() def get_apiserver_job_file(cl_args): ''' diff --git a/heron/tools/explorer/tests/python/info.json b/heron/tools/explorer/tests/python/info.json index 73abc7cb41e..5f69a35f687 100644 --- a/heron/tools/explorer/tests/python/info.json +++ b/heron/tools/explorer/tests/python/info.json @@ -1,10 +1,10 @@ { "name": "ExclamationTopology", - "tmaster_location": { + "tmanager_location": { "name": "ExclamationTopology", "stats_port": 61362, "host": "tw-mbp-rli", - "master_port": 61360, + "server_port": 61360, "id": "ExclamationTopology2cd540ae-0f43-4ae5-b304-9055910ff7e7", "controller_port": 61361 }, @@ -122,7 +122,7 @@ }, "execution_state": { "release_username": "rli", - "has_tmaster_location": true, + "has_tmanager_location": true, "jobname": "ExclamationTopology", "release_version": "explorer", "submission_user": "rli", diff --git a/heron/tools/tracker/src/python/handlers/exceptionhandler.py b/heron/tools/tracker/src/python/handlers/exceptionhandler.py index c43f28555df..9b4920764e4 100644 --- a/heron/tools/tracker/src/python/handlers/exceptionhandler.py +++ b/heron/tools/tracker/src/python/handlers/exceptionhandler.py @@ -25,7 +25,7 @@ from heron.common.src.python.utils.log import Log from heron.proto import common_pb2 -from heron.proto import tmaster_pb2 +from heron.proto import tmanager_pb2 from heron.tools.tracker.src.python import constants from heron.tools.tracker.src.python.handlers import BaseHandler @@ -63,7 +63,7 @@ def get(self): cluster, role, environ, topName) instances = self.get_arguments(constants.PARAM_INSTANCE) exceptions_logs = yield tornado.gen.Task(self.getComponentException, - topology.tmaster, component, instances) + topology.tmanager, component, instances) self.write_success_response(exceptions_logs) except Exception as e: Log.debug(traceback.format_exc()) @@ -72,22 +72,22 @@ def get(self): # pylint: disable=bad-option-value, dangerous-default-value, no-self-use, # pylint: disable=unused-argument @tornado.gen.coroutine - def getComponentException(self, tmaster, component_name, instances=[], callback=None): + def getComponentException(self, tmanager, component_name, instances=[], callback=None): """ Get all (last 1000) exceptions for 'component_name' of the topology. Returns an Array of exception logs on success. Returns json with message on failure. """ - if not tmaster or not tmaster.host or not tmaster.stats_port: + if not tmanager or not tmanager.host or not tmanager.stats_port: return - exception_request = tmaster_pb2.ExceptionLogRequest() + exception_request = tmanager_pb2.ExceptionLogRequest() exception_request.component_name = component_name if len(instances) > 0: exception_request.instances.extend(instances) request_str = exception_request.SerializeToString() - port = str(tmaster.stats_port) - host = tmaster.host + port = str(tmanager.stats_port) + host = tmanager.host url = "http://{0}:{1}/exceptions".format(host, port) request = tornado.httpclient.HTTPRequest(url, body=request_str, @@ -104,14 +104,14 @@ def getComponentException(self, tmaster, component_name, instances=[], callback= # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: - message = "Error in getting exceptions from Tmaster, code: " + responseCode + message = "Error in getting exceptions from Tmanager, code: " + responseCode Log.error(message) raise tornado.gen.Return({ "message": message }) - # Parse the response from tmaster. - exception_response = tmaster_pb2.ExceptionLogResponse() + # Parse the response from tmanager. + exception_response = tmanager_pb2.ExceptionLogResponse() exception_response.ParseFromString(result.body) if exception_response.status.status == common_pb2.NOTOK: diff --git a/heron/tools/tracker/src/python/handlers/exceptionsummaryhandler.py b/heron/tools/tracker/src/python/handlers/exceptionsummaryhandler.py index b613807a397..cc8d14744dc 100644 --- a/heron/tools/tracker/src/python/handlers/exceptionsummaryhandler.py +++ b/heron/tools/tracker/src/python/handlers/exceptionsummaryhandler.py @@ -25,7 +25,7 @@ from heron.common.src.python.utils.log import Log from heron.proto import common_pb2 -from heron.proto import tmaster_pb2 +from heron.proto import tmanager_pb2 from heron.tools.tracker.src.python import constants from heron.tools.tracker.src.python.handlers import BaseHandler @@ -64,7 +64,7 @@ def get(self): cluster, role, environ, topology_name) instances = self.get_arguments(constants.PARAM_INSTANCE) exceptions_summary = yield tornado.gen.Task(self.getComponentExceptionSummary, - topology.tmaster, component, instances) + topology.tmanager, component, instances) self.write_success_response(exceptions_summary) except Exception as e: Log.debug(traceback.format_exc()) @@ -72,20 +72,20 @@ def get(self): # pylint: disable=dangerous-default-value, no-self-use, unused-argument @tornado.gen.coroutine - def getComponentExceptionSummary(self, tmaster, component_name, instances=[], callback=None): + def getComponentExceptionSummary(self, tmanager, component_name, instances=[], callback=None): """ Get the summary of exceptions for component_name and list of instances. Empty instance list will fetch all exceptions. """ - if not tmaster or not tmaster.host or not tmaster.stats_port: + if not tmanager or not tmanager.host or not tmanager.stats_port: return - exception_request = tmaster_pb2.ExceptionLogRequest() + exception_request = tmanager_pb2.ExceptionLogRequest() exception_request.component_name = component_name if len(instances) > 0: exception_request.instances.extend(instances) request_str = exception_request.SerializeToString() - port = str(tmaster.stats_port) - host = tmaster.host + port = str(tmanager.stats_port) + host = tmanager.host url = "http://{0}:{1}/exceptionsummary".format(host, port) Log.debug("Creating request object.") request = tornado.httpclient.HTTPRequest(url, @@ -103,14 +103,14 @@ def getComponentExceptionSummary(self, tmaster, component_name, instances=[], ca # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: - message = "Error in getting exceptions from Tmaster, code: " + responseCode + message = "Error in getting exceptions from Tmanager, code: " + responseCode Log.error(message) raise tornado.gen.Return({ "message": message }) - # Parse the response from tmaster. - exception_response = tmaster_pb2.ExceptionLogResponse() + # Parse the response from tmanager. + exception_response = tmanager_pb2.ExceptionLogResponse() exception_response.ParseFromString(result.body) if exception_response.status.status == common_pb2.NOTOK: diff --git a/heron/tools/tracker/src/python/handlers/metricshandler.py b/heron/tools/tracker/src/python/handlers/metricshandler.py index 5f79f6206a6..db0aef5ac29 100644 --- a/heron/tools/tracker/src/python/handlers/metricshandler.py +++ b/heron/tools/tracker/src/python/handlers/metricshandler.py @@ -25,7 +25,7 @@ from heron.common.src.python.utils.log import Log from heron.proto import common_pb2 -from heron.proto import tmaster_pb2 +from heron.proto import tmanager_pb2 from heron.tools.tracker.src.python import constants from heron.tools.tracker.src.python.handlers import BaseHandler @@ -72,7 +72,7 @@ def get(self): metrics = yield tornado.gen.Task( self.getComponentMetrics, - topology.tmaster, component, metric_names, instances, interval) + topology.tmanager, component, metric_names, instances, interval) self.write_success_response(metrics) except Exception as e: @@ -82,7 +82,7 @@ def get(self): # pylint: disable=too-many-locals, no-self-use, unused-argument @tornado.gen.coroutine def getComponentMetrics(self, - tmaster, + tmanager, componentName, metricNames, instances, @@ -105,13 +105,13 @@ def getComponentMetrics(self, Raises exception on failure. """ - if not tmaster or not tmaster.host or not tmaster.stats_port: - raise Exception("No Tmaster found") + if not tmanager or not tmanager.host or not tmanager.stats_port: + raise Exception("No Tmanager found") - host = tmaster.host - port = tmaster.stats_port + host = tmanager.host + port = tmanager.stats_port - metricRequest = tmaster_pb2.MetricRequest() + metricRequest = tmanager_pb2.MetricRequest() metricRequest.component_name = componentName if len(instances) > 0: for instance in instances: @@ -142,17 +142,17 @@ def getComponentMetrics(self, # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: - message = "Error in getting metrics from Tmaster, code: " + responseCode + message = "Error in getting metrics from Tmanager, code: " + responseCode Log.error(message) raise Exception(message) - # Parse the response from tmaster. - metricResponse = tmaster_pb2.MetricResponse() + # Parse the response from tmanager. + metricResponse = tmanager_pb2.MetricResponse() metricResponse.ParseFromString(result.body) if metricResponse.status.status == common_pb2.NOTOK: if metricResponse.status.HasField("message"): - Log.warn("Received response from Tmaster: %s", metricResponse.status.message) + Log.warn("Received response from Tmanager: %s", metricResponse.status.message) # Form the response. ret = {} diff --git a/heron/tools/tracker/src/python/handlers/metricsqueryhandler.py b/heron/tools/tracker/src/python/handlers/metricsqueryhandler.py index a0e367a063c..5c1ea5ac0d0 100644 --- a/heron/tools/tracker/src/python/handlers/metricsqueryhandler.py +++ b/heron/tools/tracker/src/python/handlers/metricsqueryhandler.py @@ -67,7 +67,7 @@ def get(self): query = self.get_argument_query() metrics = yield tornado.gen.Task(self.executeMetricsQuery, - topology.tmaster, query, int(start_time), int(end_time)) + topology.tmanager, query, int(start_time), int(end_time)) self.write_success_response(metrics) except Exception as e: Log.debug(traceback.format_exc()) @@ -75,7 +75,7 @@ def get(self): # pylint: disable=unused-argument @tornado.gen.coroutine - def executeMetricsQuery(self, tmaster, queryString, start_time, end_time, callback=None): + def executeMetricsQuery(self, tmanager, queryString, start_time, end_time, callback=None): """ Get the specified metrics for the given query in this topology. Returns the following dict on success: @@ -101,7 +101,7 @@ def executeMetricsQuery(self, tmaster, queryString, start_time, end_time, callba """ query = Query(self.tracker) - metrics = yield query.execute_query(tmaster, queryString, start_time, end_time) + metrics = yield query.execute_query(tmanager, queryString, start_time, end_time) # Parse the response ret = {} diff --git a/heron/tools/tracker/src/python/handlers/metricstimelinehandler.py b/heron/tools/tracker/src/python/handlers/metricstimelinehandler.py index 267734e48da..5e91cde53c7 100644 --- a/heron/tools/tracker/src/python/handlers/metricstimelinehandler.py +++ b/heron/tools/tracker/src/python/handlers/metricstimelinehandler.py @@ -71,7 +71,7 @@ def get(self): topology = self.tracker.getTopologyByClusterRoleEnvironAndName( cluster, role, environ, topology_name) metrics = yield tornado.gen.Task(metricstimeline.getMetricsTimeline, - topology.tmaster, component, metric_names, + topology.tmanager, component, metric_names, instances, int(start_time), int(end_time)) self.write_success_response(metrics) except Exception as e: diff --git a/heron/tools/tracker/src/python/handlers/runtimestatehandler.py b/heron/tools/tracker/src/python/handlers/runtimestatehandler.py index 44e2fc1b055..e2e05898592 100644 --- a/heron/tools/tracker/src/python/handlers/runtimestatehandler.py +++ b/heron/tools/tracker/src/python/handlers/runtimestatehandler.py @@ -24,7 +24,7 @@ import tornado.web from heron.common.src.python.utils.log import Log -from heron.proto import tmaster_pb2 +from heron.proto import tmanager_pb2 from heron.tools.tracker.src.python.handlers import BaseHandler # pylint: disable=attribute-defined-outside-init @@ -43,7 +43,7 @@ class RuntimeStateHandler(BaseHandler): Example JSON response: { - has_tmaster_location: true, + has_tmanager_location: true, stmgrs_reg_summary: { registered_stmgrs: [ "stmgr-1", @@ -61,16 +61,16 @@ def initialize(self, tracker): # pylint: disable=dangerous-default-value, no-self-use, unused-argument @tornado.gen.coroutine - def getStmgrsRegSummary(self, tmaster, callback=None): + def getStmgrsRegSummary(self, tmanager, callback=None): """ Get summary of stream managers registration summary """ - if not tmaster or not tmaster.host or not tmaster.stats_port: + if not tmanager or not tmanager.host or not tmanager.stats_port: return - reg_request = tmaster_pb2.StmgrsRegistrationSummaryRequest() + reg_request = tmanager_pb2.StmgrsRegistrationSummaryRequest() request_str = reg_request.SerializeToString() - port = str(tmaster.stats_port) - host = tmaster.host + port = str(tmanager.stats_port) + host = tmanager.host url = "http://{0}:{1}/stmgrsregistrationsummary".format(host, port) request = tornado.httpclient.HTTPRequest(url, body=request_str, @@ -86,13 +86,13 @@ def getStmgrsRegSummary(self, tmaster, callback=None): # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: - message = "Error in getting exceptions from Tmaster, code: " + responseCode + message = "Error in getting exceptions from Tmanager, code: " + responseCode Log.error(message) raise tornado.gen.Return({ "message": message }) - # Parse the response from tmaster. - reg_response = tmaster_pb2.StmgrsRegistrationSummaryResponse() + # Parse the response from tmanager. + reg_response = tmanager_pb2.StmgrsRegistrationSummaryResponse() reg_response.ParseFromString(result.body) # Send response ret = {} @@ -115,7 +115,7 @@ def get(self): runtime_state["topology_version"] = topology_info["metadata"]["release_version"] topology = self.tracker.getTopologyByClusterRoleEnvironAndName( cluster, role, environ, topology_name) - reg_summary = yield tornado.gen.Task(self.getStmgrsRegSummary, topology.tmaster) + reg_summary = yield tornado.gen.Task(self.getStmgrsRegSummary, topology.tmanager) for stmgr, reg in list(reg_summary.items()): runtime_state["stmgrs"].setdefault(stmgr, {})["is_registered"] = reg self.write_success_response(runtime_state) diff --git a/heron/tools/tracker/src/python/metricstimeline.py b/heron/tools/tracker/src/python/metricstimeline.py index 29fcc2a3cba..a1a1f033acd 100644 --- a/heron/tools/tracker/src/python/metricstimeline.py +++ b/heron/tools/tracker/src/python/metricstimeline.py @@ -23,11 +23,11 @@ from heron.common.src.python.utils.log import Log from heron.proto import common_pb2 -from heron.proto import tmaster_pb2 +from heron.proto import tmanager_pb2 # pylint: disable=too-many-locals, too-many-branches, unused-argument @tornado.gen.coroutine -def getMetricsTimeline(tmaster, +def getMetricsTimeline(tmanager, component_name, metric_names, instances, @@ -58,16 +58,16 @@ def getMetricsTimeline(tmaster, "message": "..." } """ - # Tmaster is the proto object and must have host and port for stats. - if not tmaster or not tmaster.host or not tmaster.stats_port: - raise Exception("No Tmaster found") + # Tmanager is the proto object and must have host and port for stats. + if not tmanager or not tmanager.host or not tmanager.stats_port: + raise Exception("No Tmanager found") - host = tmaster.host - port = tmaster.stats_port + host = tmanager.host + port = tmanager.stats_port # Create the proto request object to get metrics. - metricRequest = tmaster_pb2.MetricRequest() + metricRequest = tmanager_pb2.MetricRequest() metricRequest.component_name = component_name # If no instances are give, metrics for all instances @@ -107,17 +107,17 @@ def getMetricsTimeline(tmaster, # Check the response code - error if it is in 400s or 500s responseCode = result.code if responseCode >= 400: - message = "Error in getting metrics from Tmaster, code: " + responseCode + message = "Error in getting metrics from Tmanager, code: " + responseCode Log.error(message) raise Exception(message) - # Parse the response from tmaster. - metricResponse = tmaster_pb2.MetricResponse() + # Parse the response from tmanager. + metricResponse = tmanager_pb2.MetricResponse() metricResponse.ParseFromString(result.body) if metricResponse.status.status == common_pb2.NOTOK: if metricResponse.status.HasField("message"): - Log.warn("Received response from Tmaster: %s", metricResponse.status.message) + Log.warn("Received response from Tmanager: %s", metricResponse.status.message) # Form the response. ret = {} diff --git a/heron/tools/tracker/src/python/query.py b/heron/tools/tracker/src/python/query.py index 5d89a525284..bb6dea01d9e 100644 --- a/heron/tools/tracker/src/python/query.py +++ b/heron/tools/tracker/src/python/query.py @@ -35,7 +35,7 @@ class Query: individual metrics that are part of the query. Example usage: query = Query(tracker) - result = query.execute(tmaster, query_string)""" + result = query.execute(tmanager, query_string)""" # pylint: disable=undefined-variable def __init__(self, tracker): self.tracker = tracker @@ -54,13 +54,13 @@ def __init__(self, tracker): # pylint: disable=attribute-defined-outside-init, no-member @tornado.gen.coroutine - def execute_query(self, tmaster, query_string, start, end): + def execute_query(self, tmanager, query_string, start, end): """ execute query """ - if not tmaster: - raise Exception("No tmaster found") - self.tmaster = tmaster + if not tmanager: + raise Exception("No tmanager found") + self.tmanager = tmanager root = self.parse_query_string(query_string) - metrics = yield root.execute(self.tracker, self.tmaster, start, end) + metrics = yield root.execute(self.tracker, self.tmanager, start, end) raise tornado.gen.Return(metrics) def find_closing_braces(self, query): diff --git a/heron/tools/tracker/src/python/query_operators.py b/heron/tools/tracker/src/python/query_operators.py index dd460c5aca9..0257d90b7b0 100644 --- a/heron/tools/tracker/src/python/query_operators.py +++ b/heron/tools/tracker/src/python/query_operators.py @@ -67,9 +67,9 @@ def setDefault(self, constant, start, end): endtime = end // 60 * 60 while starttime <= endtime: # STREAMCOMP-1559 - # Second check is a work around, because the response from tmaster + # Second check is a work around, because the response from tmanager # contains value 0, if it is queries for the current timestamp, - # since the bucket is created in the tmaster, but is not filled + # since the bucket is created in the tmanager, but is not filled # by the metrics. if starttime not in self.timeline or self.timeline[starttime] == 0: self.timeline[starttime] = constant @@ -87,7 +87,7 @@ def __init__(self, _): # pylint: disable=unused-argument @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): """ execute """ raise Exception("Not implemented exception") @@ -98,7 +98,7 @@ def isOperator(self): class TS(Operator): """Time Series Operator. This is the basic operator that is - responsible for getting metrics from tmaster. + responsible for getting metrics from tmanager. Accepts a list of 3 elements: 1. componentName 2. instance - can be "*" for all instances, or a single instance ID @@ -123,12 +123,12 @@ def __init__(self, children): raise Exception("TS expects metric name as third argument") @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Fetch metrics for start-60 to end+60 because the minute mark # may be a little skewed. By getting a couple more values, # we can then truncate based on the interval needed. metrics = yield getMetricsTimeline( - tmaster, self.component, [self.metricName], self.instances, + tmanager, self.component, [self.metricName], self.instances, start - 60, end + 60) if not metrics: return @@ -182,8 +182,8 @@ def __init__(self, children): "Second argument to DEFAULT must be an operator, but is " + str(type(self.timeseries))) @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): - allMetrics = yield self.timeseries.execute(tracker, tmaster, start, end) + def execute(self, tracker, tmanager, start, end): + allMetrics = yield self.timeseries.execute(tracker, tmanager, start, end) if is_str_instance(allMetrics): raise Exception(allMetrics) for metric in allMetrics: @@ -203,7 +203,7 @@ def __init__(self, children): self.timeSeriesList = children @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Initialize the metric to be returned with sum of all the constants. retMetrics = Metrics(None, None, None, start, end, {}) constants = [ts for ts in self.timeSeriesList if isinstance(ts, float)] @@ -212,7 +212,7 @@ def execute(self, tracker, tmaster, start, end): futureMetrics = [] for timeseries in leftOverTimeSeries: - futureMetrics.append(timeseries.execute(tracker, tmaster, start, end)) + futureMetrics.append(timeseries.execute(tracker, tmanager, start, end)) metrics = yield futureMetrics # Get all the timeseries metrics @@ -245,7 +245,7 @@ def __init__(self, children): self.timeSeriesList = children @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Initialize the metric to be returned with max of all the constants. retMetrics = Metrics(None, None, None, start, end, {}) constants = [ts for ts in self.timeSeriesList if isinstance(ts, float)] @@ -255,7 +255,7 @@ def execute(self, tracker, tmaster, start, end): futureMetrics = [] for timeseries in leftOverTimeSeries: - futureMetrics.append(timeseries.execute(tracker, tmaster, start, end)) + futureMetrics.append(timeseries.execute(tracker, tmanager, start, end)) metrics = yield futureMetrics @@ -300,12 +300,12 @@ def __init__(self, children): self.timeSeriesList = children[1:] @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): leftOverTimeSeries = [ts for ts in self.timeSeriesList if not isinstance(ts, float)] futureMetrics = [] for timeseries in leftOverTimeSeries: - futureMetrics.append(timeseries.execute(tracker, tmaster, start, end)) + futureMetrics.append(timeseries.execute(tracker, tmanager, start, end)) metrics = yield futureMetrics @@ -369,13 +369,13 @@ def __init__(self, children): # pylint: disable=too-many-branches, too-many-statements @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Future metrics so as to execute them in parallel futureMetrics = [] if not isinstance(self.timeSeries1, float): - futureMetrics.append(self.timeSeries1.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries1.execute(tracker, tmanager, start, end)) if not isinstance(self.timeSeries2, float): - futureMetrics.append(self.timeSeries2.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries2.execute(tracker, tmanager, start, end)) futureResolvedMetrics = yield futureMetrics @@ -494,13 +494,13 @@ def __init__(self, children): # pylint: disable=too-many-branches, too-many-statements @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Future metrics so as to execute them in parallel futureMetrics = [] if not isinstance(self.timeSeries1, float): - futureMetrics.append(self.timeSeries1.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries1.execute(tracker, tmanager, start, end)) if not isinstance(self.timeSeries2, float): - futureMetrics.append(self.timeSeries2.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries2.execute(tracker, tmanager, start, end)) futureResolvedMetrics = yield futureMetrics @@ -617,13 +617,13 @@ def __init__(self, children): # pylint: disable=too-many-branches, too-many-statements @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Future metrics so as to execute them in parallel futureMetrics = [] if not isinstance(self.timeSeries1, float): - futureMetrics.append(self.timeSeries1.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries1.execute(tracker, tmanager, start, end)) if not isinstance(self.timeSeries2, float): - futureMetrics.append(self.timeSeries2.execute(tracker, tmaster, start, end)) + futureMetrics.append(self.timeSeries2.execute(tracker, tmanager, start, end)) futureResolvedMetrics = yield futureMetrics @@ -720,9 +720,9 @@ def __init__(self, children): self.timeSeries = children[0] @tornado.gen.coroutine - def execute(self, tracker, tmaster, start, end): + def execute(self, tracker, tmanager, start, end): # Get 1 previous data point to be able to apply rate on the first data - metrics = yield self.timeSeries.execute(tracker, tmaster, start-60, end) + metrics = yield self.timeSeries.execute(tracker, tmanager, start-60, end) # Apply rate on all of them for metric in metrics: diff --git a/heron/tools/tracker/src/python/topology.py b/heron/tools/tracker/src/python/topology.py index ab513746285..4b9f077406e 100644 --- a/heron/tools/tracker/src/python/topology.py +++ b/heron/tools/tracker/src/python/topology.py @@ -38,7 +38,7 @@ class Topology: The watches are the callbacks that are called when there is any change in the topology instance using set_physical_plan, set_execution_state, - set_tmaster, and set_scheduler_location. Any other means of changing will + set_tmanager, and set_scheduler_location. Any other means of changing will not call the watches. """ @@ -52,7 +52,7 @@ def __init__(self, name, state_manager_name): self.id = None self.cluster = None self.environ = None - self.tmaster = None + self.tmanager = None self.scheduler_location = None # A map from UUIDs to the callback @@ -162,9 +162,9 @@ def set_execution_state(self, execution_state): self.zone = cluster self.trigger_watches() - def set_tmaster(self, tmaster): + def set_tmanager(self, tmanager): """ set exectuion state """ - self.tmaster = tmaster + self.tmanager = tmanager self.trigger_watches() def set_scheduler_location(self, scheduler_location): diff --git a/heron/tools/tracker/src/python/tracker.py b/heron/tools/tracker/src/python/tracker.py index 6d701e6b5cb..b48afccfcf4 100644 --- a/heron/tools/tracker/src/python/tracker.py +++ b/heron/tools/tracker/src/python/tracker.py @@ -215,10 +215,10 @@ def on_topology_execution_state(data): if not data: Log.debug("No data to be set") - def on_topology_tmaster(data): - """set tmaster""" - Log.info("Watch triggered for topology tmaster: " + topologyName) - topology.set_tmaster(data) + def on_topology_tmanager(data): + """set tmanager""" + Log.info("Watch triggered for topology tmanager: " + topologyName) + topology.set_tmanager(data) if not data: Log.debug("No data to be set") @@ -229,11 +229,11 @@ def on_topology_scheduler_location(data): if not data: Log.debug("No data to be set") - # Set watches on the pplan, execution_state, tmaster and scheduler_location. + # Set watches on the pplan, execution_state, tmanager and scheduler_location. state_manager.get_pplan(topologyName, on_topology_pplan) state_manager.get_packing_plan(topologyName, on_topology_packing_plan) state_manager.get_execution_state(topologyName, on_topology_execution_state) - state_manager.get_tmaster(topologyName, on_topology_tmaster) + state_manager.get_tmanager(topologyName, on_topology_tmanager) state_manager.get_scheduler_location(topologyName, on_topology_scheduler_location) def removeTopology(self, topology_name, state_manager_name): @@ -270,7 +270,7 @@ def extract_execution_state(self, topology): "release_tag": execution_state.release_state.release_tag, "release_version": execution_state.release_state.release_version, "has_physical_plan": None, - "has_tmaster_location": None, + "has_tmanager_location": None, "has_scheduler_location": None, "extra_links": [], } @@ -313,7 +313,7 @@ def extract_runtime_state(topology): runtime_state = {} runtime_state["has_physical_plan"] = bool(topology.physical_plan) runtime_state["has_packing_plan"] = bool(topology.packing_plan) - runtime_state["has_tmaster_location"] = bool(topology.tmaster) + runtime_state["has_tmanager_location"] = bool(topology.tmanager) runtime_state["has_scheduler_location"] = bool(topology.scheduler_location) # "stmgrs" listed runtime state for each stream manager # however it is possible that physical plan is not complete @@ -343,28 +343,28 @@ def extract_scheduler_location(self, topology): return schedulerLocation - def extract_tmaster(self, topology): + def extract_tmanager(self, topology): """ - Returns the representation of tmaster that will + Returns the representation of tmanager that will be returned from Tracker. """ - tmasterLocation = { + tmanagerLocation = { "name": None, "id": None, "host": None, "controller_port": None, - "master_port": None, + "server_port": None, "stats_port": None, } - if topology.tmaster: - tmasterLocation["name"] = topology.tmaster.topology_name - tmasterLocation["id"] = topology.tmaster.topology_id - tmasterLocation["host"] = topology.tmaster.host - tmasterLocation["controller_port"] = topology.tmaster.controller_port - tmasterLocation["master_port"] = topology.tmaster.master_port - tmasterLocation["stats_port"] = topology.tmaster.stats_port + if topology.tmanager: + tmanagerLocation["name"] = topology.tmanager.topology_name + tmanagerLocation["id"] = topology.tmanager.topology_id + tmanagerLocation["host"] = topology.tmanager.host + tmanagerLocation["controller_port"] = topology.tmanager.controller_port + tmanagerLocation["server_port"] = topology.tmanager.server_port + tmanagerLocation["stats_port"] = topology.tmanager.stats_port - return tmasterLocation + return tmanagerLocation # pylint: disable=too-many-locals def extract_logical_plan(self, topology): @@ -618,9 +618,9 @@ def setTopologyInfo(self, topology): if not topology.packing_plan: has_packing_plan = False - has_tmaster_location = True - if not topology.tmaster: - has_tmaster_location = False + has_tmanager_location = True + if not topology.tmanager: + has_tmanager_location = False has_scheduler_location = True if not topology.scheduler_location: @@ -633,14 +633,14 @@ def setTopologyInfo(self, topology): "physical_plan": None, "packing_plan": None, "execution_state": None, - "tmaster_location": None, + "tmanager_location": None, "scheduler_location": None, } executionState = self.extract_execution_state(topology) executionState["has_physical_plan"] = has_physical_plan executionState["has_packing_plan"] = has_packing_plan - executionState["has_tmaster_location"] = has_tmaster_location + executionState["has_tmanager_location"] = has_tmanager_location executionState["has_scheduler_location"] = has_scheduler_location executionState["status"] = topology.get_status() @@ -651,7 +651,7 @@ def setTopologyInfo(self, topology): topologyInfo["logical_plan"] = self.extract_logical_plan(topology) topologyInfo["physical_plan"] = self.extract_physical_plan(topology) topologyInfo["packing_plan"] = self.extract_packing_plan(topology) - topologyInfo["tmaster_location"] = self.extract_tmaster(topology) + topologyInfo["tmanager_location"] = self.extract_tmanager(topology) topologyInfo["scheduler_location"] = self.extract_scheduler_location(topology) self.topologyInfos[(topology.name, topology.state_manager_name)] = topologyInfo diff --git a/heron/tools/tracker/tests/python/mock_proto.py b/heron/tools/tracker/tests/python/mock_proto.py index 82cd4def0bb..f939395d0e9 100644 --- a/heron/tools/tracker/tests/python/mock_proto.py +++ b/heron/tools/tracker/tests/python/mock_proto.py @@ -19,7 +19,7 @@ import heron.proto.execution_state_pb2 as protoEState import heron.proto.physical_plan_pb2 as protoPPlan import heron.proto.packing_plan_pb2 as protoPackingPlan -import heron.proto.tmaster_pb2 as protoTmaster +import heron.proto.tmanager_pb2 as protoTmanager import heron.proto.topology_pb2 as protoTopology # pylint: disable=no-self-use, missing-docstring @@ -223,9 +223,9 @@ def create_mock_execution_state(self): estate.environ = MockProto.environ return estate - def create_mock_tmaster(self): - tmaster = protoTmaster.TMasterLocation() - return tmaster + def create_mock_tmanager(self): + tmanager = protoTmanager.TManagerLocation() + return tmanager def add_topology_config(self, topology, key, value): kv = topology.topology_config.kvs.add() diff --git a/heron/tools/tracker/tests/python/query_operator_unittest.py b/heron/tools/tracker/tests/python/query_operator_unittest.py index f55dc8f6cb3..b995db73b7b 100644 --- a/heron/tools/tracker/tests/python/query_operator_unittest.py +++ b/heron/tools/tracker/tests/python/query_operator_unittest.py @@ -31,7 +31,7 @@ class QueryOperatorTests(tornado.testing.AsyncTestCase): @tornado.testing.gen_test def test_TS_execute(self): ts = TS(["a", "b", "c"]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -39,7 +39,7 @@ def test_TS_execute(self): # Return mocked timeline @tornado.gen.coroutine def getMetricTimelineSideEffect(*args): - self.assertEqual((tmaster, "a", ["c"], ["b"], 40, 360), args) + self.assertEqual((tmanager, "a", ["c"], ["b"], 40, 360), args) raise tornado.gen.Return({ "starttime": 40, "endtime": 360, @@ -60,7 +60,7 @@ def getMetricTimelineSideEffect(*args): with patch("heron.tools.tracker.src.python.query_operators.getMetricsTimeline", side_effect=getMetricTimelineSideEffect): - metrics = yield ts.execute(tracker, tmaster, start, end) + metrics = yield ts.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("b", metrics[0].instance) self.assertEqual("c", metrics[0].metricName) @@ -75,7 +75,7 @@ def getMetricTimelineSideEffect(*args): @tornado.testing.gen_test def test_TS_execute_when_no_timeline(self): ts = TS(["a", "b", "c"]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -83,7 +83,7 @@ def test_TS_execute_when_no_timeline(self): # If no timeline is returned @tornado.gen.coroutine def getMetricTimelineSideEffect(*args): - self.assertEqual((tmaster, "a", ["c"], ["b"], 40, 360), args) + self.assertEqual((tmanager, "a", ["c"], ["b"], 40, 360), args) raise tornado.gen.Return({ "message": "some_exception" }) @@ -92,12 +92,12 @@ def getMetricTimelineSideEffect(*args): with self.assertRaises(Exception): with patch("heron.tools.tracker.src.python.query_operators.getMetricsTimeline", side_effect=getMetricTimelineSideEffect): - metrics = yield ts.execute(tracker, tmaster, start, end) + metrics = yield ts.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_TS_execute_with_multiple_instances(self): ts = TS(["a", "b", "c"]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -105,7 +105,7 @@ def test_TS_execute_with_multiple_instances(self): # With multiple instances @tornado.gen.coroutine def getMetricTimelineSideEffect(*args): - self.assertEqual((tmaster, "a", ["c"], [], 40, 360), args) + self.assertEqual((tmanager, "a", ["c"], [], 40, 360), args) raise tornado.gen.Return({ "starttime": 40, "endtime": 360, @@ -136,7 +136,7 @@ def getMetricTimelineSideEffect(*args): with patch("heron.tools.tracker.src.python.query_operators.getMetricsTimeline", side_effect=getMetricTimelineSideEffect): ts = TS(["a", "*", "c"]) - metrics = yield ts.execute(tracker, tmaster, start, end) + metrics = yield ts.execute(tracker, tmanager, start, end) self.assertEqual(2, len(metrics)) metric1 = metrics[0] metric2 = metrics[1] @@ -166,7 +166,7 @@ def getMetricTimelineSideEffect(*args): def test_DEFAULT_execute(self): ts = Mock() default = Default([float(0), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -174,7 +174,7 @@ def test_DEFAULT_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -185,7 +185,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield default.execute(tracker, tmaster, start, end) + metrics = yield default.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertEqual("metricName", metrics[0].metricName) @@ -201,7 +201,7 @@ def ts_side_effect(*args): def test_DEFAULT_execute_when_exception(self): ts = Mock() default = Default([float(0), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -213,13 +213,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield default.execute(tracker, tmaster, start, end) + metrics = yield default.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_DEFAULT_execute_when_missing_value(self): ts = Mock() default = Default([float(0), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -227,7 +227,7 @@ def test_DEFAULT_execute_when_missing_value(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 1.0, @@ -237,7 +237,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield default.execute(tracker, tmaster, start, end) + metrics = yield default.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertEqual("metricName", metrics[0].metricName) @@ -253,7 +253,7 @@ def ts_side_effect3(*args): def test_DEFAULT_execute_with_multiple_ts(self): ts = Mock() default = Default([float(0), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -261,7 +261,7 @@ def test_DEFAULT_execute_with_multiple_ts(self): # Multiple timelines missing some values @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { # 120: 1.0, # Missing @@ -278,7 +278,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield default.execute(tracker, tmaster, start, end) + metrics = yield default.execute(tracker, tmanager, start, end) self.assertEqual(2, len(metrics)) for metric in metrics: if metric.instance == "instance": @@ -308,7 +308,7 @@ def ts_side_effect3(*args): def test_SUM_execute(self): ts = Mock() operator = Sum([float(10), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -316,7 +316,7 @@ def test_SUM_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -327,7 +327,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 11.0, @@ -340,7 +340,7 @@ def ts_side_effect(*args): def test_SUM_execute_when_exception(self): ts = Mock() operator = Sum([float(10), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -352,13 +352,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_SUM_execute_when_missing_value(self): ts = Mock() operator = Sum([float(10), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -366,7 +366,7 @@ def test_SUM_execute_when_missing_value(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 1.0, @@ -376,7 +376,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 10, # Missing value filled @@ -389,7 +389,7 @@ def ts_side_effect3(*args): def test_SUM_execute_with_multiple_ts(self): ts = Mock() operator = Sum([float(10), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -397,7 +397,7 @@ def test_SUM_execute_with_multiple_ts(self): # Multiple timelines missing some values @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { # 120: 1.0, # Missing @@ -414,7 +414,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 12.0, @@ -427,7 +427,7 @@ def ts_side_effect3(*args): def test_MAX_execute(self): ts = Mock() operator = Max([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -435,7 +435,7 @@ def test_MAX_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -446,7 +446,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 1.0, @@ -459,7 +459,7 @@ def ts_side_effect(*args): def test_MAX_execute_when_exception(self): ts = Mock() operator = Max([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -471,13 +471,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_MAX_execute_when_missing_values(self): ts = Mock() operator = Max([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -485,7 +485,7 @@ def test_MAX_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 1.0, @@ -495,7 +495,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 180: 1.0, @@ -507,7 +507,7 @@ def ts_side_effect3(*args): def test_MAX_execute_with_multiple_ts(self): ts = Mock() operator = Max([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -515,7 +515,7 @@ def test_MAX_execute_with_multiple_ts(self): # Multiple timelines missing some values @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { # 120: 1.0, # Missing @@ -532,7 +532,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 2.0, @@ -545,7 +545,7 @@ def ts_side_effect3(*args): def test_PERCENTILE_execute(self): ts = Mock() operator = Percentile([float(90), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -553,7 +553,7 @@ def test_PERCENTILE_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -564,7 +564,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 1.0, @@ -577,7 +577,7 @@ def ts_side_effect(*args): def test_PERCENTILE_execute_when_exception(self): ts = Mock() operator = Percentile([float(90), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -589,13 +589,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_PERCENTILE_execute_when_missing_values(self): ts = Mock() operator = Percentile([float(90), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -603,7 +603,7 @@ def test_PERCENTILE_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 1.0, @@ -613,7 +613,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 180: 1.0, @@ -625,7 +625,7 @@ def ts_side_effect3(*args): def test_PERCENTILE_execute_with_multiple_ts(self): ts = Mock() operator = Percentile([float(90), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -633,7 +633,7 @@ def test_PERCENTILE_execute_with_multiple_ts(self): # Multiple timelines missing some values @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -668,7 +668,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertDictEqual({ 120: 4.0, @@ -681,7 +681,7 @@ def ts_side_effect3(*args): def test_DIVIDE_execute(self): ts = Mock() operator = Divide([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -689,7 +689,7 @@ def test_DIVIDE_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -700,7 +700,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -714,7 +714,7 @@ def ts_side_effect(*args): def test_DIVIDE_execute_when_exception(self): ts = Mock() operator = Divide([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -726,13 +726,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_DIVIDE_execute_when_missing_values(self): ts = Mock() operator = Divide([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -740,7 +740,7 @@ def test_DIVIDE_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 2.0, @@ -750,7 +750,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -764,7 +764,7 @@ def test_DIVIDE_execute_with_multiple_ts(self): ts = Mock() ts2 = Mock() operator = Divide([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -772,7 +772,7 @@ def test_DIVIDE_execute_with_multiple_ts(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -809,7 +809,7 @@ def ts_side_effect3(*args): @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -844,7 +844,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(5, len(metrics)) for metric in metrics: # All should have same value - 0.5 @@ -860,7 +860,7 @@ def test_DIVIDE_execute_with_mulitiple_ts_when_instances_do_not_match(self): ts = Mock() ts2 = Mock() operator = Divide([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -868,7 +868,7 @@ def test_DIVIDE_execute_with_mulitiple_ts_when_instances_do_not_match(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -906,7 +906,7 @@ def ts_side_effect3(*args): # When instances do not match @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -923,7 +923,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(2, len(metrics)) instances = [] for metric in metrics: @@ -940,7 +940,7 @@ def ts_side_effect4(*args): def test_MULTIPLY_execute(self): ts = Mock() operator = Multiply([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -948,7 +948,7 @@ def test_MULTIPLY_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -959,7 +959,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -973,7 +973,7 @@ def ts_side_effect(*args): def test_MULTIPLY_execute_when_exception(self): ts = Mock() operator = Multiply([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -985,13 +985,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_MULTIPLY_execute_when_missing_values(self): ts = Mock() operator = Multiply([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -999,7 +999,7 @@ def test_MULTIPLY_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 2.0, @@ -1009,7 +1009,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -1023,7 +1023,7 @@ def test_MULTIPLY_execute_with_multiple_ts(self): ts = Mock() ts2 = Mock() operator = Multiply([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1031,7 +1031,7 @@ def test_MULTIPLY_execute_with_multiple_ts(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -1068,7 +1068,7 @@ def ts_side_effect3(*args): @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -1103,7 +1103,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(5, len(metrics)) for metric in metrics: if metric.instance == "instance": @@ -1147,7 +1147,7 @@ def test_MULTIPLY_execute_with_multiple_ts_when_instances_do_not_match(self): ts = Mock() ts2 = Mock() operator = Multiply([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1155,7 +1155,7 @@ def test_MULTIPLY_execute_with_multiple_ts_when_instances_do_not_match(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -1193,7 +1193,7 @@ def ts_side_effect3(*args): # When instances do not match @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -1210,7 +1210,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(2, len(metrics)) instances = [] for metric in metrics: @@ -1235,7 +1235,7 @@ def ts_side_effect4(*args): def test_SUBTRACT_execute(self): ts = Mock() operator = Subtract([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1243,7 +1243,7 @@ def test_SUBTRACT_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -1254,7 +1254,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -1268,7 +1268,7 @@ def ts_side_effect(*args): def test_SUBTRACT_execute_when_exception(self): ts = Mock() operator = Subtract([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1280,13 +1280,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_SUBTRACT_execute_when_missing_values(self): ts = Mock() operator = Subtract([float(100), ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1294,7 +1294,7 @@ def test_SUBTRACT_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 180: 2.0, @@ -1304,7 +1304,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -1318,7 +1318,7 @@ def test_SUBTRACT_execute_with_multiple_ts(self): ts = Mock() ts2 = Mock() operator = Subtract([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1326,7 +1326,7 @@ def test_SUBTRACT_execute_with_multiple_ts(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -1363,7 +1363,7 @@ def ts_side_effect3(*args): @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -1398,7 +1398,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(5, len(metrics)) for metric in metrics: if metric.instance == "instance": @@ -1442,7 +1442,7 @@ def test_SUBTRACT_execute_with_multiple_ts_when_instances_do_not_match(self): ts = Mock() ts2 = Mock() operator = Subtract([ts, ts2]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1450,7 +1450,7 @@ def test_SUBTRACT_execute_with_multiple_ts_when_instances_do_not_match(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 1.0, @@ -1488,7 +1488,7 @@ def ts_side_effect3(*args): # When instances do not match @tornado.gen.coroutine def ts_side_effect4(*args): - self.assertEqual((tracker, tmaster, 100, 300), args) + self.assertEqual((tracker, tmanager, 100, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start, end, { 120: 2.0, @@ -1505,7 +1505,7 @@ def ts_side_effect4(*args): ]) ts2.execute.side_effect = ts_side_effect4 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(2, len(metrics)) instances = [] for metric in metrics: @@ -1530,7 +1530,7 @@ def ts_side_effect4(*args): def test_RATE_execute(self): ts = Mock() operator = Rate([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1538,7 +1538,7 @@ def test_RATE_execute(self): # Return mocked timeline @tornado.gen.coroutine def ts_side_effect(*args): - self.assertEqual((tracker, tmaster, 40, 300), args) + self.assertEqual((tracker, tmanager, 40, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start-60, end, { 60: 0.0, @@ -1550,7 +1550,7 @@ def ts_side_effect(*args): ]) ts.execute.side_effect = ts_side_effect - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -1564,7 +1564,7 @@ def ts_side_effect(*args): def test_RATE_execute_when_exception(self): ts = Mock() operator = Rate([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1576,13 +1576,13 @@ def ts_side_effect2(*args): ts.execute.side_effect = ts_side_effect2 with self.assertRaises(Exception): - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) @tornado.testing.gen_test def test_RATE_execute_when_missing_values(self): ts = Mock() operator = Rate([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1590,7 +1590,7 @@ def test_RATE_execute_when_missing_values(self): # When missing a value @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 40, 300), args) + self.assertEqual((tracker, tmanager, 40, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start-60, end, { 60: 0.0, @@ -1602,7 +1602,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(1, len(metrics)) self.assertEqual("instance", metrics[0].instance) self.assertDictEqual({ @@ -1615,7 +1615,7 @@ def ts_side_effect3(*args): def test_RATE_execute_with_multiple_ts(self): ts = Mock() operator = Rate([ts]) - tmaster = Mock() + tmanager = Mock() tracker = Mock() start = 100 end = 300 @@ -1623,7 +1623,7 @@ def test_RATE_execute_with_multiple_ts(self): # Multiple timelines @tornado.gen.coroutine def ts_side_effect3(*args): - self.assertEqual((tracker, tmaster, 40, 300), args) + self.assertEqual((tracker, tmanager, 40, 300), args) raise tornado.gen.Return([ Metrics("component", "metricName", "instance", start-60, end, { 60: 0.0, @@ -1663,7 +1663,7 @@ def ts_side_effect3(*args): ]) ts.execute.side_effect = ts_side_effect3 - metrics = yield operator.execute(tracker, tmaster, start, end) + metrics = yield operator.execute(tracker, tmanager, start, end) self.assertEqual(5, len(metrics)) for metric in metrics: if metric.instance == "instance": diff --git a/heron/tools/tracker/tests/python/topology_unittest.py b/heron/tools/tracker/tests/python/topology_unittest.py index 8ac8bdd20be..8bf3f3219d2 100644 --- a/heron/tools/tracker/tests/python/topology_unittest.py +++ b/heron/tools/tracker/tests/python/topology_unittest.py @@ -70,14 +70,14 @@ def test_set_execution_state(self): self.assertEqual(MockProto.cluster, self.topology.cluster) self.assertEqual(MockProto.environ, self.topology.environ) - def test_set_tmaster(self): + def test_set_tmanager(self): # Set it to None - self.topology.set_tmaster(None) - self.assertIsNone(self.topology.tmaster) + self.topology.set_tmanager(None) + self.assertIsNone(self.topology.tmanager) - tmaster = MockProto().create_mock_tmaster() - self.topology.set_tmaster(tmaster) - self.assertEqual(tmaster, self.topology.tmaster) + tmanager = MockProto().create_mock_tmanager() + self.topology.set_tmanager(tmanager) + self.assertEqual(tmanager, self.topology.tmanager) def test_spouts(self): # When pplan is not set @@ -140,7 +140,7 @@ def callback(something): scope["is_called"] = False self.assertFalse(scope["is_called"]) - self.topology.set_tmaster(None) + self.topology.set_tmanager(None) self.assertTrue(scope["is_called"]) def test_unregister_watch(self): diff --git a/heron/tools/tracker/tests/python/tracker_unittest.py b/heron/tools/tracker/tests/python/tracker_unittest.py index 767d93f8138..ea3bda5abf2 100644 --- a/heron/tools/tracker/tests/python/tracker_unittest.py +++ b/heron/tools/tracker/tests/python/tracker_unittest.py @@ -215,7 +215,7 @@ def test_add_new_topology(self): self.assertEqual(2, mock_state_manager_1.get_pplan.call_count) self.assertEqual(2, mock_state_manager_1.get_execution_state.call_count) - self.assertEqual(2, mock_state_manager_1.get_tmaster.call_count) + self.assertEqual(2, mock_state_manager_1.get_tmanager.call_count) def test_remove_topology(self): self.fill_tracker_topologies() diff --git a/heron/tools/ui/resources/static/js/alltopologies.js b/heron/tools/ui/resources/static/js/alltopologies.js index fa3e8c813aa..a45534a5fe5 100644 --- a/heron/tools/ui/resources/static/js/alltopologies.js +++ b/heron/tools/ui/resources/static/js/alltopologies.js @@ -38,14 +38,14 @@ var TopologyItem = React.createClass({ } var state_class = "gradeX normal"; - if (!topology.has_tmaster_location) { + if (!topology.has_tmanager_location) { state_class = "gradeX dead"; } else if (!topology.has_physical_plan) { state_class = "gradeX weird"; } var starting_duration = 5 * 60 * 1000; // 5 minutes - if ((!topology.has_tmaster_location || !topology.has_physical_plan) + if ((!topology.has_tmanager_location || !topology.has_physical_plan) && topology.submission_time * 1000 > new Date().getTime() - starting_duration) { state_class = "gradeX starting"; } @@ -93,7 +93,7 @@ var TopologyTable = React.createClass({ environ: env, role: estate.role, has_physical_plan: estate.has_physical_plan, - has_tmaster_location: estate.has_tmaster_location, + has_tmanager_location: estate.has_tmanager_location, release_version: estate.release_version, submission_time: estate.submission_time, submission_user: estate.submission_user, From d4cd831d2b597c2c40e0a976e34b6115ff0b4e35 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 20:19:46 -0400 Subject: [PATCH 10/32] renaming "topology master" to "topology manager" in heron/proto --- heron/proto/BUILD | 14 ++--- heron/proto/ckptmgr.proto | 54 +++++++++---------- heron/proto/common.proto | 2 +- heron/proto/messages.h | 2 +- heron/proto/metrics.proto | 12 ++--- heron/proto/physical_plan.proto | 2 +- heron/proto/stmgr.proto | 2 +- heron/proto/{tmaster.proto => tmanager.proto} | 26 ++++----- heron/proto/topology.proto | 6 +-- 9 files changed, 60 insertions(+), 60 deletions(-) rename heron/proto/{tmaster.proto => tmanager.proto} (92%) diff --git a/heron/proto/BUILD b/heron/proto/BUILD index 7bab2b7e2ec..822756f2fda 100644 --- a/heron/proto/BUILD +++ b/heron/proto/BUILD @@ -97,8 +97,8 @@ proto_library( ) proto_library( - name = "proto_tmaster", - src = "tmaster.proto", + name = "proto_tmanager", + src = "tmanager.proto", gen_cc = 1, gen_java = 1, gen_py = 1, @@ -121,7 +121,7 @@ proto_library( includes = ["heron/proto"], deps = [ ":proto_common", - ":proto_tmaster", + ":proto_tmanager", ], ) @@ -171,7 +171,7 @@ java_library( ":proto_scheduler_java", ":proto_stats_java", ":proto_stmgr_java", - ":proto_tmaster_java", + ":proto_tmanager_java", ":proto_topology_java", ":proto_tuple_java", "@com_google_protobuf//:protobuf_java", @@ -194,7 +194,7 @@ pex_library( ":proto_scheduler_py", ":proto_stats_py", ":proto_stmgr_py", - ":proto_tmaster_py", + ":proto_tmanager_py", ":proto_topology_py", ":proto_tuple_py", ], @@ -215,7 +215,7 @@ cc_library( "scheduler.pb.h", "stats.pb.h", "stmgr.pb.h", - "tmaster.pb.h", + "tmanager.pb.h", "topology.pb.h", "tuple.pb.h", ], @@ -230,7 +230,7 @@ cc_library( ":proto_scheduler_cc", ":proto_stats_cc", ":proto_stmgr_cc", - ":proto_tmaster_cc", + ":proto_tmanager_cc", ":proto_topology_cc", ":proto_tuple_cc", "@com_google_protobuf//:protobuf", diff --git a/heron/proto/ckptmgr.proto b/heron/proto/ckptmgr.proto index ea6162c562b..737255bb895 100644 --- a/heron/proto/ckptmgr.proto +++ b/heron/proto/ckptmgr.proto @@ -29,19 +29,19 @@ import "physical_plan.proto"; // // On Startup // For topologies needing effectively once, the startup sequence of -// tmaster/stmgr/instance is a little different. Normally, upon -// startup, all stmgrs connect to tmaster and then tmaster computes pplan +// tmanager/stmgr/instance is a little different. Normally, upon +// startup, all stmgrs connect to tmanager and then tmanager computes pplan // and distributes to stmgrs, which in turn send it to all their // local instances. As soon as instances get their pplan, they can // start processing(spouts emitting tuples, etc). However for // stateful topologies, this is a little different. First -// in addition to the pplan, tmaster needs to send the last +// in addition to the pplan, tmanager needs to send the last // consistent checkpoint id(if any) to recover from as well. // Secondly instances need to recover their prev state before // they can process any tuples. // WRT messages below, the startup sequence is the following. // From the StatefulConsistentCheckpoints message stored in the -// state manager, Tmaster picks the latest checkpoint_id and +// state manager, Tmanager picks the latest checkpoint_id and // sends RestoreTopologyStateRequest to all stmgrs. Each stmgr // does a GetInstanceStateRequest/GetInstanceStateResponse // dance with its local checkpoint manager for all of its local @@ -49,8 +49,8 @@ import "physical_plan.proto"; // RestoreInstanceStateRequest/RestoreInstanceStateResponse // dance with the corresponding instances to restore the // state. Completion of which, stmgr sends out the -// RestoreTopologyStateResponse message to the tmaster. -// Once the tmaster receives this from all stmgrs, it sends +// RestoreTopologyStateResponse message to the tmanager. +// Once the tmanager receives this from all stmgrs, it sends // out the StartStmgrStatefulProcessing message to all // stmgrs which inturn send out StartInstanceStatefulProcessing // to all their local instances. This gets the topology @@ -58,7 +58,7 @@ import "physical_plan.proto"; // // Periodic checkpoints // Every so often(as dictated by TOPOLOGY_STATEFUL_CHECKPOINT_INTERVAL) -// tmaster sends out StartStatefulCheckpoint message to all +// tmanager sends out StartStatefulCheckpoint message to all // stmgrs. Each stmgr sends out InitiateStatefulCheckpoint // message to each of its local spout instances. Because of the best effort // nature, this is delivered as a message. After instance does its @@ -67,18 +67,18 @@ import "physical_plan.proto"; // SaveInstanceStateRequest/SaveInstanceStateResponse dance with its // local checkpoint manager. Upon getting a successful // SaveInstanceStateResponse message, stmgr sends out a -// InstanceStateStored message notifying tmaster that the +// InstanceStateStored message notifying tmanager that the // state of a particular instance has been saved. Parallelly // stmgr also sends out DownstreamStatefulCheckpoint message // to each of the downstream components to do the distributed -// Lamport style checkpointing. Meanwhile at tmaster, when it +// Lamport style checkpointing. Meanwhile at tmanager, when it // receives InstanceStateStored from all instances for a particular // checkpoint_id, it has reached globally consistent checkpoint // and it stores it in the state manager as StatefulConsistentCheckpoint // // Failure Recovery // Either when stmgr dies, or when an instance dies(which is notified -// by the stmgr using the ResetTopologyState message), tmaster initiates +// by the stmgr using the ResetTopologyState message), tmanager initiates // the recovery mechanism. The recovery mechanism is exactly the same // as the initial startup described above @@ -90,7 +90,7 @@ message StatefulConsistentCheckpoint { // One can add more meta data about this ckpt later } -// message stored in the state manager by the tmaster +// message stored in the state manager by the tmanager message StatefulConsistentCheckpoints { // An ordered list of the globally consistent checkpoints // that have been snapshotted and can be recovered from. @@ -100,10 +100,10 @@ message StatefulConsistentCheckpoints { } /* - * tmaster <-> stmgr messages + * tmanager <-> stmgr messages */ -// Message sent to stmgrs by the tmaster to initiate checkpointing +// Message sent to stmgrs by the tmanager to initiate checkpointing message StartStatefulCheckpoint { required string checkpoint_id = 1; } @@ -114,19 +114,19 @@ message StatefulConsistentCheckpointSaved { required StatefulConsistentCheckpoint consistent_checkpoint = 1; } -// Message sent by tmaster to stmgr asking them to reset their instances +// Message sent by tmanager to stmgr asking them to reset their instances // to this checkpoint message RestoreTopologyStateRequest { required string checkpoint_id = 1; - // Every Attempt made by Tmaster to restore to a globally consistent + // Every Attempt made by Tmanager to restore to a globally consistent // checkpoint is tagged with a unique id. The stmgrs include this in their - // RestoreTopologyStateResponse below. This way tmaster could + // RestoreTopologyStateResponse below. This way tmanager could // try restoring to the same checkpoint_id multiple times and still // keep track of for which attempt the restore was in the responses required int64 restore_txid = 2; } -// Message that stmgr sends to tmaster after it restores +// Message that stmgr sends to tmanager after it restores // all of its local instances to a checkpoint_id message RestoreTopologyStateResponse { required heron.proto.system.Status status = 1; @@ -134,7 +134,7 @@ message RestoreTopologyStateResponse { required int64 restore_txid = 3; } -// Message sent by stmgr to tmaster asking it to reset the topology +// Message sent by stmgr to tmanager asking it to reset the topology // to some valid checkpoint. This is sent either if stmgr dies // and comes back up or if an instance dies. message ResetTopologyState { @@ -149,7 +149,7 @@ message ResetTopologyState { required string reason = 3; } -// Message sent by stmgr to tmaster informing it about +// Message sent by stmgr to tmanager informing it about // the fact that we stored a checkpoint belonging // to the instance message InstanceStateStored { @@ -157,7 +157,7 @@ message InstanceStateStored { required heron.proto.system.Instance instance = 2; } -// Message sent by tmaster to stmgr to start processing +// Message sent by tmanager to stmgr to start processing // For stateful processing, all the topology components // should be rolled back to a consistent state before they can // start processing. @@ -166,25 +166,25 @@ message StartStmgrStatefulProcessing { } /* - * tmaster -> ckptmgr messages + * tmanager -> ckptmgr messages */ -// This is the message that a tmaster sends +// This is the message that a tmanager sends // when it wants to register itself // with checkpoint manager -message RegisterTMasterRequest { +message RegisterTManagerRequest { required string topology_name = 1; required string topology_id = 2; } // This is the message that checkpoint manager // sends when it receives the register request -// from tmaster -message RegisterTMasterResponse { +// from tmanager +message RegisterTManagerResponse { required heron.proto.system.Status status = 1; } -// Message sent by tmaster to ckptmgr to cleanup +// Message sent by tmanager to ckptmgr to cleanup // old checkpoint state. message CleanStatefulCheckpointRequest { // Any checkpoints older than this can be cleaned @@ -192,7 +192,7 @@ message CleanStatefulCheckpointRequest { optional bool clean_all_checkpoints = 2; } -// Message sent by ckptmgr to tmaster about the cleanup request +// Message sent by ckptmgr to tmanager about the cleanup request message CleanStatefulCheckpointResponse { required heron.proto.system.Status status = 1; repeated string cleaned_checkpoint_ids = 2; diff --git a/heron/proto/common.proto b/heron/proto/common.proto index e49b5772670..c27ba3bbb6a 100644 --- a/heron/proto/common.proto +++ b/heron/proto/common.proto @@ -39,7 +39,7 @@ enum StatusCode { // State specific errors PATH_DOES_NOT_EXIST = 2000; PATH_ALREADY_EXISTS = 2001; - TMASTERLOCATION_ALREADY_EXISTS = 2002; + TMANAGERLOCATION_ALREADY_EXISTS = 2002; STATE_CORRUPTED = 2003; STATE_READ_ERROR = 2004; STATE_WRITE_ERROR = 2005; diff --git a/heron/proto/messages.h b/heron/proto/messages.h index 44196b315ab..6d7ac93859d 100644 --- a/heron/proto/messages.h +++ b/heron/proto/messages.h @@ -26,7 +26,7 @@ #include "proto/execution_state.pb.h" #include "proto/physical_plan.pb.h" #include "proto/tuple.pb.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "proto/scheduler.pb.h" #include "proto/stmgr.pb.h" #include "proto/metrics.pb.h" diff --git a/heron/proto/metrics.proto b/heron/proto/metrics.proto index d439c24688f..51dd35e9920 100644 --- a/heron/proto/metrics.proto +++ b/heron/proto/metrics.proto @@ -22,11 +22,11 @@ option java_package = "org.apache.heron.proto.system"; option java_outer_classname = "Metrics"; import "common.proto"; -import "tmaster.proto"; +import "tmanager.proto"; // This file defines the various protocol buffers needed // for sending metrics from all the system components(workers, -// (stmgrs, tmasters). The components first register themselves +// (stmgrs, tmanagers). The components first register themselves // with MetricPublisherRegisterRequest. And then everytime // they want to send a metric, they use the // MetricPublisherPublishMessage to send out the actual message @@ -76,12 +76,12 @@ message MetricPublisherPublishMessage { } -// This is the tmaster location refresh message sent +// This is the tmanager location refresh message sent // by the stmgrs to the metricsmgrs -message TMasterLocationRefreshMessage { - required heron.proto.tmaster.TMasterLocation tmaster = 1; +message TManagerLocationRefreshMessage { + required heron.proto.tmanager.TManagerLocation tmanager = 1; } message MetricsCacheLocationRefreshMessage { - required heron.proto.tmaster.MetricsCacheLocation metricscache = 1; + required heron.proto.tmanager.MetricsCacheLocation metricscache = 1; } diff --git a/heron/proto/physical_plan.proto b/heron/proto/physical_plan.proto index 2094fa5e09b..417e4ec95d4 100644 --- a/heron/proto/physical_plan.proto +++ b/heron/proto/physical_plan.proto @@ -41,7 +41,7 @@ message StMgr { // Http port to connect to heron-shell. // The port is not used by stmgr itself, except to - // pass it on to tmaster to be included in pplan. + // pass it on to tmanager to be included in pplan. optional int32 shell_port = 7; } diff --git a/heron/proto/stmgr.proto b/heron/proto/stmgr.proto index a5258b3dc02..0ff7e8dc0b9 100644 --- a/heron/proto/stmgr.proto +++ b/heron/proto/stmgr.proto @@ -24,7 +24,7 @@ import "common.proto"; import "physical_plan.proto"; // -// Messages sent by topologymaster +// Messages sent by topologymanager // message NewPhysicalPlanMessage { diff --git a/heron/proto/tmaster.proto b/heron/proto/tmanager.proto similarity index 92% rename from heron/proto/tmaster.proto rename to heron/proto/tmanager.proto index 5c727eeccc4..8043b83e19f 100644 --- a/heron/proto/tmaster.proto +++ b/heron/proto/tmanager.proto @@ -16,28 +16,28 @@ // under the License. syntax = "proto2"; -package heron.proto.tmaster; +package heron.proto.tmanager; -option java_package = "org.apache.heron.proto.tmaster"; -option java_outer_classname = "TopologyMaster"; +option java_package = "org.apache.heron.proto.tmanager"; +option java_outer_classname = "TopologyManager"; import "common.proto"; import "stats.proto"; import "physical_plan.proto"; -message TMasterLocation { +message TManagerLocation { required string topology_name = 1; required string topology_id = 2; required string host = 3; - // The port to talk to the tmaster + // The port to talk to the tmanager // for topology control actions // like Activate/DeActivate/Drain etc required int32 controller_port = 4; // The port that different components - // of the topology use to talk to the tmaster + // of the topology use to talk to the tmanager // like stmgr for getting pplans // and metrics mgr for sending stats - required int32 master_port = 5; + required int32 server_port = 5; // The port that is a http endpoint // to publish stats about the topology optional int32 stats_port = 6; @@ -54,7 +54,7 @@ message MetricsCacheLocation { // The port that different components // of the topology use to talk to the metricscache // like metrics mgr for sending stats - required int32 master_port = 5; + required int32 server_port = 5; // The port that is a http endpoint // to publish stats about the topology optional int32 stats_port = 6; @@ -103,7 +103,7 @@ message MetricDatum { required int64 timestamp = 5; } -message TmasterExceptionLog { +message TmanagerExceptionLog { // Source of exception. required string component_name = 1; // Current hostname. @@ -126,12 +126,12 @@ message TmasterExceptionLog { message PublishMetrics { repeated MetricDatum metrics = 1; - repeated TmasterExceptionLog exceptions = 2; + repeated TmanagerExceptionLog exceptions = 2; } // // interface called by the UI -// TMaster exposes a web endpoint +// TManager exposes a web endpoint // You send a post request with the protobuf // as the data. The response will be MetricResponse // @@ -187,10 +187,10 @@ message MetricResponse { message ExceptionLogResponse { required heron.proto.system.Status status = 1; // List of exceptions. - repeated TmasterExceptionLog exceptions = 2; + repeated TmanagerExceptionLog exceptions = 2; } -// Request to fetch exception from tmaster store of exception and metrics +// Request to fetch exception from tmanager store of exception and metrics message ExceptionLogRequest { // TODO: Make this repeated so that single request can send data for multiple components. required string component_name = 1; diff --git a/heron/proto/topology.proto b/heron/proto/topology.proto index 226ffc5e014..bbe85212397 100644 --- a/heron/proto/topology.proto +++ b/heron/proto/topology.proto @@ -24,14 +24,14 @@ option java_outer_classname = "TopologyAPI"; // This file defines the logic plan of a topology, including // components definition, stream schema and others. // 1. Heron usbmitter pushes the message Topology to state manager at node: topologies/{topology_name} -// 2. When TMaster first time starts, it reads Topology from state manager at node topologies/{topology_name}, -// 3. TMaster constructs and distribute PhysicalPlan basing on Topology and StrMgrHelloRequest. +// 2. When TManager first time starts, it reads Topology from state manager at node topologies/{topology_name}, +// 3. TManager constructs and distribute PhysicalPlan basing on Topology and StrMgrHelloRequest. // It also pushes the PhysicalPlan to state manager at node: pplans/{topology_name} // // Note: // 1. message PhysicalPlan also contains a copy of message Topolgoy. We distinguish them: // - topologies/{topology_name} consist of the topology logic plan first submitted by user, -// it shall only be used to construct the PhysicalPlan when TMaster first time starts +// it shall only be used to construct the PhysicalPlan when TManager first time starts // - pplans/{topology_name} reflects the dynamic state of the topology. Initially, it shall be the same // as what user has submitted. // Any runtime changes on Topology in are made to Topology inside the node pplans/{topology_name}. From 9c5c483487003c96ee3d4628497f3ced80a53192 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 20:55:36 -0400 Subject: [PATCH 11/32] renaming "topology master" to "topology manager" in heron/config --- .../config/src/yaml/conf/aurora/heron.aurora | 10 +- .../src/yaml/conf/aurora/heron_internals.yaml | 48 ++--- .../src/yaml/conf/aurora/metrics_sinks.yaml | 38 ++-- .../src/yaml/conf/aurora/scheduler.yaml | 2 +- .../yaml/conf/examples/heron_internals.yaml | 48 ++--- .../src/yaml/conf/examples/metrics_sinks.yaml | 38 ++-- .../yaml/conf/kubernetes/heron_internals.yaml | 48 ++--- .../yaml/conf/kubernetes/metrics_sinks.yaml | 38 ++-- .../src/yaml/conf/local/heron_internals.yaml | 48 ++--- .../src/yaml/conf/local/metrics_sinks.yaml | 38 ++-- heron/config/src/yaml/conf/localzk/README | 2 +- .../yaml/conf/localzk/heron_internals.yaml | 48 ++--- .../src/yaml/conf/localzk/metrics_sinks.yaml | 38 ++-- .../yaml/conf/marathon/heron_internals.yaml | 48 ++--- .../src/yaml/conf/marathon/metrics_sinks.yaml | 38 ++-- .../src/yaml/conf/mesos/heron_internals.yaml | 48 ++--- .../src/yaml/conf/mesos/metrics_sinks.yaml | 38 ++-- .../src/yaml/conf/nomad/heron_internals.yaml | 48 ++--- .../src/yaml/conf/nomad/metrics_sinks.yaml | 38 ++-- .../yaml/conf/sandbox/heron_internals.yaml | 48 ++--- .../src/yaml/conf/sandbox/metrics_sinks.yaml | 38 ++-- .../src/yaml/conf/slurm/heron_internals.yaml | 164 +++++++++--------- .../src/yaml/conf/slurm/metrics_sinks.yaml | 38 ++-- .../yaml/conf/standalone/heron_internals.yaml | 48 ++--- .../yaml/conf/standalone/metrics_sinks.yaml | 38 ++-- .../resources/{master.hcl => primary.hcl} | 2 +- ...ve.template.hcl => secondary.template.hcl} | 4 +- .../yaml/conf/test/test_heron_internals.yaml | 48 ++--- .../yaml/conf/test/test_metrics_sinks.yaml | 36 ++-- .../src/yaml/conf/yarn/heron_internals.yaml | 48 ++--- .../src/yaml/conf/yarn/metrics_sinks.yaml | 38 ++-- 31 files changed, 626 insertions(+), 626 deletions(-) rename heron/config/src/yaml/conf/standalone/resources/{master.hcl => primary.hcl} (97%) rename heron/config/src/yaml/conf/standalone/templates/{slave.template.hcl => secondary.template.hcl} (93%) diff --git a/heron/config/src/yaml/conf/aurora/heron.aurora b/heron/config/src/yaml/conf/aurora/heron.aurora index d939fa83690..1826e7faa61 100644 --- a/heron/config/src/yaml/conf/aurora/heron.aurora +++ b/heron/config/src/yaml/conf/aurora/heron.aurora @@ -17,7 +17,7 @@ """ Launch the topology as a single aurora job with multiple instances. -The heron-executor is responsible for starting a tmaster (container 0) +The heron-executor is responsible for starting a tmanager (container 0) and regular stmgr/metricsmgr/instances (container index > 0). """ @@ -40,13 +40,13 @@ fetch_user_package = Process( command_to_start_executor = \ '{{EXECUTOR_BINARY}}' \ ' --shard={{mesos.instance}}' \ - ' --master-port={{thermos.ports[port1]}}' \ - ' --tmaster-controller-port={{thermos.ports[port2]}}' \ - ' --tmaster-stats-port={{thermos.ports[port3]}}' \ + ' --server-port={{thermos.ports[port1]}}' \ + ' --tmanager-controller-port={{thermos.ports[port2]}}' \ + ' --tmanager-stats-port={{thermos.ports[port3]}}' \ ' --shell-port={{thermos.ports[http]}}' \ ' --metrics-manager-port={{thermos.ports[port4]}}' \ ' --scheduler-port={{thermos.ports[scheduler]}}' \ - ' --metricscache-manager-master-port={{thermos.ports[metricscachemgr_masterport]}}' \ + ' --metricscache-manager-server-port={{thermos.ports[metricscachemgr_serverport]}}' \ ' --metricscache-manager-stats-port={{thermos.ports[metricscachemgr_statsport]}}' \ ' --checkpoint-manager-port={{thermos.ports[ckptmgr_port]}}' \ ' {{TOPOLOGY_ARGUMENTS}}' diff --git a/heron/config/src/yaml/conf/aurora/heron_internals.yaml b/heron/config/src/yaml/conf/aurora/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/aurora/heron_internals.yaml +++ b/heron/config/src/yaml/conf/aurora/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/aurora/metrics_sinks.yaml b/heron/config/src/yaml/conf/aurora/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/aurora/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/aurora/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/aurora/scheduler.yaml b/heron/config/src/yaml/conf/aurora/scheduler.yaml index 4d113d1e373..a3a7a5a53de 100644 --- a/heron/config/src/yaml/conf/aurora/scheduler.yaml +++ b/heron/config/src/yaml/conf/aurora/scheduler.yaml @@ -42,7 +42,7 @@ heron.scheduler.job.kill.retry.interval.ms: 2000 #################################################################### # Following are config for tunneling #################################################################### -# Whether we should attempt to tunnel if there is no direct access to a remote host (e.g. TMaster) +# Whether we should attempt to tunnel if there is no direct access to a remote host (e.g. TManager) heron.scheduler.is.tunnel.needed: False # The connection timeout in ms when testing if we can connect to remote host diff --git a/heron/config/src/yaml/conf/examples/heron_internals.yaml b/heron/config/src/yaml/conf/examples/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/examples/heron_internals.yaml +++ b/heron/config/src/yaml/conf/examples/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/examples/metrics_sinks.yaml b/heron/config/src/yaml/conf/examples/metrics_sinks.yaml index 4a80da6ef7b..9d36164cf25 100644 --- a/heron/config/src/yaml/conf/examples/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/examples/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/kubernetes/heron_internals.yaml b/heron/config/src/yaml/conf/kubernetes/heron_internals.yaml index 791f76dcd74..a0358e64ae9 100644 --- a/heron/config/src/yaml/conf/kubernetes/heron_internals.yaml +++ b/heron/config/src/yaml/conf/kubernetes/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/kubernetes/metrics_sinks.yaml b/heron/config/src/yaml/conf/kubernetes/metrics_sinks.yaml index 0ce5457ed91..d6c5a39bbc2 100644 --- a/heron/config/src/yaml/conf/kubernetes/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/kubernetes/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - prometheus-sink - metricscache-sink @@ -44,22 +44,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -91,12 +91,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/local/heron_internals.yaml b/heron/config/src/yaml/conf/local/heron_internals.yaml index e82369c303d..59514ba354a 100644 --- a/heron/config/src/yaml/conf/local/heron_internals.yaml +++ b/heron/config/src/yaml/conf/local/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/local/metrics_sinks.yaml b/heron/config/src/yaml/conf/local/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/local/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/local/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/localzk/README b/heron/config/src/yaml/conf/localzk/README index f2d19eb6017..61dee02b47e 100644 --- a/heron/config/src/yaml/conf/localzk/README +++ b/heron/config/src/yaml/conf/localzk/README @@ -2,7 +2,7 @@ This folder contains sample configs needed for using zookeeper in LocalScheduler In order to run LocalScheduler, you need to set up a running zookeeper server basing on the config inside statemgr.yaml: 1. Set up the appropriate connection string. 2. Create following required nodes in zookeeper (one time effort): - /{heron.statemgr.root.path}/tmasters + /{heron.statemgr.root.path}/tmanagers /{heron.statemgr.root.path}/topologies /{heron.statemgr.root.path}/pplans /{heron.statemgr.root.path}/executionstate diff --git a/heron/config/src/yaml/conf/localzk/heron_internals.yaml b/heron/config/src/yaml/conf/localzk/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/localzk/heron_internals.yaml +++ b/heron/config/src/yaml/conf/localzk/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/localzk/metrics_sinks.yaml b/heron/config/src/yaml/conf/localzk/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/localzk/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/localzk/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/marathon/heron_internals.yaml b/heron/config/src/yaml/conf/marathon/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/marathon/heron_internals.yaml +++ b/heron/config/src/yaml/conf/marathon/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/marathon/metrics_sinks.yaml b/heron/config/src/yaml/conf/marathon/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/marathon/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/marathon/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/mesos/heron_internals.yaml b/heron/config/src/yaml/conf/mesos/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/mesos/heron_internals.yaml +++ b/heron/config/src/yaml/conf/mesos/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/mesos/metrics_sinks.yaml b/heron/config/src/yaml/conf/mesos/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/mesos/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/mesos/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/nomad/heron_internals.yaml b/heron/config/src/yaml/conf/nomad/heron_internals.yaml index 35940508707..25c12a8123f 100644 --- a/heron/config/src/yaml/conf/nomad/heron_internals.yaml +++ b/heron/config/src/yaml/conf/nomad/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/nomad/metrics_sinks.yaml b/heron/config/src/yaml/conf/nomad/metrics_sinks.yaml index 260a9271280..6a3c4fb2c7a 100644 --- a/heron/config/src/yaml/conf/nomad/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/nomad/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - prometheus-sink - metricscache-sink @@ -44,22 +44,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -92,12 +92,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/sandbox/heron_internals.yaml b/heron/config/src/yaml/conf/sandbox/heron_internals.yaml index e82369c303d..59514ba354a 100644 --- a/heron/config/src/yaml/conf/sandbox/heron_internals.yaml +++ b/heron/config/src/yaml/conf/sandbox/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/sandbox/metrics_sinks.yaml b/heron/config/src/yaml/conf/sandbox/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/sandbox/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/sandbox/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/slurm/heron_internals.yaml b/heron/config/src/yaml/conf/slurm/heron_internals.yaml index 0a859b4fa56..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/slurm/heron_internals.yaml +++ b/heron/config/src/yaml/conf/slurm/heron_internals.yaml @@ -18,10 +18,10 @@ ################################################################################ # Default values for various configs used inside Heron. ################################################################################ -# All the config associated with time is in the unit of milli-seconds, +# All the config associated with time is in the unit of milli-seconds, # unless otherwise specified. ################################################################################ -# All the config associated with data size is in the unit of bytes, unless +# All the config associated with data size is in the unit of bytes, unless # otherwise specified. ################################################################################ @@ -32,25 +32,25 @@ ### heron.* configs are general configurations over all componenets # The relative path to the logging directory -heron.logging.directory: "log-files" +heron.logging.directory: "log-files" # The maximum log file size in MB -heron.logging.maximum.size.mb: 100 +heron.logging.maximum.size.mb: 100 # The maximum number of log files -heron.logging.maximum.files: 5 +heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ -heron.logging.prune.interval.sec: 300 +heron.logging.prune.interval.sec: 300 # The interval in seconds to flush log files in C++ -heron.logging.flush.interval.sec: 10 +heron.logging.flush.interval.sec: 10 # The threshold level to log error -heron.logging.err.threshold: 3 +heron.logging.err.threshold: 3 # The interval in seconds for different components to export metrics to metrics manager heron.metrics.export.interval.sec: 60 @@ -62,19 +62,19 @@ heron.metrics.max.exceptions.per.message.count: 1024 # Configs related to Stream Manager, starts with heron.streammgr.* ################################################################################ -# The tuple cache (used for batching) can be drained in two ways: -# (a) Time based +# The tuple cache (used for batching) can be drained in two ways: +# (a) Time based # (b) size based # The frequency in ms to drain the tuple cache in stream manager -heron.streammgr.cache.drain.frequency.ms: 10 +heron.streammgr.cache.drain.frequency.ms: 10 # The sized based threshold in MB for buffering data tuples # waiting for checkpoint markers before giving up heron.streammgr.stateful.buffer.size.mb: 100 # The sized based threshold in MB for draining the tuple cache -heron.streammgr.cache.drain.size.mb: 100 +heron.streammgr.cache.drain.size.mb: 100 # For efficient acknowledgements heron.streammgr.xormgr.rotatingmap.nbuckets: 3 @@ -83,27 +83,27 @@ heron.streammgr.xormgr.rotatingmap.nbuckets: 3 heron.streammgr.mempool.max.message.number: 512 # The reconnect interval to other stream managers in secs for stream manager client -heron.streammgr.client.reconnect.interval.sec: 1 +heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket -heron.streammgr.connection.read.batch.size.mb: 1 +heron.streammgr.connection.read.batch.size.mb: 1 # Maximum batch size in MB to write by stream manager to socket -heron.streammgr.connection.write.batch.size.mb: 1 +heron.streammgr.connection.write.batch.size.mb: 1 -# Number of times we should wait to see a buffer full while enqueueing data +# Number of times we should wait to see a buffer full while enqueueing data # before declaring start of back pressure heron.streammgr.network.backpressure.threshold: 3 @@ -114,68 +114,68 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* ################################################################################ -# The size of packets to read from socket will be determined by the minimal of: -# (a) time based +# The size of packets to read from socket will be determined by the minimal of: +# (a) time based # (b) size based # Time based, the maximum batch time in ms for metricsmgr to read from socket -heron.metricsmgr.network.read.batch.time.ms: 16 +heron.metricsmgr.network.read.batch.time.ms: 16 # Size based, the maximum batch size in bytes to read from socket -heron.metricsmgr.network.read.batch.size.bytes: 32768 +heron.metricsmgr.network.read.batch.size.bytes: 32768 -# The size of packets to write to socket will be determined by the minimum of -# (a) time based +# The size of packets to write to socket will be determined by the minimum of +# (a) time based # (b) size based # Time based, the maximum batch time in ms for metricsmgr to write to socket -heron.metricsmgr.network.write.batch.time.ms: 16 +heron.metricsmgr.network.write.batch.time.ms: 16 # Size based, the maximum batch size in bytes to write to socket -heron.metricsmgr.network.write.batch.size.bytes: 32768 +heron.metricsmgr.network.write.batch.size.bytes: 32768 # The maximum socket's send buffer size in bytes -heron.metricsmgr.network.options.socket.send.buffer.size.bytes: 6553600 +heron.metricsmgr.network.options.socket.send.buffer.size.bytes: 6553600 # The maximum socket's received buffer size in bytes of metricsmgr's network options -heron.metricsmgr.network.options.socket.received.buffer.size.bytes: 8738000 +heron.metricsmgr.network.options.socket.received.buffer.size.bytes: 8738000 # The maximum packet size that metrics mgr can read heron.metricsmgr.network.options.maximum.packetsize.bytes: 1048576 @@ -188,45 +188,45 @@ heron.metricsmgr.network.options.maximum.packetsize.bytes: 1048576 heron.instance.network.options.maximum.packetsize.bytes: 10485760 # The queue capacity (num of items) in bolt for buffer packets to read from stream manager -heron.instance.internal.bolt.read.queue.capacity: 128 +heron.instance.internal.bolt.read.queue.capacity: 128 # The queue capacity (num of items) in bolt for buffer packets to write to stream manager -heron.instance.internal.bolt.write.queue.capacity: 128 +heron.instance.internal.bolt.write.queue.capacity: 128 # The queue capacity (num of items) in spout for buffer packets to read from stream manager -heron.instance.internal.spout.read.queue.capacity: 1024 +heron.instance.internal.spout.read.queue.capacity: 1024 # The queue capacity (num of items) in spout for buffer packets to write to stream manager -heron.instance.internal.spout.write.queue.capacity: 128 +heron.instance.internal.spout.write.queue.capacity: 128 # The queue capacity (num of items) for metrics packets to write to metrics manager -heron.instance.internal.metrics.write.queue.capacity: 128 +heron.instance.internal.metrics.write.queue.capacity: 128 # The size of packets read from stream manager will be determined by the minimal of -# (a) time based +# (a) time based # (b) size based # Time based, the maximum batch time in ms for instance to read from stream manager per attempt -heron.instance.network.read.batch.time.ms: 16 +heron.instance.network.read.batch.time.ms: 16 # Size based, the maximum batch size in bytes to read from stream manager -heron.instance.network.read.batch.size.bytes: 32768 +heron.instance.network.read.batch.size.bytes: 32768 -# The size of packets written to stream manager will be determined by the minimum of -# (a) time based +# The size of packets written to stream manager will be determined by the minimum of +# (a) time based # (b) size based # Time based, the maximum batch time in ms for instance to write to stream manager per attempt -heron.instance.network.write.batch.time.ms: 16 +heron.instance.network.write.batch.time.ms: 16 # Size based, the maximum batch size in bytes to write to stream manager -heron.instance.network.write.batch.size.bytes: 32768 +heron.instance.network.write.batch.size.bytes: 32768 # The maximum socket's send buffer size in bytes -heron.instance.network.options.socket.send.buffer.size.bytes: 6553600 +heron.instance.network.options.socket.send.buffer.size.bytes: 6553600 # The maximum socket's received buffer size in bytes of instance's network options -heron.instance.network.options.socket.received.buffer.size.bytes: 8738000 +heron.instance.network.options.socket.received.buffer.size.bytes: 8738000 # The maximum # of data tuple to batch in a HeronDataTupleSet protobuf heron.instance.set.data.tuple.capacity: 1024 @@ -235,62 +235,62 @@ heron.instance.set.data.tuple.capacity: 1024 heron.instance.set.data.tuple.size.bytes: 8388608 # The maximum # of control tuple to batch in a HeronControlTupleSet protobuf -heron.instance.set.control.tuple.capacity: 1024 +heron.instance.set.control.tuple.capacity: 1024 -# The maximum time in ms for a spout to do acknowledgement per attempt, the ack batch could +# The maximum time in ms for a spout to do acknowledgement per attempt, the ack batch could # also break if there are no more ack tuples to process -heron.instance.ack.batch.time.ms: 128 +heron.instance.ack.batch.time.ms: 128 # The maximum time in ms for an spout instance to emit tuples per attempt -heron.instance.emit.batch.time.ms: 16 +heron.instance.emit.batch.time.ms: 16 # The maximum batch size in bytes for an spout to emit tuples per attempt -heron.instance.emit.batch.size.bytes: 32768 +heron.instance.emit.batch.size.bytes: 32768 # The maximum time in ms for an bolt instance to execute tuples per attempt -heron.instance.execute.batch.time.ms: 16 +heron.instance.execute.batch.time.ms: 16 # The maximum batch size in bytes for an bolt instance to execute tuples per attempt -heron.instance.execute.batch.size.bytes: 32768 +heron.instance.execute.batch.size.bytes: 32768 # The time to wait before the instance exits forcibly when uncaught exception happens -heron.instance.force.exit.timeout.ms: 2000 +heron.instance.force.exit.timeout.ms: 2000 # Interval in seconds to reconnect to the stream manager, including the request timeout in connecting -heron.instance.reconnect.streammgr.interval.sec: 5 +heron.instance.reconnect.streammgr.interval.sec: 5 heron.instance.reconnect.streammgr.times: 60 # Interval in seconds to reconnect to the metrics manager, including the request timeout in connecting -heron.instance.reconnect.metricsmgr.interval.sec: 5 +heron.instance.reconnect.metricsmgr.interval.sec: 5 heron.instance.reconnect.metricsmgr.times: 60 # The interval in second for an instance to sample its system metrics, for instance, CPU load. heron.instance.metrics.system.sample.interval.sec: 10 # For efficient acknowledgement -heron.instance.acknowledgement.nbuckets: 10 +heron.instance.acknowledgement.nbuckets: 10 ################################################################################ -# For dynamically tuning the available sizes in the interval read & write queues +# For dynamically tuning the available sizes in the interval read & write queues # to provide high performance while avoiding GC issues ################################################################################ # The expected size on read queue in bolt -heron.instance.tuning.expected.bolt.read.queue.size: 8 +heron.instance.tuning.expected.bolt.read.queue.size: 8 # The expected size on write queue in bolt -heron.instance.tuning.expected.bolt.write.queue.size: 8 +heron.instance.tuning.expected.bolt.write.queue.size: 8 # The expected size on read queue in spout -heron.instance.tuning.expected.spout.read.queue.size: 512 +heron.instance.tuning.expected.spout.read.queue.size: 512 # The exepected size on write queue in spout -heron.instance.tuning.expected.spout.write.queue.size: 8 +heron.instance.tuning.expected.spout.write.queue.size: 8 # The expected size on metrics write queue -heron.instance.tuning.expected.metrics.write.queue.size: 8 +heron.instance.tuning.expected.metrics.write.queue.size: 8 heron.instance.tuning.current.sample.weight: 0.8 # Interval in ms to tune the size of in & out data queue in instance -heron.instance.tuning.interval.ms: 100 +heron.instance.tuning.interval.ms: 100 diff --git a/heron/config/src/yaml/conf/slurm/metrics_sinks.yaml b/heron/config/src/yaml/conf/slurm/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/slurm/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/slurm/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/standalone/heron_internals.yaml b/heron/config/src/yaml/conf/standalone/heron_internals.yaml index 35940508707..25c12a8123f 100644 --- a/heron/config/src/yaml/conf/standalone/heron_internals.yaml +++ b/heron/config/src/yaml/conf/standalone/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/standalone/metrics_sinks.yaml b/heron/config/src/yaml/conf/standalone/metrics_sinks.yaml index 260a9271280..6a3c4fb2c7a 100644 --- a/heron/config/src/yaml/conf/standalone/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/standalone/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - prometheus-sink - metricscache-sink @@ -44,22 +44,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -92,12 +92,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/standalone/resources/master.hcl b/heron/config/src/yaml/conf/standalone/resources/primary.hcl similarity index 97% rename from heron/config/src/yaml/conf/standalone/resources/master.hcl rename to heron/config/src/yaml/conf/standalone/resources/primary.hcl index 0ee4bd137e1..b82d6fa9c10 100644 --- a/heron/config/src/yaml/conf/standalone/resources/master.hcl +++ b/heron/config/src/yaml/conf/standalone/resources/primary.hcl @@ -19,7 +19,7 @@ log_level = "DEBUG" # Setup data dir -data_dir = "/tmp/master" +data_dir = "/tmp/primary" # Enable the server server { diff --git a/heron/config/src/yaml/conf/standalone/templates/slave.template.hcl b/heron/config/src/yaml/conf/standalone/templates/secondary.template.hcl similarity index 93% rename from heron/config/src/yaml/conf/standalone/templates/slave.template.hcl rename to heron/config/src/yaml/conf/standalone/templates/secondary.template.hcl index 7de9b5950a5..270236b5f1a 100644 --- a/heron/config/src/yaml/conf/standalone/templates/slave.template.hcl +++ b/heron/config/src/yaml/conf/standalone/templates/secondary.template.hcl @@ -19,12 +19,12 @@ log_level = "DEBUG" # Setup data dir -data_dir = "/tmp/slave" +data_dir = "/tmp/secondary" # Enable the client client { enabled = true - servers = [] + servers = [] options = { "driver.raw_exec.enable" = "1" } diff --git a/heron/config/src/yaml/conf/test/test_heron_internals.yaml b/heron/config/src/yaml/conf/test/test_heron_internals.yaml index aa1dad39aef..6785ccba178 100644 --- a/heron/config/src/yaml/conf/test/test_heron_internals.yaml +++ b/heron/config/src/yaml/conf/test/test_heron_internals.yaml @@ -32,8 +32,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -74,16 +74,16 @@ heron.streammgr.xormgr.rotatingmap.nbuckets: 3 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 1 +heron.streammgr.client.reconnect.tmanager.interval.sec: 1 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -101,37 +101,37 @@ heron.streammgr.network.backpressure.highwatermark.mb: 50 heron.streammgr.network.backpressure.lowwatermark.mb: 30 -### heron.tmaster.* configs are for the tmaster +### heron.tmanager.* configs are for the tmanager -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# The maximum packet size in MB of tmaster's network options for stmgrs to connect to -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# The maximum packet size in MB of tmanager's network options for stmgrs to connect to +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# The maximum packet size in MB of tmaster's network options for scheduler to connect to -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# The maximum packet size in MB of tmanager's network options for scheduler to connect to +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# The maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# The maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The inteval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The inteval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exception to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ### heron.metricsmgr.* configs are for the metrics manager diff --git a/heron/config/src/yaml/conf/test/test_metrics_sinks.yaml b/heron/config/src/yaml/conf/test/test_metrics_sinks.yaml index 00735fe50bd..6ae89b3362e 100644 --- a/heron/config/src/yaml/conf/test/test_metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/test/test_metrics_sinks.yaml @@ -40,22 +40,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -78,12 +78,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: diff --git a/heron/config/src/yaml/conf/yarn/heron_internals.yaml b/heron/config/src/yaml/conf/yarn/heron_internals.yaml index 0870cfbe334..22685e4e1a1 100644 --- a/heron/config/src/yaml/conf/yarn/heron_internals.yaml +++ b/heron/config/src/yaml/conf/yarn/heron_internals.yaml @@ -40,8 +40,8 @@ heron.logging.maximum.size.mb: 100 # The maximum number of log files heron.logging.maximum.files: 5 -# The interval in seconds after which to check if the tmaster location has been fetched or not -heron.check.tmaster.location.interval.sec: 120 +# The interval in seconds after which to check if the tmanager location has been fetched or not +heron.check.tmanager.location.interval.sec: 120 # The interval in seconds to prune logging files in C++ heron.logging.prune.interval.sec: 300 @@ -86,16 +86,16 @@ heron.streammgr.mempool.max.message.number: 512 heron.streammgr.client.reconnect.interval.sec: 1 # The reconnect interval to tamster in second for stream manager client -heron.streammgr.client.reconnect.tmaster.interval.sec: 10 +heron.streammgr.client.reconnect.tmanager.interval.sec: 10 -# The max reconnect attempts to tmaster for stream manager client -heron.streammgr.client.reconnect.tmaster.max.attempts: 30 +# The max reconnect attempts to tmanager for stream manager client +heron.streammgr.client.reconnect.tmanager.max.attempts: 30 # The maximum packet size in MB of stream manager's network options heron.streammgr.network.options.maximum.packet.mb: 10 # The interval in seconds to send heartbeat -heron.streammgr.tmaster.heartbeat.interval.sec: 10 +heron.streammgr.tmanager.heartbeat.interval.sec: 10 # Maximum batch size in MB to read by stream manager from socket heron.streammgr.connection.read.batch.size.mb: 1 @@ -114,38 +114,38 @@ heron.streammgr.network.backpressure.highwatermark.mb: 100 heron.streammgr.network.backpressure.lowwatermark.mb: 50 ################################################################################ -# Configs related to Topology Master, starts with heron.tmaster.* +# Configs related to Topology Manager, starts with heron.tmanager.* ################################################################################ -# The maximum interval in minutes of metrics to be kept in tmaster -heron.tmaster.metrics.collector.maximum.interval.min: 180 +# The maximum interval in minutes of metrics to be kept in tmanager +heron.tmanager.metrics.collector.maximum.interval.min: 180 -# The maximum time to retry to establish the tmaster -heron.tmaster.establish.retry.times: 30 +# The maximum time to retry to establish the tmanager +heron.tmanager.establish.retry.times: 30 -# The interval to retry to establish the tmaster -heron.tmaster.establish.retry.interval.sec: 1 +# The interval to retry to establish the tmanager +heron.tmanager.establish.retry.interval.sec: 1 -# Maximum packet size in MB of tmaster's network options to connect to stream managers -heron.tmaster.network.master.options.maximum.packet.mb: 16 +# Maximum packet size in MB of tmanager's network options to connect to stream managers +heron.tmanager.network.server.options.maximum.packet.mb: 16 -# Maximum packet size in MB of tmaster's network options to connect to scheduler -heron.tmaster.network.controller.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options to connect to scheduler +heron.tmanager.network.controller.options.maximum.packet.mb: 1 -# Maximum packet size in MB of tmaster's network options for stat queries -heron.tmaster.network.stats.options.maximum.packet.mb: 1 +# Maximum packet size in MB of tmanager's network options for stat queries +heron.tmanager.network.stats.options.maximum.packet.mb: 1 -# The interval for tmaster to purge metrics from socket -heron.tmaster.metrics.collector.purge.interval.sec: 60 +# The interval for tmanager to purge metrics from socket +heron.tmanager.metrics.collector.purge.interval.sec: 60 # The maximum # of exceptions to be stored in tmetrics collector, to prevent potential OOM -heron.tmaster.metrics.collector.maximum.exception: 256 +heron.tmanager.metrics.collector.maximum.exception: 256 # Should the metrics reporter bind on all interfaces -heron.tmaster.metrics.network.bindallinterfaces: False +heron.tmanager.metrics.network.bindallinterfaces: False # The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) -heron.tmaster.stmgr.state.timeout.sec: 60 +heron.tmanager.stmgr.state.timeout.sec: 60 ################################################################################ # Configs related to Metrics Manager, starts with heron.metricsmgr.* diff --git a/heron/config/src/yaml/conf/yarn/metrics_sinks.yaml b/heron/config/src/yaml/conf/yarn/metrics_sinks.yaml index b87e270de61..65302090974 100644 --- a/heron/config/src/yaml/conf/yarn/metrics_sinks.yaml +++ b/heron/config/src/yaml/conf/yarn/metrics_sinks.yaml @@ -20,7 +20,7 @@ # We would specify the unique sink-id first sinks: - file-sink - - tmaster-sink + - tmanager-sink - metricscache-sink ########### Now we would specify the detailed configuration for every unique sink @@ -43,22 +43,22 @@ file-sink: filename-output: "metrics.json" # File for metrics to write to file-maximum: 5 # maximum number of file saved in disk -### Config for tmaster-sink -tmaster-sink: - class: "org.apache.heron.metricsmgr.sink.tmaster.TMasterSink" +### Config for tmanager-sink +tmanager-sink: + class: "org.apache.heron.metricsmgr.sink.tmanager.TManagerSink" flush-frequency-ms: 60000 sink-restart-attempts: -1 # Forever - tmaster-location-check-interval-sec: 5 - tmaster-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + tmanager-location-check-interval-sec: 5 + tmanager-client: + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes - tmaster-metrics-type: + tmanager-metrics-type: "__emit-count": SUM "__execute-count": SUM "__fail-count": SUM @@ -81,12 +81,12 @@ metricscache-sink: sink-restart-attempts: -1 # Forever metricscache-location-check-interval-sec: 5 metricscache-client: - reconnect-interval-second: 5 # The re-connect interval to TMaster from TMasterClient - # The size of packets written to TMaster will be determined by the minimal of: (a) time based (b) size based - network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt - network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TMaster - network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TMaster per attempt + reconnect-interval-second: 5 # The re-connect interval to TManager from TManagerClient + # The size of packets written to TManager will be determined by the minimal of: (a) time based (b) size based + network-write-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-write-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt + network-read-batch-size-bytes: 32768 # Size based, the maximum batch size in bytes to write to TManager + network-read-batch-time-ms: 16 # Time based, the maximum batch time in ms for Metrics Manager to write to TManager per attempt socket-send-buffer-size-bytes: 6553600 # The maximum socket's send buffer size in bytes socket-received-buffer-size-bytes: 8738000 # The maximum socket's received buffer size in bytes metricscache-metrics-type: From 3d5ebc870404b7adf5347d46d497413fa1a7c3c8 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 21:18:43 -0400 Subject: [PATCH 12/32] renaming "topology master" to "topology manager" in heron/metricscachemgr --- .../metricscachemgr/MetricsCacheManager.java | 42 ++++++++--------- .../MetricsCacheManagerHttpServer.java | 46 +++++++++---------- .../MetricsCacheManagerServer.java | 16 +++---- .../metricscache/CacheCore.java | 20 ++++---- .../metricscache/MetricsCache.java | 46 +++++++++---------- .../metricscache/MetricsCacheQueryUtils.java | 44 +++++++++--------- heron/metricscachemgr/tests/java/BUILD | 2 +- .../metricscache/CacheCoreTest.java | 18 ++++---- .../MetricsCacheQueryUtilsTest.java | 44 +++++++++--------- .../metricscache/MetricsCacheTest.java | 10 ++-- 10 files changed, 144 insertions(+), 144 deletions(-) diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManager.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManager.java index 2c637f70ea1..e1c3a9f4c9f 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManager.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManager.java @@ -40,7 +40,7 @@ import org.apache.heron.common.utils.logging.LoggingHelper; import org.apache.heron.metricscachemgr.metricscache.MetricsCache; import org.apache.heron.metricsmgr.MetricsSinksConfig; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; import org.apache.heron.spi.common.ConfigLoader; import org.apache.heron.spi.common.Context; @@ -76,7 +76,7 @@ public class MetricsCacheManager { private Config config; - private TopologyMaster.MetricsCacheLocation metricsCacheLocation; + private TopologyManager.MetricsCacheLocation metricsCacheLocation; /** * Constructor: MetricsCacheManager needs 4 type information: @@ -87,7 +87,7 @@ public class MetricsCacheManager { * * @param topologyName topology name * @param serverHost server host - * @param masterPort port to accept message from sink + * @param serverPort port to accept message from sink * @param statsPort port to respond to query request * @param systemConfig heron config * @param metricsSinkConfig sink config @@ -95,10 +95,10 @@ public class MetricsCacheManager { * @param metricsCacheLocation location for state mgr */ public MetricsCacheManager(String topologyName, - String serverHost, int masterPort, int statsPort, + String serverHost, int serverPort, int statsPort, SystemConfig systemConfig, MetricsSinksConfig metricsSinkConfig, Config configExpand, - TopologyMaster.MetricsCacheLocation metricsCacheLocation) + TopologyManager.MetricsCacheLocation metricsCacheLocation) throws IOException { this.topologyName = topologyName; this.config = configExpand; @@ -121,11 +121,11 @@ public MetricsCacheManager(String topologyName, // Construct the server to accepts messages from sinks metricsCacheManagerServer = new MetricsCacheManagerServer(metricsCacheManagerServerLoop, - serverHost, masterPort, serverSocketOptions, metricsCache); + serverHost, serverPort, serverSocketOptions, metricsCache); - metricsCacheManagerServer.registerOnMessage(TopologyMaster.PublishMetrics.newBuilder()); - metricsCacheManagerServer.registerOnRequest(TopologyMaster.MetricRequest.newBuilder()); - metricsCacheManagerServer.registerOnRequest(TopologyMaster.ExceptionLogRequest.newBuilder()); + metricsCacheManagerServer.registerOnMessage(TopologyManager.PublishMetrics.newBuilder()); + metricsCacheManagerServer.registerOnRequest(TopologyManager.MetricRequest.newBuilder()); + metricsCacheManagerServer.registerOnRequest(TopologyManager.ExceptionLogRequest.newBuilder()); // Construct the server to respond to query request metricsCacheManagerHttpServer = new MetricsCacheManagerHttpServer(metricsCache, statsPort); @@ -183,11 +183,11 @@ private static Options constructOptions() { .required() .build(); - Option masterPort = Option.builder("m") - .desc("Master port to accept the metric/exception messages from sinks") - .longOpt("master_port") + Option serverPort = Option.builder("m") + .desc("Server port to accept the metric/exception messages from sinks") + .longOpt("server_port") .hasArgs() - .argName("master port") + .argName("server port") .required() .build(); @@ -252,7 +252,7 @@ private static Options constructOptions() { options.addOption(cluster); options.addOption(role); options.addOption(environment); - options.addOption(masterPort); + options.addOption(serverPort); options.addOption(statsPort); options.addOption(systemConfig); options.addOption(overrideConfig); @@ -312,7 +312,7 @@ public static void main(String[] args) throws Exception { String cluster = cmd.getOptionValue("cluster"); String role = cmd.getOptionValue("role"); String environ = cmd.getOptionValue("environment"); - int masterPort = Integer.valueOf(cmd.getOptionValue("master_port")); + int serverPort = Integer.valueOf(cmd.getOptionValue("server_port")); int statsPort = Integer.valueOf(cmd.getOptionValue("stats_port")); String systemConfigFilename = cmd.getOptionValue("system_config_file"); String overrideConfigFilename = cmd.getOptionValue("override_config_file"); @@ -336,8 +336,8 @@ public static void main(String[] args) throws Exception { LoggingHelper.addLoggingHandler(new ErrorReportLoggingHandler()); LOG.info(String.format("Starting MetricsCache for topology %s with topologyId %s with " - + "MetricsCache Id %s, master port: %d.", - topologyName, topologyId, metricsCacheMgrId, masterPort)); + + "MetricsCache Id %s, server port: %d.", + topologyName, topologyId, metricsCacheMgrId, serverPort)); LOG.info("System Config: " + systemConfig); @@ -359,18 +359,18 @@ public static void main(String[] args) throws Exception { LOG.info("Cli Config: " + config.toString()); // build metricsCache location - TopologyMaster.MetricsCacheLocation metricsCacheLocation = - TopologyMaster.MetricsCacheLocation.newBuilder() + TopologyManager.MetricsCacheLocation metricsCacheLocation = + TopologyManager.MetricsCacheLocation.newBuilder() .setTopologyName(topologyName) .setTopologyId(topologyId) .setHost(InetAddress.getLocalHost().getHostName()) .setControllerPort(-1) // not used for metricscache - .setMasterPort(masterPort) + .setServerPort(serverPort) .setStatsPort(statsPort) .build(); MetricsCacheManager metricsCacheManager = new MetricsCacheManager( - topologyName, METRICS_CACHE_HOST, masterPort, statsPort, + topologyName, METRICS_CACHE_HOST, serverPort, statsPort, systemConfig, sinksConfig, config, metricsCacheLocation); metricsCacheManager.start(); diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java index ec4e21ba87c..07a3f897f4a 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java @@ -34,13 +34,13 @@ import com.sun.net.httpserver.HttpServer; import org.apache.heron.metricscachemgr.metricscache.MetricsCache; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.utils.NetworkUtils; /** * MetricsCacheMgr http server: - * compatible with tmaster and tracker http interface for metrics + * compatible with tmanager and tracker http interface for metrics * http path: * "/stats" metric query * "/exceptions" exception query @@ -53,7 +53,7 @@ * MetricsCacheManagerHttpServer is a http server */ public class MetricsCacheManagerHttpServer { - // http path, compatible with tmaster stat interface + // http path, compatible with tmanager stat interface private static final String PATH_STATS = "/stats"; private static final String PATH_EXCEPTIONS = "/exceptions"; private static final String PATH_EXCEPTIONSUMMARY = "/exceptionsummary"; @@ -105,7 +105,7 @@ public static void main(String[] args) System.out.println("endpoint: " + url + "; component: " + args[1]); // construct query payload - byte[] requestData = TopologyMaster.MetricRequest.newBuilder() + byte[] requestData = TopologyManager.MetricRequest.newBuilder() .setComponentName(args[1]) .setMinutely(true) .setInterval(-1) @@ -118,7 +118,7 @@ public static void main(String[] args) byte[] responseData = NetworkUtils.readHttpResponse(con); // parse response data - TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.parseFrom(responseData); + TopologyManager.MetricResponse response = TopologyManager.MetricResponse.parseFrom(responseData); System.out.println(response.toString()); } @@ -157,50 +157,50 @@ public void handle(HttpExchange httpExchange) throws IOException { abstract U generateResponse(T request, MetricsCache metricsCache1); } - // compatible with tmaster stat interface: http+protobuf + // compatible with tmanager stat interface: http+protobuf class HandleStatsRequest - extends RequestHandler { + extends RequestHandler { @Override - public TopologyMaster.MetricRequest parseRequest(byte[] requestBytes) + public TopologyManager.MetricRequest parseRequest(byte[] requestBytes) throws InvalidProtocolBufferException { - return TopologyMaster.MetricRequest.parseFrom(requestBytes); + return TopologyManager.MetricRequest.parseFrom(requestBytes); } @Override - public TopologyMaster.MetricResponse generateResponse( - TopologyMaster.MetricRequest request, MetricsCache metricsCache1) { + public TopologyManager.MetricResponse generateResponse( + TopologyManager.MetricRequest request, MetricsCache metricsCache1) { return metricsCache1.getMetrics(request); } } - // compatible with tmaster exceptions interface: http+protobuf + // compatible with tmanager exceptions interface: http+protobuf public class HandleExceptionRequest extends - RequestHandler { + RequestHandler { @Override - public TopologyMaster.ExceptionLogRequest parseRequest(byte[] requestBytes) + public TopologyManager.ExceptionLogRequest parseRequest(byte[] requestBytes) throws InvalidProtocolBufferException { - return TopologyMaster.ExceptionLogRequest.parseFrom(requestBytes); + return TopologyManager.ExceptionLogRequest.parseFrom(requestBytes); } @Override - public TopologyMaster.ExceptionLogResponse generateResponse( - TopologyMaster.ExceptionLogRequest request, MetricsCache metricsCache1) { + public TopologyManager.ExceptionLogResponse generateResponse( + TopologyManager.ExceptionLogRequest request, MetricsCache metricsCache1) { return metricsCache1.getExceptions(request); } } - // compatible with tmaster exceptionsummary interface: http+protobuf + // compatible with tmanager exceptionsummary interface: http+protobuf public class HandleExceptionSummaryRequest extends - RequestHandler { + RequestHandler { @Override - public TopologyMaster.ExceptionLogRequest parseRequest(byte[] requestBytes) + public TopologyManager.ExceptionLogRequest parseRequest(byte[] requestBytes) throws InvalidProtocolBufferException { - return TopologyMaster.ExceptionLogRequest.parseFrom(requestBytes); + return TopologyManager.ExceptionLogRequest.parseFrom(requestBytes); } @Override - public TopologyMaster.ExceptionLogResponse generateResponse( - TopologyMaster.ExceptionLogRequest request, MetricsCache metricsCache1) { + public TopologyManager.ExceptionLogResponse generateResponse( + TopologyManager.ExceptionLogRequest request, MetricsCache metricsCache1) { return metricsCache1.getExceptionsSummary(request); } } diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerServer.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerServer.java index 517892db62b..6416c31ddb8 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerServer.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerServer.java @@ -30,7 +30,7 @@ import org.apache.heron.common.network.REQID; import org.apache.heron.metricscachemgr.metricscache.MetricsCache; import org.apache.heron.metricsmgr.MetricsManagerServer; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; /** * server to accept metrics from a particular sink in metrics manager @@ -71,10 +71,10 @@ public void onRequest(REQID requestId, SocketChannel channel, Message request) { LOG.fine("MetricsCacheManagerServer onRequest from host:port " + channel.socket().getRemoteSocketAddress()); - if (request instanceof TopologyMaster.MetricRequest) { - LOG.fine("received request " + (TopologyMaster.MetricRequest) request); - TopologyMaster.MetricResponse resp = - metricsCache.getMetrics((TopologyMaster.MetricRequest) request); + if (request instanceof TopologyManager.MetricRequest) { + LOG.fine("received request " + (TopologyManager.MetricRequest) request); + TopologyManager.MetricResponse resp = + metricsCache.getMetrics((TopologyManager.MetricRequest) request); LOG.fine("query finished, to send response"); sendResponse(requestId, channel, resp); LOG.fine("queued response size " + resp.getSerializedSize()); @@ -89,9 +89,9 @@ public void onMessage(SocketChannel channel, Message message) { LOG.fine("MetricsCacheManagerServer onMessage from host:port " + channel.socket().getRemoteSocketAddress()); - if (message instanceof TopologyMaster.PublishMetrics) { - LOG.fine("received message " + (TopologyMaster.PublishMetrics) message); - metricsCache.addMetrics((TopologyMaster.PublishMetrics) message); + if (message instanceof TopologyManager.PublishMetrics) { + LOG.fine("received message " + (TopologyManager.PublishMetrics) message); + metricsCache.addMetrics((TopologyManager.PublishMetrics) message); } else { LOG.severe("Unknown kind of message received " + channel.socket().getRemoteSocketAddress() + "; " + message); diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/CacheCore.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/CacheCore.java index 900a1623ee7..09b744899fe 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/CacheCore.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/CacheCore.java @@ -42,7 +42,7 @@ import org.apache.heron.metricscachemgr.metricscache.query.MetricTimeRangeValue; import org.apache.heron.metricscachemgr.metricscache.store.ExceptionDatapoint; import org.apache.heron.metricscachemgr.metricscache.store.MetricDatapoint; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; /** @@ -57,11 +57,11 @@ * 4. Index for exceptions: * component -(map)-> instance -(map)-> idxComponentInstance (int: locator) * 5. Query pattern: component-instance (equality), metricName (equality), timestamp (range) - * Different from tmaster: + * Different from tmanager: * 1. order bucket by metric timestamp rather than metric message arriving time * 2. free buckets for instances that are gone during scaling process * 3. lock for multiple threads - * Same as tmaster: + * Same as tmanager: * 1. support same protobuf message/request format */ public class CacheCore { @@ -154,15 +154,15 @@ private void assureMetricName(String name) { } /** - * compatible with heron::tmaster::TMetricsCollector + * compatible with heron::tmanager::TMetricsCollector * @param metrics The metrics to be added */ - public void addMetricException(TopologyMaster.PublishMetrics metrics) { + public void addMetricException(TopologyManager.PublishMetrics metrics) { synchronized (CacheCore.class) { - for (TopologyMaster.MetricDatum metricDatum : metrics.getMetricsList()) { + for (TopologyManager.MetricDatum metricDatum : metrics.getMetricsList()) { addMetric(metricDatum); } - for (TopologyMaster.TmasterExceptionLog exceptionLog : metrics.getExceptionsList()) { + for (TopologyManager.TmanagerExceptionLog exceptionLog : metrics.getExceptionsList()) { addException(exceptionLog); } } @@ -194,7 +194,7 @@ private long makeBucketId(int hi, int lo) { * * @param metricDatum the metric to be inserted */ - private void addMetric(TopologyMaster.MetricDatum metricDatum) { + private void addMetric(TopologyManager.MetricDatum metricDatum) { String componentName = metricDatum.getComponentName(); String instanceId = metricDatum.getInstanceId(); String metricName = metricDatum.getName(); @@ -224,7 +224,7 @@ private void addMetric(TopologyMaster.MetricDatum metricDatum) { } } - private void addException(TopologyMaster.TmasterExceptionLog exceptionLog) { + private void addException(TopologyManager.TmanagerExceptionLog exceptionLog) { String componentName = exceptionLog.getComponentName(); String instanceId = exceptionLog.getInstanceId(); assureComponentInstance(componentName, instanceId); @@ -359,7 +359,7 @@ private void getRawMetrics(List metricValue, } // end tree } - // we assume the metric value is Double: compatible with tmaster + // we assume the metric value is Double: compatible with tmanager @SuppressWarnings("fallthrough") private void getAggregatedMetrics(List metricValue, long startTime, long endTime, long bucketId, diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java index 8bdd87e8ef2..366bb295abf 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java @@ -34,13 +34,13 @@ import org.apache.heron.metricscachemgr.metricscache.query.MetricResponse; import org.apache.heron.metricsmgr.MetricsSinksConfig; import org.apache.heron.proto.system.Common; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; /** * Interface for the cache core - * providing compatible interface with tmaster - * see heron/tmaster/src/cpp/manager/tmetrics-collector.h + * providing compatible interface with tmanager + * see heron/tmanager/src/cpp/manager/tmetrics-collector.h */ public class MetricsCache { public static final String METRICS_SINKS_METRICSCACHE_SINK = "metricscache-sink"; @@ -55,27 +55,27 @@ public MetricsCache(SystemConfig systemConfig, MetricsSinksConfig sinksConfig, WakeableLooper looper) { // metadata metricNameType = new MetricsFilter(); - Map sinksTMaster = + Map sinksTManager = sinksConfig.getConfigForSink(METRICS_SINKS_METRICSCACHE_SINK); @SuppressWarnings("unchecked") Map metricsTypes = - (Map) sinksTMaster.get(METRICS_SINKS_METRICSCACHE_METRICS); + (Map) sinksTManager.get(METRICS_SINKS_METRICSCACHE_METRICS); for (String metricName : metricsTypes.keySet()) { metricNameType.setMetricToType(metricName, translateFromString(metricsTypes.get(metricName))); } - Duration maxInterval = systemConfig.getTmasterMetricsCollectorMaximumInterval(); - Duration purgeInterval = systemConfig.getTmasterMetricsCollectorPurgeInterval(); - long maxExceptions = systemConfig.getTmasterMetricsCollectorMaximumException(); + Duration maxInterval = systemConfig.getTmanagerMetricsCollectorMaximumInterval(); + Duration purgeInterval = systemConfig.getTmanagerMetricsCollectorPurgeInterval(); + long maxExceptions = systemConfig.getTmanagerMetricsCollectorMaximumException(); cache = new CacheCore(maxInterval, purgeInterval, maxExceptions); cache.startPurge(looper); } - private static TopologyMaster.MetricResponse.Builder buildResponseNotOk(String message) { - TopologyMaster.MetricResponse.Builder builder = - TopologyMaster.MetricResponse.newBuilder(); + private static TopologyManager.MetricResponse.Builder buildResponseNotOk(String message) { + TopologyManager.MetricResponse.Builder builder = + TopologyManager.MetricResponse.newBuilder(); builder.setStatus(Common.Status.newBuilder() .setStatus(Common.StatusCode.NOTOK) .setMessage(message)); @@ -96,7 +96,7 @@ private MetricsFilter.MetricAggregationType translateFromString(String type) { * * @param metrics message from sinks */ - public void addMetrics(TopologyMaster.PublishMetrics metrics) { + public void addMetrics(TopologyManager.PublishMetrics metrics) { cache.addMetricException(metrics); } @@ -121,16 +121,16 @@ public ExceptionResponse getExceptions(ExceptionRequest request) { } /** - * compatible with tmaster interface + * compatible with tmanager interface * * @param request query request defined in protobuf * @return query result defined in protobuf */ - public TopologyMaster.ExceptionLogResponse getExceptions( - TopologyMaster.ExceptionLogRequest request) { + public TopologyManager.ExceptionLogResponse getExceptions( + TopologyManager.ExceptionLogRequest request) { ExceptionRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request); ExceptionResponse response1 = cache.getExceptions(request1); - TopologyMaster.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response1); + TopologyManager.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response1); return response; } @@ -164,27 +164,27 @@ private ExceptionResponse summarizeException(ExceptionResponse response1) { } /** - * compatible with tmaster interface + * compatible with tmanager interface * * @param request query statement defined in protobuf * @return query result defined in protobuf */ - public TopologyMaster.ExceptionLogResponse getExceptionsSummary( - TopologyMaster.ExceptionLogRequest request) { + public TopologyManager.ExceptionLogResponse getExceptionsSummary( + TopologyManager.ExceptionLogRequest request) { ExceptionRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request); ExceptionResponse response1 = cache.getExceptions(request1); ExceptionResponse response2 = summarizeException(response1); - TopologyMaster.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response2); + TopologyManager.ExceptionLogResponse response = MetricsCacheQueryUtils.toProtobuf(response2); return response; } /** - * compatible with tmaster interface + * compatible with tmanager interface * * @param request query statement defined in protobuf * @return query result defined in protobuf */ - public TopologyMaster.MetricResponse getMetrics(TopologyMaster.MetricRequest request) { + public TopologyManager.MetricResponse getMetrics(TopologyManager.MetricRequest request) { String componentName = request.getComponentName(); if (!cache.componentInstanceExists(componentName, null)) { return buildResponseNotOk( @@ -206,7 +206,7 @@ public TopologyMaster.MetricResponse getMetrics(TopologyMaster.MetricRequest req MetricRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request); MetricResponse response1 = cache.getMetrics(request1, metricNameType); - TopologyMaster.MetricResponse response = MetricsCacheQueryUtils.toProtobuf(response1, request1); + TopologyManager.MetricResponse response = MetricsCacheQueryUtils.toProtobuf(response1, request1); return response; } } diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtils.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtils.java index 6a0862a692a..5233722eb02 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtils.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtils.java @@ -35,7 +35,7 @@ import org.apache.heron.metricscachemgr.metricscache.query.MetricResponse; import org.apache.heron.metricscachemgr.metricscache.query.MetricTimeRangeValue; import org.apache.heron.proto.system.Common; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import static org.apache.heron.metricscachemgr.metricscache.query.MetricGranularity.AGGREGATE_ALL_METRICS; import static org.apache.heron.metricscachemgr.metricscache.query.MetricGranularity.AGGREGATE_BY_BUCKET; @@ -48,11 +48,11 @@ private MetricsCacheQueryUtils() { } /** - * compatible with org.apache.heron.proto.tmaster.TopologyMaster.MetricRequest + * compatible with org.apache.heron.proto.tmanager.TopologyManager.MetricRequest * @param request protobuf defined message * @return metricscache defined data structure */ - public static MetricRequest fromProtobuf(TopologyMaster.MetricRequest request) { + public static MetricRequest fromProtobuf(TopologyManager.MetricRequest request) { String componentName = request.getComponentName(); Map> componentNameInstanceId = new HashMap<>(); @@ -101,15 +101,15 @@ public static MetricRequest fromProtobuf(TopologyMaster.MetricRequest request) { } /** - * compatible with org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse + * compatible with org.apache.heron.proto.tmanager.TopologyManager.MetricResponse * @param response metricscache defined data structure * @param request metricscache defined data structure * @return protobuf defined message */ - public static TopologyMaster.MetricResponse toProtobuf(MetricResponse response, + public static TopologyManager.MetricResponse toProtobuf(MetricResponse response, MetricRequest request) { - TopologyMaster.MetricResponse.Builder builder = - TopologyMaster.MetricResponse.newBuilder(); + TopologyManager.MetricResponse.Builder builder = + TopologyManager.MetricResponse.newBuilder(); builder.setInterval((request.getEndTime() - request.getStartTime()) / 1000); // in seconds // default OK if we have response to build already @@ -136,14 +136,14 @@ public static TopologyMaster.MetricResponse toProtobuf(MetricResponse response, // add TaskMetric for (String instanceId : aggregation.keySet()) { - TopologyMaster.MetricResponse.TaskMetric.Builder taskMetricBuilder = - TopologyMaster.MetricResponse.TaskMetric.newBuilder(); + TopologyManager.MetricResponse.TaskMetric.Builder taskMetricBuilder = + TopologyManager.MetricResponse.TaskMetric.newBuilder(); taskMetricBuilder.setInstanceId(instanceId); // add IndividualMetric for (String metricName : aggregation.get(instanceId).keySet()) { - TopologyMaster.MetricResponse.IndividualMetric.Builder individualMetricBuilder = - TopologyMaster.MetricResponse.IndividualMetric.newBuilder(); + TopologyManager.MetricResponse.IndividualMetric.Builder individualMetricBuilder = + TopologyManager.MetricResponse.IndividualMetric.newBuilder(); individualMetricBuilder.setName(metricName); // add value|IntervalValue @@ -152,12 +152,12 @@ public static TopologyMaster.MetricResponse toProtobuf(MetricResponse response, individualMetricBuilder.setValue(list.get(0).getValue()); } else { for (MetricTimeRangeValue v : list) { - TopologyMaster.MetricResponse.IndividualMetric.IntervalValue.Builder + TopologyManager.MetricResponse.IndividualMetric.IntervalValue.Builder intervalValueBuilder = - TopologyMaster.MetricResponse.IndividualMetric.IntervalValue.newBuilder(); + TopologyManager.MetricResponse.IndividualMetric.IntervalValue.newBuilder(); intervalValueBuilder.setValue(v.getValue()); - intervalValueBuilder.setInterval(TopologyMaster.MetricInterval.newBuilder() + intervalValueBuilder.setInterval(TopologyManager.MetricInterval.newBuilder() .setStart(v.getStartTime()).setEnd(v.getEndTime())); individualMetricBuilder.addIntervalValues(intervalValueBuilder); @@ -173,8 +173,8 @@ public static TopologyMaster.MetricResponse toProtobuf(MetricResponse response, return builder.build(); } - // compatible with org.apache.heron.proto.tmaster.TopologyMaster.ExceptionLogRequest - public static ExceptionRequest fromProtobuf(TopologyMaster.ExceptionLogRequest request) { + // compatible with org.apache.heron.proto.tmanager.TopologyManager.ExceptionLogRequest + public static ExceptionRequest fromProtobuf(TopologyManager.ExceptionLogRequest request) { String componentName = request.getComponentName(); Map> componentNameInstanceId = new HashMap<>(); @@ -191,16 +191,16 @@ public static ExceptionRequest fromProtobuf(TopologyMaster.ExceptionLogRequest r return new ExceptionRequest(componentNameInstanceId); } - // compatible with org.apache.heron.proto.tmaster.TopologyMaster.ExceptionLogResponse - public static TopologyMaster.ExceptionLogResponse toProtobuf(ExceptionResponse response) { - TopologyMaster.ExceptionLogResponse.Builder builder = - TopologyMaster.ExceptionLogResponse.newBuilder(); + // compatible with org.apache.heron.proto.tmanager.TopologyManager.ExceptionLogResponse + public static TopologyManager.ExceptionLogResponse toProtobuf(ExceptionResponse response) { + TopologyManager.ExceptionLogResponse.Builder builder = + TopologyManager.ExceptionLogResponse.newBuilder(); // default OK if we have response to build already builder.setStatus(Common.Status.newBuilder().setStatus(Common.StatusCode.OK)); for (ExceptionDatum e : response.getExceptionDatapointList()) { - TopologyMaster.TmasterExceptionLog.Builder exceptionBuilder = - TopologyMaster.TmasterExceptionLog.newBuilder(); + TopologyManager.TmanagerExceptionLog.Builder exceptionBuilder = + TopologyManager.TmanagerExceptionLog.newBuilder(); // ExceptionDatapoint exceptionBuilder.setComponentName(e.getComponentName()); exceptionBuilder.setHostname(e.getHostname()); diff --git a/heron/metricscachemgr/tests/java/BUILD b/heron/metricscachemgr/tests/java/BUILD index ba74d47dae5..60c9250885c 100644 --- a/heron/metricscachemgr/tests/java/BUILD +++ b/heron/metricscachemgr/tests/java/BUILD @@ -9,7 +9,7 @@ metricscachemgr_deps_files = [ "//heron/spi/src/java:metricsmgr-spi-java", "//heron/proto:proto_common_java", "//heron/proto:proto_metrics_java", - "//heron/proto:proto_tmaster_java", + "//heron/proto:proto_tmanager_java", "//third_party/java:jackson", "//third_party/java:cli", "//third_party/java:guava", diff --git a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/CacheCoreTest.java b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/CacheCoreTest.java index 7b29fa49a1c..a313308ad12 100644 --- a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/CacheCoreTest.java +++ b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/CacheCoreTest.java @@ -36,7 +36,7 @@ import org.apache.heron.metricscachemgr.metricscache.query.MetricRequest; import org.apache.heron.metricscachemgr.metricscache.query.MetricResponse; import org.apache.heron.metricscachemgr.metricscache.query.MetricTimeRangeValue; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; import static org.apache.heron.metricscachemgr.metricscache.query.MetricGranularity.RAW; @@ -99,7 +99,7 @@ private void prepareDataForHashIndex() { // although it may be slightly different from the time origin // in the CacheCore initialization. now = System.currentTimeMillis(); - TopologyMaster.PublishMetrics.Builder builder = TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder builder = TopologyManager.PublishMetrics.newBuilder(); // should be in bucket 1 long ts = now - 90 * 1000; @@ -119,7 +119,7 @@ private void prepareDataForHashIndex() { for (String component : components) { for (String instance : instances) { for (String metric : metrics) { - builder.addMetrics(TopologyMaster.MetricDatum.newBuilder() + builder.addMetrics(TopologyManager.MetricDatum.newBuilder() .setTimestamp(ts) .setComponentName(component).setInstanceId(instance) .setName(metric) @@ -478,7 +478,7 @@ private void prepareDataForTreeIndex() { // although it may be slightly different from the time origin // in the CacheCore initialization. now = System.currentTimeMillis(); - TopologyMaster.PublishMetrics.Builder builder = TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder builder = TopologyManager.PublishMetrics.newBuilder(); long[] ts = new long[]{ // the timestamp falls outside cache time window. too old to be in the cache now - 120 * 1000, @@ -501,7 +501,7 @@ private void prepareDataForTreeIndex() { "0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7" }; for (int i = 0; i < ts.length; i++) { - builder.addMetrics(TopologyMaster.MetricDatum.newBuilder() + builder.addMetrics(TopologyManager.MetricDatum.newBuilder() .setTimestamp(ts[i]) .setComponentName("c1").setInstanceId("i1") .setName("m1") @@ -634,11 +634,11 @@ public void testPurge() throws InterruptedException { // in the CacheCore initialization. now = ticker.read(); - TopologyMaster.PublishMetrics.Builder builder = TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder builder = TopologyManager.PublishMetrics.newBuilder(); // should be in bucket 1 long ts = now - 9 * 1000; // c1-i1, m1: 0.1 - builder.addMetrics(TopologyMaster.MetricDatum.newBuilder() + builder.addMetrics(TopologyManager.MetricDatum.newBuilder() .setTimestamp(ts) .setComponentName("c1").setInstanceId("i1") .setName("m1") @@ -680,11 +680,11 @@ public void testPurge() throws InterruptedException { ); // insert-select after purge - TopologyMaster.PublishMetrics.Builder builder2 = TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder builder2 = TopologyManager.PublishMetrics.newBuilder(); // should be in bucket 1 ts = now - 3 * 1000; // c1-i1, m1: 0.1 - builder2.addMetrics(TopologyMaster.MetricDatum.newBuilder() + builder2.addMetrics(TopologyManager.MetricDatum.newBuilder() .setTimestamp(ts) .setComponentName("c1").setInstanceId("i1") .setName("m1") diff --git a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtilsTest.java b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtilsTest.java index aa2d64de453..b3bdafb8c9c 100644 --- a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtilsTest.java +++ b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheQueryUtilsTest.java @@ -37,7 +37,7 @@ import org.apache.heron.metricscachemgr.metricscache.query.MetricResponse; import org.apache.heron.metricscachemgr.metricscache.query.MetricTimeRangeValue; import org.apache.heron.proto.system.Common; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import static org.apache.heron.metricscachemgr.metricscache.MetricsCacheQueryUtils.toProtobuf; import static org.junit.Assert.assertEquals; @@ -83,8 +83,8 @@ private static void assertMetricRequest( @Test public void testFromProtoBufMetricInterval() { - TopologyMaster.MetricRequest request = - TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1") .addInstanceId("i1").addInstanceId("i2") .addMetric("m1").addMetric("m2") @@ -104,12 +104,12 @@ public void testFromProtoBufMetricInterval() { @Test public void testFromProtoBufMetricExplicitInterval() { - TopologyMaster.MetricRequest request = - TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1") .addInstanceId("i1").addInstanceId("i2") .addMetric("m1").addMetric("m2") - .setExplicitInterval(TopologyMaster.MetricInterval.newBuilder() + .setExplicitInterval(TopologyManager.MetricInterval.newBuilder() .setStart(100).setEnd(200)) // in seconds .build(); @@ -126,12 +126,12 @@ public void testFromProtoBufMetricExplicitInterval() { @Test public void testFromProtoBufMetricMinutely() { - TopologyMaster.MetricRequest request = - TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1") .addInstanceId("i1").addInstanceId("i2") .addMetric("m1").addMetric("m2") - .setExplicitInterval(TopologyMaster.MetricInterval.newBuilder() + .setExplicitInterval(TopologyManager.MetricInterval.newBuilder() .setStart(100).setEnd(200)) // in seconds .setMinutely(true) .build(); @@ -149,11 +149,11 @@ public void testFromProtoBufMetricMinutely() { @Test public void testFromProtoBufMetricEmptyMetrics() { - TopologyMaster.MetricRequest request = - TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1") .addInstanceId("i1").addInstanceId("i2") - .setExplicitInterval(TopologyMaster.MetricInterval.newBuilder() + .setExplicitInterval(TopologyManager.MetricInterval.newBuilder() .setStart(100).setEnd(200)) // in seconds .setMinutely(true) .build(); @@ -171,11 +171,11 @@ public void testFromProtoBufMetricEmptyMetrics() { @Test public void testFromProtoBufMetricEmptyInstanceIds() { - TopologyMaster.MetricRequest request = - TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1") .addMetric("m1").addMetric("m2") - .setExplicitInterval(TopologyMaster.MetricInterval.newBuilder() + .setExplicitInterval(TopologyManager.MetricInterval.newBuilder() .setStart(100).setEnd(200)) // in seconds .setMinutely(true) .build(); @@ -196,7 +196,7 @@ public void testToProtoBufMetric() { long endTime = 200 * 1000; MetricRequest request = new MetricRequest(null, null, startTime, endTime, null); - TopologyMaster.MetricResponse response1 = toProtobuf(response, request); + TopologyManager.MetricResponse response1 = toProtobuf(response, request); assertEquals(200 - 100, response1.getInterval()); assertEquals(Common.StatusCode.OK, response1.getStatus().getStatus()); @@ -219,7 +219,7 @@ public void testToProtoBufMetric2() { long endTime = 200 * 1000; MetricRequest request = new MetricRequest(null, null, startTime, endTime, null); - TopologyMaster.MetricResponse response1 = toProtobuf(response, request); + TopologyManager.MetricResponse response1 = toProtobuf(response, request); assertEquals(200 - 100, response1.getInterval()); assertEquals(Common.StatusCode.OK, response1.getStatus().getStatus()); @@ -242,8 +242,8 @@ public void testToProtoBufMetric2() { @Test public void testFromProtoBufException() { - TopologyMaster.ExceptionLogRequest request = - TopologyMaster.ExceptionLogRequest.newBuilder() + TopologyManager.ExceptionLogRequest request = + TopologyManager.ExceptionLogRequest.newBuilder() .setComponentName("c1") .addInstances("i1").addInstances("i2") .build(); @@ -260,8 +260,8 @@ public void testFromProtoBufException() { @Test public void testFromProtoBufExceptionEmptyInstances() { - TopologyMaster.ExceptionLogRequest request = - TopologyMaster.ExceptionLogRequest.newBuilder() + TopologyManager.ExceptionLogRequest request = + TopologyManager.ExceptionLogRequest.newBuilder() .setComponentName("c1") .build(); @@ -279,7 +279,7 @@ public void testToProtoBufException() { ExceptionDatum dp = new ExceptionDatum("c1", "i1", "h1", "s1", "lt1", "ft1", 10, "l1"); response.add(dp); - TopologyMaster.ExceptionLogResponse response1 = toProtobuf(new ExceptionResponse(response)); + TopologyManager.ExceptionLogResponse response1 = toProtobuf(new ExceptionResponse(response)); assertEquals(1, response1.getExceptionsCount()); assertEquals("c1", response1.getExceptions(0).getComponentName()); diff --git a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java index dbfdc482b3b..b891b2b9790 100644 --- a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java +++ b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java @@ -26,7 +26,7 @@ import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.metricsmgr.MetricsSinksConfig; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import static org.junit.Assert.assertEquals; @@ -47,11 +47,11 @@ public void testMetricCache() throws IOException { // initialize metric cache, except looper MetricsCache mc = new MetricsCache(systemConfig, sinksConfig, new NIOLooper()); - mc.addMetrics(TopologyMaster.PublishMetrics.newBuilder() - .addMetrics(TopologyMaster.MetricDatum.newBuilder() + mc.addMetrics(TopologyManager.PublishMetrics.newBuilder() + .addMetrics(TopologyManager.MetricDatum.newBuilder() .setComponentName("c1").setInstanceId("i1").setName("__jvm-uptime-secs") .setTimestamp(System.currentTimeMillis()).setValue("0.1")) - .addExceptions(TopologyMaster.TmasterExceptionLog.newBuilder() + .addExceptions(TopologyManager.TmanagerExceptionLog.newBuilder() .setComponentName("c1").setHostname("h1").setInstanceId("i1") .setStacktrace("s1").setLogging("l1") .setCount(1) @@ -60,7 +60,7 @@ public void testMetricCache() throws IOException { .build()); // query last 10 seconds - TopologyMaster.MetricResponse response = mc.getMetrics(TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricResponse response = mc.getMetrics(TopologyManager.MetricRequest.newBuilder() .setComponentName("c1").addInstanceId("i1") .setInterval(10).addMetric("__jvm-uptime-secs") .build()); From 37a98a62ded687f91c257aa0df342a010b52b8ea Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 21:41:54 -0400 Subject: [PATCH 13/32] renaming "topology master" to "topology manager" in heron/common --- .../config/heron-internals-config-reader.cpp | 60 ++++++++--------- .../config/heron-internals-config-reader.h | 52 +++++++-------- .../config/heron-internals-config-vars.cpp | 62 +++++++++--------- .../cpp/config/heron-internals-config-vars.h | 54 ++++++++-------- .../src/cpp/config/metrics-sinks-reader.cpp | 10 +-- .../src/cpp/config/metrics-sinks-reader.h | 4 +- .../src/cpp/config/metrics-sinks-vars.cpp | 4 +- .../src/cpp/config/metrics-sinks-vars.h | 4 +- heron/common/src/cpp/metrics/BUILD | 4 +- .../common/src/cpp/metrics/metrics-mgr-st.cpp | 6 +- heron/common/src/cpp/metrics/metrics-mgr-st.h | 8 +-- heron/common/src/cpp/metrics/metrics.h | 2 +- .../src/cpp/metrics/metricsmgr-client.cpp | 42 ++++++------ .../src/cpp/metrics/metricsmgr-client.h | 14 ++-- ...aster-metrics.cpp => tmanager-metrics.cpp} | 14 ++-- .../{tmaster-metrics.h => tmanager-metrics.h} | 16 ++--- heron/common/src/cpp/setup/zk-setup.cpp | 6 +- .../{SlaveLooper.java => ExecutorLooper.java} | 8 +-- .../heron/common/config/SystemConfig.java | 12 ++-- .../heron/common/config/SystemConfigKey.java | 18 +++--- .../common/utils/metrics/JVMMetrics.java | 6 +- .../heron/common/utils/misc/ThreadNames.java | 2 +- .../heron/common/basics/CommunicatorTest.java | 4 +- .../common/basics/WakeableLooperTest.java | 64 +++++++++---------- 24 files changed, 238 insertions(+), 238 deletions(-) rename heron/common/src/cpp/metrics/{tmaster-metrics.cpp => tmanager-metrics.cpp} (80%) rename heron/common/src/cpp/metrics/{tmaster-metrics.h => tmanager-metrics.h} (83%) rename heron/common/src/java/org/apache/heron/common/basics/{SlaveLooper.java => ExecutorLooper.java} (92%) diff --git a/heron/common/src/cpp/config/heron-internals-config-reader.cpp b/heron/common/src/cpp/config/heron-internals-config-reader.cpp index 8926ae1a78b..4e77af36f0f 100644 --- a/heron/common/src/cpp/config/heron-internals-config-reader.cpp +++ b/heron/common/src/cpp/config/heron-internals-config-reader.cpp @@ -105,8 +105,8 @@ sp_int32 HeronInternalsConfigReader::GetHeronLoggingMaximumFiles() { return config_[HeronInternalsConfigVars::HERON_LOGGING_MAXIMUM_FILES].as(); } -sp_int32 HeronInternalsConfigReader::GetCheckTMasterLocationIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_CHECK_TMASTER_LOCATION_INTERVAL_SEC].as(); +sp_int32 HeronInternalsConfigReader::GetCheckTManagerLocationIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_CHECK_TMANAGER_LOCATION_INTERVAL_SEC].as(); } sp_int32 HeronInternalsConfigReader::GetHeronLoggingPruneIntervalSec() { @@ -158,8 +158,8 @@ sp_int32 HeronInternalsConfigReader::GetHeronMetricsmgrScribePeriodicFlushInterv .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronMetricsmgrReconnectTmasterIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_METRICSMGR_RECONNECT_TMASTER_INTERVAL_SEC] +sp_int32 HeronInternalsConfigReader::GetHeronMetricsmgrReconnectTmanagerIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_METRICSMGR_RECONNECT_TMANAGER_INTERVAL_SEC] .as(); } @@ -168,52 +168,52 @@ sp_int32 HeronInternalsConfigReader::GetHeronMetricsmgrNetworkOptionsMaximumPack .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterMetricsCollectorMaximumIntervalMin() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN] +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerMetricsCollectorMaximumIntervalMin() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterEstablishRetryTimes() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_ESTABLISH_RETRY_TIMES].as(); +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerEstablishRetryTimes() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_ESTABLISH_RETRY_TIMES].as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterEstablishRetryIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_ESTABLISH_RETRY_INTERVAL_SEC].as(); +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerEstablishRetryIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_ESTABLISH_RETRY_INTERVAL_SEC].as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterNetworkMasterOptionsMaximumPacketMb() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_NETWORK_MASTER_OPTIONS_MAXIMUM_PACKET_MB] +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerNetworkServerOptionsMaximumPacketMb() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_SERVER_OPTIONS_MAXIMUM_PACKET_MB] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterNetworkControllerOptionsMaximumPacketMb() { +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerNetworkControllerOptionsMaximumPacketMb() { return config_ - [HeronInternalsConfigVars::HERON_TMASTER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB] + [HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterNetworkStatsOptionsMaximumPacketMb() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB] +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerNetworkStatsOptionsMaximumPacketMb() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterMetricsCollectorPurgeIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC] +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerMetricsCollectorPurgeIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterMetricsCollectorMaximumException() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION] +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerMetricsCollectorMaximumException() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION] .as(); } -bool HeronInternalsConfigReader::GetHeronTmasterMetricsNetworkBindAllInterfaces() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_METRICS_NETWORK_BINDALLINTERFACES] +bool HeronInternalsConfigReader::GetHeronTmanagerMetricsNetworkBindAllInterfaces() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_METRICS_NETWORK_BINDALLINTERFACES] .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronTmasterStmgrStateTimeoutSec() { - return config_[HeronInternalsConfigVars::HERON_TMASTER_STMGR_STATE_TIMEOUT_SEC].as(); +sp_int32 HeronInternalsConfigReader::GetHeronTmanagerStmgrStateTimeoutSec() { + return config_[HeronInternalsConfigVars::HERON_TMANAGER_STMGR_STATE_TIMEOUT_SEC].as(); } sp_int32 HeronInternalsConfigReader::GetHeronStreammgrCacheDrainFrequencyMs() { @@ -237,8 +237,8 @@ sp_int32 HeronInternalsConfigReader::GetHeronStreammgrXormgrRotatingmapNbuckets( return config_[HeronInternalsConfigVars::HERON_STREAMMGR_XORMGR_ROTATINGMAP_NBUCKETS].as(); } -sp_int32 HeronInternalsConfigReader::GetHeronStreammgrClientReconnectTmasterMaxAttempts() { - return config_[HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_MAX_ATTEMPTS] +sp_int32 HeronInternalsConfigReader::GetHeronStreammgrClientReconnectTmanagerMaxAttempts() { + return config_[HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_MAX_ATTEMPTS] .as(); } @@ -246,8 +246,8 @@ sp_int32 HeronInternalsConfigReader::GetHeronStreammgrClientReconnectIntervalSec return config_[HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_INTERVAL_SEC].as(); } -sp_int32 HeronInternalsConfigReader::GetHeronStreammgrClientReconnectTmasterIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_INTERVAL_SEC] +sp_int32 HeronInternalsConfigReader::GetHeronStreammgrClientReconnectTmanagerIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_INTERVAL_SEC] .as(); } @@ -256,8 +256,8 @@ sp_int32 HeronInternalsConfigReader::GetHeronStreammgrNetworkOptionsMaximumPacke .as(); } -sp_int32 HeronInternalsConfigReader::GetHeronStreammgrTmasterHeartbeatIntervalSec() { - return config_[HeronInternalsConfigVars::HERON_STREAMMGR_TMASTER_HEARTBEAT_INTERVAL_SEC] +sp_int32 HeronInternalsConfigReader::GetHeronStreammgrTmanagerHeartbeatIntervalSec() { + return config_[HeronInternalsConfigVars::HERON_STREAMMGR_TMANAGER_HEARTBEAT_INTERVAL_SEC] .as(); } diff --git a/heron/common/src/cpp/config/heron-internals-config-reader.h b/heron/common/src/cpp/config/heron-internals-config-reader.h index 8e2447790f9..a03d4df1560 100644 --- a/heron/common/src/cpp/config/heron-internals-config-reader.h +++ b/heron/common/src/cpp/config/heron-internals-config-reader.h @@ -65,9 +65,9 @@ class HeronInternalsConfigReader : public YamlFileReader { // The maximum number of log files sp_int32 GetHeronLoggingMaximumFiles(); - // The interval after which we check if the tmaster location + // The interval after which we check if the tmanager location // has been set or not - sp_int32 GetCheckTMasterLocationIntervalSec(); + sp_int32 GetCheckTManagerLocationIntervalSec(); // The interval in seconds to prune logging files in C+++ sp_int32 GetHeronLoggingPruneIntervalSec(); @@ -105,44 +105,44 @@ class HeronInternalsConfigReader : public YamlFileReader { // The interval in seconds to flush cached metircs to scribe sp_int32 GetHeronMetricsmgrScribePeriodicFlushIntervalSec(); - // The interval in seconds to reconnect to tmaster if a connection failure happens - sp_int32 GetHeronMetricsmgrReconnectTmasterIntervalSec(); + // The interval in seconds to reconnect to tmanager if a connection failure happens + sp_int32 GetHeronMetricsmgrReconnectTmanagerIntervalSec(); // The maximum packet size in MB of metrics manager's network options sp_int32 GetHeronMetricsmgrNetworkOptionsMaximumPacketMb(); /** - * Tmaster Config Getters + * Tmanager Config Getters **/ - // The maximum interval in minutes of metrics to be kept in tmaster - sp_int32 GetHeronTmasterMetricsCollectorMaximumIntervalMin(); + // The maximum interval in minutes of metrics to be kept in tmanager + sp_int32 GetHeronTmanagerMetricsCollectorMaximumIntervalMin(); - // The maximum time to retry to establish the tmaster - sp_int32 GetHeronTmasterEstablishRetryTimes(); + // The maximum time to retry to establish the tmanager + sp_int32 GetHeronTmanagerEstablishRetryTimes(); - // The interval to retry to establish the tmaster - sp_int32 GetHeronTmasterEstablishRetryIntervalSec(); + // The interval to retry to establish the tmanager + sp_int32 GetHeronTmanagerEstablishRetryIntervalSec(); - // The maximum packet size in MB of tmaster's network options for stmgrs to connect to - sp_int32 GetHeronTmasterNetworkMasterOptionsMaximumPacketMb(); + // The maximum packet size in MB of tmanager's network options for stmgrs to connect to + sp_int32 GetHeronTmanagerNetworkServerOptionsMaximumPacketMb(); - // The maximum packet size in MB of tmaster's network options for scheduler to connect to - sp_int32 GetHeronTmasterNetworkControllerOptionsMaximumPacketMb(); + // The maximum packet size in MB of tmanager's network options for scheduler to connect to + sp_int32 GetHeronTmanagerNetworkControllerOptionsMaximumPacketMb(); - // The maximum packet size in MB of tmaster's network options for stat queries - sp_int32 GetHeronTmasterNetworkStatsOptionsMaximumPacketMb(); + // The maximum packet size in MB of tmanager's network options for stat queries + sp_int32 GetHeronTmanagerNetworkStatsOptionsMaximumPacketMb(); - // The inteval for tmaster to purge metrics from socket - sp_int32 GetHeronTmasterMetricsCollectorPurgeIntervalSec(); + // The inteval for tmanager to purge metrics from socket + sp_int32 GetHeronTmanagerMetricsCollectorPurgeIntervalSec(); // The maximum # of exception to be stored in tmetrics collector, to prevent potential OOM - sp_int32 GetHeronTmasterMetricsCollectorMaximumException(); + sp_int32 GetHeronTmanagerMetricsCollectorMaximumException(); // Should metrics server bind on all interfaces - bool GetHeronTmasterMetricsNetworkBindAllInterfaces(); + bool GetHeronTmanagerMetricsNetworkBindAllInterfaces(); // The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) - sp_int32 GetHeronTmasterStmgrStateTimeoutSec(); + sp_int32 GetHeronTmanagerStmgrStateTimeoutSec(); /** * Stream manager Config Getters @@ -167,16 +167,16 @@ class HeronInternalsConfigReader : public YamlFileReader { sp_int32 GetHeronStreammgrClientReconnectIntervalSec(); // The reconnect interval to tamster in second for stream manager client - sp_int32 GetHeronStreammgrClientReconnectTmasterIntervalSec(); + sp_int32 GetHeronStreammgrClientReconnectTmanagerIntervalSec(); - // The max reconnect attempts to tmaster for stream manager client - sp_int32 GetHeronStreammgrClientReconnectTmasterMaxAttempts(); + // The max reconnect attempts to tmanager for stream manager client + sp_int32 GetHeronStreammgrClientReconnectTmanagerMaxAttempts(); // The maximum packet size in MB of stream manager's network options sp_int32 GetHeronStreammgrNetworkOptionsMaximumPacketMb(); // The interval in seconds to send heartbeat - sp_int32 GetHeronStreammgrTmasterHeartbeatIntervalSec(); + sp_int32 GetHeronStreammgrTmanagerHeartbeatIntervalSec(); // Maximum batch size in MB to read by stream manager from socket sp_int32 GetHeronStreammgrConnectionReadBatchSizeMb(); diff --git a/heron/common/src/cpp/config/heron-internals-config-vars.cpp b/heron/common/src/cpp/config/heron-internals-config-vars.cpp index afef087b939..067675eac18 100644 --- a/heron/common/src/cpp/config/heron-internals-config-vars.cpp +++ b/heron/common/src/cpp/config/heron-internals-config-vars.cpp @@ -29,8 +29,8 @@ const sp_string HeronInternalsConfigVars::HERON_LOGGING_MAXIMUM_SIZE_MB = "heron.logging.maximum.size.mb"; const sp_string HeronInternalsConfigVars::HERON_LOGGING_MAXIMUM_FILES = "heron.logging.maximum.files"; -const sp_string HeronInternalsConfigVars::HERON_CHECK_TMASTER_LOCATION_INTERVAL_SEC = - "heron.check.tmaster.location.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_CHECK_TMANAGER_LOCATION_INTERVAL_SEC = + "heron.check.tmanager.location.interval.sec"; const sp_string HeronInternalsConfigVars::HERON_LOGGING_PRUNE_INTERVAL_SEC = "heron.logging.prune.interval.sec"; const sp_string HeronInternalsConfigVars::HERON_LOGGING_FLUSH_INTERVAL_SEC = @@ -56,33 +56,33 @@ const sp_string HeronInternalsConfigVars::HERON_METRICSMGR_SCRIBE_WRITE_TIMEOUT_ "heron.metricsmgr.scribe.write.timeout.sec"; const sp_string HeronInternalsConfigVars::HERON_METRICSMGR_SCRIBE_PERIODIC_FLUSH_INTERVAL_SEC = "heron.metricsmgr.scribe.periodic.flush.interval.sec"; -const sp_string HeronInternalsConfigVars::HERON_METRICSMGR_RECONNECT_TMASTER_INTERVAL_SEC = - "heron.metricsmgr.reconnect.tmaster.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_METRICSMGR_RECONNECT_TMANAGER_INTERVAL_SEC = + "heron.metricsmgr.reconnect.tmanager.interval.sec"; const sp_string HeronInternalsConfigVars::HERON_METRICSMGR_NETWORK_OPTIONS_MAXIMUM_PACKET_MB = "heron.metricsmgr.network.options.maximum.packet.mb"; -// heron.tmaster.* configs are for the metrics manager -const sp_string HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN = - "heron.tmaster.metrics.collector.maximum.interval.min"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_ESTABLISH_RETRY_TIMES = - "heron.tmaster.establish.retry.times"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_ESTABLISH_RETRY_INTERVAL_SEC = - "heron.tmaster.establish.retry.interval.sec"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_NETWORK_MASTER_OPTIONS_MAXIMUM_PACKET_MB = - "heron.tmaster.network.master.options.maximum.packet.mb"; +// heron.tmanager.* configs are for the metrics manager +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN = + "heron.tmanager.metrics.collector.maximum.interval.min"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_ESTABLISH_RETRY_TIMES = + "heron.tmanager.establish.retry.times"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_ESTABLISH_RETRY_INTERVAL_SEC = + "heron.tmanager.establish.retry.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_SERVER_OPTIONS_MAXIMUM_PACKET_MB = + "heron.tmanager.network.server.options.maximum.packet.mb"; const sp_string - HeronInternalsConfigVars::HERON_TMASTER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB = - "heron.tmaster.network.controller.options.maximum.packet.mb"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB = - "heron.tmaster.network.stats.options.maximum.packet.mb"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC = - "heron.tmaster.metrics.collector.purge.interval.sec"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION = - "heron.tmaster.metrics.collector.maximum.exception"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_METRICS_NETWORK_BINDALLINTERFACES = - "heron.tmaster.metrics.network.bindallinterfaces"; -const sp_string HeronInternalsConfigVars::HERON_TMASTER_STMGR_STATE_TIMEOUT_SEC = - "heron.tmaster.stmgr.state.timeout.sec"; + HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB = + "heron.tmanager.network.controller.options.maximum.packet.mb"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB = + "heron.tmanager.network.stats.options.maximum.packet.mb"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC = + "heron.tmanager.metrics.collector.purge.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION = + "heron.tmanager.metrics.collector.maximum.exception"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_METRICS_NETWORK_BINDALLINTERFACES = + "heron.tmanager.metrics.network.bindallinterfaces"; +const sp_string HeronInternalsConfigVars::HERON_TMANAGER_STMGR_STATE_TIMEOUT_SEC = + "heron.tmanager.stmgr.state.timeout.sec"; // heron.streammgr.* configs are for the stream manager const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CACHE_DRAIN_FREQUENCY_MS = @@ -95,14 +95,14 @@ const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_XORMGR_ROTATINGMAP_NBU "heron.streammgr.xormgr.rotatingmap.nbuckets"; const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_INTERVAL_SEC = "heron.streammgr.client.reconnect.interval.sec"; -const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_INTERVAL_SEC = - "heron.streammgr.client.reconnect.tmaster.interval.sec"; -const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_MAX_ATTEMPTS = - "heron.streammgr.client.reconnect.tmaster.max.attempts"; +const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_INTERVAL_SEC = + "heron.streammgr.client.reconnect.tmanager.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_MAX_ATTEMPTS = + "heron.streammgr.client.reconnect.tmanager.max.attempts"; const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_NETWORK_OPTIONS_MAXIMUM_PACKET_MB = "heron.streammgr.network.options.maximum.packet.mb"; -const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_TMASTER_HEARTBEAT_INTERVAL_SEC = - "heron.streammgr.tmaster.heartbeat.interval.sec"; +const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_TMANAGER_HEARTBEAT_INTERVAL_SEC = + "heron.streammgr.tmanager.heartbeat.interval.sec"; const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CONNECTION_READ_BATCH_SIZE_MB = "heron.streammgr.connection.read.batch.size.mb"; const sp_string HeronInternalsConfigVars::HERON_STREAMMGR_CONNECTION_WRITE_BATCH_SIZE_MB = diff --git a/heron/common/src/cpp/config/heron-internals-config-vars.h b/heron/common/src/cpp/config/heron-internals-config-vars.h index dd8391b296b..8feac420c86 100644 --- a/heron/common/src/cpp/config/heron-internals-config-vars.h +++ b/heron/common/src/cpp/config/heron-internals-config-vars.h @@ -51,9 +51,9 @@ class HeronInternalsConfigVars { // The maximum number of log files static const sp_string HERON_LOGGING_MAXIMUM_FILES; - // The interval in seconds after which to check if the tmaster location + // The interval in seconds after which to check if the tmanager location // has been fetched or not - static const sp_string HERON_CHECK_TMASTER_LOCATION_INTERVAL_SEC; + static const sp_string HERON_CHECK_TMANAGER_LOCATION_INTERVAL_SEC; // The interval in seconds to prune logging files in C+++ static const sp_string HERON_LOGGING_PRUNE_INTERVAL_SEC; @@ -92,45 +92,45 @@ class HeronInternalsConfigVars { // The interval in seconds to flush cached metircs to scribe static const sp_string HERON_METRICSMGR_SCRIBE_PERIODIC_FLUSH_INTERVAL_SEC; - // The interval in seconds to reconnect to tmaster if a connection failure happens - static const sp_string HERON_METRICSMGR_RECONNECT_TMASTER_INTERVAL_SEC; + // The interval in seconds to reconnect to tmanager if a connection failure happens + static const sp_string HERON_METRICSMGR_RECONNECT_TMANAGER_INTERVAL_SEC; // The maximum packet size in MB of metrics manager's network options static const sp_string HERON_METRICSMGR_NETWORK_OPTIONS_MAXIMUM_PACKET_MB; /** - * HERON_TMASTER_* configs are for the metrics manager + * HERON_TMANAGER_* configs are for the metrics manager **/ - // The maximum interval in minutes of metrics to be kept in tmaster - static const sp_string HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN; + // The maximum interval in minutes of metrics to be kept in tmanager + static const sp_string HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_INTERVAL_MIN; - // The maximum time to retry to establish the tmaster - static const sp_string HERON_TMASTER_ESTABLISH_RETRY_TIMES; + // The maximum time to retry to establish the tmanager + static const sp_string HERON_TMANAGER_ESTABLISH_RETRY_TIMES; - // The interval to retry to establish the tmaster - static const sp_string HERON_TMASTER_ESTABLISH_RETRY_INTERVAL_SEC; + // The interval to retry to establish the tmanager + static const sp_string HERON_TMANAGER_ESTABLISH_RETRY_INTERVAL_SEC; - // The maximum packet size in MB of tmaster's network options for stmgrs to connect to - static const sp_string HERON_TMASTER_NETWORK_MASTER_OPTIONS_MAXIMUM_PACKET_MB; + // The maximum packet size in MB of tmanager's network options for stmgrs to connect to + static const sp_string HERON_TMANAGER_NETWORK_SERVER_OPTIONS_MAXIMUM_PACKET_MB; - // The maximum packet size in MB of tmaster's network options for scheduler to connect to - static const sp_string HERON_TMASTER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB; + // The maximum packet size in MB of tmanager's network options for scheduler to connect to + static const sp_string HERON_TMANAGER_NETWORK_CONTROLLER_OPTIONS_MAXIMUM_PACKET_MB; - // The maximum packet size in MB of tmaster's network options for stat queries - static const sp_string HERON_TMASTER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB; + // The maximum packet size in MB of tmanager's network options for stat queries + static const sp_string HERON_TMANAGER_NETWORK_STATS_OPTIONS_MAXIMUM_PACKET_MB; - // The inteval for tmaster to purge metrics from socket - static const sp_string HERON_TMASTER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC; + // The inteval for tmanager to purge metrics from socket + static const sp_string HERON_TMANAGER_METRICS_COLLECTOR_PURGE_INTERVAL_SEC; // The maximum # of exception to be stored in tmetrics collector, to prevent potential OOM - static const sp_string HERON_TMASTER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION; + static const sp_string HERON_TMANAGER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION; - // Whether tmaster's metrics server should bind on all interfaces - static const sp_string HERON_TMASTER_METRICS_NETWORK_BINDALLINTERFACES; + // Whether tmanager's metrics server should bind on all interfaces + static const sp_string HERON_TMANAGER_METRICS_NETWORK_BINDALLINTERFACES; // The timeout in seconds for stream mgr, compared with (current time - last heartbeat time) - static const sp_string HERON_TMASTER_STMGR_STATE_TIMEOUT_SEC; + static const sp_string HERON_TMANAGER_STMGR_STATE_TIMEOUT_SEC; /** * HERON_STREAMMGR_* configs are for the stream manager @@ -153,16 +153,16 @@ class HeronInternalsConfigVars { static const sp_string HERON_STREAMMGR_CLIENT_RECONNECT_INTERVAL_SEC; // The reconnect interval to tamster in second for stream manager client - static const sp_string HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_INTERVAL_SEC; + static const sp_string HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_INTERVAL_SEC; - // The max reconnect attempts to tmaster for stream manager client - static const sp_string HERON_STREAMMGR_CLIENT_RECONNECT_TMASTER_MAX_ATTEMPTS; + // The max reconnect attempts to tmanager for stream manager client + static const sp_string HERON_STREAMMGR_CLIENT_RECONNECT_TMANAGER_MAX_ATTEMPTS; // The maximum packet size in MB of stream manager's network options static const sp_string HERON_STREAMMGR_NETWORK_OPTIONS_MAXIMUM_PACKET_MB; // The interval in seconds to send heartbeat - static const sp_string HERON_STREAMMGR_TMASTER_HEARTBEAT_INTERVAL_SEC; + static const sp_string HERON_STREAMMGR_TMANAGER_HEARTBEAT_INTERVAL_SEC; // Maximum batch size in MB to read by stream manager from socket static const sp_string HERON_STREAMMGR_CONNECTION_READ_BATCH_SIZE_MB; diff --git a/heron/common/src/cpp/config/metrics-sinks-reader.cpp b/heron/common/src/cpp/config/metrics-sinks-reader.cpp index 9cf5ffb831c..175285724fd 100644 --- a/heron/common/src/cpp/config/metrics-sinks-reader.cpp +++ b/heron/common/src/cpp/config/metrics-sinks-reader.cpp @@ -42,11 +42,11 @@ MetricsSinksReader::MetricsSinksReader(std::shared_ptr eventLoop, MetricsSinksReader::~MetricsSinksReader() {} -void MetricsSinksReader::GetTMasterMetrics(std::list >& metrics) { - if (config_[MetricsSinksVars::METRICS_SINKS_TMASTER_SINK]) { - YAML::Node n = config_[MetricsSinksVars::METRICS_SINKS_TMASTER_SINK]; - if (n.IsMap() && n[MetricsSinksVars::METRICS_SINKS_TMASTER_METRICS]) { - YAML::Node m = n[MetricsSinksVars::METRICS_SINKS_TMASTER_METRICS]; +void MetricsSinksReader::GetTManagerMetrics(std::list >& metrics) { + if (config_[MetricsSinksVars::METRICS_SINKS_TMANAGER_SINK]) { + YAML::Node n = config_[MetricsSinksVars::METRICS_SINKS_TMANAGER_SINK]; + if (n.IsMap() && n[MetricsSinksVars::METRICS_SINKS_TMANAGER_METRICS]) { + YAML::Node m = n[MetricsSinksVars::METRICS_SINKS_TMANAGER_METRICS]; if (m.IsMap()) { for (YAML::const_iterator it = m.begin(); it != m.end(); ++it) { metrics.push_back(make_pair(it->first.as(), it->second.as())); diff --git a/heron/common/src/cpp/config/metrics-sinks-reader.h b/heron/common/src/cpp/config/metrics-sinks-reader.h index 5caf1a0341b..2bb0f9191d4 100644 --- a/heron/common/src/cpp/config/metrics-sinks-reader.h +++ b/heron/common/src/cpp/config/metrics-sinks-reader.h @@ -39,9 +39,9 @@ class MetricsSinksReader : public YamlFileReader { MetricsSinksReader(std::shared_ptr eventLoop, const sp_string& _defaults_file); virtual ~MetricsSinksReader(); - // Get the list of metrics whitelisted for tmaster along + // Get the list of metrics whitelisted for tmanager along // with their types - void GetTMasterMetrics(std::list >& metrics); + void GetTManagerMetrics(std::list >& metrics); virtual void OnConfigFileLoad(); }; diff --git a/heron/common/src/cpp/config/metrics-sinks-vars.cpp b/heron/common/src/cpp/config/metrics-sinks-vars.cpp index 959c33dbec7..ac25a17394c 100644 --- a/heron/common/src/cpp/config/metrics-sinks-vars.cpp +++ b/heron/common/src/cpp/config/metrics-sinks-vars.cpp @@ -28,7 +28,7 @@ namespace heron { namespace config { -const sp_string MetricsSinksVars::METRICS_SINKS_TMASTER_SINK = "tmaster-sink"; -const sp_string MetricsSinksVars::METRICS_SINKS_TMASTER_METRICS = "tmaster-metrics-type"; +const sp_string MetricsSinksVars::METRICS_SINKS_TMANAGER_SINK = "tmanager-sink"; +const sp_string MetricsSinksVars::METRICS_SINKS_TMANAGER_METRICS = "tmanager-metrics-type"; } // namespace config } // namespace heron diff --git a/heron/common/src/cpp/config/metrics-sinks-vars.h b/heron/common/src/cpp/config/metrics-sinks-vars.h index dd86d6bd509..4efcb5b0190 100644 --- a/heron/common/src/cpp/config/metrics-sinks-vars.h +++ b/heron/common/src/cpp/config/metrics-sinks-vars.h @@ -32,8 +32,8 @@ namespace config { class MetricsSinksVars { public: - static const sp_string METRICS_SINKS_TMASTER_SINK; - static const sp_string METRICS_SINKS_TMASTER_METRICS; + static const sp_string METRICS_SINKS_TMANAGER_SINK; + static const sp_string METRICS_SINKS_TMANAGER_METRICS; }; } // namespace config } // namespace heron diff --git a/heron/common/src/cpp/metrics/BUILD b/heron/common/src/cpp/metrics/BUILD index 994c0cbb23e..05bf2cd54ca 100644 --- a/heron/common/src/cpp/metrics/BUILD +++ b/heron/common/src/cpp/metrics/BUILD @@ -25,8 +25,8 @@ cc_library( "multi-mean-metric.h", "time-spent-metric.cpp", "time-spent-metric.h", - "tmaster-metrics.cpp", - "tmaster-metrics.h", + "tmanager-metrics.cpp", + "tmanager-metrics.h", ], copts = [ "-Iheron", diff --git a/heron/common/src/cpp/metrics/metrics-mgr-st.cpp b/heron/common/src/cpp/metrics/metrics-mgr-st.cpp index fe097c0d2d8..3219c7e9de8 100644 --- a/heron/common/src/cpp/metrics/metrics-mgr-st.cpp +++ b/heron/common/src/cpp/metrics/metrics-mgr-st.cpp @@ -64,12 +64,12 @@ void MetricsMgrSt::Start(const sp_string& _my_hostname, sp_int32 _my_port, -1, eventLoop_, options_); } -void MetricsMgrSt::RefreshTMasterLocation(const proto::tmaster::TMasterLocation& location) { - client_->SendTMasterLocation(location); +void MetricsMgrSt::RefreshTManagerLocation(const proto::tmanager::TManagerLocation& location) { + client_->SendTManagerLocation(location); } void MetricsMgrSt::RefreshMetricsCacheLocation( - const proto::tmaster::MetricsCacheLocation& location) { + const proto::tmanager::MetricsCacheLocation& location) { LOG(INFO) << "RefreshMetricsCacheLocation"; client_->SendMetricsCacheLocation(location); } diff --git a/heron/common/src/cpp/metrics/metrics-mgr-st.h b/heron/common/src/cpp/metrics/metrics-mgr-st.h index 637a09be75a..c71c3fd2d3d 100644 --- a/heron/common/src/cpp/metrics/metrics-mgr-st.h +++ b/heron/common/src/cpp/metrics/metrics-mgr-st.h @@ -35,8 +35,8 @@ namespace heron { namespace proto { -namespace tmaster { -class TMasterLocation; +namespace tmanager { +class TManagerLocation; } } } @@ -56,8 +56,8 @@ class MetricsMgrSt { void register_metric(const sp_string& _metric_name, shared_ptr _metric); void unregister_metric(const sp_string& _metric_name); - void RefreshTMasterLocation(const proto::tmaster::TMasterLocation& location); - void RefreshMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& location); + void RefreshTManagerLocation(const proto::tmanager::TManagerLocation& location); + void RefreshMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& location); /** Start MetricsMgrClient object diff --git a/heron/common/src/cpp/metrics/metrics.h b/heron/common/src/cpp/metrics/metrics.h index a8b40442c6e..e4655dcd0cf 100644 --- a/heron/common/src/cpp/metrics/metrics.h +++ b/heron/common/src/cpp/metrics/metrics.h @@ -30,6 +30,6 @@ #include "metrics/time-spent-metric.h" #include "metrics/metricsmgr-client.h" #include "metrics/metrics-mgr-st.h" -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" #endif diff --git a/heron/common/src/cpp/metrics/metricsmgr-client.cpp b/heron/common/src/cpp/metrics/metricsmgr-client.cpp index bcd9687d9a4..6509ac9e137 100644 --- a/heron/common/src/cpp/metrics/metricsmgr-client.cpp +++ b/heron/common/src/cpp/metrics/metricsmgr-client.cpp @@ -42,7 +42,7 @@ MetricsMgrClient::MetricsMgrClient(const sp_string& _hostname, sp_int32 _port, component_name_(_component_name), instance_id_(_instance_id), instance_index_(_instance_index), - tmaster_location_(NULL), + tmanager_location_(NULL), metricscache_location_(NULL), registered_(false) { InstallResponseHandler(make_unique(), @@ -50,7 +50,7 @@ MetricsMgrClient::MetricsMgrClient(const sp_string& _hostname, sp_int32 _port, Start(); } -MetricsMgrClient::~MetricsMgrClient() { delete tmaster_location_; delete metricscache_location_; } +MetricsMgrClient::~MetricsMgrClient() { delete tmanager_location_; delete metricscache_location_; } void MetricsMgrClient::HandleConnect(NetworkErrorCode _status) { if (_status == OK) { @@ -95,12 +95,12 @@ void MetricsMgrClient::HandleRegisterResponse( registered_ = true; } - // Check if we need to send tmaster location - if (tmaster_location_) { - LOG(INFO) << "Sending TMaster Location to metricsmgr"; - InternalSendTMasterLocation(); + // Check if we need to send tmanager location + if (tmanager_location_) { + LOG(INFO) << "Sending TManager Location to metricsmgr"; + InternalSendTManagerLocation(); } else { - LOG(INFO) << "Do not have a TMasterLocation yet"; + LOG(INFO) << "Do not have a TManagerLocation yet"; } // Check if we need to send metricscache location if (metricscache_location_) { @@ -111,26 +111,26 @@ void MetricsMgrClient::HandleRegisterResponse( } } -void MetricsMgrClient::SendTMasterLocation(const proto::tmaster::TMasterLocation& location) { - if (tmaster_location_) { - delete tmaster_location_; +void MetricsMgrClient::SendTManagerLocation(const proto::tmanager::TManagerLocation& location) { + if (tmanager_location_) { + delete tmanager_location_; } - tmaster_location_ = new proto::tmaster::TMasterLocation(location); + tmanager_location_ = new proto::tmanager::TManagerLocation(location); if (registered_) { - LOG(INFO) << "Sending TMaster Location to metricsmgr"; - InternalSendTMasterLocation(); + LOG(INFO) << "Sending TManager Location to metricsmgr"; + InternalSendTManagerLocation(); } else { LOG(INFO) << "We have not yet registered to metricsmgr." - << " Holding off sending TMasterLocation"; + << " Holding off sending TManagerLocation"; } } void MetricsMgrClient::SendMetricsCacheLocation( - const proto::tmaster::MetricsCacheLocation& location) { + const proto::tmanager::MetricsCacheLocation& location) { if (metricscache_location_) { delete metricscache_location_; } - metricscache_location_ = new proto::tmaster::MetricsCacheLocation(location); + metricscache_location_ = new proto::tmanager::MetricsCacheLocation(location); if (registered_) { LOG(INFO) << "Sending MetricsCache Location to metricsmgr"; InternalSendMetricsCacheLocation(); @@ -146,11 +146,11 @@ void MetricsMgrClient::SendMetrics(proto::system::MetricPublisherPublishMessage* delete _message; } -void MetricsMgrClient::InternalSendTMasterLocation() { - CHECK(tmaster_location_); - proto::system::TMasterLocationRefreshMessage* m = - new proto::system::TMasterLocationRefreshMessage(); - m->mutable_tmaster()->CopyFrom(*tmaster_location_); +void MetricsMgrClient::InternalSendTManagerLocation() { + CHECK(tmanager_location_); + proto::system::TManagerLocationRefreshMessage* m = + new proto::system::TManagerLocationRefreshMessage(); + m->mutable_tmanager()->CopyFrom(*tmanager_location_); SendMessage(*m); delete m; diff --git a/heron/common/src/cpp/metrics/metricsmgr-client.h b/heron/common/src/cpp/metrics/metricsmgr-client.h index 28cd4223649..1e4b4a1da1f 100644 --- a/heron/common/src/cpp/metrics/metricsmgr-client.h +++ b/heron/common/src/cpp/metrics/metricsmgr-client.h @@ -28,8 +28,8 @@ namespace heron { namespace proto { -namespace tmaster { -class TMasterLocation; +namespace tmanager { +class TManagerLocation; } } } @@ -45,15 +45,15 @@ class MetricsMgrClient : public Client { ~MetricsMgrClient(); void SendMetrics(proto::system::MetricPublisherPublishMessage* _message); - void SendTMasterLocation(const proto::tmaster::TMasterLocation& location); - void SendMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& location); + void SendTManagerLocation(const proto::tmanager::TManagerLocation& location); + void SendMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& location); protected: virtual void HandleConnect(NetworkErrorCode status); virtual void HandleClose(NetworkErrorCode status); private: - void InternalSendTMasterLocation(); + void InternalSendTManagerLocation(); void InternalSendMetricsCacheLocation(); void ReConnect(); void SendRegisterRequest(); @@ -66,8 +66,8 @@ class MetricsMgrClient : public Client { sp_string component_name_; sp_string instance_id_; int instance_index_; - proto::tmaster::TMasterLocation* tmaster_location_; - proto::tmaster::MetricsCacheLocation* metricscache_location_; + proto::tmanager::TManagerLocation* tmanager_location_; + proto::tmanager::MetricsCacheLocation* metricscache_location_; // Tells if we have registered to metrics manager or not bool registered_; }; diff --git a/heron/common/src/cpp/metrics/tmaster-metrics.cpp b/heron/common/src/cpp/metrics/tmanager-metrics.cpp similarity index 80% rename from heron/common/src/cpp/metrics/tmaster-metrics.cpp rename to heron/common/src/cpp/metrics/tmanager-metrics.cpp index 8b7399e5fbd..6a36c50327d 100644 --- a/heron/common/src/cpp/metrics/tmaster-metrics.cpp +++ b/heron/common/src/cpp/metrics/tmanager-metrics.cpp @@ -17,7 +17,7 @@ * under the License. */ -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" #include #include #include @@ -37,25 +37,25 @@ namespace common { using std::shared_ptr; -TMasterMetrics::TMasterMetrics(const sp_string& sinks_filename, shared_ptr eventLoop) { +TManagerMetrics::TManagerMetrics(const sp_string& sinks_filename, shared_ptr eventLoop) { sinks_reader_ = new config::MetricsSinksReader(eventLoop, sinks_filename); std::list > metrics; - sinks_reader_->GetTMasterMetrics(metrics); + sinks_reader_->GetTManagerMetrics(metrics); for (auto iter = metrics.begin(); iter != metrics.end(); ++iter) { metrics_prefixes_[iter->first] = TranslateFromString(iter->second); } } -TMasterMetrics::~TMasterMetrics() { delete sinks_reader_; } +TManagerMetrics::~TManagerMetrics() { delete sinks_reader_; } -bool TMasterMetrics::IsTMasterMetric(const sp_string& _name) { +bool TManagerMetrics::IsTManagerMetric(const sp_string& _name) { for (auto iter = metrics_prefixes_.begin(); iter != metrics_prefixes_.end(); ++iter) { if (_name.find(iter->first) == 0) return true; } return false; } -TMasterMetrics::MetricAggregationType TMasterMetrics::GetAggregationType(const sp_string& _name) { +TManagerMetrics::MetricAggregationType TManagerMetrics::GetAggregationType(const sp_string& _name) { for (auto iter = metrics_prefixes_.begin(); iter != metrics_prefixes_.end(); ++iter) { if (_name.find(iter->first) == 0) { return iter->second; @@ -64,7 +64,7 @@ TMasterMetrics::MetricAggregationType TMasterMetrics::GetAggregationType(const s return UNKNOWN; } -TMasterMetrics::MetricAggregationType TMasterMetrics::TranslateFromString(const sp_string& type) { +TManagerMetrics::MetricAggregationType TManagerMetrics::TranslateFromString(const sp_string& type) { if (type == "SUM") { return SUM; } else if (type == "AVG") { diff --git a/heron/common/src/cpp/metrics/tmaster-metrics.h b/heron/common/src/cpp/metrics/tmanager-metrics.h similarity index 83% rename from heron/common/src/cpp/metrics/tmaster-metrics.h rename to heron/common/src/cpp/metrics/tmanager-metrics.h index f680a3441ec..b944e464816 100644 --- a/heron/common/src/cpp/metrics/tmaster-metrics.h +++ b/heron/common/src/cpp/metrics/tmanager-metrics.h @@ -19,12 +19,12 @@ ////////////////////////////////////////////////////////////////////////////// // -// tmaster-metric.h +// tmanager-metric.h // -// Defines all the metrics that needs to be sent to tmaster +// Defines all the metrics that needs to be sent to tmanager ////////////////////////////////////////////////////////////////////////////// -#ifndef __TMASTER_METRICS_H_ -#define __TMASTER_METRICS_H_ +#ifndef __TMANAGER_METRICS_H_ +#define __TMANAGER_METRICS_H_ #include #include "metrics/imetric.h" @@ -41,7 +41,7 @@ class MetricsSinksReader; namespace heron { namespace common { -class TMasterMetrics { +class TManagerMetrics { public: enum MetricAggregationType { UNKNOWN = -1, @@ -49,10 +49,10 @@ class TMasterMetrics { AVG, LAST // We only care about the last value }; - TMasterMetrics(const sp_string& metrics_sinks, std::shared_ptr eventLoop); - ~TMasterMetrics(); + TManagerMetrics(const sp_string& metrics_sinks, std::shared_ptr eventLoop); + ~TManagerMetrics(); - bool IsTMasterMetric(const sp_string& _name); + bool IsTManagerMetric(const sp_string& _name); MetricAggregationType GetAggregationType(const sp_string& _name); private: diff --git a/heron/common/src/cpp/setup/zk-setup.cpp b/heron/common/src/cpp/setup/zk-setup.cpp index cf2f223475e..bba3c7d03e2 100644 --- a/heron/common/src/cpp/setup/zk-setup.cpp +++ b/heron/common/src/cpp/setup/zk-setup.cpp @@ -52,7 +52,7 @@ void AllDone(sp_int32 _status) { } } -void TMastersDone(sp_int32 _status) { +void TManagersDone(sp_int32 _status) { if (_status == ZNODEEXISTS || _status == ZOK) { zkclient->CreateNode(zkroot + "/executionstate", "Heron Cluster " + clustername, false, [](sp_int32 status) { AllDone(status); }); @@ -64,8 +64,8 @@ void TMastersDone(sp_int32 _status) { void PplansDone(sp_int32 _status) { if (_status == ZNODEEXISTS || _status == ZOK) { - zkclient->CreateNode(zkroot + "/tmasters", "Heron Cluster " + clustername, false, - [](sp_int32 status) { TMastersDone(status); }); + zkclient->CreateNode(zkroot + "/tmanagers", "Heron Cluster " + clustername, false, + [](sp_int32 status) { TManagersDone(status); }); } else { LOG(ERROR) << "Error creating node in zk " << _status << std::endl; ::exit(1); diff --git a/heron/common/src/java/org/apache/heron/common/basics/SlaveLooper.java b/heron/common/src/java/org/apache/heron/common/basics/ExecutorLooper.java similarity index 92% rename from heron/common/src/java/org/apache/heron/common/basics/SlaveLooper.java rename to heron/common/src/java/org/apache/heron/common/basics/ExecutorLooper.java index 8c3e4e5205f..a727426419f 100644 --- a/heron/common/src/java/org/apache/heron/common/basics/SlaveLooper.java +++ b/heron/common/src/java/org/apache/heron/common/basics/ExecutorLooper.java @@ -22,17 +22,17 @@ import java.time.Duration; /** - * A SlaveLooper, implementing WakeableLooper, is a class wrapping object wait()/notify() to await/unblock a thread. + * A ExecutorLooper, implementing WakeableLooper, is a class wrapping object wait()/notify() to await/unblock a thread. * It extends WakeableLooper, so it will execute in a while loop unless the exitLoop() is called. * And in every execution, in tasksOnWakeup(), it will do nothing by default - * The SlaveLooper should start by calling {@code loop()} + * The ExecutorLooper should start by calling {@code loop()} */ -public class SlaveLooper extends WakeableLooper { +public class ExecutorLooper extends WakeableLooper { // The lock to implement the await/unblock private final RunnableLock lock; - public SlaveLooper() { + public ExecutorLooper() { this.lock = new RunnableLock(); } diff --git a/heron/common/src/java/org/apache/heron/common/config/SystemConfig.java b/heron/common/src/java/org/apache/heron/common/config/SystemConfig.java index 2608de749d5..801d26b0f7e 100644 --- a/heron/common/src/java/org/apache/heron/common/config/SystemConfig.java +++ b/heron/common/src/java/org/apache/heron/common/config/SystemConfig.java @@ -230,20 +230,20 @@ public int getHeronMetricsMaxExceptionsPerMessageCount() { return getInteger(SystemConfigKey.HERON_METRICS_MAX_EXCEPTIONS_PER_MESSAGE_COUNT); } - public long getTmasterMetricsCollectorMaximumException() { + public long getTmanagerMetricsCollectorMaximumException() { try { - return getLong(SystemConfigKey.TMASTER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION); + return getLong(SystemConfigKey.TMANAGER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION); } catch (IllegalArgumentException e) { return 256; // default value if not found in config } } - public Duration getTmasterMetricsCollectorMaximumInterval() { - return getDuration(SystemConfigKey.TMASTER_METRICS_COLLECTOR_MAXIMUM_INTERVAL); + public Duration getTmanagerMetricsCollectorMaximumInterval() { + return getDuration(SystemConfigKey.TMANAGER_METRICS_COLLECTOR_MAXIMUM_INTERVAL); } - public Duration getTmasterMetricsCollectorPurgeInterval() { - return getDuration(SystemConfigKey.TMASTER_METRICS_COLLECTOR_PURGE_INTERVAL); + public Duration getTmanagerMetricsCollectorPurgeInterval() { + return getDuration(SystemConfigKey.TMANAGER_METRICS_COLLECTOR_PURGE_INTERVAL); } private String getString(SystemConfigKey key) { diff --git a/heron/common/src/java/org/apache/heron/common/config/SystemConfigKey.java b/heron/common/src/java/org/apache/heron/common/config/SystemConfigKey.java index 80ada01ffff..97beae3e474 100644 --- a/heron/common/src/java/org/apache/heron/common/config/SystemConfigKey.java +++ b/heron/common/src/java/org/apache/heron/common/config/SystemConfigKey.java @@ -309,23 +309,23 @@ public enum SystemConfigKey { "heron.metricsmgr.network.options.maximum.packetsize.bytes", Type.BYTE_AMOUNT), /** - *The maximum exception count be kept in tmaster + *The maximum exception count be kept in tmanager */ - TMASTER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION( - "heron.tmaster.metrics.collector.maximum.exception", Type.LONG), + TMANAGER_METRICS_COLLECTOR_MAXIMUM_EXCEPTION( + "heron.tmanager.metrics.collector.maximum.exception", Type.LONG), /** - * The maximum interval in minutes of metrics to be kept in tmaster + * The maximum interval in minutes of metrics to be kept in tmanager */ - TMASTER_METRICS_COLLECTOR_MAXIMUM_INTERVAL( - "heron.tmaster.metrics.collector.maximum.interval.min", + TMANAGER_METRICS_COLLECTOR_MAXIMUM_INTERVAL( + "heron.tmanager.metrics.collector.maximum.interval.min", ChronoUnit.MINUTES, Duration.ofHours(3)), /** - * The interval for tmaster to purge metrics from socket + * The interval for tmanager to purge metrics from socket */ - TMASTER_METRICS_COLLECTOR_PURGE_INTERVAL( - "heron.tmaster.metrics.collector.purge.interval.sec", + TMANAGER_METRICS_COLLECTOR_PURGE_INTERVAL( + "heron.tmanager.metrics.collector.purge.interval.sec", ChronoUnit.SECONDS, Duration.ofMinutes(1)); diff --git a/heron/common/src/java/org/apache/heron/common/utils/metrics/JVMMetrics.java b/heron/common/src/java/org/apache/heron/common/utils/metrics/JVMMetrics.java index ed2384c10b2..6cb605a5d2f 100644 --- a/heron/common/src/java/org/apache/heron/common/utils/metrics/JVMMetrics.java +++ b/heron/common/src/java/org/apache/heron/common/utils/metrics/JVMMetrics.java @@ -105,7 +105,7 @@ public class JVMMetrics { */ private MultiAssignableMetric threadsCPUTimeNs; - // The CPU time used by threads other than SlaveThread and GatewayThread + // The CPU time used by threads other than ExecutorThread and GatewayThread private AssignableMetric otherThreadsCPUTimeNs; /* @@ -121,7 +121,7 @@ public class JVMMetrics { */ private MultiAssignableMetric threadsUserCPUTimeNs; - // The user CPU time used by threads other than SlaveThread and GatewayThread + // The user CPU time used by threads other than ExecutorThread and GatewayThread private AssignableMetric otherThreadsUserCPUTimeNs; /* @@ -383,7 +383,7 @@ private void getThreadsMetrics() { String threadName = threadInfo.getThreadName(); if (threadName.equals(ThreadNames.THREAD_GATEWAY_NAME) - || threadName.equals(ThreadNames.THREAD_SLAVE_NAME)) { + || threadName.equals(ThreadNames.THREAD_EXECUTOR_NAME)) { threadsCPUTimeNs.scope(threadName).setValue(cpuTime); threadsUserCPUTimeNs.scope(threadName).setValue(cpuUserTime); } else { diff --git a/heron/common/src/java/org/apache/heron/common/utils/misc/ThreadNames.java b/heron/common/src/java/org/apache/heron/common/utils/misc/ThreadNames.java index d575d76c3aa..d3bc47cfcf7 100644 --- a/heron/common/src/java/org/apache/heron/common/utils/misc/ThreadNames.java +++ b/heron/common/src/java/org/apache/heron/common/utils/misc/ThreadNames.java @@ -23,7 +23,7 @@ public final class ThreadNames { /** * Thread Name Constants */ - public static final String THREAD_SLAVE_NAME = "SlaveThread"; + public static final String THREAD_EXECUTOR_NAME = "ExecutorThread"; public static final String THREAD_GATEWAY_NAME = "GatewayThread"; private ThreadNames() { diff --git a/heron/common/tests/java/org/apache/heron/common/basics/CommunicatorTest.java b/heron/common/tests/java/org/apache/heron/common/basics/CommunicatorTest.java index 6868a2c34cf..4f357fb59a2 100644 --- a/heron/common/tests/java/org/apache/heron/common/basics/CommunicatorTest.java +++ b/heron/common/tests/java/org/apache/heron/common/basics/CommunicatorTest.java @@ -35,8 +35,8 @@ public class CommunicatorTest { @Before public void before() { - producer = new SlaveLooper(); - consumer = new SlaveLooper(); + producer = new ExecutorLooper(); + consumer = new ExecutorLooper(); communicator = new Communicator(producer, consumer); communicator.init(QUEUE_BUFFER_SIZE, QUEUE_BUFFER_SIZE, 0.5); } diff --git a/heron/common/tests/java/org/apache/heron/common/basics/WakeableLooperTest.java b/heron/common/tests/java/org/apache/heron/common/basics/WakeableLooperTest.java index 3a2d88f389b..b258d88e4b1 100644 --- a/heron/common/tests/java/org/apache/heron/common/basics/WakeableLooperTest.java +++ b/heron/common/tests/java/org/apache/heron/common/basics/WakeableLooperTest.java @@ -33,17 +33,17 @@ */ public class WakeableLooperTest { private static int globalValue; - private WakeableLooper slaveLooper; + private WakeableLooper executorLooper; @Before public void before() { - slaveLooper = new SlaveLooper(); + executorLooper = new ExecutorLooper(); globalValue = 6; } @After public void after() { - slaveLooper = null; + executorLooper = null; } /** @@ -57,15 +57,15 @@ public void testLoop() { @Override public void run() { globalValue += 10; - slaveLooper.wakeUp(); + executorLooper.wakeUp(); i--; if (i == 0) { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); } } }; - slaveLooper.addTasksOnWakeup(r); - slaveLooper.loop(); + executorLooper.addTasksOnWakeup(r); + executorLooper.loop(); Assert.assertEquals(36, globalValue); } @@ -77,12 +77,12 @@ public void testAddTasksOnWakeup() { Runnable r = new Runnable() { @Override public void run() { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); globalValue = 10; } }; - slaveLooper.addTasksOnWakeup(r); - slaveLooper.loop(); + executorLooper.addTasksOnWakeup(r); + executorLooper.loop(); Assert.assertEquals(10, globalValue); } @@ -94,15 +94,15 @@ public void testRegisterTimerEventInSeconds() { Runnable r = new Runnable() { @Override public void run() { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); globalValue = 10; } }; long startTime = System.nanoTime(); Duration interval = Duration.ofSeconds(1); - slaveLooper.registerTimerEvent(interval, r); - slaveLooper.loop(); + executorLooper.registerTimerEvent(interval, r); + executorLooper.loop(); long endTime = System.nanoTime(); Assert.assertTrue(endTime - startTime - interval.toNanos() >= 0); Assert.assertEquals(10, globalValue); @@ -116,15 +116,15 @@ public void testRegisterTimerEventInNanoSeconds() { Runnable r = new Runnable() { @Override public void run() { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); globalValue = 10; } }; long startTime = System.nanoTime(); Duration interval = Duration.ofMillis(6); - slaveLooper.registerTimerEvent(interval, r); - slaveLooper.loop(); + executorLooper.registerTimerEvent(interval, r); + executorLooper.loop(); long endTime = System.nanoTime(); Assert.assertTrue(endTime - startTime - interval.toNanos() >= 0); Assert.assertEquals(10, globalValue); @@ -138,12 +138,12 @@ public void testExitLoop() { Runnable r = new Runnable() { @Override public void run() { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); globalValue = 10; } }; - slaveLooper.addTasksOnWakeup(r); - slaveLooper.loop(); + executorLooper.addTasksOnWakeup(r); + executorLooper.loop(); Assert.assertEquals(10, globalValue); } @@ -156,18 +156,18 @@ public void testGetNextTimeoutIntervalMs() Runnable r = new Runnable() { @Override public void run() { - slaveLooper.exitLoop(); + executorLooper.exitLoop(); globalValue = 10; } }; Duration interval = Duration.ofSeconds(6); - slaveLooper.registerTimerEvent(interval, r); + executorLooper.registerTimerEvent(interval, r); Method method = - slaveLooper.getClass().getSuperclass().getDeclaredMethod("getNextTimeoutInterval"); + executorLooper.getClass().getSuperclass().getDeclaredMethod("getNextTimeoutInterval"); method.setAccessible(true); - Duration res = (Duration) method.invoke(slaveLooper); + Duration res = (Duration) method.invoke(executorLooper); Assert.assertNotNull(res); @@ -186,11 +186,11 @@ public void run() { globalValue = 10; } }; - slaveLooper.addTasksOnWakeup(r); + executorLooper.addTasksOnWakeup(r); - Method method = slaveLooper.getClass().getSuperclass().getDeclaredMethod("runOnce"); + Method method = executorLooper.getClass().getSuperclass().getDeclaredMethod("runOnce"); method.setAccessible(true); - method.invoke(slaveLooper); + method.invoke(executorLooper); Assert.assertEquals(10, globalValue); } @@ -207,12 +207,12 @@ public void run() { globalValue = 10; } }; - slaveLooper.addTasksOnWakeup(r); + executorLooper.addTasksOnWakeup(r); Method method = - slaveLooper.getClass().getSuperclass().getDeclaredMethod("executeTasksOnWakeup"); + executorLooper.getClass().getSuperclass().getDeclaredMethod("executeTasksOnWakeup"); method.setAccessible(true); - method.invoke(slaveLooper); + method.invoke(executorLooper); Assert.assertEquals(10, globalValue); } @@ -231,14 +231,14 @@ public void run() { }; Duration interval = Duration.ofNanos(1); - slaveLooper.registerTimerEvent(interval, r); + executorLooper.registerTimerEvent(interval, r); Method method = - slaveLooper.getClass().getSuperclass().getDeclaredMethod( + executorLooper.getClass().getSuperclass().getDeclaredMethod( "triggerExpiredTimers", long.class); long current = System.nanoTime(); method.setAccessible(true); - method.invoke(slaveLooper, current); + method.invoke(executorLooper, current); Assert.assertEquals(10, globalValue); } From a4731dd2268e3179cddbd33593372200f3a6c5df Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 21:48:27 -0400 Subject: [PATCH 14/32] renaming "topology master" to "topology manager" in heron/simulator --- .../apache/heron/simulator/executors/InstanceExecutor.java | 6 +++--- .../apache/heron/simulator/executors/MetricsExecutor.java | 4 ++-- .../apache/heron/simulator/executors/StreamExecutor.java | 4 ++-- .../org/apache/heron/simulator/instance/BoltInstance.java | 4 ++-- .../org/apache/heron/simulator/instance/SpoutInstance.java | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/heron/simulator/src/java/org/apache/heron/simulator/executors/InstanceExecutor.java b/heron/simulator/src/java/org/apache/heron/simulator/executors/InstanceExecutor.java index 1479a433abe..51aebf208e1 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/executors/InstanceExecutor.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/executors/InstanceExecutor.java @@ -26,7 +26,7 @@ import org.apache.heron.api.generated.TopologyAPI; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.utils.metrics.MetricsCollector; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.IInstance; @@ -49,7 +49,7 @@ public class InstanceExecutor implements Runnable { private final PhysicalPlanHelper physicalPlanHelper; - private final SlaveLooper looper; + private final ExecutorLooper looper; private final Communicator streamInQueue; private final Communicator streamOutQueue; @@ -68,7 +68,7 @@ public InstanceExecutor(PhysicalPlans.PhysicalPlan physicalPlan, streamInQueue = new Communicator<>(); streamOutQueue = new Communicator<>(); metricsOutQueue = new Communicator<>(); - looper = new SlaveLooper(); + looper = new ExecutorLooper(); MetricsCollector metricsCollector = new MetricsCollector(looper, metricsOutQueue); diff --git a/heron/simulator/src/java/org/apache/heron/simulator/executors/MetricsExecutor.java b/heron/simulator/src/java/org/apache/heron/simulator/executors/MetricsExecutor.java index e50a9fa8f3d..16e0c123513 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/executors/MetricsExecutor.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/executors/MetricsExecutor.java @@ -24,7 +24,7 @@ import java.util.logging.Logger; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.WakeableLooper; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.JVMMetrics; @@ -135,6 +135,6 @@ protected void handleMetricPublisherPublishMessage( } protected WakeableLooper createWakeableLooper() { - return new SlaveLooper(); + return new ExecutorLooper(); } } diff --git a/heron/simulator/src/java/org/apache/heron/simulator/executors/StreamExecutor.java b/heron/simulator/src/java/org/apache/heron/simulator/executors/StreamExecutor.java index 4bb1dab6e58..bff954aa442 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/executors/StreamExecutor.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/executors/StreamExecutor.java @@ -30,7 +30,7 @@ import com.google.protobuf.Message; import org.apache.heron.api.generated.TopologyAPI; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.WakeableLooper; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; @@ -334,7 +334,7 @@ protected void sendMessageToInstance(int taskId, HeronTuples.HeronTupleSet messa } protected WakeableLooper createWakeableLooper() { - return new SlaveLooper(); + return new ExecutorLooper(); } protected Set createSpoutsSet(PhysicalPlans.PhysicalPlan physicalPlan) { diff --git a/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java b/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java index 271037668a9..c3e61ce6dbb 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java @@ -30,7 +30,7 @@ import org.apache.heron.common.basics.ByteAmount; import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.common.utils.tuple.TupleImpl; @@ -47,7 +47,7 @@ public class BoltInstance public BoltInstance(PhysicalPlanHelper helper, Communicator streamInQueue, Communicator streamOutQueue, - SlaveLooper looper) { + ExecutorLooper looper) { super(helper, streamInQueue, streamOutQueue, looper); SystemConfig systemConfig = (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); diff --git a/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java b/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java index c5eb8d920c6..48cd9f6c11d 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java @@ -28,7 +28,7 @@ import org.apache.heron.common.basics.ByteAmount; import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; @@ -48,7 +48,7 @@ public class SpoutInstance @SuppressWarnings("deprecation") public SpoutInstance(PhysicalPlanHelper helper, Communicator streamInQueue, - Communicator streamOutQueue, SlaveLooper looper) { + Communicator streamOutQueue, ExecutorLooper looper) { super(helper, streamInQueue, streamOutQueue, looper); Map config = helper.getTopologyContext().getTopologyConfig(); SystemConfig systemConfig = From ded9f24b2adf352757434899c24f93f314b60317 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 21:59:26 -0400 Subject: [PATCH 15/32] renaming "topology master" to "topology manager" in heron/spi --- heron/spi/src/java/BUILD | 2 +- .../org/apache/heron/spi/common/Context.java | 4 +- .../java/org/apache/heron/spi/common/Key.java | 2 +- .../heron/spi/statemgr/IStateManager.java | 40 ++++----- .../SchedulerStateManagerAdaptor.java | 18 ++-- ...rException.java => TManagerException.java} | 8 +- .../{TMasterUtils.java => TManagerUtils.java} | 82 +++++++++---------- .../heron/spi/common/ConfigLoaderTest.java | 2 +- .../apache/heron/spi/common/ContextTest.java | 2 +- .../org/apache/heron/spi/common/KeysTest.java | 4 +- 10 files changed, 82 insertions(+), 82 deletions(-) rename heron/spi/src/java/org/apache/heron/spi/utils/{TMasterException.java => TManagerException.java} (82%) rename heron/spi/src/java/org/apache/heron/spi/utils/{TMasterUtils.java => TManagerUtils.java} (68%) diff --git a/heron/spi/src/java/BUILD b/heron/spi/src/java/BUILD index b6865076acf..657ae28004b 100644 --- a/heron/spi/src/java/BUILD +++ b/heron/spi/src/java/BUILD @@ -36,7 +36,7 @@ java_library( "//heron/proto:proto_packing_plan_java", "//heron/proto:proto_physical_plan_java", "//heron/proto:proto_scheduler_java", - "//heron/proto:proto_tmaster_java", + "//heron/proto:proto_tmanager_java", "//heron/proto:proto_topology_java", "@com_google_protobuf//:protobuf_java", "@maven//:com_google_guava_guava", diff --git a/heron/spi/src/java/org/apache/heron/spi/common/Context.java b/heron/spi/src/java/org/apache/heron/spi/common/Context.java index c8a1d3c1214..fd55c1d3e3b 100644 --- a/heron/spi/src/java/org/apache/heron/spi/common/Context.java +++ b/heron/spi/src/java/org/apache/heron/spi/common/Context.java @@ -326,8 +326,8 @@ public static String stmgrBinary(Config cfg) { return cfg.getStringValue(Key.STMGR_BINARY); } - public static String tmasterBinary(Config cfg) { - return cfg.getStringValue(Key.TMASTER_BINARY); + public static String tmanagerBinary(Config cfg) { + return cfg.getStringValue(Key.TMANAGER_BINARY); } public static String shellBinary(Config cfg) { diff --git a/heron/spi/src/java/org/apache/heron/spi/common/Key.java b/heron/spi/src/java/org/apache/heron/spi/common/Key.java index db2bd96b723..e283c8ac209 100644 --- a/heron/spi/src/java/org/apache/heron/spi/common/Key.java +++ b/heron/spi/src/java/org/apache/heron/spi/common/Key.java @@ -182,7 +182,7 @@ public enum Key { //keys for config provided user binaries EXECUTOR_BINARY ("heron.binaries.executor", "${HERON_BIN}/heron-executor"), STMGR_BINARY ("heron.binaries.stmgr", "${HERON_BIN}/heron-stmgr"), - TMASTER_BINARY ("heron.binaries.tmaster", "${HERON_BIN}/heron-tmaster"), + TMANAGER_BINARY ("heron.binaries.tmanager", "${HERON_BIN}/heron-tmanager"), SHELL_BINARY ("heron.binaries.shell", "${HERON_BIN}/heron-shell"), PYTHON_INSTANCE_BINARY("heron.binaries.python.instance", "${HERON_BIN}/heron-python-instance"), CPP_INSTANCE_BINARY ("heron.binaries.cpp.instance", "${HERON_BIN}/heron-cpp-instance"), diff --git a/heron/spi/src/java/org/apache/heron/spi/statemgr/IStateManager.java b/heron/spi/src/java/org/apache/heron/spi/statemgr/IStateManager.java index d02c7f9190f..af7503a526c 100644 --- a/heron/spi/src/java/org/apache/heron/spi/statemgr/IStateManager.java +++ b/heron/spi/src/java/org/apache/heron/spi/statemgr/IStateManager.java @@ -29,7 +29,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; /** @@ -37,22 +37,22 @@ *

* Services across Heron use HeronStateMgr to get/set state information. * Currently the primary things kept by state are: - * 1. Where is the the topology master running. - * The topology master is responsible for writing this information out + * 1. Where is the the topology manager running. + * The topology manager is responsible for writing this information out * upon startup. The stream managers query this upon startup to find out - * who is their topology master. In case they loose connection with - * the topology master, the stream managers query this again to see - * if the topology master has changed. + * who is their topology manager. In case they loose connection with + * the topology manager, the stream managers query this again to see + * if the topology manager has changed. *

* 2. Topology and the current running state of the topology * This information is seeded by the topology submitter. - * The topology master updates this when the state of the topology + * The topology manager updates this when the state of the topology * changes. *

* 3. Current assignment. - * This information is solely used by topology master. When it + * This information is solely used by topology manager. When it * creates a new assignment or when the assignment changes, it writes - * out this information. This is required for topology master failover. + * out this information. This is required for topology manager failover. *

* Clients call the methods of the state passing a callback. The callback * is called with result code upon the completion of the operation. @@ -116,28 +116,28 @@ public String getName() { ListenableFuture deleteLocks(String topologyName); /** - * Set the location of Tmaster. + * Set the location of Tmanager. * * @return Boolean - Success or Failure */ - ListenableFuture setTMasterLocation( - TopologyMaster.TMasterLocation location, String topologyName); + ListenableFuture setTManagerLocation( + TopologyManager.TManagerLocation location, String topologyName); /** - * Get the tmaster location for the given topology + * Get the tmanager location for the given topology * * @param watcher @see org.apache.heron.spi.statemgr.WatchCallback - * @return TMasterLocation + * @return TManagerLocation */ - ListenableFuture getTMasterLocation( + ListenableFuture getTManagerLocation( WatchCallback watcher, String topologyName); /** - * Delete the tmaster location for the given topology + * Delete the tmanager location for the given topology * * @return Boolean - Success or Failure */ - ListenableFuture deleteTMasterLocation(String topologyName); + ListenableFuture deleteTManagerLocation(String topologyName); /** * Set the location of MetricsCache. @@ -145,15 +145,15 @@ ListenableFuture getTMasterLocation( * @return Boolean - Success or Failure */ ListenableFuture setMetricsCacheLocation( - TopologyMaster.MetricsCacheLocation location, String topologyName); + TopologyManager.MetricsCacheLocation location, String topologyName); /** * Get the MetricsCache location for the given topology * * @param watcher @see org.apache.heron.spi.statemgr.WatchCallback - * @return TMasterLocation + * @return TManagerLocation */ - ListenableFuture getMetricsCacheLocation( + ListenableFuture getMetricsCacheLocation( WatchCallback watcher, String topologyName); /** diff --git a/heron/spi/src/java/org/apache/heron/spi/statemgr/SchedulerStateManagerAdaptor.java b/heron/spi/src/java/org/apache/heron/spi/statemgr/SchedulerStateManagerAdaptor.java index 8715cbb7dd5..80d2dd469a2 100644 --- a/heron/spi/src/java/org/apache/heron/spi/statemgr/SchedulerStateManagerAdaptor.java +++ b/heron/spi/src/java/org/apache/heron/spi/statemgr/SchedulerStateManagerAdaptor.java @@ -32,7 +32,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; /** * This file provides a Adaptor for Scheduler. @@ -169,12 +169,12 @@ public Boolean updatePackingPlan(PackingPlans.PackingPlan packingPlan, String to } /** - * Delete the tmaster location for the given topology + * Delete the tmanager location for the given topology * * @return Boolean - Success or Failure */ - public Boolean deleteTMasterLocation(String topologyName) { - return awaitResult(delegate.deleteTMasterLocation(topologyName)); + public Boolean deleteTManagerLocation(String topologyName) { + return awaitResult(delegate.deleteTManagerLocation(topologyName)); } /** @@ -240,12 +240,12 @@ public Boolean deleteStatefulCheckpoint(String topologyName) { } /** - * Get the tmaster location for the given topology + * Get the tmanager location for the given topology * - * @return TMasterLocation + * @return TManagerLocation */ - public TopologyMaster.TMasterLocation getTMasterLocation(String topologyName) { - return awaitResult(delegate.getTMasterLocation(null, topologyName)); + public TopologyManager.TManagerLocation getTManagerLocation(String topologyName) { + return awaitResult(delegate.getTManagerLocation(null, topologyName)); } /** @@ -262,7 +262,7 @@ public Scheduler.SchedulerLocation getSchedulerLocation(String topologyName) { * * @return MetricsCacheLocation */ - public TopologyMaster.MetricsCacheLocation getMetricsCacheLocation(String topologyName) { + public TopologyManager.MetricsCacheLocation getMetricsCacheLocation(String topologyName) { return awaitResult(delegate.getMetricsCacheLocation(null, topologyName)); } diff --git a/heron/spi/src/java/org/apache/heron/spi/utils/TMasterException.java b/heron/spi/src/java/org/apache/heron/spi/utils/TManagerException.java similarity index 82% rename from heron/spi/src/java/org/apache/heron/spi/utils/TMasterException.java rename to heron/spi/src/java/org/apache/heron/spi/utils/TManagerException.java index b1b5ec5df04..355a7a9044a 100644 --- a/heron/spi/src/java/org/apache/heron/spi/utils/TMasterException.java +++ b/heron/spi/src/java/org/apache/heron/spi/utils/TManagerException.java @@ -20,17 +20,17 @@ package org.apache.heron.spi.utils; /** - * Thrown to indicate a TMaster error + * Thrown to indicate a TManager error */ -public class TMasterException extends RuntimeException { +public class TManagerException extends RuntimeException { public static final long serialVersionUID = 2718487207462907715L; - public TMasterException(String message) { + public TManagerException(String message) { super(message); } - public TMasterException(String message, Throwable cause) { + public TManagerException(String message, Throwable cause) { super(message, cause); } } diff --git a/heron/spi/src/java/org/apache/heron/spi/utils/TMasterUtils.java b/heron/spi/src/java/org/apache/heron/spi/utils/TManagerUtils.java similarity index 68% rename from heron/spi/src/java/org/apache/heron/spi/utils/TMasterUtils.java rename to heron/spi/src/java/org/apache/heron/spi/utils/TManagerUtils.java index c16430cce96..da167b28cae 100644 --- a/heron/spi/src/java/org/apache/heron/spi/utils/TMasterUtils.java +++ b/heron/spi/src/java/org/apache/heron/spi/utils/TManagerUtils.java @@ -32,55 +32,55 @@ import org.apache.heron.api.generated.TopologyAPI; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; -public final class TMasterUtils { - public enum TMasterCommand { +public final class TManagerUtils { + public enum TManagerCommand { ACTIVATE, DEACTIVATE, RUNTIME_CONFIG_UPDATE } - private static final Logger LOG = Logger.getLogger(TMasterUtils.class.getName()); + private static final Logger LOG = Logger.getLogger(TManagerUtils.class.getName()); - private TMasterUtils() { + private TManagerUtils() { } /** - * Communicate with TMaster with command + * Communicate with TManager with command * - * @param command the command requested to TMaster, activate or deactivate. + * @param command the command requested to TManager, activate or deactivate. */ @VisibleForTesting - public static void sendToTMaster(String command, + public static void sendToTManager(String command, String topologyName, SchedulerStateManagerAdaptor stateManager, NetworkUtils.TunnelConfig tunnelConfig) - throws TMasterException { + throws TManagerException { final List empty = new ArrayList(); - sendToTMasterWithArguments(command, topologyName, empty, stateManager, tunnelConfig); + sendToTManagerWithArguments(command, topologyName, empty, stateManager, tunnelConfig); } @VisibleForTesting - public static void sendToTMasterWithArguments(String command, + public static void sendToTManagerWithArguments(String command, String topologyName, List arguments, SchedulerStateManagerAdaptor stateManager, NetworkUtils.TunnelConfig tunnelConfig) - throws TMasterException { - // fetch the TMasterLocation for the topology - LOG.fine("Fetching TMaster location for topology: " + topologyName); + throws TManagerException { + // fetch the TManagerLocation for the topology + LOG.fine("Fetching TManager location for topology: " + topologyName); - TopologyMaster.TMasterLocation location = stateManager.getTMasterLocation(topologyName); + TopologyManager.TManagerLocation location = stateManager.getTManagerLocation(topologyName); if (location == null) { - throw new TMasterException("Failed to fetch TMaster location for topology: " + throw new TManagerException("Failed to fetch TManager location for topology: " + topologyName); } - LOG.fine("Fetched TMaster location for topology: " + topologyName); + LOG.fine("Fetched TManager location for topology: " + topologyName); - // for the url request to be sent to TMaster + // for the url request to be sent to TManager String url = String.format("http://%s:%d/%s?topologyid=%s", location.getHost(), location.getControllerPort(), command, location.getTopologyId()); // Append extra url arguments @@ -91,41 +91,41 @@ public static void sendToTMasterWithArguments(String command, try { URL endpoint = new URL(url); - LOG.fine("HTTP URL for TMaster: " + endpoint); + LOG.fine("HTTP URL for TManager: " + endpoint); sendGetRequest(endpoint, command, tunnelConfig); } catch (MalformedURLException e) { - throw new TMasterException("Invalid URL for TMaster endpoint: " + url, e); + throw new TManagerException("Invalid URL for TManager endpoint: " + url, e); } } private static void sendGetRequest(URL endpoint, String command, NetworkUtils.TunnelConfig tunnelConfig) - throws TMasterException { + throws TManagerException { // create a URL connection HttpURLConnection connection = NetworkUtils.getProxiedHttpConnectionIfNeeded(endpoint, tunnelConfig); if (connection == null) { - throw new TMasterException(String.format( - "Failed to get a HTTP connection to TMaster: %s", endpoint)); + throw new TManagerException(String.format( + "Failed to get a HTTP connection to TManager: %s", endpoint)); } - LOG.fine("Successfully opened HTTP connection to TMaster"); + LOG.fine("Successfully opened HTTP connection to TManager"); // now sent the http request NetworkUtils.sendHttpGetRequest(connection); - LOG.fine("Sent the HTTP payload to TMaster"); + LOG.fine("Sent the HTTP payload to TManager"); // get the response and check if it is successful try { int responseCode = connection.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { - LOG.fine("Successfully got a HTTP response from TMaster using command: " + command); + LOG.fine("Successfully got a HTTP response from TManager using command: " + command); } else { - throw new TMasterException( - String.format("Non OK HTTP response %d from TMaster for command %s", + throw new TManagerException( + String.format("Non OK HTTP response %d from TManager for command %s", responseCode, command)); } } catch (IOException e) { - throw new TMasterException(String.format( - "Failed to receive HTTP response from TMaster using command: `%s`", command), e); + throw new TManagerException(String.format( + "Failed to receive HTTP response from TManager using command: `%s`", command), e); } finally { connection.disconnect(); } @@ -136,11 +136,11 @@ private static void sendGetRequest(URL endpoint, String command, */ private static TopologyAPI.TopologyState getRuntimeTopologyState( String topologyName, - SchedulerStateManagerAdaptor statemgr) throws TMasterException { + SchedulerStateManagerAdaptor statemgr) throws TManagerException { PhysicalPlans.PhysicalPlan plan = statemgr.getPhysicalPlan(topologyName); if (plan == null) { - throw new TMasterException(String.format( + throw new TManagerException(String.format( "Failed to get physical plan for topology '%s'", topologyName)); } @@ -148,15 +148,15 @@ private static TopologyAPI.TopologyState getRuntimeTopologyState( } public static void transitionTopologyState(String topologyName, - TMasterCommand topologyStateControlCommand, + TManagerCommand topologyStateControlCommand, SchedulerStateManagerAdaptor statemgr, TopologyAPI.TopologyState startState, TopologyAPI.TopologyState expectedState, NetworkUtils.TunnelConfig tunnelConfig) - throws TMasterException { - TopologyAPI.TopologyState state = TMasterUtils.getRuntimeTopologyState(topologyName, statemgr); + throws TManagerException { + TopologyAPI.TopologyState state = TManagerUtils.getRuntimeTopologyState(topologyName, statemgr); if (state == null) { - throw new TMasterException(String.format( + throw new TManagerException(String.format( "Topology '%s' is not initialized yet", topologyName)); } @@ -168,23 +168,23 @@ public static void transitionTopologyState(String topologyName, } if (state != startState) { - throw new TMasterException(String.format( + throw new TManagerException(String.format( "Topology '%s' is not in state '%s'", topologyName, startState)); } String command = topologyStateControlCommand.name().toLowerCase(); - TMasterUtils.sendToTMaster(command, topologyName, statemgr, tunnelConfig); + TManagerUtils.sendToTManager(command, topologyName, statemgr, tunnelConfig); LOG.log(Level.INFO, "Topology command {0} completed successfully.", topologyStateControlCommand); } public static void sendRuntimeConfig(String topologyName, - TMasterCommand topologyStateControlCommand, + TManagerCommand topologyStateControlCommand, SchedulerStateManagerAdaptor statemgr, String[] configs, NetworkUtils.TunnelConfig tunnelConfig) - throws TMasterException { + throws TManagerException { final String runtimeConfigKey = "runtime-config"; final String runtimeConfigUpdateEndpoint = "runtime_config/update"; @@ -193,7 +193,7 @@ public static void sendRuntimeConfig(String topologyName, arguments.add(runtimeConfigKey + "=" + config); } - TMasterUtils.sendToTMasterWithArguments( + TManagerUtils.sendToTManagerWithArguments( runtimeConfigUpdateEndpoint, topologyName, arguments, statemgr, tunnelConfig); LOG.log(Level.INFO, diff --git a/heron/spi/tests/java/org/apache/heron/spi/common/ConfigLoaderTest.java b/heron/spi/tests/java/org/apache/heron/spi/common/ConfigLoaderTest.java index 27ad696ef19..228b1aff5a9 100644 --- a/heron/spi/tests/java/org/apache/heron/spi/common/ConfigLoaderTest.java +++ b/heron/spi/tests/java/org/apache/heron/spi/common/ConfigLoaderTest.java @@ -131,7 +131,7 @@ private static void assertConfig(Config config, String binPath = config.getStringValue(Key.HERON_BIN); assertKeyValue(config, Key.EXECUTOR_BINARY, binPath + "/heron-executor"); assertKeyValue(config, Key.STMGR_BINARY, binPath + "/heron-stmgr"); - assertKeyValue(config, Key.TMASTER_BINARY, binPath + "/heron-tmaster"); + assertKeyValue(config, Key.TMANAGER_BINARY, binPath + "/heron-tmanager"); assertKeyValue(config, Key.SHELL_BINARY, binPath + "/heron-shell"); assertKeyValue(config, Key.PYTHON_INSTANCE_BINARY, binPath + "/heron-python-instance"); assertKeyValue(config, Key.CPP_INSTANCE_BINARY, binPath + "/heron-cpp-instance"); diff --git a/heron/spi/tests/java/org/apache/heron/spi/common/ContextTest.java b/heron/spi/tests/java/org/apache/heron/spi/common/ContextTest.java index 0585ec713bd..1d4c39c3bef 100644 --- a/heron/spi/tests/java/org/apache/heron/spi/common/ContextTest.java +++ b/heron/spi/tests/java/org/apache/heron/spi/common/ContextTest.java @@ -37,7 +37,7 @@ public void testBinaries() throws Exception { assertEquals(Key.EXECUTOR_BINARY.getDefault(), Context.executorBinary(props)); assertEquals(Key.STMGR_BINARY.getDefault(), Context.stmgrBinary(props)); - assertEquals(Key.TMASTER_BINARY.getDefault(), Context.tmasterBinary(props)); + assertEquals(Key.TMANAGER_BINARY.getDefault(), Context.tmanagerBinary(props)); assertEquals(Key.SHELL_BINARY.getDefault(), Context.shellBinary(props)); assertEquals( Key.PYTHON_INSTANCE_BINARY.getDefault(), diff --git a/heron/spi/tests/java/org/apache/heron/spi/common/KeysTest.java b/heron/spi/tests/java/org/apache/heron/spi/common/KeysTest.java index 6f3b2bd762b..32ebf1cd4f6 100644 --- a/heron/spi/tests/java/org/apache/heron/spi/common/KeysTest.java +++ b/heron/spi/tests/java/org/apache/heron/spi/common/KeysTest.java @@ -147,8 +147,8 @@ public void testBinaries() throws Exception { Key.STMGR_BINARY ); assertEquals( - "heron.binaries.tmaster", - Key.TMASTER_BINARY + "heron.binaries.tmanager", + Key.TMANAGER_BINARY ); assertEquals( "heron.binaries.shell", From 95f22729d9c9487eeb8f646547f358d356d40392 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:15:26 -0400 Subject: [PATCH 16/32] renaming "topology master" to "topology manager" in heron/statemgrs --- .../cpp/statemgr/heron-localfilestatemgr.cpp | 40 +++---- .../cpp/statemgr/heron-localfilestatemgr.h | 16 +-- .../src/cpp/statemgr/heron-statemgr.cpp | 6 +- .../src/cpp/statemgr/heron-statemgr.h | 38 +++---- .../src/cpp/statemgr/heron-zkstatemgr.cpp | 104 +++++++++--------- .../src/cpp/statemgr/heron-zkstatemgr.h | 82 +++++++------- .../statemgr/FileSystemStateManager.java | 22 ++-- .../heron/statemgr/NullStateManager.java | 14 +-- .../localfs/LocalFileSystemStateManager.java | 14 +-- .../curator/CuratorStateManager.java | 12 +- .../statemgrs/src/python/filestatemanager.py | 30 ++--- heron/statemgrs/src/python/statemanager.py | 8 +- heron/statemgrs/src/python/zkstatemanager.py | 22 ++-- heron/statemgrs/tests/cpp/statetest.cpp | 6 +- .../tests/cpp/zkstatemgr_unittest.cpp | 44 ++++---- 15 files changed, 229 insertions(+), 229 deletions(-) diff --git a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp index 00080e03315..42a5f315cc2 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp +++ b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.cpp @@ -50,7 +50,7 @@ void HeronLocalFileStateMgr::InitTree() { path += "/topologies"; FileUtils::makeDirectory(path); path = dpath; - path += "/tmasters"; + path += "/tmanagers"; FileUtils::makeDirectory(path); path = dpath; path += "/pplans"; @@ -66,14 +66,14 @@ void HeronLocalFileStateMgr::InitTree() { FileUtils::makeDirectory(path); } -void HeronLocalFileStateMgr::SetTMasterLocationWatch(const std::string& topology_name, +void HeronLocalFileStateMgr::SetTManagerLocationWatch(const std::string& topology_name, VCallback<> watcher) { CHECK(watcher); // We kind of cheat here. We check periodically - time_t tmaster_last_change = FileUtils::getModifiedTime(GetTMasterLocationPath(topology_name)); + time_t tmanager_last_change = FileUtils::getModifiedTime(GetTManagerLocationPath(topology_name)); - auto cb = [topology_name, tmaster_last_change, watcher, this](EventLoop::Status status) { - this->CheckTMasterLocation(topology_name, tmaster_last_change, std::move(watcher), status); + auto cb = [topology_name, tmanager_last_change, watcher, this](EventLoop::Status status) { + this->CheckTManagerLocation(topology_name, tmanager_last_change, std::move(watcher), status); }; CHECK_GT(eventLoop_->registerTimer(std::move(cb), false, 1000000), 0); @@ -83,11 +83,11 @@ void HeronLocalFileStateMgr::SetMetricsCacheLocationWatch(const std::string& top VCallback<> watcher) { CHECK(watcher); // We kind of cheat here. We check periodically - time_t tmaster_last_change = FileUtils::getModifiedTime( + time_t tmanager_last_change = FileUtils::getModifiedTime( GetMetricsCacheLocationPath(topology_name)); - auto cb = [topology_name, tmaster_last_change, watcher, this](EventLoop::Status status) { - this->CheckMetricsCacheLocation(topology_name, tmaster_last_change, std::move(watcher), status); + auto cb = [topology_name, tmanager_last_change, watcher, this](EventLoop::Status status) { + this->CheckMetricsCacheLocation(topology_name, tmanager_last_change, std::move(watcher), status); }; CHECK_GT(eventLoop_->registerTimer(std::move(cb), false, 1000000), 0); @@ -106,12 +106,12 @@ void HeronLocalFileStateMgr::SetPackingPlanWatch(const std::string& topology_nam CHECK_GT(eventLoop_->registerTimer(std::move(cb), false, 1000000), 0); } -void HeronLocalFileStateMgr::GetTMasterLocation(const std::string& _topology_name, - shared_ptr _return, +void HeronLocalFileStateMgr::GetTManagerLocation(const std::string& _topology_name, + shared_ptr _return, VCallback cb) { std::string contents; proto::system::StatusCode status = - ReadAllFileContents(GetTMasterLocationPath(_topology_name), contents); + ReadAllFileContents(GetTManagerLocationPath(_topology_name), contents); if (status == proto::system::OK) { if (!_return->ParseFromString(contents)) { status = proto::system::STATE_CORRUPTED; @@ -123,7 +123,7 @@ void HeronLocalFileStateMgr::GetTMasterLocation(const std::string& _topology_nam } void HeronLocalFileStateMgr::GetMetricsCacheLocation(const std::string& _topology_name, - shared_ptr _return, + shared_ptr _return, VCallback cb) { std::string contents; proto::system::StatusCode status = @@ -138,12 +138,12 @@ void HeronLocalFileStateMgr::GetMetricsCacheLocation(const std::string& _topolog CHECK_GT(eventLoop_->registerTimer(std::move(wCb), false, 0), 0); } -void HeronLocalFileStateMgr::SetTMasterLocation(const proto::tmaster::TMasterLocation& _location, +void HeronLocalFileStateMgr::SetTManagerLocation(const proto::tmanager::TManagerLocation& _location, VCallback cb) { // Note: Unlike Zk statemgr, we overwrite the location even if there is already one. - // This is because when running in simulator we control when a tmaster dies and + // This is because when running in simulator we control when a tmanager dies and // comes up deterministically. - std::string fname = GetTMasterLocationPath(_location.topology_name()); + std::string fname = GetTManagerLocationPath(_location.topology_name()); std::string contents; _location.SerializeToString(&contents); proto::system::StatusCode status = WriteToFile(fname, contents); @@ -152,10 +152,10 @@ void HeronLocalFileStateMgr::SetTMasterLocation(const proto::tmaster::TMasterLoc } void HeronLocalFileStateMgr::SetMetricsCacheLocation( - const proto::tmaster::MetricsCacheLocation& _location, + const proto::tmanager::MetricsCacheLocation& _location, VCallback cb) { // Note: Unlike Zk statemgr, we overwrite the location even if there is already one. - // This is because when running in simulator we control when a tmaster dies and + // This is because when running in simulator we control when a tmanager dies and // comes up deterministically. std::string fname = GetMetricsCacheLocationPath(_location.topology_name()); std::string contents; @@ -464,9 +464,9 @@ proto::system::StatusCode HeronLocalFileStateMgr::MakeSureFileDoesNotExist( } } -void HeronLocalFileStateMgr::CheckTMasterLocation(std::string topology_name, time_t last_change, +void HeronLocalFileStateMgr::CheckTManagerLocation(std::string topology_name, time_t last_change, VCallback<> watcher, EventLoop::Status) { - time_t nlast_change = FileUtils::getModifiedTime(GetTMasterLocationPath(topology_name)); + time_t nlast_change = FileUtils::getModifiedTime(GetTManagerLocationPath(topology_name)); if (nlast_change > last_change) { watcher(); } else { @@ -474,7 +474,7 @@ void HeronLocalFileStateMgr::CheckTMasterLocation(std::string topology_name, tim } auto cb = [topology_name, nlast_change, watcher, this](EventLoop::Status status) { - this->CheckTMasterLocation(topology_name, nlast_change, std::move(watcher), status); + this->CheckTManagerLocation(topology_name, nlast_change, std::move(watcher), status); }; CHECK_GT(eventLoop_->registerTimer(std::move(cb), false, 1000000), 0); diff --git a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.h b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.h index 41c98b8577a..3af89105ffe 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.h +++ b/heron/statemgrs/src/cpp/statemgr/heron-localfilestatemgr.h @@ -48,20 +48,20 @@ class HeronLocalFileStateMgr : public HeronStateMgr { // Sets up the basic filesystem tree at the given location void InitTree(); - void SetTMasterLocationWatch(const std::string& _topology_name, VCallback<> _watcher); + void SetTManagerLocationWatch(const std::string& _topology_name, VCallback<> _watcher); void SetMetricsCacheLocationWatch(const std::string& _topology_name, VCallback<> _watcher); void SetPackingPlanWatch(const std::string& _topology_name, VCallback<> _watcher); // implement the functions - void GetTMasterLocation(const std::string& _topology_name, - shared_ptr _return, + void GetTManagerLocation(const std::string& _topology_name, + shared_ptr _return, VCallback _cb); - void SetTMasterLocation(const proto::tmaster::TMasterLocation& _location, + void SetTManagerLocation(const proto::tmanager::TManagerLocation& _location, VCallback _cb); void GetMetricsCacheLocation(const std::string& _topology_name, - shared_ptr _return, + shared_ptr _return, VCallback _cb); - void SetMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& _location, + void SetMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& _location, VCallback _cb); void CreateTopology(const proto::api::Topology& _top, VCallback _cb); @@ -128,8 +128,8 @@ class HeronLocalFileStateMgr : public HeronStateMgr { // helper function to see if a file exists proto::system::StatusCode MakeSureFileDoesNotExist(const std::string& _filename); - // helper function to see if the tmaster location has changed - void CheckTMasterLocation(std::string _topology_name, time_t _last_change, VCallback<> _watcher, + // helper function to see if the tmanager location has changed + void CheckTManagerLocation(std::string _topology_name, time_t _last_change, VCallback<> _watcher, EventLoop::Status); void CheckMetricsCacheLocation(std::string _topology_name, time_t _last_change, VCallback<> _watcher, EventLoop::Status); diff --git a/heron/statemgrs/src/cpp/statemgr/heron-statemgr.cpp b/heron/statemgrs/src/cpp/statemgr/heron-statemgr.cpp index 60184891174..a0c5da30516 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-statemgr.cpp +++ b/heron/statemgrs/src/cpp/statemgr/heron-statemgr.cpp @@ -99,7 +99,7 @@ void HeronStateMgr::ListExecutionStateDone(std::vector _watcher) = 0; + virtual void SetTManagerLocationWatch(const std::string& _topology_name, VCallback<> _watcher) = 0; virtual void SetMetricsCacheLocationWatch( const std::string& _topology_name, VCallback<> _watcher) = 0; virtual void SetPackingPlanWatch(const std::string& _topology_name, VCallback<> _watcher) = 0; - // Sets/Gets the Tmaster - virtual void GetTMasterLocation(const std::string& _topology_name, - shared_ptr _return, + // Sets/Gets the Tmanager + virtual void GetTManagerLocation(const std::string& _topology_name, + shared_ptr _return, VCallback _cb) = 0; - virtual void SetTMasterLocation(const proto::tmaster::TMasterLocation& _location, + virtual void SetTManagerLocation(const proto::tmanager::TManagerLocation& _location, VCallback _cb) = 0; virtual void GetMetricsCacheLocation(const std::string& _topology_name, - shared_ptr _return, + shared_ptr _return, VCallback _cb) = 0; - virtual void SetMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& _location, + virtual void SetMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& _location, VCallback _cb) = 0; // Gets/Sets the Topology @@ -164,7 +164,7 @@ class HeronStateMgr { // // We define methods of where the records have to be placed // - std::string GetTMasterLocationPath(const std::string& _topology_name); + std::string GetTManagerLocationPath(const std::string& _topology_name); std::string GetMetricsCacheLocationPath(const std::string& _topology_name); std::string GetTopologyPath(const std::string& _topology_name); std::string GetPhysicalPlanPath(const std::string& _topology_name); @@ -172,7 +172,7 @@ class HeronStateMgr { std::string GetExecutionStatePath(const std::string& _topology_name); std::string GetStatefulCheckpointsPath(const std::string& _topology_name); - std::string GetTMasterLocationDir(); + std::string GetTManagerLocationDir(); std::string GetMetricsCacheLocationDir(); std::string GetTopologyDir(); std::string GetPhysicalPlanDir(); diff --git a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp index 1bae943a65d..6f0158abe8b 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp +++ b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.cpp @@ -38,7 +38,7 @@ HeronZKStateMgr::HeronZKStateMgr(const std::string& zkhostport, const std::strin zkclient_(NULL), zkclient_factory_(new DefaultZKClientFactory()), eventLoop_(eventLoop), - tmaster_location_watcher_info_(NULL), + tmanager_location_watcher_info_(NULL), exitOnSessionExpiry_(exitOnSessionExpiry) { Init(); } @@ -52,7 +52,7 @@ HeronZKStateMgr::HeronZKStateMgr(const std::string& zkhostport, const std::strin zkclient_(NULL), zkclient_factory_(zkclient_factory), eventLoop_(eventLoop), - tmaster_location_watcher_info_(NULL), + tmanager_location_watcher_info_(NULL), exitOnSessionExpiry_(exitOnSessionExpiry) { Init(); } @@ -73,7 +73,7 @@ void HeronZKStateMgr::Init() { HeronZKStateMgr::~HeronZKStateMgr() { delete zkclient_; delete zkclient_factory_; - delete tmaster_location_watcher_info_; + delete tmanager_location_watcher_info_; } void HeronZKStateMgr::InitTree() { @@ -81,13 +81,13 @@ void HeronZKStateMgr::InitTree() { CHECK(false); } -void HeronZKStateMgr::SetTMasterLocationWatch(const std::string& topology_name, +void HeronZKStateMgr::SetTManagerLocationWatch(const std::string& topology_name, VCallback<> watcher) { CHECK(watcher); CHECK(!topology_name.empty()); - tmaster_location_watcher_info_ = new TMasterLocationWatchInfo(std::move(watcher), topology_name); - SetTMasterLocationWatchInternal(); + tmanager_location_watcher_info_ = new TManagerLocationWatchInfo(std::move(watcher), topology_name); + SetTManagerLocationWatchInternal(); } void HeronZKStateMgr::SetMetricsCacheLocationWatch(const std::string& topology_name, @@ -95,7 +95,7 @@ void HeronZKStateMgr::SetMetricsCacheLocationWatch(const std::string& topology_n CHECK(watcher); CHECK(!topology_name.empty()); - metricscache_location_watcher_info_ = new TMasterLocationWatchInfo( + metricscache_location_watcher_info_ = new TManagerLocationWatchInfo( std::move(watcher), topology_name); SetMetricsCacheLocationWatchInternal(); } @@ -104,22 +104,22 @@ void HeronZKStateMgr::SetPackingPlanWatch(const std::string& topology_name, VCal CHECK(watcher); CHECK(!topology_name.empty()); - packing_plan_watcher_info_ = new TMasterLocationWatchInfo(std::move(watcher), topology_name); + packing_plan_watcher_info_ = new TManagerLocationWatchInfo(std::move(watcher), topology_name); SetPackingPlanWatchInternal(); } -void HeronZKStateMgr::SetTMasterLocation(const proto::tmaster::TMasterLocation& _location, +void HeronZKStateMgr::SetTManagerLocation(const proto::tmanager::TManagerLocation& _location, VCallback cb) { // Just try to create an ephimeral node - std::string path = GetTMasterLocationPath(_location.topology_name()); + std::string path = GetTManagerLocationPath(_location.topology_name()); std::string value; _location.SerializeToString(&value); - auto wCb = [cb, this](sp_int32 rc) { this->SetTMasterLocationDone(std::move(cb), rc); }; + auto wCb = [cb, this](sp_int32 rc) { this->SetTManagerLocationDone(std::move(cb), rc); }; zkclient_->CreateNode(path, value, true, std::move(wCb)); } -void HeronZKStateMgr::SetMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& _location, +void HeronZKStateMgr::SetMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& _location, VCallback cb) { // Just try to create an ephimeral node std::string path = GetMetricsCacheLocationPath(_location.topology_name()); @@ -130,21 +130,21 @@ void HeronZKStateMgr::SetMetricsCacheLocation(const proto::tmaster::MetricsCache zkclient_->CreateNode(path, value, true, std::move(wCb)); } -void HeronZKStateMgr::GetTMasterLocation(const std::string& _topology_name, - shared_ptr _return, +void HeronZKStateMgr::GetTManagerLocation(const std::string& _topology_name, + shared_ptr _return, VCallback cb) { - std::string path = GetTMasterLocationPath(_topology_name); + std::string path = GetTManagerLocationPath(_topology_name); std::string* contents = new std::string(); auto wCb = [contents, _return, cb, this](sp_int32 rc) { - this->GetTMasterLocationDone(contents, _return, std::move(cb), rc); + this->GetTManagerLocationDone(contents, _return, std::move(cb), rc); }; zkclient_->Get(path, contents, std::move(wCb)); } void HeronZKStateMgr::GetMetricsCacheLocation(const std::string& _topology_name, - shared_ptr _return, + shared_ptr _return, VCallback cb) { std::string path = GetMetricsCacheLocationPath(_topology_name); std::string* contents = new std::string(); @@ -362,13 +362,13 @@ void HeronZKStateMgr::GlobalWatchEventHandler(const ZKClient::ZkWatchEvent event LOG(INFO) << "Deleted current zk client, creating a new one..."; zkclient_ = zkclient_factory_->create(zkhostport_, eventLoop_, watch_event_cb_); LOG(INFO) << "New zk client created"; - // set tmaster watch and notify the client watcher + // set tmanager watch and notify the client watcher // NOTE: It isn't enough to just set the watch here, since we could - // have lost a tmaster node change when the session expired. This is needed + // have lost a tmanager node change when the session expired. This is needed // since the current zkclient design notifies only the "Connected_State" events to // the individual node watchers. Session expired events need explicit notification. - if (IsTmasterWatchDefined()) { - TMasterLocationWatch(); + if (IsTmanagerWatchDefined()) { + TManagerLocationWatch(); } } else { LOG(WARNING) << "Events other than session expired event are not" @@ -376,14 +376,14 @@ void HeronZKStateMgr::GlobalWatchEventHandler(const ZKClient::ZkWatchEvent event } } -void HeronZKStateMgr::SetTMasterLocationDone(VCallback cb, +void HeronZKStateMgr::SetTManagerLocationDone(VCallback cb, sp_int32 _rc) { proto::system::StatusCode code = proto::system::OK; if (_rc == ZNODEEXISTS) { - LOG(ERROR) << "Setting TMaster Location failed because another zmaster exists" << std::endl; - code = proto::system::TMASTERLOCATION_ALREADY_EXISTS; + LOG(ERROR) << "Setting TManager Location failed because another zprimary exists" << std::endl; + code = proto::system::TMANAGERLOCATION_ALREADY_EXISTS; } else if (_rc != ZOK) { - LOG(ERROR) << "Setting TMaster Location failed with error " << _rc << std::endl; + LOG(ERROR) << "Setting TManager Location failed with error " << _rc << std::endl; code = proto::system::STATE_WRITE_ERROR; } @@ -394,7 +394,7 @@ void HeronZKStateMgr::SetMetricsCacheLocationDone(VCallback _return, +void HeronZKStateMgr::GetTManagerLocationDone(std::string* _contents, + shared_ptr _return, VCallback cb, sp_int32 _rc) { proto::system::StatusCode code = proto::system::OK; if (_rc == ZOK) { if (!_return->ParseFromString(*_contents)) { - LOG(ERROR) << "Error parsing tmaster location" << std::endl; + LOG(ERROR) << "Error parsing tmanager location" << std::endl; code = proto::system::STATE_CORRUPTED; } } else if (_rc == ZNONODE) { - LOG(ERROR) << "Error getting tmaster location because the tmaster does not exist" << std::endl; + LOG(ERROR) << "Error getting tmanager location because the tmanager does not exist" << std::endl; code = proto::system::PATH_DOES_NOT_EXIST; } else { - LOG(ERROR) << "Getting TMaster Location failed with error " << _rc << std::endl; + LOG(ERROR) << "Getting TManager Location failed with error " << _rc << std::endl; code = proto::system::STATE_READ_ERROR; } delete _contents; @@ -427,7 +427,7 @@ void HeronZKStateMgr::GetTMasterLocationDone(std::string* _contents, } void HeronZKStateMgr::GetMetricsCacheLocationDone(std::string* _contents, - shared_ptr _return, + shared_ptr _return, VCallback cb, sp_int32 _rc) { proto::system::StatusCode code = proto::system::OK; @@ -713,9 +713,9 @@ void HeronZKStateMgr::ListExecutionStateTopologiesDone(VCallbackwatcher_cb && - !tmaster_location_watcher_info_->topology_name.empty()); +bool HeronZKStateMgr::IsTmanagerWatchDefined() { + return (tmanager_location_watcher_info_ != NULL && tmanager_location_watcher_info_->watcher_cb && + !tmanager_location_watcher_info_->topology_name.empty()); } bool HeronZKStateMgr::IsMetricsCacheWatchDefined() { @@ -744,19 +744,19 @@ bool HeronZKStateMgr::ShouldRetrySetWatch(sp_int32 rc) { } } -void HeronZKStateMgr::SetTMasterWatchCompletionHandler(sp_int32 rc) { +void HeronZKStateMgr::SetTManagerWatchCompletionHandler(sp_int32 rc) { if (rc == ZOK || rc == ZNONODE) { - // NoNode is when there is no tmaster up yet, but the watch is set. - LOG(INFO) << "Setting watch on tmaster location succeeded: " << zerror(rc) << std::endl; + // NoNode is when there is no tmanager up yet, but the watch is set. + LOG(INFO) << "Setting watch on tmanager location succeeded: " << zerror(rc) << std::endl; } else { // Any other return code should be treated as warning, since ideally // we shouldn't be in this state. - LOG(WARNING) << "Setting watch on tmaster location returned: " << zerror(rc) << std::endl; + LOG(WARNING) << "Setting watch on tmanager location returned: " << zerror(rc) << std::endl; if (ShouldRetrySetWatch(rc)) { LOG(INFO) << "Retrying after " << SET_WATCH_RETRY_INTERVAL_S << " seconds" << std::endl; - auto cb = [this](EventLoop::Status status) { this->CallSetTMasterLocationWatch(status); }; + auto cb = [this](EventLoop::Status status) { this->CallSetTManagerLocationWatch(status); }; eventLoop_->registerTimer(std::move(cb), false, SET_WATCH_RETRY_INTERVAL_S * 1000 * 1000); } @@ -765,7 +765,7 @@ void HeronZKStateMgr::SetTMasterWatchCompletionHandler(sp_int32 rc) { void HeronZKStateMgr::SetMetricsCacheWatchCompletionHandler(sp_int32 rc) { if (rc == ZOK || rc == ZNONODE) { - // NoNode is when there is no tmaster up yet, but the watch is set. + // NoNode is when there is no tmanager up yet, but the watch is set. LOG(INFO) << "Setting watch on metricscache location succeeded: " << zerror(rc) << std::endl; } else { // Any other return code should be treated as warning, since ideally @@ -801,8 +801,8 @@ void HeronZKStateMgr::SetPackingPlanWatchCompletionHandler(sp_int32 rc) { } } -void HeronZKStateMgr::CallSetTMasterLocationWatch(EventLoop::Status) { - SetTMasterLocationWatchInternal(); +void HeronZKStateMgr::CallSetTManagerLocationWatch(EventLoop::Status) { + SetTManagerLocationWatchInternal(); } void HeronZKStateMgr::CallSetMetricsCacheLocationWatch(EventLoop::Status) { @@ -813,14 +813,14 @@ void HeronZKStateMgr::CallSetPackingPlanWatch(EventLoop::Status) { SetPackingPlanWatchInternal(); } -void HeronZKStateMgr::SetTMasterLocationWatchInternal() { - CHECK(IsTmasterWatchDefined()); +void HeronZKStateMgr::SetTManagerLocationWatchInternal() { + CHECK(IsTmanagerWatchDefined()); - LOG(INFO) << "Setting watch on tmaster location " << std::endl; - std::string path = GetTMasterLocationPath(tmaster_location_watcher_info_->topology_name); + LOG(INFO) << "Setting watch on tmanager location " << std::endl; + std::string path = GetTManagerLocationPath(tmanager_location_watcher_info_->topology_name); - zkclient_->Exists(path, [this]() { this->TMasterLocationWatch(); }, - [this](sp_int32 rc) { this->SetTMasterWatchCompletionHandler(rc); }); + zkclient_->Exists(path, [this]() { this->TManagerLocationWatch(); }, + [this](sp_int32 rc) { this->SetTManagerWatchCompletionHandler(rc); }); } void HeronZKStateMgr::SetMetricsCacheLocationWatchInternal() { @@ -844,11 +844,11 @@ void HeronZKStateMgr::SetPackingPlanWatchInternal() { [this](sp_int32 rc) { this->SetPackingPlanWatchCompletionHandler(rc); }); } -void HeronZKStateMgr::TMasterLocationWatch() { +void HeronZKStateMgr::TManagerLocationWatch() { // First setup watch again - SetTMasterLocationWatchInternal(); + SetTManagerLocationWatchInternal(); // Then run the watcher - tmaster_location_watcher_info_->watcher_cb(); + tmanager_location_watcher_info_->watcher_cb(); } void HeronZKStateMgr::MetricsCacheLocationWatch() { diff --git a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.h b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.h index 1d67d08be72..f1f5f1b3656 100644 --- a/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.h +++ b/heron/statemgrs/src/cpp/statemgr/heron-zkstatemgr.h @@ -24,16 +24,16 @@ // // This file defines the ZK implenentation of the HeronStateMgr interface. // The details are -// 1. TopologyMasterLocation is kept as an ephimeral node. This way if the -// master goes away, the node is not there. Thus a create would -// succeed. If there is another tmaster, the createnode would fail -// which would give the indication that another tmaster was running. -// Thus TMasterServer can use just the set method to see if he is the -// only tmaster. +// 1. TopologyManagerLocation is kept as an ephimeral node. This way if the +// primary goes away, the node is not there. Thus a create would +// succeed. If there is another tmanager, the createnode would fail +// which would give the indication that another tmanager was running. +// Thus TManagerServer can use just the set method to see if he is the +// only tmanager. // 2. Once #1 is ensured, Topology and Assignment are straightforward // create/set operations // 3. The Topology node always exists. However the assignment may or -// may not exist. Currently TMaster always does get of assignment +// may not exist. Currently TManager always does get of assignment // to see if some assignment exists or not. We also keep track // of this. So that the next time a SetAssignment is called, // we know whether to do createnode or setnode @@ -66,21 +66,21 @@ class HeronZKStateMgr : public HeronStateMgr { void InitTree(); - // Sets up a watch on tmaster location change - void SetTMasterLocationWatch(const std::string& _topology_name, VCallback<> _watcher); + // Sets up a watch on tmanager location change + void SetTManagerLocationWatch(const std::string& _topology_name, VCallback<> _watcher); void SetMetricsCacheLocationWatch(const std::string& _topology_name, VCallback<> _watcher); void SetPackingPlanWatch(const std::string& _topology_name, VCallback<> _watcher); - // Sets the Tmaster - void SetTMasterLocation(const proto::tmaster::TMasterLocation& _location, + // Sets the Tmanager + void SetTManagerLocation(const proto::tmanager::TManagerLocation& _location, VCallback _cb); - void GetTMasterLocation(const std::string& _topology_name, - shared_ptr _return, + void GetTManagerLocation(const std::string& _topology_name, + shared_ptr _return, VCallback _cb); - void SetMetricsCacheLocation(const proto::tmaster::MetricsCacheLocation& _location, + void SetMetricsCacheLocation(const proto::tmanager::MetricsCacheLocation& _location, VCallback _cb); void GetMetricsCacheLocation(const std::string& _topology_name, - shared_ptr _return, + shared_ptr _return, VCallback _cb); // Gets/Sets the Topology @@ -143,13 +143,13 @@ class HeronZKStateMgr : public HeronStateMgr { private: // Done methods - void SetTMasterLocationDone(VCallback _cb, sp_int32 _rc); + void SetTManagerLocationDone(VCallback _cb, sp_int32 _rc); void SetMetricsCacheLocationDone(VCallback _cb, sp_int32 _rc); - void GetTMasterLocationDone(std::string* _contents, - shared_ptr _return, + void GetTManagerLocationDone(std::string* _contents, + shared_ptr _return, VCallback _cb, sp_int32 _rc); void GetMetricsCacheLocationDone(std::string* _contents, - shared_ptr _return, + shared_ptr _return, VCallback _cb, sp_int32 _rc); @@ -183,36 +183,36 @@ class HeronZKStateMgr : public HeronStateMgr { void ListTopologiesDone(VCallback _cb, sp_int32 _rc); void ListExecutionStateTopologiesDone(VCallback _cb, sp_int32 _rc); - // This is the callback passed to ZkClient, to handle tmaster location - // changes. It inturn calls the tmaster_location_watcher to notify the + // This is the callback passed to ZkClient, to handle tmanager location + // changes. It inturn calls the tmanager_location_watcher to notify the // clients about the change. - void TMasterLocationWatch(); + void TManagerLocationWatch(); void MetricsCacheLocationWatch(); void PackingPlanWatch(); // Handles global events from ZKClient. For now, it handles the session // expired event, by deleting the current client, creating a new one, - // setting the tmaster location watch, and notifying the client of a - // possible tmaster location change. + // setting the tmanager location watch, and notifying the client of a + // possible tmanager location change. void GlobalWatchEventHandler(const ZKClient::ZkWatchEvent event); - // Sets a tmaster location watch through the ZKClient Exists method. - void SetTMasterLocationWatchInternal(); + // Sets a tmanager location watch through the ZKClient Exists method. + void SetTManagerLocationWatchInternal(); void SetMetricsCacheLocationWatchInternal(); void SetPackingPlanWatchInternal(); // A wrapper to be passed to select server registerTimer call. - // Ignores the status and call SetTMasterLocationWatchInternal - void CallSetTMasterLocationWatch(EventLoop::Status status); + // Ignores the status and call SetTManagerLocationWatchInternal + void CallSetTManagerLocationWatch(EventLoop::Status status); void CallSetMetricsCacheLocationWatch(EventLoop::Status status); void CallSetPackingPlanWatch(EventLoop::Status status); // A handler callback that gets called by ZkClient upon completion of - // setting Tmaster watch. If the return code indicates failure, we + // setting Tmanager watch. If the return code indicates failure, we // retry after SET_WATCH_RETRY_INTERVAL_S seconds. - void SetTMasterWatchCompletionHandler(sp_int32 rc); + void SetTManagerWatchCompletionHandler(sp_int32 rc); void SetMetricsCacheWatchCompletionHandler(sp_int32 rc); void SetPackingPlanWatchCompletionHandler(sp_int32 rc); - // Essentially tells you whether SetTmasterLocationWatch has been + // Essentially tells you whether SetTmanagerLocationWatch has been // called by the client or not. It gets this info through - // tmaster_location_watcher_info_ - bool IsTmasterWatchDefined(); + // tmanager_location_watcher_info_ + bool IsTmanagerWatchDefined(); bool IsMetricsCacheWatchDefined(); bool IsPackingPlanWatchDefined(); // Common functionality for c`tors. Should be called only once from c`tor @@ -232,21 +232,21 @@ class HeronZKStateMgr : public HeronStateMgr { // A permanent callback initialized to wrap the WatchEventHandler VCallback watch_event_cb_; - // Holds the tmaster location watch callback and the topology name - // passed by the client. Needed for recreating tmaster location watch - // on session expiry. Only set after 'SetTmasterLocationWatch' method + // Holds the tmanager location watch callback and the topology name + // passed by the client. Needed for recreating tmanager location watch + // on session expiry. Only set after 'SetTmanagerLocationWatch' method // is called. - struct TMasterLocationWatchInfo { + struct TManagerLocationWatchInfo { VCallback<> watcher_cb; std::string topology_name; - TMasterLocationWatchInfo(VCallback<> watcher, std::string name) + TManagerLocationWatchInfo(VCallback<> watcher, std::string name) : watcher_cb(std::move(watcher)), topology_name(name) {} }; - const TMasterLocationWatchInfo* tmaster_location_watcher_info_; - const TMasterLocationWatchInfo* metricscache_location_watcher_info_; - const TMasterLocationWatchInfo* packing_plan_watcher_info_; + const TManagerLocationWatchInfo* tmanager_location_watcher_info_; + const TManagerLocationWatchInfo* metricscache_location_watcher_info_; + const TManagerLocationWatchInfo* packing_plan_watcher_info_; // If true, we exit on zookeeper session expired event const bool exitOnSessionExpiry_; // Retry interval if setting a watch on zk node fails. diff --git a/heron/statemgrs/src/java/org/apache/heron/statemgr/FileSystemStateManager.java b/heron/statemgrs/src/java/org/apache/heron/statemgr/FileSystemStateManager.java index 91400090563..2b46fca2400 100644 --- a/heron/statemgrs/src/java/org/apache/heron/statemgr/FileSystemStateManager.java +++ b/heron/statemgrs/src/java/org/apache/heron/statemgr/FileSystemStateManager.java @@ -33,7 +33,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; import org.apache.heron.spi.common.Context; import org.apache.heron.spi.common.Key; @@ -60,7 +60,7 @@ protected static void safeSetException(SettableFuture future, Throwable c protected String rootAddress; protected enum StateLocation { - TMASTER_LOCATION("tmasters", "TMaster location"), + TMANAGER_LOCATION("tmanagers", "TManager location"), METRICSCACHE_LOCATION("metricscaches", "MetricsCache location"), TOPOLOGY("topologies", "Topologies"), PACKING_PLAN("packingplans", "Packing plan"), @@ -171,17 +171,17 @@ public ListenableFuture getPhysicalPlan( } @Override - public ListenableFuture getTMasterLocation( + public ListenableFuture getTManagerLocation( WatchCallback watcher, String topologyName) { - return getNodeData(watcher, StateLocation.TMASTER_LOCATION, topologyName, - TopologyMaster.TMasterLocation.newBuilder()); + return getNodeData(watcher, StateLocation.TMANAGER_LOCATION, topologyName, + TopologyManager.TManagerLocation.newBuilder()); } @Override - public ListenableFuture getMetricsCacheLocation( + public ListenableFuture getMetricsCacheLocation( WatchCallback watcher, String topologyName) { return getNodeData(watcher, StateLocation.METRICSCACHE_LOCATION, topologyName, - TopologyMaster.MetricsCacheLocation.newBuilder()); + TopologyManager.MetricsCacheLocation.newBuilder()); } @Override @@ -192,8 +192,8 @@ public ListenableFuture getStat } @Override - public ListenableFuture deleteTMasterLocation(String topologyName) { - return deleteNode(StateLocation.TMASTER_LOCATION, topologyName); + public ListenableFuture deleteTManagerLocation(String topologyName) { + return deleteNode(StateLocation.TMANAGER_LOCATION, topologyName); } @Override @@ -311,9 +311,9 @@ protected void doMain(String[] args, Config config) print("SchedulerLocation node not found %s", e.getMessage()); } try { - print("==> TMasterLocation:\n%s", getTMasterLocation(null, topologyName).get()); + print("==> TManagerLocation:\n%s", getTManagerLocation(null, topologyName).get()); } catch (ExecutionException e) { - print("TMasterLocation node not found %s", e.getMessage()); + print("TManagerLocation node not found %s", e.getMessage()); } try { print("==> MetricsCacheLocation:\n%s", getMetricsCacheLocation(null, topologyName).get()); diff --git a/heron/statemgrs/src/java/org/apache/heron/statemgr/NullStateManager.java b/heron/statemgrs/src/java/org/apache/heron/statemgr/NullStateManager.java index c29641d7fa3..a7437f22592 100644 --- a/heron/statemgrs/src/java/org/apache/heron/statemgr/NullStateManager.java +++ b/heron/statemgrs/src/java/org/apache/heron/statemgr/NullStateManager.java @@ -32,7 +32,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; import org.apache.heron.spi.statemgr.IStateManager; import org.apache.heron.spi.statemgr.Lock; @@ -72,15 +72,15 @@ public ListenableFuture isTopologyRunning(String topologyName) { } @Override - public ListenableFuture setTMasterLocation( - TopologyMaster.TMasterLocation location, + public ListenableFuture setTManagerLocation( + TopologyManager.TManagerLocation location, String topologyName) { return nullFuture; } @Override public ListenableFuture setMetricsCacheLocation( - TopologyMaster.MetricsCacheLocation location, + TopologyManager.MetricsCacheLocation location, String topologyName) { return nullFuture; } @@ -121,7 +121,7 @@ public ListenableFuture setSchedulerLocation( } @Override - public ListenableFuture deleteTMasterLocation(String topologyName) { + public ListenableFuture deleteTManagerLocation(String topologyName) { return nullFuture; } @@ -156,14 +156,14 @@ public ListenableFuture deleteSchedulerLocation(String topologyName) { } @Override - public ListenableFuture getTMasterLocation( + public ListenableFuture getTManagerLocation( WatchCallback watcher, String topologyName) { return SettableFuture.create(); } @Override - public ListenableFuture getMetricsCacheLocation( + public ListenableFuture getMetricsCacheLocation( WatchCallback watcher, String topologyName) { return SettableFuture.create(); diff --git a/heron/statemgrs/src/java/org/apache/heron/statemgr/localfs/LocalFileSystemStateManager.java b/heron/statemgrs/src/java/org/apache/heron/statemgr/localfs/LocalFileSystemStateManager.java index d459bc1e01e..66d4b02a7b4 100644 --- a/heron/statemgrs/src/java/org/apache/heron/statemgr/localfs/LocalFileSystemStateManager.java +++ b/heron/statemgrs/src/java/org/apache/heron/statemgr/localfs/LocalFileSystemStateManager.java @@ -36,7 +36,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; import org.apache.heron.spi.common.Key; import org.apache.heron.spi.statemgr.Lock; @@ -184,19 +184,19 @@ public ListenableFuture setExecutionState( } @Override - public ListenableFuture setTMasterLocation( - TopologyMaster.TMasterLocation location, String topologyName) { + public ListenableFuture setTManagerLocation( + TopologyManager.TManagerLocation location, String topologyName) { // Note: Unlike Zk statemgr, we overwrite the location even if there is already one. - // This is because when running in simulator we control when a tmaster dies and + // This is because when running in simulator we control when a tmanager dies and // comes up deterministically. - return setData(StateLocation.TMASTER_LOCATION, topologyName, location.toByteArray(), true); + return setData(StateLocation.TMANAGER_LOCATION, topologyName, location.toByteArray(), true); } @Override public ListenableFuture setMetricsCacheLocation( - TopologyMaster.MetricsCacheLocation location, String topologyName) { + TopologyManager.MetricsCacheLocation location, String topologyName) { // Note: Unlike Zk statemgr, we overwrite the location even if there is already one. - // This is because when running in simulator we control when a tmaster dies and + // This is because when running in simulator we control when a tmanager dies and // comes up deterministically. LOG.info("setMetricsCacheLocation: "); return setData(StateLocation.METRICSCACHE_LOCATION, topologyName, location.toByteArray(), true); diff --git a/heron/statemgrs/src/java/org/apache/heron/statemgr/zookeeper/curator/CuratorStateManager.java b/heron/statemgrs/src/java/org/apache/heron/statemgr/zookeeper/curator/CuratorStateManager.java index 3e0e63f6d97..b1d3da9abc5 100644 --- a/heron/statemgrs/src/java/org/apache/heron/statemgr/zookeeper/curator/CuratorStateManager.java +++ b/heron/statemgrs/src/java/org/apache/heron/statemgr/zookeeper/curator/CuratorStateManager.java @@ -46,7 +46,7 @@ import org.apache.heron.proto.system.ExecutionEnvironment; import org.apache.heron.proto.system.PackingPlans; import org.apache.heron.proto.system.PhysicalPlans; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.common.Config; import org.apache.heron.spi.common.Context; import org.apache.heron.spi.common.Key; @@ -348,15 +348,15 @@ protected Lock getLock(String path) { } @Override - public ListenableFuture setTMasterLocation( - TopologyMaster.TMasterLocation location, + public ListenableFuture setTManagerLocation( + TopologyManager.TManagerLocation location, String topologyName) { - return createNode(StateLocation.TMASTER_LOCATION, topologyName, location.toByteArray(), true); + return createNode(StateLocation.TMANAGER_LOCATION, topologyName, location.toByteArray(), true); } @Override public ListenableFuture setMetricsCacheLocation( - TopologyMaster.MetricsCacheLocation location, + TopologyManager.MetricsCacheLocation location, String topologyName) { client.getConnectionStateListenable().addListener(new ConnectionStateListener() { @Override @@ -419,7 +419,7 @@ public ListenableFuture setSchedulerLocation( } @Override - public ListenableFuture deleteTMasterLocation(String topologyName) { + public ListenableFuture deleteTManagerLocation(String topologyName) { // It is a EPHEMERAL node and would be removed automatically final SettableFuture result = SettableFuture.create(); safeSetFuture(result, true); diff --git a/heron/statemgrs/src/python/filestatemanager.py b/heron/statemgrs/src/python/filestatemanager.py index 3ece9071a87..01cf2fe7fb7 100644 --- a/heron/statemgrs/src/python/filestatemanager.py +++ b/heron/statemgrs/src/python/filestatemanager.py @@ -30,7 +30,7 @@ from heron.proto.packing_plan_pb2 import PackingPlan from heron.proto.physical_plan_pb2 import PhysicalPlan from heron.proto.scheduler_pb2 import SchedulerLocation -from heron.proto.tmaster_pb2 import TMasterLocation +from heron.proto.tmanager_pb2 import TManagerLocation from heron.proto.topology_pb2 import Topology # pylint: disable=too-many-instance-attributes @@ -50,7 +50,7 @@ def __init__(self, name, rootpath): self.execution_state_directory = {} self.packing_plan_directory = {} self.pplan_directory = {} - self.tmaster_directory = {} + self.tmanager_directory = {} self.scheduler_location_directory = {} # The watches are triggered when there @@ -65,7 +65,7 @@ def __init__(self, name, rootpath): self.execution_state_watchers = defaultdict(lambda: []) self.packing_plan_watchers = defaultdict(lambda: []) self.pplan_watchers = defaultdict(lambda: []) - self.tmaster_watchers = defaultdict(lambda: []) + self.tmanager_watchers = defaultdict(lambda: []) self.scheduler_location_watchers = defaultdict(lambda: []) # Instantiate the monitoring thread. @@ -144,11 +144,11 @@ def trigger_watches_based_on_files(watchers, path, directory, ProtoClass): self.pplan_watchers, pplan_path, self.pplan_directory, PhysicalPlan) - # Get the directory name for tmaster - tmaster_path = os.path.dirname(self.get_tmaster_path("")) + # Get the directory name for tmanager + tmanager_path = os.path.dirname(self.get_tmanager_path("")) trigger_watches_based_on_files( - self.tmaster_watchers, tmaster_path, - self.tmaster_directory, TMasterLocation) + self.tmanager_watchers, tmanager_path, + self.tmanager_directory, TManagerLocation) # Get the directory name for scheduler location scheduler_location_path = os.path.dirname(self.get_scheduler_location_path("")) @@ -253,19 +253,19 @@ def delete_execution_state(self, topologyName): Delete path is currently not supported in file based state manager. """ - def get_tmaster(self, topologyName, callback=None): + def get_tmanager(self, topologyName, callback=None): """ - Get tmaster + Get tmanager """ if callback: - self.tmaster_watchers[topologyName].append(callback) + self.tmanager_watchers[topologyName].append(callback) else: - tmaster_path = self.get_tmaster_path(topologyName) - with open(tmaster_path, "rb") as f: + tmanager_path = self.get_tmanager_path(topologyName) + with open(tmanager_path, "rb") as f: data = f.read() - tmaster = TMasterLocation() - tmaster.ParseFromString(data) - return tmaster + tmanager = TManagerLocation() + tmanager.ParseFromString(data) + return tmanager return None def get_scheduler_location(self, topologyName, callback=None): diff --git a/heron/statemgrs/src/python/statemanager.py b/heron/statemgrs/src/python/statemanager.py index 1992a5ac0b9..f285dba610b 100644 --- a/heron/statemgrs/src/python/statemanager.py +++ b/heron/statemgrs/src/python/statemanager.py @@ -30,7 +30,7 @@ HERON_PACKING_PLANS_PREFIX = "{0}/packingplans/" HERON_PPLANS_PREFIX = "{0}/pplans/" HERON_SCHEDULER_LOCATION_PREFIX = "{0}/schedulers/" -HERON_TMASTER_PREFIX = "{0}/tmasters/" +HERON_TMANAGER_PREFIX = "{0}/tmanagers/" HERON_TOPOLOGIES_KEY = "{0}/topologies" # pylint: disable=too-many-public-methods, attribute-defined-outside-init @@ -146,8 +146,8 @@ def get_pplan_path(self, topologyName): def get_execution_state_path(self, topologyName): return HERON_EXECUTION_STATE_PREFIX.format(self.rootpath) + topologyName - def get_tmaster_path(self, topologyName): - return HERON_TMASTER_PREFIX.format(self.rootpath) + topologyName + def get_tmanager_path(self, topologyName): + return HERON_TMANAGER_PREFIX.format(self.rootpath) + topologyName def get_scheduler_location_path(self, topologyName): return HERON_SCHEDULER_LOCATION_PREFIX.format(self.rootpath) + topologyName @@ -202,7 +202,7 @@ def delete_execution_state(self, topologyName): pass @abc.abstractmethod - def get_tmaster(self, topologyName, callback=None): + def get_tmanager(self, topologyName, callback=None): pass @abc.abstractmethod diff --git a/heron/statemgrs/src/python/zkstatemanager.py b/heron/statemgrs/src/python/zkstatemanager.py index 24809d6318c..f8a932b8d11 100644 --- a/heron/statemgrs/src/python/zkstatemanager.py +++ b/heron/statemgrs/src/python/zkstatemanager.py @@ -25,7 +25,7 @@ from heron.proto.packing_plan_pb2 import PackingPlan from heron.proto.physical_plan_pb2 import PhysicalPlan from heron.proto.scheduler_pb2 import SchedulerLocation -from heron.proto.tmaster_pb2 import TMasterLocation +from heron.proto.tmanager_pb2 import TManagerLocation from heron.proto.topology_pb2 import Topology from heron.statemgrs.src.python.log import Log as LOG @@ -416,8 +416,8 @@ def delete_execution_state(self, topologyName): self.client.delete(path) return True - def get_tmaster(self, topologyName, callback=None): - """ get tmaster """ + def get_tmanager(self, topologyName, callback=None): + """ get tmanager """ isWatching = False # Temp dict used to return result @@ -434,29 +434,29 @@ def callback(data): """ ret["result"] = data - self._get_tmaster_with_watch(topologyName, callback, isWatching) + self._get_tmanager_with_watch(topologyName, callback, isWatching) # The topologies are now populated with the data. return ret["result"] - def _get_tmaster_with_watch(self, topologyName, callback, isWatching): + def _get_tmanager_with_watch(self, topologyName, callback, isWatching): """ Helper function to get pplan with a callback. The future watch is placed only if isWatching is True. """ - path = self.get_tmaster_path(topologyName) + path = self.get_tmanager_path(topologyName) if isWatching: LOG.info("Adding data watch for path: " + path) # pylint: disable=unused-variable, unused-argument @self.client.DataWatch(path) - def watch_tmaster(data, stats, event): - """ invoke callback to watch tmaster """ + def watch_tmanager(data, stats, event): + """ invoke callback to watch tmanager """ if data: - tmaster = TMasterLocation() - tmaster.ParseFromString(data) - callback(tmaster) + tmanager = TManagerLocation() + tmanager.ParseFromString(data) + callback(tmanager) else: callback(None) diff --git a/heron/statemgrs/tests/cpp/statetest.cpp b/heron/statemgrs/tests/cpp/statetest.cpp index ee232b59260..bec1498a13b 100644 --- a/heron/statemgrs/tests/cpp/statetest.cpp +++ b/heron/statemgrs/tests/cpp/statetest.cpp @@ -28,8 +28,8 @@ using heron::common::HeronStateMgr; -void TMasterLocationWatchHandler() { - std::cout << "TMasterLocationWatchHandler triggered " << std::endl; +void TManagerLocationWatchHandler() { + std::cout << "TManagerLocationWatchHandler triggered " << std::endl; } int main(int argc, char* argv[]) { @@ -55,7 +55,7 @@ int main(int argc, char* argv[]) { const std::string topology_name = "test_topology"; HeronStateMgr* state_mgr = HeronStateMgr::MakeStateMgr(host_port, top_level_dir, &ss); - state_mgr->SetTMasterLocationWatch(topology_name, []() { TMasterLocationWatchHandler(); }); + state_mgr->SetTManagerLocationWatch(topology_name, []() { TManagerLocationWatchHandler(); }); state_mgr->SetPackingPlanWatch(topology_name, []() { PackingPlanWatchHandler(); }); ss.loop(); return 0; diff --git a/heron/statemgrs/tests/cpp/zkstatemgr_unittest.cpp b/heron/statemgrs/tests/cpp/zkstatemgr_unittest.cpp index 9fc4c801b65..400a05ddb36 100644 --- a/heron/statemgrs/tests/cpp/zkstatemgr_unittest.cpp +++ b/heron/statemgrs/tests/cpp/zkstatemgr_unittest.cpp @@ -82,8 +82,8 @@ class HeronZKStateMgrTest : public ::testing::Test { } // a proxy for the call since the tests cannot call directly // (friendship inheritance is not supported) - static void CallTMasterLocationWatch(HeronZKStateMgr* heron_zkstatemgr) { - heron_zkstatemgr->TMasterLocationWatch(); + static void CallTManagerLocationWatch(HeronZKStateMgr* heron_zkstatemgr) { + heron_zkstatemgr->TManagerLocationWatch(); } static void CallPackingPlanWatch(HeronZKStateMgr* heron_zkstatemgr) { heron_zkstatemgr->PackingPlanWatch(); @@ -96,7 +96,7 @@ class HeronZKStateMgrTest : public ::testing::Test { heron_zkstatemgr->GlobalWatchEventHandler(event); } - static void TmasterLocationWatchHandler() { tmaster_watch_handler_count++; } + static void TmanagerLocationWatchHandler() { tmanager_watch_handler_count++; } static void PackingPlanWatchHandler() { packing_plan_watch_handler_count++; } MockZKClient* mock_zkclient; @@ -104,13 +104,13 @@ class HeronZKStateMgrTest : public ::testing::Test { std::shared_ptr ss; std::string hostportlist; std::string topleveldir; - // used to verify the number of calls to TmasterLocationWatchHandler - static int tmaster_watch_handler_count; + // used to verify the number of calls to TmanagerLocationWatchHandler + static int tmanager_watch_handler_count; static int packing_plan_watch_handler_count; }; // static member needs to be defined outside class... sigh :( -int HeronZKStateMgrTest::tmaster_watch_handler_count = 0; +int HeronZKStateMgrTest::tmanager_watch_handler_count = 0; int HeronZKStateMgrTest::packing_plan_watch_handler_count = 0; // Ensure that ZKClient is created and deleted appropriately. @@ -129,9 +129,9 @@ TEST_F(HeronZKStateMgrTest, testCreateDelete) { delete heron_zkstatemgr; } -TEST_F(HeronZKStateMgrTest, testSetTMasterLocationWatch) { +TEST_F(HeronZKStateMgrTest, testSetTManagerLocationWatch) { const std::string topology_name = "dummy_topology"; - const std::string expected_path = topleveldir + "/tmasters/" + topology_name; + const std::string expected_path = topleveldir + "/tmanagers/" + topology_name; // Calling the factory create method ensures that ZkClient is created once EXPECT_CALL(*mock_zkclient_factory, create(hostportlist, ss, _)).Times(1); @@ -139,10 +139,10 @@ TEST_F(HeronZKStateMgrTest, testSetTMasterLocationWatch) { HeronZKStateMgr* heron_zkstatemgr = new HeronZKStateMgrWithMock(hostportlist, topleveldir, ss, mock_zkclient_factory); - // Ensure that it sets a watch to the tmaster location + // Ensure that it sets a watch to the tmanager location EXPECT_CALL(*mock_zkclient, Exists(expected_path, _, _)).Times(1); - heron_zkstatemgr->SetTMasterLocationWatch(topology_name, []() { TmasterLocationWatchHandler(); }); + heron_zkstatemgr->SetTManagerLocationWatch(topology_name, []() { TmanagerLocationWatchHandler(); }); EXPECT_CALL(*mock_zkclient, Die()).Times(1); EXPECT_CALL(*mock_zkclient_factory, Die()).Times(1); @@ -150,22 +150,22 @@ TEST_F(HeronZKStateMgrTest, testSetTMasterLocationWatch) { delete heron_zkstatemgr; } -TEST_F(HeronZKStateMgrTest, testTMasterLocationWatch) { +TEST_F(HeronZKStateMgrTest, testTManagerLocationWatch) { const std::string topology_name = "dummy_topology"; - const std::string expected_path = topleveldir + "/tmasters/" + topology_name; + const std::string expected_path = topleveldir + "/tmanagers/" + topology_name; HeronZKStateMgr* heron_zkstatemgr = new HeronZKStateMgrWithMock(hostportlist, topleveldir, ss, mock_zkclient_factory); - heron_zkstatemgr->SetTMasterLocationWatch(topology_name, []() { TmasterLocationWatchHandler(); }); + heron_zkstatemgr->SetTManagerLocationWatch(topology_name, []() { TmanagerLocationWatchHandler(); }); - // ensure TmasterLocationWatch resets the watch + // ensure TmanagerLocationWatch resets the watch EXPECT_CALL(*mock_zkclient, Exists(expected_path, _, _)).Times(1); - tmaster_watch_handler_count = 0; - CallTMasterLocationWatch(heron_zkstatemgr); + tmanager_watch_handler_count = 0; + CallTManagerLocationWatch(heron_zkstatemgr); // ensure watch handler is called. - ASSERT_EQ(tmaster_watch_handler_count, 1); + ASSERT_EQ(tmanager_watch_handler_count, 1); EXPECT_CALL(*mock_zkclient, Die()).Times(1); EXPECT_CALL(*mock_zkclient_factory, Die()).Times(1); @@ -182,7 +182,7 @@ TEST_F(HeronZKStateMgrTest, testPackingPlanWatch) { heron_zkstatemgr->SetPackingPlanWatch(topology_name, []() { PackingPlanWatchHandler(); }); - // ensure TmasterLocationWatch resets the watch + // ensure TmanagerLocationWatch resets the watch EXPECT_CALL(*mock_zkclient, Exists(expected_path, _, _)).Times(1); packing_plan_watch_handler_count = 0; @@ -198,14 +198,14 @@ TEST_F(HeronZKStateMgrTest, testPackingPlanWatch) { TEST_F(HeronZKStateMgrTest, testGlobalWatchEventHandler) { const std::string topology_name = "dummy_topology"; - const std::string expected_path = topleveldir + "/tmasters/" + topology_name; + const std::string expected_path = topleveldir + "/tmanagers/" + topology_name; heron::common::HeronZKStateMgr* heron_zkstatemgr = new HeronZKStateMgrWithMock(hostportlist, topleveldir, ss, mock_zkclient_factory); - heron_zkstatemgr->SetTMasterLocationWatch(topology_name, []() { TmasterLocationWatchHandler(); }); + heron_zkstatemgr->SetTManagerLocationWatch(topology_name, []() { TmanagerLocationWatchHandler(); }); - tmaster_watch_handler_count = 0; + tmanager_watch_handler_count = 0; const ZKClient::ZkWatchEvent session_expired_event = {ZOO_SESSION_EVENT, ZOO_EXPIRED_SESSION_STATE, ""}; @@ -217,7 +217,7 @@ TEST_F(HeronZKStateMgrTest, testGlobalWatchEventHandler) { CallGlobalWatchEventHandler(heron_zkstatemgr, session_expired_event); // Ensure watch handler is called - ASSERT_EQ(tmaster_watch_handler_count, 1); + ASSERT_EQ(tmanager_watch_handler_count, 1); EXPECT_CALL(*mock_zkclient_factory, Die()).Times(1); EXPECT_CALL(*mock_zkclient, Die()).Times(1); From 3215fe3fc9a6affea0a7e5cf6ee68074c27c4b4d Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:30:59 -0400 Subject: [PATCH 17/32] renaming "topology master" to "topology manager" in heron/healthmgr --- .../common/PhysicalPlanProvider.java | 16 ++++++------ .../sensors/MetricsCacheMetricsProvider.java | 24 ++++++++--------- .../MetricsCacheMetricsProviderTest.java | 26 +++++++++---------- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/heron/healthmgr/src/java/org/apache/heron/healthmgr/common/PhysicalPlanProvider.java b/heron/healthmgr/src/java/org/apache/heron/healthmgr/common/PhysicalPlanProvider.java index 3b20524a873..52b99b5b553 100644 --- a/heron/healthmgr/src/java/org/apache/heron/healthmgr/common/PhysicalPlanProvider.java +++ b/heron/healthmgr/src/java/org/apache/heron/healthmgr/common/PhysicalPlanProvider.java @@ -33,7 +33,7 @@ import org.apache.heron.api.generated.TopologyAPI; import org.apache.heron.proto.system.PhysicalPlans.PhysicalPlan; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.NetworkUtils; @@ -41,7 +41,7 @@ /** * A topology's physical plan may get updated at runtime. This provider is used to - * fetch the latest version from the tmaster and provide to any dependent components. + * fetch the latest version from the tmanager and provide to any dependent components. */ public class PhysicalPlanProvider implements Provider { private static final Logger LOG = Logger.getLogger(PhysicalPlanProvider.class.getName()); @@ -62,7 +62,7 @@ public PhysicalPlanProvider(SchedulerStateManagerAdaptor stateManagerAdaptor, protected PhysicalPlan ParseResponseToPhysicalPlan(byte[] responseData) { // byte to base64 string String encodedString = new String(responseData); - LOG.fine("tmaster returns physical plan in base64 str: " + encodedString); + LOG.fine("tmanager returns physical plan in base64 str: " + encodedString); // base64 string to proto bytes byte[] decodedBytes = Base64.getDecoder().decode(encodedString); // construct proto obj from bytes @@ -77,14 +77,14 @@ protected PhysicalPlan ParseResponseToPhysicalPlan(byte[] responseData) { @Override public synchronized PhysicalPlan get() { - TopologyMaster.TMasterLocation tMasterLocation - = stateManagerAdaptor.getTMasterLocation(topologyName); - String host = tMasterLocation.getHost(); - int port = tMasterLocation.getControllerPort(); + TopologyManager.TManagerLocation tManagerLocation + = stateManagerAdaptor.getTManagerLocation(topologyName); + String host = tManagerLocation.getHost(); + int port = tManagerLocation.getControllerPort(); // construct metric cache stat url String url = "http://" + host + ":" + port + "/get_current_physical_plan"; - LOG.fine("tmaster physical plan query endpoint: " + url); + LOG.fine("tmanager physical plan query endpoint: " + url); // http communication HttpURLConnection con = NetworkUtils.getHttpConnection(url); diff --git a/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProvider.java b/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProvider.java index ac2498a28f3..c70c7b10e53 100644 --- a/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProvider.java +++ b/heron/healthmgr/src/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProvider.java @@ -36,12 +36,12 @@ import com.microsoft.dhalion.core.Measurement; import org.apache.heron.proto.system.Common.StatusCode; -import org.apache.heron.proto.tmaster.TopologyMaster; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricInterval; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric.IntervalValue; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.TaskMetric; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricsCacheLocation; +import org.apache.heron.proto.tmanager.TopologyManager; +import org.apache.heron.proto.tmanager.TopologyManager.MetricInterval; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.IndividualMetric; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.IndividualMetric.IntervalValue; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.TaskMetric; +import org.apache.heron.proto.tmanager.TopologyManager.MetricsCacheLocation; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import org.apache.heron.spi.utils.NetworkUtils; @@ -73,7 +73,7 @@ public Collection getMeasurements(Instant startTime, Collection result = new ArrayList<>(); for (String metric : metricNames) { for (String component : components) { - TopologyMaster.MetricResponse response = + TopologyManager.MetricResponse response = getMetricsFromMetricsCache(metric, component, startTime, duration); Collection measurements = parse(response, component, metric, startTime); LOG.fine(String.format("%d measurements received for %s/%s", @@ -87,7 +87,7 @@ public Collection getMeasurements(Instant startTime, @VisibleForTesting @SuppressWarnings("unchecked") Collection parse( - TopologyMaster.MetricResponse response, String component, String metric, Instant startTime) { + TopologyManager.MetricResponse response, String component, String metric, Instant startTime) { Collection metricsData = new ArrayList(); if (response == null || !response.getStatus().getStatus().equals(StatusCode.OK)) { @@ -137,10 +137,10 @@ Collection parse( } @VisibleForTesting - TopologyMaster.MetricResponse getMetricsFromMetricsCache( + TopologyManager.MetricResponse getMetricsFromMetricsCache( String metric, String component, Instant start, Duration duration) { LOG.log(Level.FINE, "MetricsCache Query request metric name : {0}", metric); - TopologyMaster.MetricRequest request = TopologyMaster.MetricRequest.newBuilder() + TopologyManager.MetricRequest request = TopologyManager.MetricRequest.newBuilder() .setComponentName(component) .setExplicitInterval( MetricInterval.newBuilder() @@ -163,8 +163,8 @@ TopologyMaster.MetricResponse getMetricsFromMetricsCache( byte[] responseData = NetworkUtils.readHttpResponse(connection); try { - TopologyMaster.MetricResponse response = - TopologyMaster.MetricResponse.parseFrom(responseData); + TopologyManager.MetricResponse response = + TopologyManager.MetricResponse.parseFrom(responseData); LOG.log(Level.FINE, "MetricsCache Query response: \n{0}", response); return response; } catch (InvalidProtocolBufferException e) { diff --git a/heron/healthmgr/tests/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProviderTest.java b/heron/healthmgr/tests/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProviderTest.java index 1ec3d770fae..9723d24993b 100644 --- a/heron/healthmgr/tests/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProviderTest.java +++ b/heron/healthmgr/tests/java/org/apache/heron/healthmgr/sensors/MetricsCacheMetricsProviderTest.java @@ -36,12 +36,12 @@ import org.apache.heron.proto.system.Common.Status; import org.apache.heron.proto.system.Common.StatusCode; -import org.apache.heron.proto.tmaster.TopologyMaster; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricInterval; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.IndividualMetric.IntervalValue; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricResponse.TaskMetric; -import org.apache.heron.proto.tmaster.TopologyMaster.MetricsCacheLocation; +import org.apache.heron.proto.tmanager.TopologyManager; +import org.apache.heron.proto.tmanager.TopologyManager.MetricInterval; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.IndividualMetric; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.IndividualMetric.IntervalValue; +import org.apache.heron.proto.tmanager.TopologyManager.MetricResponse.TaskMetric; +import org.apache.heron.proto.tmanager.TopologyManager.MetricsCacheLocation; import org.apache.heron.spi.statemgr.SchedulerStateManagerAdaptor; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.doReturn; @@ -55,7 +55,7 @@ public void provides1Comp2InstanceMetricsFromMetricsCache() { String metric = "count"; String comp = "bolt"; - TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .addMetric(TaskMetric.newBuilder() .setInstanceId("container_1_bolt_1") @@ -113,7 +113,7 @@ public void providesMultipleComponentMetricsFromMetricsCache() { String metric = "count"; String comp1 = "bolt-1"; - TopologyMaster.MetricResponse response1 = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response1 = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .addMetric(TaskMetric.newBuilder() .setInstanceId("container_1_bolt-1_2") @@ -131,7 +131,7 @@ public void providesMultipleComponentMetricsFromMetricsCache() { metric, comp1, Instant.ofEpochSecond(10), Duration.ofSeconds(60)); String comp2 = "bolt-2"; - TopologyMaster.MetricResponse response2 = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response2 = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .addMetric(TaskMetric.newBuilder() .setInstanceId("container_1_bolt-2_1") @@ -182,7 +182,7 @@ public void parsesBackPressureMetric() { String metric = "__time_spent_back_pressure_by_compid/container_1_split_1"; String comp = "__stmgr__"; - TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .addMetric(TaskMetric.newBuilder() .setInstanceId("stmgr-1") @@ -216,7 +216,7 @@ public void handleMissingData() { String metric = "dummy"; String comp = "split"; - TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .build(); @@ -238,7 +238,7 @@ private MetricsCacheMetricsProvider createMetricsProviderSpy() { .setTopologyId("topoId") .setHost("localhost") .setControllerPort(0) - .setMasterPort(0) + .setServerPort(0) .build(); SchedulerStateManagerAdaptor stateMgr = Mockito.mock(SchedulerStateManagerAdaptor.class); @@ -256,7 +256,7 @@ public void testGetTimeLineMetrics() { String metric = "count"; String comp = "bolt"; - TopologyMaster.MetricResponse response = TopologyMaster.MetricResponse.newBuilder() + TopologyManager.MetricResponse response = TopologyManager.MetricResponse.newBuilder() .setStatus(Status.newBuilder().setStatus(StatusCode.OK)) .addMetric(TaskMetric.newBuilder() .setInstanceId("container_1_bolt_1") From f494080c4f8fc55a52aeb6e4f7db13b702751f60 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:33:44 -0400 Subject: [PATCH 18/32] renaming "topology master" to "topology manager" in heron/ckptmgr --- .../heron/ckptmgr/CheckpointManager.java | 2 +- .../ckptmgr/CheckpointManagerServer.java | 20 +++++++++---------- .../ckptmgr/CheckpointManagerServerTest.java | 14 ++++++------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManager.java b/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManager.java index e50b59e3b6c..2ee2c644f59 100644 --- a/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManager.java +++ b/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManager.java @@ -254,7 +254,7 @@ public static void main(String[] args) throws IOException, Level loggingLevel = Level.INFO; String loggingDir = systemConfig.getHeronLoggingDirectory(); - // Log to file and TMaster + // Log to file and TManager LoggingHelper.loggerInit(loggingLevel, true); LoggingHelper.addLoggingHandler( LoggingHelper.getFileHandler(ckptmgrId, loggingDir, true, diff --git a/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManagerServer.java b/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManagerServer.java index f6c3416a256..a349c5d9875 100644 --- a/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManagerServer.java +++ b/heron/ckptmgr/src/java/org/apache/heron/ckptmgr/CheckpointManagerServer.java @@ -82,7 +82,7 @@ public CheckpointManagerServer( private void registerInitialization() { registerOnRequest(CheckpointManager.RegisterStMgrRequest.newBuilder()); - registerOnRequest(CheckpointManager.RegisterTMasterRequest.newBuilder()); + registerOnRequest(CheckpointManager.RegisterTManagerRequest.newBuilder()); registerOnRequest(CheckpointManager.SaveInstanceStateRequest.newBuilder()); @@ -101,9 +101,9 @@ public void onConnect(SocketChannel channel) { public void onRequest(REQID rid, SocketChannel channel, Message request) { if (request instanceof CheckpointManager.RegisterStMgrRequest) { handleStMgrRegisterRequest(rid, channel, (CheckpointManager.RegisterStMgrRequest) request); - } else if (request instanceof CheckpointManager.RegisterTMasterRequest) { - handleTMasterRegisterRequest(rid, channel, - (CheckpointManager.RegisterTMasterRequest) request); + } else if (request instanceof CheckpointManager.RegisterTManagerRequest) { + handleTManagerRegisterRequest(rid, channel, + (CheckpointManager.RegisterTManagerRequest) request); } else if (request instanceof CheckpointManager.SaveInstanceStateRequest) { handleSaveInstanceStateRequest( rid, channel, (CheckpointManager.SaveInstanceStateRequest) request); @@ -153,20 +153,20 @@ protected void handleCleanStatefulCheckpointRequest( sendResponse(rid, channel, responseBuilder.build()); } - protected void handleTMasterRegisterRequest( + protected void handleTManagerRegisterRequest( REQID rid, SocketChannel channel, - CheckpointManager.RegisterTMasterRequest request + CheckpointManager.RegisterTManagerRequest request ) { - LOG.info("Got a TMaster register request from TMaster host:port " + LOG.info("Got a TManager register request from TManager host:port " + channel.socket().getRemoteSocketAddress()); - CheckpointManager.RegisterTMasterResponse.Builder responseBuilder = - CheckpointManager.RegisterTMasterResponse.newBuilder(); + CheckpointManager.RegisterTManagerResponse.Builder responseBuilder = + CheckpointManager.RegisterTManagerResponse.newBuilder(); if (!checkRegistrationValidity(request.getTopologyName(), request.getTopologyId())) { - String errorMessage = String.format("The TMaster register message came with a different " + String errorMessage = String.format("The TManager register message came with a different " + "topologyName: %s and/or topologyId: %s", request.getTopologyName(), request.getTopologyId()); diff --git a/heron/ckptmgr/tests/java/org/apache/heron/ckptmgr/CheckpointManagerServerTest.java b/heron/ckptmgr/tests/java/org/apache/heron/ckptmgr/CheckpointManagerServerTest.java index 34eba639faa..47077c4482d 100644 --- a/heron/ckptmgr/tests/java/org/apache/heron/ckptmgr/CheckpointManagerServerTest.java +++ b/heron/ckptmgr/tests/java/org/apache/heron/ckptmgr/CheckpointManagerServerTest.java @@ -64,7 +64,7 @@ public class CheckpointManagerServerTest { private static CheckpointManager.GetInstanceStateRequest getInstanceStateRequest; private static CheckpointManager.CleanStatefulCheckpointRequest cleanStatefulCheckpointRequest; private static CheckpointManager.RegisterStMgrRequest registerStmgrRequest; - private static CheckpointManager.RegisterTMasterRequest registerTMasterRequest; + private static CheckpointManager.RegisterTManagerRequest registerTManagerRequest; private static PhysicalPlans.Instance instance; @@ -137,7 +137,7 @@ public static void setup() throws Exception { .setPhysicalPlan(pplan) .build(); - registerTMasterRequest = CheckpointManager.RegisterTMasterRequest.newBuilder() + registerTManagerRequest = CheckpointManager.RegisterTManagerRequest.newBuilder() .setTopologyId(TOPOLOGY_ID) .setTopologyName(TOPOLOGY_NAME) .build(); @@ -224,10 +224,10 @@ public void handleResponse(HeronClient client, StatusCode status, } @Test - public void testRegisterTMaster() throws Exception { - runTest(TestRequestHandler.RequestType.REGISTER_TMASTER, + public void testRegisterTManager() throws Exception { + runTest(TestRequestHandler.RequestType.REGISTER_TMANAGER, new HeronServerTester.SuccessResponseHandler( - CheckpointManager.RegisterTMasterResponse.class)); + CheckpointManager.RegisterTManagerResponse.class)); } @Test @@ -249,8 +249,8 @@ public enum RequestType { CheckpointManager.CleanStatefulCheckpointResponse.getDescriptor()), REGISTER_STMGR(registerStmgrRequest, CheckpointManager.RegisterStMgrResponse.getDescriptor()), - REGISTER_TMASTER(registerTMasterRequest, - CheckpointManager.RegisterTMasterResponse.getDescriptor()); + REGISTER_TMANAGER(registerTManagerRequest, + CheckpointManager.RegisterTManagerResponse.getDescriptor()); private Message requestMessage; private Descriptors.Descriptor responseMessageDescriptor; From d015ebbaf64d8c10976cf498f66cd07f517a4a92 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:37:04 -0400 Subject: [PATCH 19/32] renaming "topology master" to "topology manager" in heron/api --- heron/api/src/java/org/apache/heron/api/bolt/IBolt.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/api/src/java/org/apache/heron/api/bolt/IBolt.java b/heron/api/src/java/org/apache/heron/api/bolt/IBolt.java index f294997d50a..7f3ec1bbf89 100644 --- a/heron/api/src/java/org/apache/heron/api/bolt/IBolt.java +++ b/heron/api/src/java/org/apache/heron/api/bolt/IBolt.java @@ -34,7 +34,7 @@ *

A bolt's lifecycle is as follows:

*

*

IBolt object created on client machine. The IBolt is serialized into the topology - * (using Java serialization) and submitted to the master machine of the cluster (Nimbus). + * (using Java serialization) and submitted to the primary machine of the cluster (Nimbus). * Nimbus then launches workers which deserialize the object, call prepare on it, and then * start processing tuples.

*

From 4bc3d8df98741416258191ed34379f91b1ed6191 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:46:38 -0400 Subject: [PATCH 20/32] renaming "topology master" to "topology manager" in heron/stmgr --- heron/stmgr/src/cpp/BUILD | 4 +- .../stmgr/src/cpp/manager/instance-server.cpp | 2 +- .../stmgr/src/cpp/manager/stateful-restorer.h | 2 +- heron/stmgr/src/cpp/manager/stmgr.cpp | 240 +++++------ heron/stmgr/src/cpp/manager/stmgr.h | 40 +- .../stmgr/src/cpp/manager/tmaster-client.cpp | 110 ++--- heron/stmgr/src/cpp/manager/tmaster-client.h | 40 +- heron/stmgr/tests/cpp/server/BUILD | 6 +- .../tests/cpp/server/dummy_metricsmgr.cpp | 22 +- .../stmgr/tests/cpp/server/dummy_metricsmgr.h | 18 +- heron/stmgr/tests/cpp/server/dummy_stmgr.cpp | 44 +- heron/stmgr/tests/cpp/server/dummy_stmgr.h | 14 +- .../stmgr/tests/cpp/server/stmgr_unittest.cpp | 386 +++++++++--------- 13 files changed, 464 insertions(+), 464 deletions(-) diff --git a/heron/stmgr/src/cpp/BUILD b/heron/stmgr/src/cpp/BUILD index f70057bd42f..fdc81daf003 100644 --- a/heron/stmgr/src/cpp/BUILD +++ b/heron/stmgr/src/cpp/BUILD @@ -88,8 +88,8 @@ cc_library( "manager/stmgr-server.h", "manager/stream-consumers.cpp", "manager/stream-consumers.h", - "manager/tmaster-client.cpp", - "manager/tmaster-client.h", + "manager/tmanager-client.cpp", + "manager/tmanager-client.h", ], copts = [ "-Iheron", diff --git a/heron/stmgr/src/cpp/manager/instance-server.cpp b/heron/stmgr/src/cpp/manager/instance-server.cpp index 047c634c54e..de205f2edb0 100644 --- a/heron/stmgr/src/cpp/manager/instance-server.cpp +++ b/heron/stmgr/src/cpp/manager/instance-server.cpp @@ -378,7 +378,7 @@ void InstanceServer::HandleRegisterInstanceRequest(REQID _reqid, Connection* _co // Have all the instances connected to us? if (HaveAllInstancesConnectedToUs()) { - // Notify to stmgr so that it might want to connect to tmaster + // Notify to stmgr so that it might want to connect to tmanager stmgr_->HandleAllInstancesConnected(); } } diff --git a/heron/stmgr/src/cpp/manager/stateful-restorer.h b/heron/stmgr/src/cpp/manager/stateful-restorer.h index b6c13c54845..7d97b9e6889 100644 --- a/heron/stmgr/src/cpp/manager/stateful-restorer.h +++ b/heron/stmgr/src/cpp/manager/stateful-restorer.h @@ -54,7 +54,7 @@ class TupleCache; class StMgrClientMgr; class CkptMgrClient; -// For Heron topologies running in effectively once semantics, the tmaster +// For Heron topologies running in effectively once semantics, the tmanager // could initiate restore topology to a certain globally consistent checkpoint. // This could be triggered either during startup or after failure of certain // topology components. StatefulRestorer implements the state machine of this recovery diff --git a/heron/stmgr/src/cpp/manager/stmgr.cpp b/heron/stmgr/src/cpp/manager/stmgr.cpp index aa6625e64e1..f60ffd240d9 100644 --- a/heron/stmgr/src/cpp/manager/stmgr.cpp +++ b/heron/stmgr/src/cpp/manager/stmgr.cpp @@ -45,7 +45,7 @@ #include "util/xor-manager.h" #include "util/neighbour-calculator.h" #include "manager/stateful-restorer.h" -#include "manager/tmaster-client.h" +#include "manager/tmanager-client.h" #include "util/tuple-cache.h" #include "manager/ckptmgr-client.h" @@ -71,7 +71,7 @@ const sp_string METRIC_TIME_SPENT_BACK_PRESSURE_INIT = "__server/__time_spent_back_pressure_initiated"; const sp_int64 PROCESS_METRICS_FREQUENCY = 10_s; const sp_int64 UPTIME_METRIC_FREQUENCY = 1_s; -const sp_int64 TMASTER_RETRY_FREQUENCY = 10_s; +const sp_int64 TMANAGER_RETRY_FREQUENCY = 10_s; StMgr::StMgr(shared_ptr eventLoop, const sp_string& _myhost, sp_int32 _data_port, sp_int32 _local_data_port, @@ -126,7 +126,7 @@ void StMgr::Init() { back_pressure_metric_initiated_ = make_shared(); metrics_manager_client_->register_metric(METRIC_TIME_SPENT_BACK_PRESSURE_INIT, back_pressure_metric_initiated_); - state_mgr_->SetTMasterLocationWatch(topology_name_, [this]() { this->FetchTMasterLocation(); }); + state_mgr_->SetTManagerLocationWatch(topology_name_, [this]() { this->FetchTManagerLocation(); }); if (0 != metricscachemgr_mode_.compare("disabled")) { state_mgr_->SetMetricsCacheLocationWatch( topology_name_, [this]() { this->FetchMetricsCacheLocation(); }); @@ -147,8 +147,8 @@ void StMgr::Init() { CHECK_GT( eventLoop_->registerTimer( - [this](EventLoop::Status status) { this->CheckTMasterLocation(status); }, false, - config::HeronInternalsConfigReader::Instance()->GetCheckTMasterLocationIntervalSec() * + [this](EventLoop::Status status) { this->CheckTManagerLocation(status); }, false, + config::HeronInternalsConfigReader::Instance()->GetCheckTManagerLocationIntervalSec() * 1_s), 0); // fire only once @@ -159,10 +159,10 @@ void StMgr::Init() { StartStmgrServer(); // Create and start InstanceServer StartInstanceServer(); - // FetchTMasterLocation() triggers the StMgr::CreateTMasterClient() where the TMasterClient - // constructor needs actual stmgr ports, thus put FetchTMasterLocation() + // FetchTManagerLocation() triggers the StMgr::CreateTManagerClient() where the TManagerClient + // constructor needs actual stmgr ports, thus put FetchTManagerLocation() // has to be after after StartStmgrServer and StartInstanceServer() - FetchTMasterLocation(); + FetchTManagerLocation(); if (0 != metricscachemgr_mode_.compare("disabled")) { FetchMetricsCacheLocation(); } @@ -221,9 +221,9 @@ const NetworkOptions& StMgr::GetInstanceServerNetworkOptions() const { return instance_server_->get_serveroptions(); } -void StMgr::CheckTMasterLocation(EventLoop::Status) { - if (!tmaster_client_) { - LOG(FATAL) << "Could not fetch the TMaster location in time. Exiting. "; +void StMgr::CheckTManagerLocation(EventLoop::Status) { + if (!tmanager_client_) { + LOG(FATAL) << "Could not fetch the TManager location in time. Exiting. "; } } @@ -246,20 +246,20 @@ void StMgr::UpdateProcessMetrics(EventLoop::Status) { stmgr_process_metrics_->scope(METRIC_MEM_USED)->SetValue(totalmemory); } -void StMgr::FetchTMasterLocation() { - LOG(INFO) << "Fetching TMaster Location"; - auto tmaster = make_shared(); +void StMgr::FetchTManagerLocation() { + LOG(INFO) << "Fetching TManager Location"; + auto tmanager = make_shared(); - auto cb = [tmaster, this](proto::system::StatusCode status) { - this->OnTMasterLocationFetch(tmaster, status); + auto cb = [tmanager, this](proto::system::StatusCode status) { + this->OnTManagerLocationFetch(tmanager, status); }; - state_mgr_->GetTMasterLocation(topology_name_, tmaster, std::move(cb)); + state_mgr_->GetTManagerLocation(topology_name_, tmanager, std::move(cb)); } void StMgr::FetchMetricsCacheLocation() { LOG(INFO) << "Fetching MetricsCache Location"; - auto metricscache = make_shared(); + auto metricscache = make_shared(); auto cb = [metricscache, this](proto::system::StatusCode status) { this->OnMetricsCacheLocationFetch(metricscache, status); @@ -334,20 +334,20 @@ void StMgr::CreateCheckpointMgrClient() { save_watcher, get_watcher, ckpt_watcher); } -void StMgr::CreateTMasterClient(shared_ptr tmasterLocation) { - CHECK(!tmaster_client_); - LOG(INFO) << "Creating Tmaster Client at " << tmasterLocation->host() << ":" - << tmasterLocation->master_port(); - NetworkOptions master_options; - master_options.set_host(tmasterLocation->host()); - master_options.set_port(tmasterLocation->master_port()); - master_options.set_socket_family(PF_INET); - master_options.set_max_packet_size( +void StMgr::CreateTManagerClient(shared_ptr tmanagerLocation) { + CHECK(!tmanager_client_); + LOG(INFO) << "Creating Tmanager Client at " << tmanagerLocation->host() << ":" + << tmanagerLocation->server_port(); + NetworkOptions client_options; + client_options.set_host(tmanagerLocation->host()); + client_options.set_port(tmanagerLocation->server_port()); + client_options.set_socket_family(PF_INET); + client_options.set_max_packet_size( config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterNetworkMasterOptionsMaximumPacketMb() * + ->GetHeronTmanagerNetworkServerOptionsMaximumPacketMb() * 1_MB); - master_options.set_high_watermark(high_watermark_); - master_options.set_low_watermark(low_watermark_); + client_options.set_high_watermark(high_watermark_); + client_options.set_low_watermark(low_watermark_); auto pplan_watch = [this](shared_ptr pplan) { this->NewPhysicalPlan(pplan); @@ -373,7 +373,7 @@ void StMgr::CreateTMasterClient(shared_ptr tmas this->BroadcastCheckpointSaved(_msg); }; - tmaster_client_ = make_shared(eventLoop_, master_options, stmgr_id_, stmgr_host_, + tmanager_client_ = make_shared(eventLoop_, client_options, stmgr_id_, stmgr_host_, data_port_, local_data_port_, shell_port_, std::move(pplan_watch), std::move(stateful_checkpoint_watch), @@ -393,140 +393,140 @@ void StMgr::CreateTupleCache() { tuple_cache_->RegisterCheckpointDrainer(&StMgr::DrainDownstreamCheckpoint, this); } -void StMgr::HandleNewTmaster(shared_ptr newTmasterLocation) { - // Lets delete the existing tmaster if we have one. - if (tmaster_client_) { - LOG(INFO) << "Destroying existing tmasterClient"; - tmaster_client_->Die(); - tmaster_client_ = NULL; +void StMgr::HandleNewTmanager(shared_ptr newTmanagerLocation) { + // Lets delete the existing tmanager if we have one. + if (tmanager_client_) { + LOG(INFO) << "Destroying existing tmanagerClient"; + tmanager_client_->Die(); + tmanager_client_ = NULL; } - // Create the tmaster and the servers/clients but don't start the tmaster + // Create the tmanager and the servers/clients but don't start the tmanager // connection as yet. We'll do that once we connect to all the instances. - CreateTMasterClient(newTmasterLocation); + CreateTManagerClient(newTmanagerLocation); - // In the case where we are doing a tmaster refresh we may have already + // In the case where we are doing a tmanager refresh we may have already // connected to all of the instances if (instance_server_ && instance_server_->HaveAllInstancesConnectedToUs()) { - StartTMasterClient(); + StartTManagerClient(); } } -void StMgr::BroadcastTmasterLocation(shared_ptr tmasterLocation) { - // Notify metrics manager of the tmaster location changes +void StMgr::BroadcastTmanagerLocation(shared_ptr tmanagerLocation) { + // Notify metrics manager of the tmanager location changes // TODO(vikasr): What if the refresh fails? - metrics_manager_client_->RefreshTMasterLocation(*tmasterLocation); + metrics_manager_client_->RefreshTManagerLocation(*tmanagerLocation); } void StMgr::BroadcastMetricsCacheLocation( - shared_ptr tmasterLocation) { + shared_ptr tmanagerLocation) { // Notify metrics manager of the metricscache location changes // TODO(huijun): What if the refresh fails? LOG(INFO) << "BroadcastMetricsCacheLocation"; - metrics_manager_client_->RefreshMetricsCacheLocation(*tmasterLocation); + metrics_manager_client_->RefreshMetricsCacheLocation(*tmanagerLocation); } -void StMgr::OnTMasterLocationFetch(shared_ptr newTmasterLocation, +void StMgr::OnTManagerLocationFetch(shared_ptr newTmanagerLocation, proto::system::StatusCode _status) { if (_status != proto::system::OK) { - LOG(INFO) << "TMaster Location Fetch failed with status " << _status; - LOG(INFO) << "Retrying after " << TMASTER_RETRY_FREQUENCY << " micro seconds "; + LOG(INFO) << "TManager Location Fetch failed with status " << _status; + LOG(INFO) << "Retrying after " << TMANAGER_RETRY_FREQUENCY << " micro seconds "; CHECK_GT(eventLoop_->registerTimer([this](EventLoop::Status) { - this->FetchTMasterLocation(); - }, false, TMASTER_RETRY_FREQUENCY), 0); + this->FetchTManagerLocation(); + }, false, TMANAGER_RETRY_FREQUENCY), 0); } else { - // We got a new tmaster location. + // We got a new tmanager location. // Just verify that we are talking to the right entity - if (newTmasterLocation->topology_name() != topology_name_ || - newTmasterLocation->topology_id() != topology_id_) { - LOG(FATAL) << "Topology name/id mismatch between stmgr and TMaster " - << "We expected " << topology_name_ << " : " << topology_id_ << " but tmaster had " - << newTmasterLocation->topology_name() << " : " - << newTmasterLocation->topology_id(); + if (newTmanagerLocation->topology_name() != topology_name_ || + newTmanagerLocation->topology_id() != topology_id_) { + LOG(FATAL) << "Topology name/id mismatch between stmgr and TManager " + << "We expected " << topology_name_ << " : " << topology_id_ << " but tmanager had " + << newTmanagerLocation->topology_name() << " : " + << newTmanagerLocation->topology_id(); } - LOG(INFO) << "Fetched TMasterLocation to be " << newTmasterLocation->host() << ":" - << newTmasterLocation->master_port(); + LOG(INFO) << "Fetched TManagerLocation to be " << newTmanagerLocation->host() << ":" + << newTmanagerLocation->server_port(); - bool isNewTmaster = true; + bool isNewTmanager = true; - if (tmaster_client_) { - sp_string currentTmasterHostPort = tmaster_client_->getTmasterHostPort(); - std::string newTmasterHostPort = - newTmasterLocation->host() + ":" + std::to_string(newTmasterLocation->master_port()); + if (tmanager_client_) { + sp_string currentTmanagerHostPort = tmanager_client_->getTmanagerHostPort(); + std::string newTmanagerHostPort = + newTmanagerLocation->host() + ":" + std::to_string(newTmanagerLocation->server_port()); - if (currentTmasterHostPort == newTmasterHostPort) { - LOG(INFO) << "New tmaster location same as the current one. " + if (currentTmanagerHostPort == newTmanagerHostPort) { + LOG(INFO) << "New tmanager location same as the current one. " << "Nothing to do here... "; - isNewTmaster = false; + isNewTmanager = false; } else { - LOG(INFO) << "New tmaster location different from the current one." - << " Current one at " << currentTmasterHostPort << " and New one at " - << newTmasterHostPort; - isNewTmaster = true; + LOG(INFO) << "New tmanager location different from the current one." + << " Current one at " << currentTmanagerHostPort << " and New one at " + << newTmanagerHostPort; + isNewTmanager = true; } } - if (isNewTmaster) { - HandleNewTmaster(newTmasterLocation); + if (isNewTmanager) { + HandleNewTmanager(newTmanagerLocation); } // Stmgr doesn't know what other things might have changed, so it is important - // to broadcast the location, even though we know its the same tmaster. - BroadcastTmasterLocation(newTmasterLocation); + // to broadcast the location, even though we know its the same tmanager. + BroadcastTmanagerLocation(newTmanagerLocation); } } void StMgr::OnMetricsCacheLocationFetch( - shared_ptr newTmasterLocation, + shared_ptr newTmanagerLocation, proto::system::StatusCode _status) { if (_status != proto::system::OK) { LOG(INFO) << "MetricsCache Location Fetch failed with status " << _status; - LOG(INFO) << "Retrying after " << TMASTER_RETRY_FREQUENCY << " micro seconds "; + LOG(INFO) << "Retrying after " << TMANAGER_RETRY_FREQUENCY << " micro seconds "; CHECK_GT(eventLoop_->registerTimer([this](EventLoop::Status) { this->FetchMetricsCacheLocation(); - }, false, TMASTER_RETRY_FREQUENCY), 0); + }, false, TMANAGER_RETRY_FREQUENCY), 0); } else { // We got a new metricscache location. // Just verify that we are talking to the right entity - if (newTmasterLocation->topology_name() != topology_name_ || - newTmasterLocation->topology_id() != topology_id_) { + if (newTmanagerLocation->topology_name() != topology_name_ || + newTmanagerLocation->topology_id() != topology_id_) { LOG(FATAL) << "Topology name/id mismatch between stmgr and MetricsCache " << "We expected " << topology_name_ << " : " << topology_id_ << " but MetricsCache had " - << newTmasterLocation->topology_name() << " : " - << newTmasterLocation->topology_id() << std::endl; + << newTmanagerLocation->topology_name() << " : " + << newTmanagerLocation->topology_id() << std::endl; } - LOG(INFO) << "Fetched MetricsCacheLocation to be " << newTmasterLocation->host() << ":" - << newTmasterLocation->master_port(); + LOG(INFO) << "Fetched MetricsCacheLocation to be " << newTmanagerLocation->host() << ":" + << newTmanagerLocation->server_port(); // Stmgr doesn't know what other things might have changed, so it is important // to broadcast the location, even though we know its the same metricscache. - BroadcastMetricsCacheLocation(newTmasterLocation); + BroadcastMetricsCacheLocation(newTmanagerLocation); } } -// Start the tmaster client -void StMgr::StartTMasterClient() { - if (!tmaster_client_) { - LOG(INFO) << "We haven't received tmaster location yet" - << ", so tmaster_client_ hasn't been created" +// Start the tmanager client +void StMgr::StartTManagerClient() { + if (!tmanager_client_) { + LOG(INFO) << "We haven't received tmanager location yet" + << ", so tmanager_client_ hasn't been created" << "Once we get the location, it will be started"; // Nothing else to do here } else { std::vector all_instance_info; instance_server_->GetInstanceInfo(all_instance_info); - tmaster_client_->SetInstanceInfo(all_instance_info); - if (!tmaster_client_->IsConnected()) { - LOG(INFO) << "Connecting to the TMaster as all the instances have connected to us"; - tmaster_client_->Start(); + tmanager_client_->SetInstanceInfo(all_instance_info); + if (!tmanager_client_->IsConnected()) { + LOG(INFO) << "Connecting to the TManager as all the instances have connected to us"; + tmanager_client_->Start(); } } } void StMgr::NewPhysicalPlan(shared_ptr _pplan) { - LOG(INFO) << "Received a new physical plan from tmaster"; + LOG(INFO) << "Received a new physical plan from tmanager"; heron::config::TopologyConfigHelper::LogTopology(_pplan->topology()); // first make sure that we are part of the plan ;) bool found = false; @@ -842,8 +842,8 @@ void StMgr::DrainInstanceData(sp_int32 _task_id, proto::system::HeronTupleSet2* if (dropped && stateful_restorer_ && !stateful_restorer_->InProgress()) { LOG(INFO) << "We dropped some messages because we are not yet connected with stmgr " << dest_stmgr_id << " and we are not in restore. Hence sending Reset " - << "message to TMaster"; - tmaster_client_->SendResetTopologyState("", _task_id, "Dropped Instance Tuples"); + << "message to TManager"; + tmanager_client_->SendResetTopologyState("", _task_id, "Dropped Instance Tuples"); restore_initiated_metrics_->incr(); } __global_protobuf_pool_release__(_tuple); @@ -929,10 +929,10 @@ void StMgr::HandleDeadStMgrConnection(const sp_string& _stmgr_id) { // If we are stateful topology, we need to send a resetTopology message // in case we are not in 2pc if (stateful_restorer_) { - if (!stateful_restorer_->InProgress() && tmaster_client_) { + if (!stateful_restorer_->InProgress() && tmanager_client_) { LOG(INFO) << "We lost connection with stmgr " << _stmgr_id - << " and hence sending ResetTopology message to tmaster"; - tmaster_client_->SendResetTopologyState(_stmgr_id, -1, "Dead Stmgr"); + << " and hence sending ResetTopology message to tmanager"; + tmanager_client_->SendResetTopologyState(_stmgr_id, -1, "Dead Stmgr"); restore_initiated_metrics_->incr(); } else { // We are in restore @@ -954,22 +954,22 @@ void StMgr::HandleAllInstancesConnected() { if (stateful_restorer_->InProgress()) { // We are in the middle of a restore stateful_restorer_->HandleAllInstancesConnected(); - } else if (tmaster_client_ && tmaster_client_->IsConnected()) { - LOG(INFO) << "We are already connected to tmaster(which means we are not in" + } else if (tmanager_client_ && tmanager_client_->IsConnected()) { + LOG(INFO) << "We are already connected to tmanager(which means we are not in" << " initial startup), and we are not in the middle of restore." << " This means that while running normally, some instances" << " got reconnected to us and thus we might have lost some tuples in middle" << " We must reset the topology"; - tmaster_client_->SendResetTopologyState("", -1, "All Instances connected"); + tmanager_client_->SendResetTopologyState("", -1, "All Instances connected"); restore_initiated_metrics_->incr(); } else { - // This is the first time we came up when we haven't even connected to tmaster - // Now that all instances are connected to us, we should connect to tmaster - StartTMasterClient(); + // This is the first time we came up when we haven't even connected to tmanager + // Now that all instances are connected to us, we should connect to tmanager + StartTManagerClient(); } } else { - // Now we can connect to the tmaster - StartTMasterClient(); + // Now we can connect to the tmanager + StartTManagerClient(); } } @@ -979,8 +979,8 @@ void StMgr::HandleDeadInstance(sp_int32 _task_id) { stateful_restorer_->HandleDeadInstanceConnection(_task_id); } else { LOG(INFO) << "An instance " << _task_id << " died while we are not " - << "in restore. Sending ResetMessage to tmaster"; - tmaster_client_->SendResetTopologyState("", _task_id, "Dead Instance"); + << "in restore. Sending ResetMessage to tmanager"; + tmanager_client_->SendResetTopologyState("", _task_id, "Dead Instance"); restore_initiated_metrics_->incr(); } } @@ -1042,7 +1042,7 @@ void StMgr::HandleSavedInstanceState(const proto::system::Instance& _instance, LOG(INFO) << "Got notification from ckptmgr that we saved instance state for task " << _instance.info().task_id() << " for checkpoint " << _checkpoint_id; - tmaster_client_->SavedInstanceState(_instance, _checkpoint_id); + tmanager_client_->SavedInstanceState(_instance, _checkpoint_id); } // Invoked by CheckpointMgr Client when it retreives the state of an instance @@ -1073,10 +1073,10 @@ void StMgr::HandleDownStreamStatefulCheckpoint( _message.checkpoint_id()); } -// Called by TmasterClient when it receives directive from tmaster +// Called by TmanagerClient when it receives directive from tmanager // to restore the topology to _checkpoint_id checkpoint void StMgr::RestoreTopologyState(sp_string _checkpoint_id, sp_int64 _restore_txid) { - LOG(INFO) << "Got a Restore Topology State message from Tmaster for checkpoint " + LOG(INFO) << "Got a Restore Topology State message from Tmanager for checkpoint " << _checkpoint_id << " and txid " << _restore_txid; CHECK(stateful_restorer_); @@ -1092,14 +1092,14 @@ void StMgr::BroadcastCheckpointSaved( instance_server_->BroadcastStatefulCheckpointSaved(_msg); } -// Called by TmasterClient when it receives directive from tmaster +// Called by TmanagerClient when it receives directive from tmanager // to start processing after having previously recovered the state at _checkpoint_id void StMgr::StartStatefulProcessing(sp_string _checkpoint_id) { - LOG(INFO) << "Received StartProcessing message from tmaster for " + LOG(INFO) << "Received StartProcessing message from tmanager for " << _checkpoint_id; CHECK(stateful_restorer_); if (stateful_restorer_->InProgress()) { - LOG(FATAL) << "StartProcessing received from Tmaster for " + LOG(FATAL) << "StartProcessing received from Tmanager for " << _checkpoint_id << " when we are still in Restore"; } instance_server_->SendStartInstanceStatefulProcessing(_checkpoint_id); @@ -1110,16 +1110,16 @@ void StMgr::HandleRestoreInstanceStateResponse(sp_int32 _task_id, const std::string& _checkpoint_id) { // If we are stateful topology, we might want to see how the restore went // and if it was successful and all other local instances have recovered - // send back a success response to tmaster saying that we have recovered + // send back a success response to tmanager saying that we have recovered CHECK(stateful_restorer_); stateful_restorer_->HandleInstanceRestoredState(_task_id, _status.status(), _checkpoint_id); } // Called after we have recovered our state(either successfully or unsuccessfully) -// We need to let our tmaster know +// We need to let our tmanager know void StMgr::HandleStatefulRestoreDone(proto::system::StatusCode _status, std::string _checkpoint_id, sp_int64 _restore_txid) { - tmaster_client_->SendRestoreTopologyStateResponse(_status, _checkpoint_id, _restore_txid); + tmanager_client_->SendRestoreTopologyStateResponse(_status, _checkpoint_id, _restore_txid); } // Patch new physical plan with internal hydrated topology but keep new topology data: diff --git a/heron/stmgr/src/cpp/manager/stmgr.h b/heron/stmgr/src/cpp/manager/stmgr.h index 582ac3ad73e..74948f3bda8 100644 --- a/heron/stmgr/src/cpp/manager/stmgr.h +++ b/heron/stmgr/src/cpp/manager/stmgr.h @@ -52,7 +52,7 @@ using std::shared_ptr; class StMgrServer; class InstanceServer; class StMgrClientMgr; -class TMasterClient; +class TManagerClient; class StreamConsumers; class XorManager; class TupleCache; @@ -76,7 +76,7 @@ class StMgr { // All kinds of initialization like starting servers and clients void Init(); - // Called by tmaster client when a new physical plan is available + // Called by tmanager client when a new physical plan is available void NewPhysicalPlan(shared_ptr pplan); void HandleStreamManagerData(const sp_string& _stmgr_id, pool_unique_ptr _message); @@ -104,7 +104,7 @@ class StMgr { void SendStopBackPressureToOtherStMgrs(); void StartBackPressureOnSpouts(); void AttemptStopBackPressureFromSpouts(); - void StartTMasterClient(); + void StartTManagerClient(); bool DidAnnounceBackPressure(); bool DidOthersAnnounceBackPressure(); const NetworkOptions& GetStmgrServerNetworkOptions() const; @@ -124,14 +124,14 @@ class StMgr { const std::string& _checkpoint_id); private: - void OnTMasterLocationFetch(shared_ptr _tmaster, + void OnTManagerLocationFetch(shared_ptr _tmanager, proto::system::StatusCode); void OnMetricsCacheLocationFetch( - shared_ptr _tmaster, proto::system::StatusCode); - void FetchTMasterLocation(); + shared_ptr _tmanager, proto::system::StatusCode); + void FetchTManagerLocation(); void FetchMetricsCacheLocation(); - // A wrapper that calls FetchTMasterLocation. Needed for RegisterTimer - void CheckTMasterLocation(EventLoop::Status); + // A wrapper that calls FetchTManagerLocation. Needed for RegisterTimer + void CheckTManagerLocation(EventLoop::Status); void UpdateUptimeMetric(); void UpdateProcessMetrics(EventLoop::Status); @@ -167,28 +167,28 @@ class StMgr { sp_int32 ExtractTopologyTimeout(const proto::api::Topology& _topology); - void CreateTMasterClient(shared_ptr tmasterLocation); + void CreateTManagerClient(shared_ptr tmanagerLocation); void StartStmgrServer(); void StartInstanceServer(); void CreateTupleCache(); - // This is called when we receive a valid new Tmaster Location. - // Performs all the actions necessary to deal with new tmaster. - void HandleNewTmaster(shared_ptr newTmasterLocation); - // Broadcast the tmaster location changes to other components. (MM for now) - void BroadcastTmasterLocation(shared_ptr tmasterLocation); + // This is called when we receive a valid new Tmanager Location. + // Performs all the actions necessary to deal with new tmanager. + void HandleNewTmanager(shared_ptr newTmanagerLocation); + // Broadcast the tmanager location changes to other components. (MM for now) + void BroadcastTmanagerLocation(shared_ptr tmanagerLocation); void BroadcastMetricsCacheLocation( - shared_ptr tmasterLocation); + shared_ptr tmanagerLocation); - // Called when TMaster sends a InitiateStatefulCheckpoint message with a checkpoint_id + // Called when TManager sends a InitiateStatefulCheckpoint message with a checkpoint_id // This will send intiate checkpoint messages to local instances to capture their state. void InitiateStatefulCheckpoint(sp_string checkpoint_id); - // Invoked when TMaster asks us to restore all our local instances state to + // Invoked when TManager asks us to restore all our local instances state to // the checkpoint represented by _checkpoint_id. This starts the // Restore state machine void RestoreTopologyState(sp_string _checkpoint_id, sp_int64 _restore_txid); - // Invoked when TMaster sends the StartStatefulProcessing request to kick + // Invoked when TManager sends the StartStatefulProcessing request to kick // start the computation. We send the StartStatefulProcessing to all our // local instances so that they can start the processing. void StartStatefulProcessing(sp_string _checkpoint_id); @@ -197,7 +197,7 @@ class StMgr { void HandleStatefulRestoreDone(proto::system::StatusCode _status, std::string _checkpoint_id, sp_int64 _restore_txid); - // Called when stmgr received StatefulConsistentCheckpointSaved message from TMaster + // Called when stmgr received StatefulConsistentCheckpointSaved message from TManager // Then, the stmgr will forward this fact to all heron instances connected to it void BroadcastCheckpointSaved(const proto::ckptmgr::StatefulConsistentCheckpointSaved& _msg); @@ -222,7 +222,7 @@ class StMgr { shared_ptr instance_server_; // Pushing data to other streammanagers shared_ptr clientmgr_; - shared_ptr tmaster_client_; + shared_ptr tmanager_client_; shared_ptr eventLoop_; // Map of task_id to stmgr_id diff --git a/heron/stmgr/src/cpp/manager/tmaster-client.cpp b/heron/stmgr/src/cpp/manager/tmaster-client.cpp index 657077d68d2..4b170b43352 100644 --- a/heron/stmgr/src/cpp/manager/tmaster-client.cpp +++ b/heron/stmgr/src/cpp/manager/tmaster-client.cpp @@ -17,7 +17,7 @@ * under the License. */ -#include "manager/tmaster-client.h" +#include "manager/tmanager-client.h" #include #include #include @@ -35,7 +35,7 @@ namespace heron { namespace stmgr { -TMasterClient::TMasterClient(shared_ptr eventLoop, const NetworkOptions& _options, +TManagerClient::TManagerClient(shared_ptr eventLoop, const NetworkOptions& _options, const sp_string& _stmgr_id, const sp_string& _stmgr_host, sp_int32 _data_port, sp_int32 _local_data_port, sp_int32 _shell_port, VCallback> _pplan_watch, @@ -59,33 +59,33 @@ TMasterClient::TMasterClient(shared_ptr eventLoop, const NetworkOptio reconnect_timer_id(0), heartbeat_timer_id(0), reconnect_attempts_(0) { - reconnect_tmaster_interval_sec_ = config::HeronInternalsConfigReader::Instance() - ->GetHeronStreammgrClientReconnectTmasterIntervalSec(); - stream_to_tmaster_heartbeat_interval_sec_ = config::HeronInternalsConfigReader::Instance() - ->GetHeronStreammgrTmasterHeartbeatIntervalSec(); + reconnect_tmanager_interval_sec_ = config::HeronInternalsConfigReader::Instance() + ->GetHeronStreammgrClientReconnectTmanagerIntervalSec(); + stream_to_tmanager_heartbeat_interval_sec_ = config::HeronInternalsConfigReader::Instance() + ->GetHeronStreammgrTmanagerHeartbeatIntervalSec(); reconnect_max_attempt_ = config::HeronInternalsConfigReader::Instance() - ->GetHeronStreammgrClientReconnectTmasterMaxAttempts(); + ->GetHeronStreammgrClientReconnectTmanagerMaxAttempts(); reconnect_timer_cb = [this]() { this->OnReConnectTimer(); }; heartbeat_timer_cb = [this]() { this->OnHeartbeatTimer(); }; - InstallResponseHandler(make_unique(), - &TMasterClient::HandleRegisterResponse); - InstallResponseHandler(make_unique(), - &TMasterClient::HandleHeartbeatResponse); - InstallMessageHandler(&TMasterClient::HandleNewAssignmentMessage); - InstallMessageHandler(&TMasterClient::HandleStatefulCheckpointMessage); - InstallMessageHandler(&TMasterClient::HandleRestoreTopologyStateRequest); - InstallMessageHandler(&TMasterClient::HandleStartStmgrStatefulProcessing); - InstallMessageHandler(&TMasterClient::HandleStatefulCheckpointSavedMessage); + InstallResponseHandler(make_unique(), + &TManagerClient::HandleRegisterResponse); + InstallResponseHandler(make_unique(), + &TManagerClient::HandleHeartbeatResponse); + InstallMessageHandler(&TManagerClient::HandleNewAssignmentMessage); + InstallMessageHandler(&TManagerClient::HandleStatefulCheckpointMessage); + InstallMessageHandler(&TManagerClient::HandleRestoreTopologyStateRequest); + InstallMessageHandler(&TManagerClient::HandleStartStmgrStatefulProcessing); + InstallMessageHandler(&TManagerClient::HandleStatefulCheckpointSavedMessage); } -TMasterClient::~TMasterClient() { +TManagerClient::~TManagerClient() { CleanInstances(); } -void TMasterClient::Die() { - LOG(INFO) << "Tmaster client is being destroyed " << std::endl; +void TManagerClient::Die() { + LOG(INFO) << "Tmanager client is being destroyed " << std::endl; to_die_ = true; Stop(); // Unregister the timers @@ -98,11 +98,11 @@ void TMasterClient::Die() { } } -sp_string TMasterClient::getTmasterHostPort() { +sp_string TManagerClient::getTmanagerHostPort() { return options_.get_host() + ":" + std::to_string(options_.get_port()); } -void TMasterClient::HandleConnect(NetworkErrorCode _status) { +void TManagerClient::HandleConnect(NetworkErrorCode _status) { if (_status == OK) { // reset the reconnect attempt once connection established reconnect_attempts_ = 0; @@ -111,7 +111,7 @@ void TMasterClient::HandleConnect(NetworkErrorCode _status) { Stop(); return; } - LOG(INFO) << "Connected to tmaster running at " << get_clientoptions().get_host() << ":" + LOG(INFO) << "Connected to tmanager running at " << get_clientoptions().get_host() << ":" << get_clientoptions().get_port() << std::endl; SendRegisterRequest(); } else { @@ -119,23 +119,23 @@ void TMasterClient::HandleConnect(NetworkErrorCode _status) { delete this; return; } - LOG(ERROR) << "Could not connect to tmaster at " << get_clientoptions().get_host() << ":" + LOG(ERROR) << "Could not connect to tmanager at " << get_clientoptions().get_host() << ":" << get_clientoptions().get_port() << ", Status code: " << _status << std::endl; LOG(INFO) << "Will retry again..." << std::endl; // Shouldn't be in a state where a previous timer is not cleared yet. if (reconnect_timer_id == 0) { - reconnect_timer_id = AddTimer(reconnect_timer_cb, reconnect_tmaster_interval_sec_ * 1000000); + reconnect_timer_id = AddTimer(reconnect_timer_cb, reconnect_tmanager_interval_sec_ * 1000000); } } } -void TMasterClient::HandleClose(NetworkErrorCode _code) { +void TManagerClient::HandleClose(NetworkErrorCode _code) { if (to_die_) { delete this; return; } - LOG(INFO) << "TMaster connection closed with code " << _code << std::endl; - LOG(INFO) << "Will try to reconnect again after " << reconnect_tmaster_interval_sec_ << "seconds" + LOG(INFO) << "TManager connection closed with code " << _code << std::endl; + LOG(INFO) << "Will try to reconnect again after " << reconnect_tmanager_interval_sec_ << "seconds" << std::endl; // Shouldn't be in a state where a previous timer is not cleared yet. CHECK_EQ(reconnect_timer_id, 0); @@ -146,15 +146,15 @@ void TMasterClient::HandleClose(NetworkErrorCode _code) { heartbeat_timer_id = 0; } - reconnect_timer_id = AddTimer(reconnect_timer_cb, reconnect_tmaster_interval_sec_ * 1000000); + reconnect_timer_id = AddTimer(reconnect_timer_cb, reconnect_tmanager_interval_sec_ * 1000000); } -void TMasterClient::HandleRegisterResponse( +void TManagerClient::HandleRegisterResponse( void*, - pool_unique_ptr _response, + pool_unique_ptr _response, NetworkErrorCode _status) { if (_status != OK) { - LOG(ERROR) << "non ok network stack code for Register Response from Tmaster" << std::endl; + LOG(ERROR) << "non ok network stack code for Register Response from Tmanager" << std::endl; Stop(); return; } @@ -162,23 +162,23 @@ void TMasterClient::HandleRegisterResponse( proto::system::StatusCode status = _response->status().status(); if (status != proto::system::OK) { - LOG(ERROR) << "Register with Tmaster failed with status " << status << std::endl; + LOG(ERROR) << "Register with Tmanager failed with status " << status << std::endl; Stop(); } else { - LOG(INFO) << "Registered successfully with Tmaster" << std::endl; + LOG(INFO) << "Registered successfully with Tmanager" << std::endl; if (_response->has_pplan()) { pplan_watch_(shared_ptr(_response->release_pplan())); } // Shouldn't be in a state where a previous timer is not cleared yet. CHECK_EQ(heartbeat_timer_id, 0); heartbeat_timer_id = - AddTimer(heartbeat_timer_cb, stream_to_tmaster_heartbeat_interval_sec_ * 1000000); + AddTimer(heartbeat_timer_cb, stream_to_tmanager_heartbeat_interval_sec_ * 1000000); } } -void TMasterClient::HandleHeartbeatResponse( +void TManagerClient::HandleHeartbeatResponse( void*, - pool_unique_ptr _response, + pool_unique_ptr _response, NetworkErrorCode _status) { if (_status != OK) { LOG(ERROR) << "NonOK response message for heartbeat Response" << std::endl; @@ -195,49 +195,49 @@ void TMasterClient::HandleHeartbeatResponse( // Shouldn't be in a state where a previous timer is not cleared yet. CHECK_EQ(heartbeat_timer_id, 0); heartbeat_timer_id = - AddTimer(heartbeat_timer_cb, stream_to_tmaster_heartbeat_interval_sec_ * 1000000); + AddTimer(heartbeat_timer_cb, stream_to_tmanager_heartbeat_interval_sec_ * 1000000); } } -void TMasterClient::HandleNewAssignmentMessage( +void TManagerClient::HandleNewAssignmentMessage( pool_unique_ptr _message) { LOG(INFO) << "Got a new assignment" << std::endl; pplan_watch_(shared_ptr(_message->release_new_pplan())); } -void TMasterClient::HandleStatefulCheckpointMessage( +void TManagerClient::HandleStatefulCheckpointMessage( pool_unique_ptr _message) { - LOG(INFO) << "Got a new start stateful checkpoint message from tmaster with id " + LOG(INFO) << "Got a new start stateful checkpoint message from tmanager with id " << _message->checkpoint_id(); stateful_checkpoint_watch_(_message->checkpoint_id()); } -void TMasterClient::OnReConnectTimer() { +void TManagerClient::OnReConnectTimer() { // The timer has triggered the callback, so reset the timer_id; reconnect_timer_id = 0; if (++reconnect_attempts_ < reconnect_max_attempt_) { Start(); } else { - LOG(FATAL) << "Could not connect to tmaster after reaching" + LOG(FATAL) << "Could not connect to tmanager after reaching" << " the max reconnect attempts" << reconnect_max_attempt_ << ". Quitting..."; } } -void TMasterClient::OnHeartbeatTimer() { +void TManagerClient::OnHeartbeatTimer() { LOG(INFO) << "Sending heartbeat" << std::endl; // The timer has triggered the callback, so reset the timer_id; heartbeat_timer_id = 0; SendHeartbeatRequest(); } -void TMasterClient::CleanInstances() { +void TManagerClient::CleanInstances() { instances_.clear(); } -void TMasterClient::SendRegisterRequest() { - auto request = make_unique(); +void TManagerClient::SendRegisterRequest() { + auto request = make_unique(); sp_string cwd; FileUtils::getCwd(cwd); @@ -258,7 +258,7 @@ void TMasterClient::SendRegisterRequest() { return; } -void TMasterClient::SetInstanceInfo(const std::vector& _instances) { +void TManagerClient::SetInstanceInfo(const std::vector& _instances) { if (!instances_.empty()) { CleanInstances(); } @@ -270,8 +270,8 @@ void TMasterClient::SetInstanceInfo(const std::vector& } } -void TMasterClient::SendHeartbeatRequest() { - auto request = make_unique(); +void TManagerClient::SendHeartbeatRequest() { + auto request = make_unique(); request->set_heartbeat_time(time(nullptr)); // TODO(vikasr) Send actual stats request->mutable_stats(); @@ -279,7 +279,7 @@ void TMasterClient::SendHeartbeatRequest() { return; } -void TMasterClient::SavedInstanceState(const proto::system::Instance& _instance, +void TManagerClient::SavedInstanceState(const proto::system::Instance& _instance, const std::string& _checkpoint_id) { proto::ckptmgr::InstanceStateStored message; message.set_checkpoint_id(_checkpoint_id); @@ -287,7 +287,7 @@ void TMasterClient::SavedInstanceState(const proto::system::Instance& _instance, SendMessage(message); } -void TMasterClient::SendRestoreTopologyStateResponse(proto::system::StatusCode _status, +void TManagerClient::SendRestoreTopologyStateResponse(proto::system::StatusCode _status, const std::string& _ckpt_id, sp_int64 _txid) { proto::ckptmgr::RestoreTopologyStateResponse message; @@ -297,22 +297,22 @@ void TMasterClient::SendRestoreTopologyStateResponse(proto::system::StatusCode _ SendMessage(message); } -void TMasterClient::HandleRestoreTopologyStateRequest( +void TManagerClient::HandleRestoreTopologyStateRequest( pool_unique_ptr _message) { restore_topology_watch_(_message->checkpoint_id(), _message->restore_txid()); } -void TMasterClient::HandleStartStmgrStatefulProcessing( +void TManagerClient::HandleStartStmgrStatefulProcessing( pool_unique_ptr _message) { start_stateful_watch_(_message->checkpoint_id()); } -void TMasterClient::HandleStatefulCheckpointSavedMessage( +void TManagerClient::HandleStatefulCheckpointSavedMessage( pool_unique_ptr _msg) { broadcast_checkpoint_saved_(*_msg); } -void TMasterClient::SendResetTopologyState(const std::string& _dead_stmgr, +void TManagerClient::SendResetTopologyState(const std::string& _dead_stmgr, int32_t _dead_task, const std::string& _reason) { proto::ckptmgr::ResetTopologyState message; diff --git a/heron/stmgr/src/cpp/manager/tmaster-client.h b/heron/stmgr/src/cpp/manager/tmaster-client.h index 2457d7280a7..2fd1f7ffdb7 100644 --- a/heron/stmgr/src/cpp/manager/tmaster-client.h +++ b/heron/stmgr/src/cpp/manager/tmaster-client.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMASTER_CLIENT_H_ -#define SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMASTER_CLIENT_H_ +#ifndef SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMANAGER_CLIENT_H_ +#define SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMANAGER_CLIENT_H_ #include #include @@ -33,9 +33,9 @@ namespace stmgr { using std::shared_ptr; -class TMasterClient : public Client { +class TManagerClient : public Client { public: - TMasterClient(shared_ptr eventLoop, const NetworkOptions& _options, + TManagerClient(shared_ptr eventLoop, const NetworkOptions& _options, const sp_string& _stmgr_id, const sp_string& _stmgr_host, sp_int32 _data_port, sp_int32 _local_data_port, sp_int32 _shell_port, @@ -45,7 +45,7 @@ class TMasterClient : public Client { VCallback _start_stateful_watch, VCallback _broadcast_checkpoint_saved); - virtual ~TMasterClient(); + virtual ~TManagerClient(); // Told by the upper layer to disconnect and self destruct void Die(); @@ -53,19 +53,19 @@ class TMasterClient : public Client { // Sets the instances that belong to us void SetInstanceInfo(const std::vector& _instances); - // returns the tmaster address "host:port" form. - sp_string getTmasterHostPort(); + // returns the tmanager address "host:port" form. + sp_string getTmanagerHostPort(); - // Send a InstanceStateStored message to tmaster + // Send a InstanceStateStored message to tmanager void SavedInstanceState(const proto::system::Instance& _instance, const std::string& _checkpoint_id); - // Send RestoreTopologyStateResponse to tmaster + // Send RestoreTopologyStateResponse to tmanager void SendRestoreTopologyStateResponse(proto::system::StatusCode _status, const std::string& _checkpoint_id, sp_int64 _txid); - // Send ResetTopologyState message to tmaster + // Send ResetTopologyState message to tmanager void SendResetTopologyState(const std::string& _dead_stmgr, int32_t _dead_instance, const std::string& _reason); @@ -76,10 +76,10 @@ class TMasterClient : public Client { private: void HandleRegisterResponse(void*, - pool_unique_ptr _response, + pool_unique_ptr _response, NetworkErrorCode); void HandleHeartbeatResponse(void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode); void HandleNewAssignmentMessage(pool_unique_ptr _message); @@ -106,19 +106,19 @@ class TMasterClient : public Client { sp_int32 local_data_port_; sp_int32 shell_port_; - // Set of instances to be reported to tmaster + // Set of instances to be reported to tmanager std::set> instances_; bool to_die_; - // We invoke this callback upon a new physical plan from tmaster + // We invoke this callback upon a new physical plan from tmanager VCallback> pplan_watch_; - // We invoke this callback upon receiving a checkpoint message from tmaster + // We invoke this callback upon receiving a checkpoint message from tmanager // passing in the checkpoint id VCallback stateful_checkpoint_watch_; - // We invoke this callback upon receiving a restore topology message from tmaster + // We invoke this callback upon receiving a restore topology message from tmanager // passing in the checkpoint id and the txid VCallback restore_topology_watch_; - // We invoke this callback upon receiving a StartStatefulProcessing message from tmaster + // We invoke this callback upon receiving a StartStatefulProcessing message from tmanager // passing in the checkpoint id VCallback start_stateful_watch_; // This callback will be invoked upon receiving a StatefulConsistentCheckpointSaved message. @@ -126,8 +126,8 @@ class TMasterClient : public Client { VCallback broadcast_checkpoint_saved_; // Configs to be read - sp_int32 reconnect_tmaster_interval_sec_; - sp_int32 stream_to_tmaster_heartbeat_interval_sec_; + sp_int32 reconnect_tmanager_interval_sec_; + sp_int32 stream_to_tmanager_heartbeat_interval_sec_; sp_int64 reconnect_timer_id; sp_int64 heartbeat_timer_id; @@ -144,4 +144,4 @@ class TMasterClient : public Client { } // namespace stmgr } // namespace heron -#endif // SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMASTER_CLIENT_H_ +#endif // SRC_CPP_SVCS_STMGR_SRC_MANAGER_TMANAGER_CLIENT_H_ diff --git a/heron/stmgr/tests/cpp/server/BUILD b/heron/stmgr/tests/cpp/server/BUILD index 7b6708e83ca..9d9d5c5ec26 100644 --- a/heron/stmgr/tests/cpp/server/BUILD +++ b/heron/stmgr/tests/cpp/server/BUILD @@ -20,7 +20,7 @@ cc_test( "-Iheron/statemgrs/src/cpp", "-Iheron/stmgr/src/cpp", "-Iheron/stmgr/tests/cpp", - "-Iheron/tmaster/src/cpp", + "-Iheron/tmanager/src/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], @@ -34,8 +34,8 @@ cc_test( "//heron/stmgr/src/cpp:manager-cxx", "//heron/stmgr/src/cpp:grouping-cxx", "//heron/stmgr/src/cpp:util-cxx", - # TODO: Stmgr unit tests should not depend on tmaster - "//heron/tmaster/src/cpp:tmaster-cxx", + # TODO: Stmgr unit tests should not depend on tmanager + "//heron/tmanager/src/cpp:tmanager-cxx", "@com_google_googletest//:gtest", ], ) diff --git a/heron/stmgr/tests/cpp/server/dummy_metricsmgr.cpp b/heron/stmgr/tests/cpp/server/dummy_metricsmgr.cpp index 4bd2b50e63f..12917371686 100644 --- a/heron/stmgr/tests/cpp/server/dummy_metricsmgr.cpp +++ b/heron/stmgr/tests/cpp/server/dummy_metricsmgr.cpp @@ -31,16 +31,16 @@ #include "server/dummy_metricsmgr.h" ///////////////////////////// DummyMtrMgr ///////////////////////////////////////////////// DummyMtrMgr::DummyMtrMgr(std::shared_ptr ss, const NetworkOptions& options, - const sp_string& stmgr_id, CountDownLatch* tmasterLatch, + const sp_string& stmgr_id, CountDownLatch* tmanagerLatch, CountDownLatch* connectionCloseLatch) : Server(ss, options), stmgr_id_expected_(stmgr_id), location_(NULL), - tmasterLatch_(tmasterLatch), + tmanagerLatch_(tmanagerLatch), connectionCloseLatch_(connectionCloseLatch) { InstallRequestHandler(&DummyMtrMgr::HandleMetricPublisherRegisterRequest); InstallMessageHandler(&DummyMtrMgr::HandleMetricPublisherPublishMessage); - InstallMessageHandler(&DummyMtrMgr::HandleTMasterLocationMessage); + InstallMessageHandler(&DummyMtrMgr::HandleTManagerLocationMessage); } DummyMtrMgr::~DummyMtrMgr() { delete location_; } @@ -67,16 +67,16 @@ void DummyMtrMgr::HandleMetricPublisherRegisterRequest(REQID id, Connection* con void DummyMtrMgr::HandleMetricPublisherPublishMessage( Connection*, pool_unique_ptr message) {} -void DummyMtrMgr::HandleTMasterLocationMessage( - Connection*, pool_unique_ptr message) { - location_ = message->release_tmaster(); +void DummyMtrMgr::HandleTManagerLocationMessage( + Connection*, pool_unique_ptr message) { + location_ = message->release_tmanager(); - LOG(INFO) << "Got tmaster location: " << location_->host() << ":" << location_->master_port(); + LOG(INFO) << "Got tmanager location: " << location_->host() << ":" << location_->server_port(); - if (tmasterLatch_ != NULL) { - // notify that we received tmaster location - tmasterLatch_->countDown(); + if (tmanagerLatch_ != NULL) { + // notify that we received tmanager location + tmanagerLatch_->countDown(); } } -heron::proto::tmaster::TMasterLocation* DummyMtrMgr::get_tmaster() { return location_; } +heron::proto::tmanager::TManagerLocation* DummyMtrMgr::get_tmanager() { return location_; } diff --git a/heron/stmgr/tests/cpp/server/dummy_metricsmgr.h b/heron/stmgr/tests/cpp/server/dummy_metricsmgr.h index 17710ef1103..674824b1808 100644 --- a/heron/stmgr/tests/cpp/server/dummy_metricsmgr.h +++ b/heron/stmgr/tests/cpp/server/dummy_metricsmgr.h @@ -25,8 +25,8 @@ namespace heron { namespace proto { -namespace tmaster { -class TMasterLocation; +namespace tmanager { +class TManagerLocation; } } } @@ -35,10 +35,10 @@ class DummyMtrMgr : public Server { public: DummyMtrMgr(std::shared_ptr ss, const NetworkOptions& options, const sp_string& stmgr_id, - CountDownLatch* tmasterLatch, CountDownLatch* connectionCloseLatch); + CountDownLatch* tmanagerLatch, CountDownLatch* connectionCloseLatch); virtual ~DummyMtrMgr(); - heron::proto::tmaster::TMasterLocation* get_tmaster(); + heron::proto::tmanager::TManagerLocation* get_tmanager(); protected: // handle an incoming connection from server @@ -53,14 +53,14 @@ class DummyMtrMgr : public Server { virtual void HandleMetricPublisherPublishMessage( Connection* _conn, pool_unique_ptr _message); - virtual void HandleTMasterLocationMessage( - Connection*, pool_unique_ptr _message); + virtual void HandleTManagerLocationMessage( + Connection*, pool_unique_ptr _message); private: sp_string stmgr_id_expected_; - heron::proto::tmaster::TMasterLocation* location_; - // Used to signal that tmaster location has been received - CountDownLatch* tmasterLatch_; + heron::proto::tmanager::TManagerLocation* location_; + // Used to signal that tmanager location has been received + CountDownLatch* tmanagerLatch_; // Used to signal that connection to stmgr has been closed CountDownLatch* connectionCloseLatch_; }; diff --git a/heron/stmgr/tests/cpp/server/dummy_stmgr.cpp b/heron/stmgr/tests/cpp/server/dummy_stmgr.cpp index 5f94fa4e04e..5444706a049 100644 --- a/heron/stmgr/tests/cpp/server/dummy_stmgr.cpp +++ b/heron/stmgr/tests/cpp/server/dummy_stmgr.cpp @@ -28,8 +28,8 @@ using std::shared_ptr; -///////////////////////////// DummyTMasterClient /////////////////////////////////////////// -DummyTMasterClient::DummyTMasterClient( +///////////////////////////// DummyTManagerClient /////////////////////////////////////////// +DummyTManagerClient::DummyTManagerClient( shared_ptr eventLoop, const NetworkOptions& _options, const sp_string& stmgr_id, const sp_string& stmgr_host, sp_int32 stmgr_port, sp_int32 shell_port, const std::vector>& _instances) @@ -39,21 +39,21 @@ DummyTMasterClient::DummyTMasterClient( stmgr_port_(stmgr_port), shell_port_(shell_port), instances_(_instances) { - InstallResponseHandler(make_unique(), - &DummyTMasterClient::HandleRegisterResponse); + InstallResponseHandler(make_unique(), + &DummyTManagerClient::HandleRegisterResponse); // Setup the call back function to be invoked when retrying retry_cb_ = [this]() { this->Retry(); }; } -DummyTMasterClient::~DummyTMasterClient() {} +DummyTManagerClient::~DummyTManagerClient() {} -void DummyTMasterClient::HandleRegisterResponse( +void DummyTManagerClient::HandleRegisterResponse( void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode) { } -void DummyTMasterClient::HandleConnect(NetworkErrorCode _status) { +void DummyTManagerClient::HandleConnect(NetworkErrorCode _status) { if (_status == OK) { CreateAndSendRegisterRequest(); } else { @@ -62,10 +62,10 @@ void DummyTMasterClient::HandleConnect(NetworkErrorCode _status) { } } -void DummyTMasterClient::HandleClose(NetworkErrorCode) {} +void DummyTManagerClient::HandleClose(NetworkErrorCode) {} -void DummyTMasterClient::CreateAndSendRegisterRequest() { - auto request = make_unique(); +void DummyTManagerClient::CreateAndSendRegisterRequest() { + auto request = make_unique(); heron::proto::system::StMgr* stmgr = request->mutable_stmgr(); sp_string cwd; stmgr->set_id(stmgr_id_); @@ -85,16 +85,16 @@ void DummyTMasterClient::CreateAndSendRegisterRequest() { DummyStMgr::DummyStMgr(shared_ptr ss, const NetworkOptions& options, const sp_string& stmgr_id, const sp_string& stmgr_host, sp_int32 stmgr_port, - const sp_string& tmaster_host, sp_int32 tmaster_port, sp_int32 shell_port, + const sp_string& tmanager_host, sp_int32 tmanager_port, sp_int32 shell_port, const std::vector>& _instances) : Server(ss, options), num_start_bp_(0), num_stop_bp_(0) { - NetworkOptions tmaster_options; - tmaster_options.set_host(tmaster_host); - tmaster_options.set_port(tmaster_port); - tmaster_options.set_max_packet_size(1_MB); - tmaster_options.set_socket_family(PF_INET); + NetworkOptions tmanager_options; + tmanager_options.set_host(tmanager_host); + tmanager_options.set_port(tmanager_port); + tmanager_options.set_max_packet_size(1_MB); + tmanager_options.set_socket_family(PF_INET); - tmaster_client_ = new DummyTMasterClient(ss, tmaster_options, stmgr_id, stmgr_host, stmgr_port, + tmanager_client_ = new DummyTManagerClient(ss, tmanager_options, stmgr_id, stmgr_host, stmgr_port, shell_port, _instances); InstallRequestHandler(&DummyStMgr::HandleStMgrHelloRequest); InstallMessageHandler(&DummyStMgr::HandleStartBackPressureMessage); @@ -102,14 +102,14 @@ DummyStMgr::DummyStMgr(shared_ptr ss, const NetworkOptions& optio } DummyStMgr::~DummyStMgr() { - tmaster_client_->Stop(); - delete tmaster_client_; + tmanager_client_->Stop(); + delete tmanager_client_; } sp_int32 DummyStMgr::Start() { if (SP_OK == Server::Start()) { - tmaster_client_->setStmgrPort(get_serveroptions().get_port()); - tmaster_client_->Start(); + tmanager_client_->setStmgrPort(get_serveroptions().get_port()); + tmanager_client_->Start(); return SP_OK; } else { return SP_NOTOK; diff --git a/heron/stmgr/tests/cpp/server/dummy_stmgr.h b/heron/stmgr/tests/cpp/server/dummy_stmgr.h index bd0342b6783..1be63f2065e 100644 --- a/heron/stmgr/tests/cpp/server/dummy_stmgr.h +++ b/heron/stmgr/tests/cpp/server/dummy_stmgr.h @@ -23,13 +23,13 @@ #include #include "network/network_error.h" -class DummyTMasterClient : public Client { +class DummyTManagerClient : public Client { public: - DummyTMasterClient(std::shared_ptr eventLoop, const NetworkOptions& _options, + DummyTManagerClient(std::shared_ptr eventLoop, const NetworkOptions& _options, const sp_string& stmgr_id, const sp_string& stmgr_host, sp_int32 stmgr_port, sp_int32 shell_port, const std::vector>& instances); - virtual ~DummyTMasterClient(); + virtual ~DummyTManagerClient(); void setStmgrPort(sp_int32 stmgrPort) { stmgr_port_ = stmgrPort; @@ -43,7 +43,7 @@ class DummyTMasterClient : public Client { virtual void HandleClose(NetworkErrorCode _status); virtual void HandleRegisterResponse( void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode); // Send worker request void CreateAndSendRegisterRequest(); @@ -61,8 +61,8 @@ class DummyStMgr : public Server { public: DummyStMgr(std::shared_ptr ss, const NetworkOptions& options, const sp_string& stmgr_id, - const sp_string& stmgr_host, sp_int32 stmgr_port, const sp_string& tmaster_host, - sp_int32 tmaster_port, sp_int32 shell_port, + const sp_string& stmgr_host, sp_int32 stmgr_port, const sp_string& tmanager_host, + sp_int32 tmanager_port, sp_int32 shell_port, const std::vector>& instances); virtual ~DummyStMgr(); @@ -90,7 +90,7 @@ class DummyStMgr : public Server { std::vector other_stmgrs_ids_; sp_int32 num_start_bp_; sp_int32 num_stop_bp_; - DummyTMasterClient* tmaster_client_; + DummyTManagerClient* tmanager_client_; }; #endif diff --git a/heron/stmgr/tests/cpp/server/stmgr_unittest.cpp b/heron/stmgr/tests/cpp/server/stmgr_unittest.cpp index fb446d9d8b4..ebbe6828c60 100644 --- a/heron/stmgr/tests/cpp/server/stmgr_unittest.cpp +++ b/heron/stmgr/tests/cpp/server/stmgr_unittest.cpp @@ -39,7 +39,7 @@ #include "config/physical-plan-helper.h" #include "statemgr/heron-statemgr.h" #include "statemgr/heron-localfilestatemgr.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "manager/stmgr.h" #include "server/dummy_instance.h" #include "server/dummy_stmgr.h" @@ -208,7 +208,7 @@ void CreateLocalStateOnFS(heron::proto::api::Topology* topology, heron::proto::system::PackingPlan* packingPlan, sp_string dpath) { auto ss = std::make_shared(); - // Write the dummy topology/tmaster location out to the local file system via the state mgr + // Write the dummy topology/tmanager location out to the local file system via the state mgr heron::common::HeronLocalFileStateMgr state_mgr(dpath, ss); state_mgr.CreateTopology(*topology, NULL); state_mgr.CreatePackingPlan(topology->name(), *packingPlan, NULL); @@ -256,19 +256,19 @@ void StartServer(std::shared_ptr ss) { ss->loop(); } -void StartTMaster(std::shared_ptr& ss, heron::tmaster::TMaster*& tmaster, - std::thread*& tmaster_thread, const sp_string& zkhostportlist, +void StartTManager(std::shared_ptr& ss, heron::tmanager::TManager*& tmanager, + std::thread*& tmanager_thread, const sp_string& zkhostportlist, const sp_string& topology_name, const sp_string& topology_id, const sp_string& dpath, - sp_int32 tmaster_port, sp_int32 tmaster_controller_port, - sp_int32 tmaster_stats_port, sp_int32 metrics_mgr_port, + sp_int32 tmanager_port, sp_int32 tmanager_controller_port, + sp_int32 tmanager_stats_port, sp_int32 metrics_mgr_port, sp_int32 ckptmgr_port) { ss = std::make_shared(); - tmaster = new heron::tmaster::TMaster(zkhostportlist, topology_name, topology_id, dpath, - tmaster_controller_port, tmaster_port, tmaster_stats_port, + tmanager = new heron::tmanager::TManager(zkhostportlist, topology_name, topology_id, dpath, + tmanager_controller_port, tmanager_port, tmanager_stats_port, metrics_mgr_port, ckptmgr_port, metrics_sinks_config_filename, LOCALHOST, ss); - tmaster_thread = new std::thread(StartServer, ss); + tmanager_thread = new std::thread(StartServer, ss); } void StartStMgr(std::shared_ptr& ss, heron::stmgr::StMgr*& mgr, @@ -301,7 +301,7 @@ void StartStMgr(std::shared_ptr& ss, heron::stmgr::StMgr*& mgr, void StartDummyStMgr(std::shared_ptr& ss, DummyStMgr*& mgr, std::thread*& stmgr_thread, - sp_int32& stmgr_port, sp_int32 tmaster_port, sp_int32 shell_port, + sp_int32& stmgr_port, sp_int32 tmanager_port, sp_int32 shell_port, const sp_string& stmgr_id, const std::vector>& instances) { // Create the select server for this stmgr to use @@ -313,7 +313,7 @@ void StartDummyStMgr(std::shared_ptr& ss, DummyStMgr*& mgr, options.set_max_packet_size(1_MB); options.set_socket_family(PF_INET); - mgr = new DummyStMgr(ss, options, stmgr_id, LOCALHOST, stmgr_port, LOCALHOST, tmaster_port, + mgr = new DummyStMgr(ss, options, stmgr_id, LOCALHOST, stmgr_port, LOCALHOST, tmanager_port, shell_port, instances); EXPECT_EQ(0, stmgr_port); EXPECT_EQ(0, mgr->Start()) << "DummyStMgr bind " << LOCALHOST << ":" << stmgr_port; @@ -324,7 +324,7 @@ void StartDummyStMgr(std::shared_ptr& ss, DummyStMgr*& mgr, void StartDummyMtrMgr(std::shared_ptr& ss, DummyMtrMgr*& mgr, std::thread*& mtmgr_thread, - sp_int32& mtmgr_port, const sp_string& stmgr_id, CountDownLatch* tmasterLatch, + sp_int32& mtmgr_port, const sp_string& stmgr_id, CountDownLatch* tmanagerLatch, CountDownLatch* connectionCloseLatch) { // Create the select server for this stmgr to use ss = std::make_shared(); @@ -335,7 +335,7 @@ void StartDummyMtrMgr(std::shared_ptr& ss, DummyMtrMgr*& mgr, options.set_max_packet_size(10_MB); options.set_socket_family(PF_INET); - mgr = new DummyMtrMgr(ss, options, stmgr_id, tmasterLatch, connectionCloseLatch); + mgr = new DummyMtrMgr(ss, options, stmgr_id, tmanagerLatch, connectionCloseLatch); EXPECT_EQ(0, mgr->Start()) << "DummyMtrMgr bind " << LOCALHOST << ":" << mtmgr_port; mtmgr_port = mgr->get_serveroptions().get_port(); EXPECT_GT(mtmgr_port, 0); @@ -389,10 +389,10 @@ void StartDummyBoltInstance(std::shared_ptr& ss, DummyBoltInstanc struct CommonResources { // arguments - sp_string tmaster_host_; - sp_int32 tmaster_port_; - sp_int32 tmaster_controller_port_; - sp_int32 tmaster_stats_port_; + sp_string tmanager_host_; + sp_int32 tmanager_port_; + sp_int32 tmanager_controller_port_; + sp_int32 tmanager_stats_port_; sp_int32 metricsmgr_port_; sp_int32 shell_port_; sp_int32 ckptmgr_port_; @@ -419,8 +419,8 @@ struct CommonResources { heron::proto::api::Topology* topology_; heron::proto::system::PackingPlan* packing_plan_; - heron::tmaster::TMaster* tmaster_; - std::thread* tmaster_thread_; + heron::tmanager::TManager* tmanager_; + std::thread* tmanager_thread_; // Component std::vector spout_workers_list_; @@ -450,7 +450,7 @@ struct CommonResources { sp_int64 high_watermark_; sp_int64 low_watermark_; - CommonResources() : topology_(NULL), tmaster_(NULL), tmaster_thread_(NULL) { + CommonResources() : topology_(NULL), tmanager_(NULL), tmanager_thread_(NULL) { // Create the sington for heron_internals_config_reader // if it does not exist if (!heron::config::HeronInternalsConfigReader::Exists()) { @@ -478,7 +478,7 @@ struct CommonResources { } }; -void StartTMaster(CommonResources& common) { +void StartTManager(CommonResources& common) { // Generate a dummy topology common.topology_ = GenerateDummyTopology( common.topology_name_, common.topology_id_, common.num_spouts_, common.num_spout_instances_, @@ -496,14 +496,14 @@ void StartTMaster(CommonResources& common) { common.stmgrs_id_list_.push_back(id); } - // Start the tmaster - std::shared_ptr tmaster_eventLoop; + // Start the tmanager + std::shared_ptr tmanager_eventLoop; - StartTMaster(tmaster_eventLoop, common.tmaster_, common.tmaster_thread_, common.zkhostportlist_, + StartTManager(tmanager_eventLoop, common.tmanager_, common.tmanager_thread_, common.zkhostportlist_, common.topology_name_, common.topology_id_, common.dpath_, - common.tmaster_port_, common.tmaster_controller_port_, common.tmaster_stats_port_, + common.tmanager_port_, common.tmanager_controller_port_, common.tmanager_stats_port_, common.metricsmgr_port_, common.ckptmgr_port_); - common.ss_list_.push_back(tmaster_eventLoop); + common.ss_list_.push_back(tmanager_eventLoop); } void DistributeWorkersAcrossStmgrs(CommonResources& common) { @@ -629,7 +629,7 @@ void StartStMgrs(CommonResources& common) { heron::stmgr::StMgr* mgr = NULL; std::thread* stmgr_thread = NULL; - StartStMgr(stmgr_ss, mgr, stmgr_thread, common.tmaster_host_, common.stmgr_ports_[i], + StartStMgr(stmgr_ss, mgr, stmgr_thread, common.tmanager_host_, common.stmgr_ports_[i], common.local_data_ports_[i], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[i], common.stmgrs_id_list_[i], common.zkhostportlist_, @@ -642,12 +642,12 @@ void StartStMgrs(CommonResources& common) { } } -void StartMetricsMgr(CommonResources& common, CountDownLatch* tmasterLatch, +void StartMetricsMgr(CommonResources& common, CountDownLatch* tmanagerLatch, CountDownLatch* connectionCloseLatch) { std::shared_ptr ss; DummyMtrMgr* mgr = NULL; std::thread* metrics_mgr = NULL; - StartDummyMtrMgr(ss, mgr, metrics_mgr, common.metricsmgr_port_, "stmgr", tmasterLatch, + StartDummyMtrMgr(ss, mgr, metrics_mgr, common.metricsmgr_port_, "stmgr", tmanagerLatch, connectionCloseLatch); common.ss_list_.push_back(ss); common.metrics_mgr_ = mgr; @@ -659,8 +659,8 @@ void StartMetricsMgr(CommonResources& common) { StartMetricsMgr(common, NULL, NU void TearCommonResources(CommonResources& common) { delete common.topology_; delete common.packing_plan_; - delete common.tmaster_thread_; - delete common.tmaster_; + delete common.tmanager_thread_; + delete common.tmanager_; delete common.metrics_mgr_thread_; delete common.metrics_mgr_; @@ -690,24 +690,24 @@ void TearCommonResources(CommonResources& common) { FileUtils::removeRecursive(common.dpath_, true); } -void VerifyMetricsMgrTMaster(CommonResources& common) { - EXPECT_NE(common.metrics_mgr_->get_tmaster(), (heron::proto::tmaster::TMasterLocation*)NULL); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->topology_name(), common.topology_name_); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->topology_id(), common.topology_id_); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->host(), LOCALHOST); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->controller_port(), common.tmaster_controller_port_); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->master_port(), common.tmaster_port_); - EXPECT_EQ(common.metrics_mgr_->get_tmaster()->stats_port(), common.tmaster_stats_port_); +void VerifyMetricsMgrTManager(CommonResources& common) { + EXPECT_NE(common.metrics_mgr_->get_tmanager(), (heron::proto::tmanager::TManagerLocation*)NULL); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->topology_name(), common.topology_name_); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->topology_id(), common.topology_id_); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->host(), LOCALHOST); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->controller_port(), common.tmanager_controller_port_); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->server_port(), common.tmanager_port_); + EXPECT_EQ(common.metrics_mgr_->get_tmanager()->stats_port(), common.tmanager_stats_port_); } // Test to make sure that the stmgr can decode the pplan TEST(StMgr, test_pplan_decode) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 10000; - common.tmaster_controller_port_ = 10001; - common.tmaster_stats_port_ = 10002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 10000; + common.tmanager_controller_port_ = 10001; + common.tmanager_stats_port_ = 10002; common.metricsmgr_port_ = 0; common.shell_port_ = 40000; common.ckptmgr_port_ = 50000; @@ -730,8 +730,8 @@ TEST(StMgr, test_pplan_decode) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -739,7 +739,7 @@ TEST(StMgr, test_pplan_decode) { // Start the stream managers StartStMgrs(common); - // Start dummy worker to make the stmgr connect to the tmaster + // Start dummy worker to make the stmgr connect to the tmanager StartWorkerComponents(common, 0, 0); // Wait till we get the physical plan populated on atleast one of the stmgrs @@ -752,7 +752,7 @@ TEST(StMgr, test_pplan_decode) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); @@ -792,10 +792,10 @@ TEST(StMgr, test_tuple_route) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 15000; - common.tmaster_controller_port_ = 15001; - common.tmaster_stats_port_ = 15002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 15000; + common.tmanager_controller_port_ = 15001; + common.tmanager_stats_port_ = 15002; common.metricsmgr_port_ = 0; common.shell_port_ = 45000; common.ckptmgr_port_ = 55000; @@ -815,8 +815,8 @@ TEST(StMgr, test_tuple_route) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); int num_msgs_sent_by_spout_instance = 8; @@ -844,7 +844,7 @@ TEST(StMgr, test_tuple_route) { // Wait for the threads to terminate. We have already waited for the bolt // threads - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); @@ -871,10 +871,10 @@ TEST(StMgr, test_custom_grouping_route) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 15500; - common.tmaster_controller_port_ = 15501; - common.tmaster_stats_port_ = 15502; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 15500; + common.tmanager_controller_port_ = 15501; + common.tmanager_stats_port_ = 15502; common.metricsmgr_port_ = 0; common.shell_port_ = 45500; common.ckptmgr_port_ = 55500; @@ -894,8 +894,8 @@ TEST(StMgr, test_custom_grouping_route) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); int num_msgs_sent_by_spout_instance = 8; @@ -923,7 +923,7 @@ TEST(StMgr, test_custom_grouping_route) { // Wait for the threads to terminate. We have already waited for the bolt // threads - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { @@ -961,10 +961,10 @@ TEST(StMgr, test_back_pressure_instance) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 17000; - common.tmaster_controller_port_ = 17001; - common.tmaster_stats_port_ = 17002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 17000; + common.tmanager_controller_port_ = 17001; + common.tmanager_stats_port_ = 17002; common.metricsmgr_port_ = 0; common.shell_port_ = 47000; common.ckptmgr_port_ = 57000; @@ -986,8 +986,8 @@ TEST(StMgr, test_back_pressure_instance) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -996,7 +996,7 @@ TEST(StMgr, test_back_pressure_instance) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1010,7 +1010,7 @@ TEST(StMgr, test_back_pressure_instance) { std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1051,7 +1051,7 @@ TEST(StMgr, test_back_pressure_instance) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); @@ -1072,10 +1072,10 @@ TEST(StMgr, test_spout_death_under_backpressure) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 17300; - common.tmaster_controller_port_ = 17301; - common.tmaster_stats_port_ = 17302; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 17300; + common.tmanager_controller_port_ = 17301; + common.tmanager_stats_port_ = 17302; common.metricsmgr_port_ = 0; common.shell_port_ = 47300; common.ckptmgr_port_ = 57300; @@ -1097,8 +1097,8 @@ TEST(StMgr, test_spout_death_under_backpressure) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1107,7 +1107,7 @@ TEST(StMgr, test_spout_death_under_backpressure) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1120,7 +1120,7 @@ TEST(StMgr, test_spout_death_under_backpressure) { DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1186,7 +1186,7 @@ TEST(StMgr, test_spout_death_under_backpressure) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); @@ -1208,10 +1208,10 @@ TEST(StMgr, test_back_pressure_stmgr) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 18000; - common.tmaster_controller_port_ = 18001; - common.tmaster_stats_port_ = 18002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 18000; + common.tmanager_controller_port_ = 18001; + common.tmanager_stats_port_ = 18002; common.metricsmgr_port_ = 0; common.shell_port_ = 48000; common.ckptmgr_port_ = 58000; @@ -1236,8 +1236,8 @@ TEST(StMgr, test_back_pressure_stmgr) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1247,7 +1247,7 @@ TEST(StMgr, test_back_pressure_stmgr) { heron::stmgr::StMgr* regular_stmgr1 = NULL; std::thread* regular_stmgr_thread1 = NULL; - StartStMgr(regular_stmgr_ss1, regular_stmgr1, regular_stmgr_thread1, common.tmaster_host_, + StartStMgr(regular_stmgr_ss1, regular_stmgr1, regular_stmgr_thread1, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1259,7 +1259,7 @@ TEST(StMgr, test_back_pressure_stmgr) { heron::stmgr::StMgr* regular_stmgr2 = NULL; std::thread* regular_stmgr_thread2 = NULL; - StartStMgr(regular_stmgr_ss2, regular_stmgr2, regular_stmgr_thread2, common.tmaster_host_, + StartStMgr(regular_stmgr_ss2, regular_stmgr2, regular_stmgr_thread2, common.tmanager_host_, common.stmgr_ports_[1], common.local_data_ports_[1], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[1], common.stmgrs_id_list_[1], @@ -1274,7 +1274,7 @@ TEST(StMgr, test_back_pressure_stmgr) { DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[2], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[2], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[2], common.stmgr_instance_list_[2]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1307,7 +1307,7 @@ TEST(StMgr, test_back_pressure_stmgr) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread1->join(); dummy_stmgr_thread->join(); @@ -1329,10 +1329,10 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 18500; - common.tmaster_controller_port_ = 18501; - common.tmaster_stats_port_ = 18502; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 18500; + common.tmanager_controller_port_ = 18501; + common.tmanager_stats_port_ = 18502; common.metricsmgr_port_ = 0; common.shell_port_ = 49000; common.ckptmgr_port_ = 59000; @@ -1354,8 +1354,8 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { // Start the metrics mgr StartMetricsMgr(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1364,7 +1364,7 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1377,7 +1377,7 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1406,7 +1406,7 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { common.stmgr_ports_[1] = 0; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1421,7 +1421,7 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); @@ -1440,14 +1440,14 @@ TEST(StMgr, test_back_pressure_stmgr_reconnect) { TearCommonResources(common); } -TEST(StMgr, test_tmaster_restart_on_new_address) { +TEST(StMgr, test_tmanager_restart_on_new_address) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 18500; - common.tmaster_controller_port_ = 18501; - common.tmaster_stats_port_ = 18502; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 18500; + common.tmanager_controller_port_ = 18501; + common.tmanager_stats_port_ = 18502; common.metricsmgr_port_ = 0; common.shell_port_ = 49001; common.ckptmgr_port_ = 59001; @@ -1466,22 +1466,22 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { int num_msgs_sent_by_spout_instance = 100 * 1000 * 1000; // 100M - // A countdown latch to wait on, until metric mgr receives tmaster location + // A countdown latch to wait on, until metric mgr receives tmanager location // The count is 4 here, since we need to ensure it is sent twice for stmgr: once at - // start, and once after receiving new tmaster location. Plus 2 from tmaster, total 4. + // start, and once after receiving new tmanager location. Plus 2 from tmanager, total 4. // 5-4=1 is used to avoid countdown on 0 - CountDownLatch* metricsMgrTmasterLatch = new CountDownLatch(5); + CountDownLatch* metricsMgrTmanagerLatch = new CountDownLatch(5); // Start the metrics mgr, common.ss_list_[0] - StartMetricsMgr(common, metricsMgrTmasterLatch, NULL); + StartMetricsMgr(common, metricsMgrTmanagerLatch, NULL); - // Start the tmaster etc. common.ss_list_[1] - StartTMaster(common); + // Start the tmanager etc. common.ss_list_[1] + StartTManager(common); // Check the count: should be 5-1=4 - // The Tmaster sends its location to MetircsMgr when MetircsMgrClient initializes. - EXPECT_TRUE(metricsMgrTmasterLatch->wait(4, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(4), metricsMgrTmasterLatch->getCount()); + // The Tmanager sends its location to MetircsMgr when MetircsMgrClient initializes. + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(4, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(4), metricsMgrTmanagerLatch->getCount()); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1490,7 +1490,7 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1500,16 +1500,16 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { common.ss_list_.push_back(regular_stmgr_ss); // Check the count: should be 4-1=3 - // The Stmgr sends Tmaster location to MetricsMgr when MetircsMgrClient initializes - EXPECT_TRUE(metricsMgrTmasterLatch->wait(3, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(3), metricsMgrTmasterLatch->getCount()); + // The Stmgr sends Tmanager location to MetricsMgr when MetircsMgrClient initializes + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(3, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(3), metricsMgrTmanagerLatch->getCount()); // Start a dummy stmgr std::shared_ptr dummy_stmgr_ss; DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); // common.ss_list_[3] common.ss_list_.push_back(dummy_stmgr_ss); @@ -1521,11 +1521,11 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { // workers have connected while (!regular_stmgr->GetPhysicalPlan()) sleep(1); - // Kill current tmaster + // Kill current tmanager common.ss_list_[1]->loopExit(); - common.tmaster_thread_->join(); - delete common.tmaster_; - delete common.tmaster_thread_; + common.tmanager_thread_->join(); + delete common.tmanager_; + delete common.tmanager_thread_; // Killing dummy stmgr so that we can restart it on another port, to change // the physical plan. @@ -1534,31 +1534,31 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { delete dummy_stmgr_thread; delete dummy_stmgr; - // Change the tmaster port - common.tmaster_port_ = 18511; + // Change the tmanager port + common.tmanager_port_ = 18511; // Start new dummy stmgr at different port, to generate a differnt pplan that we // can verify common.stmgr_ports_[1] = 0; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); - // Start tmaster on a different port - StartTMaster(common); + // Start tmanager on a different port + StartTManager(common); - // This confirms that metrics manager received the new tmaster location - // Tmaster sends its location to MetricsMgr when MetricsMgrClient initialize: 3-1=2 - // Stmgr-0 watches new tmaster location and sends it to MetricsMgr: 2-1=1 - EXPECT_TRUE(metricsMgrTmasterLatch->wait(1, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(1), metricsMgrTmasterLatch->getCount()); + // This confirms that metrics manager received the new tmanager location + // Tmanager sends its location to MetricsMgr when MetricsMgrClient initialize: 3-1=2 + // Stmgr-0 watches new tmanager location and sends it to MetricsMgr: 2-1=1 + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(1, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(1), metricsMgrTmanagerLatch->getCount()); // Now wait until stmgr receives the new physical plan // No easy way to avoid sleep here. sleep(2); - // Ensure that Stmgr connected to the new tmaster and has received new physical plan + // Ensure that Stmgr connected to the new tmanager and has received new physical plan if (regular_stmgr->GetPhysicalPlan()->stmgrs(1).data_port() != common.stmgr_ports_[1]) { CHECK_EQ(regular_stmgr->GetPhysicalPlan()->stmgrs(0).data_port(), common.stmgr_ports_[1]); } @@ -1569,7 +1569,7 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); @@ -1585,18 +1585,18 @@ TEST(StMgr, test_tmaster_restart_on_new_address) { delete regular_stmgr; delete dummy_stmgr_thread; delete dummy_stmgr; - delete metricsMgrTmasterLatch; + delete metricsMgrTmanagerLatch; TearCommonResources(common); } -TEST(StMgr, test_tmaster_restart_on_same_address) { +TEST(StMgr, test_tmanager_restart_on_same_address) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 18500; - common.tmaster_controller_port_ = 18501; - common.tmaster_stats_port_ = 18502; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 18500; + common.tmanager_controller_port_ = 18501; + common.tmanager_stats_port_ = 18502; common.metricsmgr_port_ = 0; common.shell_port_ = 49002; common.ckptmgr_port_ = 59002; @@ -1615,21 +1615,21 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { int num_msgs_sent_by_spout_instance = 100 * 1000 * 1000; // 100M - // A countdown latch to wait on, until metric mgr receives tmaster location + // A countdown latch to wait on, until metric mgr receives tmanager location // The count is 2 here for stmgr, since we need to ensure it is sent twice: once at - // start, and once after receiving new tmaster location - CountDownLatch* metricsMgrTmasterLatch = new CountDownLatch(5); + // start, and once after receiving new tmanager location + CountDownLatch* metricsMgrTmanagerLatch = new CountDownLatch(5); // Start the metrics mgr - StartMetricsMgr(common, metricsMgrTmasterLatch, NULL); + StartMetricsMgr(common, metricsMgrTmanagerLatch, NULL); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Check the count: should be 5-1=4 - // Tmaster send its location to MetricsMgr when MetricsMgrClient initializes - EXPECT_TRUE(metricsMgrTmasterLatch->wait(4, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(4), metricsMgrTmasterLatch->getCount()); + // Tmanager send its location to MetricsMgr when MetricsMgrClient initializes + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(4, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(4), metricsMgrTmanagerLatch->getCount()); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1638,7 +1638,7 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1647,16 +1647,16 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { common.ss_list_.push_back(regular_stmgr_ss); // Check the count: should be 4-1=3 - // Stmgr-0 sends tmaster location to MetrcisMgr when MetricsMgrClient initializes. - EXPECT_TRUE(metricsMgrTmasterLatch->wait(3, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(3), metricsMgrTmasterLatch->getCount()); + // Stmgr-0 sends tmanager location to MetrcisMgr when MetricsMgrClient initializes. + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(3, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(3), metricsMgrTmanagerLatch->getCount()); // Start a dummy stmgr std::shared_ptr dummy_stmgr_ss; DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1667,11 +1667,11 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { // workers have connected while (!regular_stmgr->GetPhysicalPlan()) sleep(1); - // Kill current tmaster + // Kill current tmanager common.ss_list_[1]->loopExit(); - common.tmaster_thread_->join(); - delete common.tmaster_; - delete common.tmaster_thread_; + common.tmanager_thread_->join(); + delete common.tmanager_; + delete common.tmanager_thread_; // Killing dummy stmgr so that we can restart it on another port, to change // the physical plan. @@ -1685,31 +1685,31 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { sp_int32 stmgr_port_old = common.stmgr_ports_[1]; common.stmgr_ports_[1] = 0; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); - // Start tmaster on a different port - StartTMaster(common); + // Start tmanager on a different port + StartTManager(common); - // This confirms that metrics manager received the new tmaster location + // This confirms that metrics manager received the new tmanager location // Check the count: should be 3-2=1 - // Tmaster sends its location when MetricsMgrClient initialize - // Stmgr-0 watches and sends tmaster location - EXPECT_TRUE(metricsMgrTmasterLatch->wait(1, std::chrono::seconds(5))); - EXPECT_EQ(static_cast(1), metricsMgrTmasterLatch->getCount()); + // Tmanager sends its location when MetricsMgrClient initialize + // Stmgr-0 watches and sends tmanager location + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(1, std::chrono::seconds(5))); + EXPECT_EQ(static_cast(1), metricsMgrTmanagerLatch->getCount()); // Now wait until stmgr receives the new physical plan. // No easy way to avoid sleep here. // Note: Here we sleep longer compared to the previous test as we need - // to tmasterClient could take upto 1 second (specified in test_heron_internals.yaml) - // to retry connecting to tmaster. + // to tmanagerClient could take upto 1 second (specified in test_heron_internals.yaml) + // to retry connecting to tmanager. int retries = 30; while (regular_stmgr->GetPhysicalPlan()->stmgrs(1).data_port() == stmgr_port_old && retries--) sleep(1); - // Ensure that Stmgr connected to the new tmaster and has received new physical plan + // Ensure that Stmgr connected to the new tmanager and has received new physical plan CHECK_EQ(regular_stmgr->GetPhysicalPlan()->stmgrs(1).data_port(), common.stmgr_ports_[1]); CHECK_EQ(regular_stmgr->GetPhysicalPlan()->stmgrs(1).local_data_port(), common.local_data_ports_[1]); @@ -1720,7 +1720,7 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); common.metrics_mgr_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); @@ -1736,19 +1736,19 @@ TEST(StMgr, test_tmaster_restart_on_same_address) { delete regular_stmgr; delete dummy_stmgr_thread; delete dummy_stmgr; - delete metricsMgrTmasterLatch; + delete metricsMgrTmanagerLatch; TearCommonResources(common); } // This tests to make sure that metrics mgr upon reconnect -// will get the tmaster location +// will get the tmanager location TEST(StMgr, test_metricsmgr_reconnect) { CommonResources common; // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 19000; - common.tmaster_controller_port_ = 19001; - common.tmaster_stats_port_ = 19002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 19000; + common.tmanager_controller_port_ = 19001; + common.tmanager_stats_port_ = 19002; common.metricsmgr_port_ = 0; common.shell_port_ = 49500; common.ckptmgr_port_ = 59500; @@ -1767,18 +1767,18 @@ TEST(StMgr, test_metricsmgr_reconnect) { int num_msgs_sent_by_spout_instance = 100 * 1000 * 1000; // 100M - // A countdown latch to wait on, until metric mgr receives tmaster location - CountDownLatch* metricsMgrTmasterLatch = new CountDownLatch(1); + // A countdown latch to wait on, until metric mgr receives tmanager location + CountDownLatch* metricsMgrTmanagerLatch = new CountDownLatch(1); // A countdown latch to wait on metrics manager to close connnection. CountDownLatch* metricsMgrConnectionCloseLatch = new CountDownLatch(1); // Start the metrics mgr - StartMetricsMgr(common, metricsMgrTmasterLatch, metricsMgrConnectionCloseLatch); + StartMetricsMgr(common, metricsMgrTmanagerLatch, metricsMgrConnectionCloseLatch); // lets remember this std::shared_ptr mmgr_ss = common.ss_list_.back(); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -1787,7 +1787,7 @@ TEST(StMgr, test_metricsmgr_reconnect) { std::shared_ptr regular_stmgr_ss; heron::stmgr::StMgr* regular_stmgr = NULL; std::thread* regular_stmgr_thread = NULL; - StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmaster_host_, + StartStMgr(regular_stmgr_ss, regular_stmgr, regular_stmgr_thread, common.tmanager_host_, common.stmgr_ports_[0], common.local_data_ports_[0], common.topology_name_, common.topology_id_, common.topology_, common.stmgr_instance_id_list_[0], common.stmgrs_id_list_[0], common.zkhostportlist_, @@ -1800,7 +1800,7 @@ TEST(StMgr, test_metricsmgr_reconnect) { DummyStMgr* dummy_stmgr = NULL; std::thread* dummy_stmgr_thread = NULL; StartDummyStMgr(dummy_stmgr_ss, dummy_stmgr, dummy_stmgr_thread, common.stmgr_ports_[1], - common.tmaster_port_, common.shell_port_, common.stmgrs_id_list_[1], + common.tmanager_port_, common.shell_port_, common.stmgrs_id_list_[1], common.stmgr_instance_list_[1]); common.ss_list_.push_back(dummy_stmgr_ss); @@ -1811,11 +1811,11 @@ TEST(StMgr, test_metricsmgr_reconnect) { // workers have connected while (!regular_stmgr->GetPhysicalPlan()) sleep(1); - // wait until metrics mgr also get time to get tmaster location - EXPECT_TRUE(metricsMgrTmasterLatch->wait(0, std::chrono::seconds(5))); + // wait until metrics mgr also get time to get tmanager location + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(0, std::chrono::seconds(5))); // Check that metricsmgr got it - VerifyMetricsMgrTMaster(common); + VerifyMetricsMgrTManager(common); // Kill the metrics mgr for (auto iter = common.ss_list_.begin(); iter != common.ss_list_.end(); ++iter) { @@ -1842,17 +1842,17 @@ TEST(StMgr, test_metricsmgr_reconnect) { common.metrics_mgr_thread_ = NULL; delete common.metrics_mgr_; common.metrics_mgr_ = NULL; - delete metricsMgrTmasterLatch; + delete metricsMgrTmanagerLatch; delete metricsMgrConnectionCloseLatch; - metricsMgrTmasterLatch = new CountDownLatch(1); + metricsMgrTmanagerLatch = new CountDownLatch(1); metricsMgrConnectionCloseLatch = new CountDownLatch(1); // Start the metrics mgr again - StartMetricsMgr(common, metricsMgrTmasterLatch, metricsMgrConnectionCloseLatch); - EXPECT_TRUE(metricsMgrTmasterLatch->wait(0, std::chrono::seconds(5))); + StartMetricsMgr(common, metricsMgrTmanagerLatch, metricsMgrConnectionCloseLatch); + EXPECT_TRUE(metricsMgrTmanagerLatch->wait(0, std::chrono::seconds(5))); // Check that metricsmgr got it - VerifyMetricsMgrTMaster(common); + VerifyMetricsMgrTManager(common); // Stop the schedulers for (size_t i = 0; i < common.ss_list_.size(); ++i) { @@ -1860,7 +1860,7 @@ TEST(StMgr, test_metricsmgr_reconnect) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); regular_stmgr_thread->join(); dummy_stmgr_thread->join(); common.metrics_mgr_thread_->join(); @@ -1876,7 +1876,7 @@ TEST(StMgr, test_metricsmgr_reconnect) { delete regular_stmgr; delete dummy_stmgr_thread; delete dummy_stmgr; - delete metricsMgrTmasterLatch; + delete metricsMgrTmanagerLatch; delete metricsMgrConnectionCloseLatch; TearCommonResources(common); } From 1ca01a420a5621e5555102ac8fbed72a11bcb083 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 22:57:34 -0400 Subject: [PATCH 21/32] renaming "topology master" to "topology manager" in heron/stmgr (missed renames) --- .../src/cpp/manager/{tmaster-client.cpp => tmanager-client.cpp} | 0 .../stmgr/src/cpp/manager/{tmaster-client.h => tmanager-client.h} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename heron/stmgr/src/cpp/manager/{tmaster-client.cpp => tmanager-client.cpp} (100%) rename heron/stmgr/src/cpp/manager/{tmaster-client.h => tmanager-client.h} (100%) diff --git a/heron/stmgr/src/cpp/manager/tmaster-client.cpp b/heron/stmgr/src/cpp/manager/tmanager-client.cpp similarity index 100% rename from heron/stmgr/src/cpp/manager/tmaster-client.cpp rename to heron/stmgr/src/cpp/manager/tmanager-client.cpp diff --git a/heron/stmgr/src/cpp/manager/tmaster-client.h b/heron/stmgr/src/cpp/manager/tmanager-client.h similarity index 100% rename from heron/stmgr/src/cpp/manager/tmaster-client.h rename to heron/stmgr/src/cpp/manager/tmanager-client.h From 06285d2efe40fc8cdef29090bf3bea6ae9eb4c8b Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Sun, 25 Oct 2020 23:13:35 -0400 Subject: [PATCH 22/32] renaming "topology master" to "topology manager" in heron/executor --- heron/executor/src/python/heron_executor.py | 74 +++++++++---------- .../tests/python/heron_executor_unittest.py | 38 +++++----- 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 491cbdc7ff0..f10de6ad6e9 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -75,12 +75,12 @@ @click.option("--instance-classpath", required=True) @click.option("--instance-jvm-opts", required=True) @click.option("--is-stateful", required=True) -@click.option("--master-port", required=True) +@click.option("--server-port", required=True) @click.option("--metrics-manager-classpath", required=True) @click.option("--metrics-manager-port", required=True) @click.option("--metrics-sinks-config-file", required=True) @click.option("--metricscache-manager-classpath", required=True) -@click.option("--metricscache-manager-master-port", required=True) +@click.option("--metricscache-manager-server-port", required=True) @click.option("--metricscache-manager-mode", required=False) @click.option("--metricscache-manager-stats-port", required=True) @click.option("--override-config-file", required=True) @@ -95,9 +95,9 @@ @click.option("--state-manager-root", required=True) @click.option("--stateful-config-file", required=True) @click.option("--stmgr-binary", required=True) -@click.option("--tmaster-binary", required=True) -@click.option("--tmaster-controller-port", required=True) -@click.option("--tmaster-stats-port", required=True) +@click.option("--tmanager-binary", required=True) +@click.option("--tmanager-controller-port", required=True) +@click.option("--tmanager-stats-port", required=True) @click.option("--topology-binary-file", required=True) @click.option("--topology-defn-file", required=True) @click.option("--topology-id", required=True) @@ -254,7 +254,7 @@ def __repr__(self): class HeronExecutor: """ Heron executor is a class that is responsible for running each of the process on a given container. Based on the container id and the instance distribution, it determines if the container - is a master node or a worker node and it starts processes accordingly.""" + is a primary node or a worker node and it starts processes accordingly.""" def init_from_parsed_args(self, parsed_args): """ initialize from parsed arguments """ self.shard = parsed_args.shard @@ -264,7 +264,7 @@ def init_from_parsed_args(self, parsed_args): self.state_manager_connection = parsed_args.state_manager_connection self.state_manager_root = parsed_args.state_manager_root self.state_manager_config_file = parsed_args.state_manager_config_file - self.tmaster_binary = parsed_args.tmaster_binary + self.tmanager_binary = parsed_args.tmanager_binary self.stmgr_binary = parsed_args.stmgr_binary self.metrics_manager_classpath = parsed_args.metrics_manager_classpath self.metricscache_manager_classpath = parsed_args.metricscache_manager_classpath @@ -281,14 +281,14 @@ def init_from_parsed_args(self, parsed_args): # id within docker, rather than the host's hostname. NOTE: this 'HOST' env variable is not # guaranteed to be set in all Docker executor environments (outside of Marathon) if is_kubernetes_environment(): - self.master_host = socket.getfqdn() + self.primary_host = socket.getfqdn() elif is_docker_environment(): - self.master_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() + self.primary_host = os.environ.get('HOST') if 'HOST' in os.environ else socket.gethostname() else: - self.master_host = socket.gethostname() - self.master_port = parsed_args.master_port - self.tmaster_controller_port = parsed_args.tmaster_controller_port - self.tmaster_stats_port = parsed_args.tmaster_stats_port + self.primary_host = socket.gethostname() + self.server_port = parsed_args.server_port + self.tmanager_controller_port = parsed_args.tmanager_controller_port + self.tmanager_stats_port = parsed_args.tmanager_stats_port self.heron_internals_config_file = parsed_args.heron_internals_config_file self.override_config_file = parsed_args.override_config_file self.component_ram_map = [{x.split(':')[0]:int(x.split(':')[1])} @@ -320,7 +320,7 @@ def init_from_parsed_args(self, parsed_args): self.shell_port = parsed_args.shell_port self.heron_shell_binary = parsed_args.heron_shell_binary self.metrics_manager_port = parsed_args.metrics_manager_port - self.metricscache_manager_master_port = parsed_args.metricscache_manager_master_port + self.metricscache_manager_server_port = parsed_args.metricscache_manager_server_port self.metricscache_manager_stats_port = parsed_args.metricscache_manager_stats_port self.cluster = parsed_args.cluster self.role = parsed_args.role @@ -389,7 +389,7 @@ def initialize(self): chmod_logs_dir = Command('chmod a+rx . && chmod a+x %s' % self.log_dir, self.shell_env) self.run_command_or_exit(chmod_logs_dir) - chmod_x_binaries = [self.tmaster_binary, self.stmgr_binary, self.heron_shell_binary] + chmod_x_binaries = [self.tmanager_binary, self.stmgr_binary, self.heron_shell_binary] for binary in chmod_x_binaries: stat_result = os.stat(binary)[stat.ST_MODE] @@ -455,7 +455,7 @@ def _get_metrics_cache_cmd(self): self.metricscache_manager_classpath, metricscachemgr_main_class, "--metricscache_id", 'metricscache-0', - "--master_port", self.metricscache_manager_master_port, + "--server_port", self.metricscache_manager_server_port, "--stats_port", self.metricscache_manager_stats_port, "--topology_name", self.topology_name, "--topology_id", self.topology_id, @@ -492,34 +492,34 @@ def _get_healthmgr_cmd(self): healthmgr_cmd = self._get_java_gc_instance_cmd(healthmgr_cmd, 'healthmgr') return Command(healthmgr_cmd, self.shell_env) - def _get_tmaster_processes(self): - ''' get the command to start the tmaster processes ''' + def _get_tmanager_processes(self): + ''' get the command to start the tmanager processes ''' retval = {} - tmaster_cmd_lst = [ - self.tmaster_binary, + tmanager_cmd_lst = [ + self.tmanager_binary, '--topology_name=%s' % self.topology_name, '--topology_id=%s' % self.topology_id, '--zkhostportlist=%s' % self.state_manager_connection, '--zkroot=%s' % self.state_manager_root, - '--myhost=%s' % self.master_host, - '--master_port=%s' % str(self.master_port), - '--controller_port=%s' % str(self.tmaster_controller_port), - '--stats_port=%s' % str(self.tmaster_stats_port), + '--myhost=%s' % self.primary_host, + '--server_port=%s' % str(self.server_port), + '--controller_port=%s' % str(self.tmanager_controller_port), + '--stats_port=%s' % str(self.tmanager_stats_port), '--config_file=%s' % self.heron_internals_config_file, '--override_config_file=%s' % self.override_config_file, '--metrics_sinks_yaml=%s' % self.metrics_sinks_config_file, '--metricsmgr_port=%s' % str(self.metrics_manager_port), '--ckptmgr_port=%s' % str(self.checkpoint_manager_port)] - tmaster_env = self.shell_env.copy() if self.shell_env is not None else {} - tmaster_cmd = Command(tmaster_cmd_lst, tmaster_env) + tmanager_env = self.shell_env.copy() if self.shell_env is not None else {} + tmanager_cmd = Command(tmanager_cmd_lst, tmanager_env) if os.environ.get('ENABLE_HEAPCHECK') is not None: - tmaster_cmd.env.update({ + tmanager_cmd.env.update({ 'LD_PRELOAD': "/usr/lib/libtcmalloc.so", 'HEAPCHECK': "normal" }) - retval["heron-tmaster"] = tmaster_cmd + retval["heron-tmanager"] = tmanager_cmd if self.metricscache_manager_mode.lower() != "disabled": retval["heron-metricscache"] = self._get_metrics_cache_cmd() @@ -665,7 +665,7 @@ def _get_jvm_instance_arguments(self, instance_id, component_name, global_task_i '-task_id', str(global_task_id), '-component_index', str(component_index), '-stmgr_id', self.stmgr_ids[self.shard], - '-stmgr_port', self.tmaster_controller_port, + '-stmgr_port', self.tmanager_controller_port, '-metricsmgr_port', self.metrics_manager_port, '-system_config_file', self.heron_internals_config_file, '-override_config_file', self.override_config_file] @@ -707,7 +707,7 @@ def _get_python_instance_cmd(self, instance_info): '--task_id=%s' % str(global_task_id), '--component_index=%s' % str(component_index), '--stmgr_id=%s' % self.stmgr_ids[self.shard], - '--stmgr_port=%s' % self.tmaster_controller_port, + '--stmgr_port=%s' % self.tmanager_controller_port, '--metricsmgr_port=%s' % self.metrics_manager_port, '--sys_config=%s' % self.heron_internals_config_file, '--override_config=%s' % self.override_config_file, @@ -734,7 +734,7 @@ def _get_cpp_instance_cmd(self, instance_info): '--task_id=%s' % str(global_task_id), '--component_index=%s' % str(component_index), '--stmgr_id=%s' % self.stmgr_ids[self.shard], - '--stmgr_port=%s' % str(self.tmaster_controller_port), + '--stmgr_port=%s' % str(self.tmanager_controller_port), '--metricsmgr_port=%s' % str(self.metrics_manager_port), '--config_file=%s' % self.heron_internals_config_file, '--override_config_file=%s' % self.override_config_file, @@ -771,9 +771,9 @@ def _get_streaming_processes(self): '--zkroot=%s' % self.state_manager_root, '--stmgr_id=%s' % self.stmgr_ids[self.shard], '--instance_ids=%s' % ','.join([x[0] for x in instance_info]), - '--myhost=%s' % self.master_host, - '--data_port=%s' % str(self.master_port), - '--local_data_port=%s' % str(self.tmaster_controller_port), + '--myhost=%s' % self.primary_host, + '--data_port=%s' % str(self.server_port), + '--local_data_port=%s' % str(self.tmanager_controller_port), '--metricsmgr_port=%s' % str(self.metrics_manager_port), '--shell_port=%s' % str(self.shell_port), '--config_file=%s' % self.heron_internals_config_file, @@ -989,7 +989,7 @@ def start_process_monitor(self): def get_commands_to_run(self): """ - Prepare either TMaster or Streaming commands according to shard. + Prepare either TManager or Streaming commands according to shard. The Shell command is attached to all containers. The empty container plan and non-exist container plan are bypassed. """ @@ -1006,7 +1006,7 @@ def get_commands_to_run(self): return retval if self.shard == 0: - commands = self._get_tmaster_processes() + commands = self._get_tmanager_processes() else: self._untar_if_needed() commands = self._get_streaming_processes() @@ -1027,7 +1027,7 @@ def get_command_changes(self, current_commands, updated_commands): # if the current command has a matching command in the updated commands we keep it # otherwise we kill it for current_name, current_command in list(current_commands.items()): - # We don't restart tmaster since it watches the packing plan and updates itself. The stream + # We don't restart tmanager since it watches the packing plan and updates itself. The stream # manager is restarted just to reset state, but we could update it to do so without a restart if current_name in list(updated_commands.keys()) and \ current_command == updated_commands[current_name] and \ diff --git a/heron/executor/tests/python/heron_executor_unittest.py b/heron/executor/tests/python/heron_executor_unittest.py index 84481a1d9fc..7d6299f057a 100644 --- a/heron/executor/tests/python/heron_executor_unittest.py +++ b/heron/executor/tests/python/heron_executor_unittest.py @@ -133,7 +133,7 @@ def get_expected_metricscachemgr_command(): "-XX:+PrintHeapAtGC -XX:+HeapDumpOnOutOfMemoryError -XX:ParallelGCThreads=4 " \ "-Xloggc:log-files/gc.metricscache.log " \ "-cp metricscachemgr_classpath org.apache.heron.metricscachemgr.MetricsCacheManager " \ - "--metricscache_id metricscache-0 --master_port metricscachemgr_masterport " \ + "--metricscache_id metricscache-0 --server_port metricscachemgr_serverport " \ "--stats_port metricscachemgr_statsport --topology_name topname --topology_id topid " \ "--system_config_file %s --override_config_file %s " \ "--sink_config_file metrics_sinks_config_file " \ @@ -167,17 +167,17 @@ def get_expected_instance_command(component_name, instance_id, container_id): "-cp instance_classpath:classpath -XX:+HeapDumpOnOutOfMemoryError " \ "org.apache.heron.instance.HeronInstance -topology_name topname -topology_id topid " \ "-instance_id %s -component_name %s -task_id %d -component_index 0 -stmgr_id stmgr-%d " \ - "-stmgr_port tmaster_controller_port -metricsmgr_port metricsmgr_port " \ + "-stmgr_port tmanager_controller_port -metricsmgr_port metricsmgr_port " \ "-system_config_file %s -override_config_file %s" \ % (instance_name, instance_name, component_name, instance_id, container_id, INTERNAL_CONF_PATH, OVERRIDE_PATH) MockPOpen.set_next_pid(37) expected_processes_container_0 = [ - ProcessInfo(MockPOpen(), 'heron-tmaster', - 'tmaster_binary --topology_name=topname --topology_id=topid ' - '--zkhostportlist=zknode --zkroot=zkroot --myhost=%s --master_port=master_port ' - '--controller_port=tmaster_controller_port --stats_port=tmaster_stats_port ' + ProcessInfo(MockPOpen(), 'heron-tmanager', + 'tmanager_binary --topology_name=topname --topology_id=topid ' + '--zkhostportlist=zknode --zkroot=zkroot --myhost=%s --server_port=server_port ' + '--controller_port=tmanager_controller_port --stats_port=tmanager_stats_port ' '--config_file=%s --override_config_file=%s ' '--metrics_sinks_yaml=metrics_sinks_config_file ' '--metricsmgr_port=metricsmgr_port ' @@ -195,8 +195,8 @@ def get_expected_instance_command(component_name, instance_id, container_id): '--topologydefn_file=topdefnfile --zkhostportlist=zknode --zkroot=zkroot ' '--stmgr_id=stmgr-1 ' '--instance_ids=container_1_word_3,container_1_exclaim1_2,container_1_exclaim1_1 ' - '--myhost=%s --data_port=master_port ' - '--local_data_port=tmaster_controller_port --metricsmgr_port=metricsmgr_port ' + '--myhost=%s --data_port=server_port ' + '--local_data_port=tmanager_controller_port --metricsmgr_port=metricsmgr_port ' '--shell_port=shell-port --config_file=%s --override_config_file=%s ' '--ckptmgr_port=ckptmgr-port --ckptmgr_id=ckptmgr-1 ' '--metricscachemgr_mode=cluster' @@ -217,8 +217,8 @@ def get_expected_instance_command(component_name, instance_id, container_id): '--topologydefn_file=topdefnfile --zkhostportlist=zknode --zkroot=zkroot ' '--stmgr_id=stmgr-7 ' '--instance_ids=container_7_word_11,container_7_exclaim1_210 --myhost=%s ' - '--data_port=master_port ' - '--local_data_port=tmaster_controller_port --metricsmgr_port=metricsmgr_port ' + '--data_port=server_port ' + '--local_data_port=tmanager_controller_port --metricsmgr_port=metricsmgr_port ' '--shell_port=shell-port --config_file=%s --override_config_file=%s ' '--ckptmgr_port=ckptmgr-port --ckptmgr_id=ckptmgr-7 ' '--metricscachemgr_mode=cluster' @@ -242,9 +242,9 @@ def setUp(self): }) # ./heron-executor - # + # # - # + # # # # @@ -259,14 +259,14 @@ def get_args(shard_id): ("--state-manager-connection", "zknode"), ("--state-manager-root", "zkroot"), ("--state-manager-config-file", "state_manager_config_file"), - ("--tmaster-binary", "tmaster_binary"), + ("--tmanager-binary", "tmanager_binary"), ("--stmgr-binary", "stmgr_binary"), ("--metrics-manager-classpath", "metricsmgr_classpath"), ("--instance-jvm-opts", "LVhYOitIZWFwRHVtcE9uT3V0T2ZNZW1vcnlFcnJvcg(61)(61)"), ("--classpath", "classpath"), - ("--master-port", "master_port"), - ("--tmaster-controller-port", "tmaster_controller_port"), - ("--tmaster-stats-port", "tmaster_stats_port"), + ("--server-port", "server_port"), + ("--tmanager-controller-port", "tmanager_controller_port"), + ("--tmanager-stats-port", "tmanager_stats_port"), ("--heron-internals-config-file", INTERNAL_CONF_PATH), ("--override-config-file", OVERRIDE_PATH), ("--component-ram-map", "exclaim1:536870912,word:536870912"), @@ -287,7 +287,7 @@ def get_args(shard_id): ("--python-instance-binary", "python_instance_binary"), ("--cpp-instance-binary", "cpp_instance_binary"), ("--metricscache-manager-classpath", "metricscachemgr_classpath"), - ("--metricscache-manager-master-port", "metricscachemgr_masterport"), + ("--metricscache-manager-server-port", "metricscachemgr_serverport"), ("--metricscache-manager-stats-port", "metricscachemgr_statsport"), ("--is-stateful", "is_stateful_enabled"), ("--checkpoint-manager-classpath", "ckptmgr_classpath"), @@ -445,7 +445,7 @@ def get_expected_metricscachemgr_command(): " -Xlog:gc*,safepoint=info:file=log-files/gc.metricscache.log:tags,time,uptime," \ "level:filecount=5,filesize=100M " \ "-cp metricscachemgr_classpath org.apache.heron.metricscachemgr.MetricsCacheManager " \ - "--metricscache_id metricscache-0 --master_port metricscachemgr_masterport " \ + "--metricscache_id metricscache-0 --server_port metricscachemgr_serverport " \ "--stats_port metricscachemgr_statsport --topology_name topname --topology_id topid " \ "--system_config_file %s --override_config_file %s " \ "--sink_config_file metrics_sinks_config_file " \ @@ -476,7 +476,7 @@ def get_expected_instance_command(component_name, instance_id, container_id): "-cp instance_classpath:classpath -XX:+HeapDumpOnOutOfMemoryError " \ "org.apache.heron.instance.HeronInstance -topology_name topname -topology_id topid " \ "-instance_id %s -component_name %s -task_id %d -component_index 0 -stmgr_id stmgr-%d " \ - "-stmgr_port tmaster_controller_port -metricsmgr_port metricsmgr_port " \ + "-stmgr_port tmanager_controller_port -metricsmgr_port metricsmgr_port " \ "-system_config_file %s -override_config_file %s" \ % (instance_name, instance_name, component_name, instance_id, container_id, INTERNAL_CONF_PATH, OVERRIDE_PATH) From bb1b61e3798191d1d0c3f5bebce80d36dc8106a7 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 12:45:35 -0400 Subject: [PATCH 23/32] renaming "topology master" to "topology manager" in heron/instance --- heron/instance/src/cpp/BUILD | 18 ++--- .../src/cpp/boltimpl/bolt-instance.cpp | 16 ++--- .../instance/src/cpp/boltimpl/bolt-instance.h | 10 +-- .../boltimpl/bolt-output-collector-impl.cpp | 4 +- .../cpp/boltimpl/bolt-output-collector-impl.h | 6 +- heron/instance/src/cpp/boltimpl/tuple-impl.h | 2 +- .../slave.cpp => executor/executor.cpp} | 54 +++++++------- .../{slave/slave.h => executor/executor.h} | 32 ++++----- .../imetrics-registrar-impl.cpp | 8 +-- .../imetrics-registrar-impl.h | 8 +-- .../cpp/{slave => executor}/instance-base.h | 6 +- .../outgoing-tuple-collection.cpp | 10 +-- .../outgoing-tuple-collection.h | 10 +-- .../{slave => executor}/task-context-impl.cpp | 2 +- .../{slave => executor}/task-context-impl.h | 4 +- heron/instance/src/cpp/gateway/gateway.cpp | 42 +++++------ heron/instance/src/cpp/gateway/gateway.h | 38 +++++----- .../instance/src/cpp/gateway/stmgr-client.cpp | 4 +- heron/instance/src/cpp/instance-main.cpp | 40 +++++------ .../src/cpp/spoutimpl/spout-instance.cpp | 12 ++-- .../src/cpp/spoutimpl/spout-instance.h | 6 +- .../spoutimpl/spout-output-collector-impl.cpp | 4 +- .../spoutimpl/spout-output-collector-impl.h | 6 +- .../instance/{Slave.java => Executor.java} | 60 ++++++++-------- .../apache/heron/instance/HeronInstance.java | 46 ++++++------ .../heron/instance/bolt/BoltInstance.java | 6 +- .../heron/instance/spout/SpoutInstance.java | 6 +- .../heron/instance/util/InstanceUtils.java | 4 +- .../heron/network/MetricsManagerClient.java | 2 +- .../heron/network/StreamManagerClient.java | 4 +- .../grouping/AbstractTupleRoutingTest.java | 24 +++---- .../heron/instance/CommunicatorTester.java | 30 ++++---- .../{SlaveTester.java => ExecutorTester.java} | 20 +++--- .../heron/instance/bolt/BoltInstanceTest.java | 16 ++--- .../bolt/BoltStatefulInstanceTest.java | 50 ++++++------- .../spout/ActivateDeactivateTest.java | 16 ++--- .../instance/spout/SpoutInstanceTest.java | 72 +++++++++---------- .../spout/SpoutStatefulInstanceTest.java | 46 ++++++------ 38 files changed, 372 insertions(+), 372 deletions(-) rename heron/instance/src/cpp/{slave/slave.cpp => executor/executor.cpp} (73%) rename heron/instance/src/cpp/{slave/slave.h => executor/executor.h} (79%) rename heron/instance/src/cpp/{slave => executor}/imetrics-registrar-impl.cpp (95%) rename heron/instance/src/cpp/{slave => executor}/imetrics-registrar-impl.h (93%) rename heron/instance/src/cpp/{slave => executor}/instance-base.h (89%) rename heron/instance/src/cpp/{slave => executor}/outgoing-tuple-collection.cpp (95%) rename heron/instance/src/cpp/{slave => executor}/outgoing-tuple-collection.h (88%) rename heron/instance/src/cpp/{slave => executor}/task-context-impl.cpp (99%) rename heron/instance/src/cpp/{slave => executor}/task-context-impl.h (97%) rename heron/instance/src/java/org/apache/heron/instance/{Slave.java => Executor.java} (89%) rename heron/instance/tests/java/org/apache/heron/instance/{SlaveTester.java => ExecutorTester.java} (69%) diff --git a/heron/instance/src/cpp/BUILD b/heron/instance/src/cpp/BUILD index f590973379d..207ace412c6 100644 --- a/heron/instance/src/cpp/BUILD +++ b/heron/instance/src/cpp/BUILD @@ -22,15 +22,15 @@ cc_binary( "gateway/stmgr-client.cpp", "gateway/stmgr-client.h", "instance-main.cpp", - "slave/imetrics-registrar-impl.cpp", - "slave/imetrics-registrar-impl.h", - "slave/instance-base.h", - "slave/outgoing-tuple-collection.cpp", - "slave/outgoing-tuple-collection.h", - "slave/slave.cpp", - "slave/slave.h", - "slave/task-context-impl.cpp", - "slave/task-context-impl.h", + "executor/imetrics-registrar-impl.cpp", + "executor/imetrics-registrar-impl.h", + "executor/instance-base.h", + "executor/outgoing-tuple-collection.cpp", + "executor/outgoing-tuple-collection.h", + "executor/executor.cpp", + "executor/executor.h", + "executor/task-context-impl.cpp", + "executor/task-context-impl.h", "spoutimpl/root-tuple-info.h", "spoutimpl/spout-instance.cpp", "spoutimpl/spout-instance.h", diff --git a/heron/instance/src/cpp/boltimpl/bolt-instance.cpp b/heron/instance/src/cpp/boltimpl/bolt-instance.cpp index 37cba543639..a7916d45eeb 100644 --- a/heron/instance/src/cpp/boltimpl/bolt-instance.cpp +++ b/heron/instance/src/cpp/boltimpl/bolt-instance.cpp @@ -37,11 +37,11 @@ namespace instance { BoltInstance::BoltInstance(std::shared_ptr eventLoop, std::shared_ptr taskContext, - NotifyingCommunicator>* dataToSlave, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator>* dataToExecutor, + NotifyingCommunicator* dataFromExecutor, void* dllHandle) - : taskContext_(taskContext), dataToSlave_(dataToSlave), - dataFromSlave_(dataFromSlave), eventLoop_(eventLoop), bolt_(NULL), active_(false), + : taskContext_(taskContext), dataToExecutor_(dataToExecutor), + dataFromExecutor_(dataFromExecutor), eventLoop_(eventLoop), bolt_(NULL), active_(false), tickTimer_(-1) { maxWriteBufferSize_ = config::HeronInternalsConfigReader::Instance() ->GetHeronInstanceInternalBoltWriteQueueCapacity(); @@ -61,7 +61,7 @@ BoltInstance::BoltInstance(std::shared_ptr eventLoop, taskContext_->getConfig())); metrics_.reset(new BoltMetrics(taskContext->getMetricsRegistrar())); collector_.reset(new BoltOutputCollectorImpl(serializer_, taskContext_, - dataFromSlave_, metrics_)); + dataFromExecutor_, metrics_)); } BoltInstance::~BoltInstance() { @@ -100,7 +100,7 @@ void BoltInstance::Deactivate() { } void BoltInstance::DoWork() { - dataToSlave_->resumeConsumption(); + dataToExecutor_->resumeConsumption(); } void BoltInstance::executeTuple(const proto::api::StreamId& stream, @@ -130,8 +130,8 @@ void BoltInstance::HandleGatewayTuples(pool_unique_ptrsize() > maxWriteBufferSize_) { - dataToSlave_->stopConsumption(); + if (dataFromExecutor_->size() > maxWriteBufferSize_) { + dataToExecutor_->stopConsumption(); } } diff --git a/heron/instance/src/cpp/boltimpl/bolt-instance.h b/heron/instance/src/cpp/boltimpl/bolt-instance.h index 31b949da3fe..8bcf73c5963 100644 --- a/heron/instance/src/cpp/boltimpl/bolt-instance.h +++ b/heron/instance/src/cpp/boltimpl/bolt-instance.h @@ -22,7 +22,7 @@ #include -#include "slave/instance-base.h" +#include "executor/instance-base.h" #include "proto/messages.h" #include "network/network.h" @@ -41,8 +41,8 @@ namespace instance { class BoltInstance : public InstanceBase { public: BoltInstance(std::shared_ptr eventLoop, std::shared_ptr taskContext, - NotifyingCommunicator>* dataToSlave, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator>* dataToExecutor, + NotifyingCommunicator* dataFromExecutor, void* dllHandle); virtual ~BoltInstance(); @@ -62,8 +62,8 @@ class BoltInstance : public InstanceBase { const proto::system::HeronDataTuple& tup); std::shared_ptr taskContext_; - NotifyingCommunicator>* dataToSlave_; - NotifyingCommunicator* dataFromSlave_; + NotifyingCommunicator>* dataToExecutor_; + NotifyingCommunicator* dataFromExecutor_; std::shared_ptr eventLoop_; api::bolt::IBolt* bolt_; std::shared_ptr serializer_; diff --git a/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.cpp b/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.cpp index 4cc360515cf..102c0933080 100644 --- a/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.cpp +++ b/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.cpp @@ -45,10 +45,10 @@ namespace instance { BoltOutputCollectorImpl::BoltOutputCollectorImpl( std::shared_ptr serializer, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator* dataFromExecutor, std::shared_ptr metrics) : api::bolt::IBoltOutputCollector(serializer), metrics_(metrics) { - collector_ = new OutgoingTupleCollection(taskContext->getThisComponentName(), dataFromSlave); + collector_ = new OutgoingTupleCollection(taskContext->getThisComponentName(), dataFromExecutor); ackingEnabled_ = taskContext->isAckingEnabled(); taskId_ = taskContext->getThisTaskId(); } diff --git a/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.h b/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.h index 20effb7819a..48fcc2b5ed5 100644 --- a/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.h +++ b/heron/instance/src/cpp/boltimpl/bolt-output-collector-impl.h @@ -33,8 +33,8 @@ #include "utils/notifying-communicator.h" #include "bolt/ibolt-output-collector.h" #include "serializer/ipluggable-serializer.h" -#include "slave/task-context-impl.h" -#include "slave/outgoing-tuple-collection.h" +#include "executor/task-context-impl.h" +#include "executor/outgoing-tuple-collection.h" #include "boltimpl/bolt-metrics.h" namespace heron { @@ -44,7 +44,7 @@ class BoltOutputCollectorImpl : public api::bolt::IBoltOutputCollector { public: BoltOutputCollectorImpl(std::shared_ptr serializer, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator* dataFromExecutor, std::shared_ptr metrics); virtual ~BoltOutputCollectorImpl(); diff --git a/heron/instance/src/cpp/boltimpl/tuple-impl.h b/heron/instance/src/cpp/boltimpl/tuple-impl.h index f7f0835c8f3..cf5c890ec98 100644 --- a/heron/instance/src/cpp/boltimpl/tuple-impl.h +++ b/heron/instance/src/cpp/boltimpl/tuple-impl.h @@ -30,7 +30,7 @@ #include "topology/task-context.h" #include "serializer/ipluggable-serializer.h" -#include "slave/task-context-impl.h" +#include "executor/task-context-impl.h" namespace heron { namespace instance { diff --git a/heron/instance/src/cpp/slave/slave.cpp b/heron/instance/src/cpp/executor/executor.cpp similarity index 73% rename from heron/instance/src/cpp/slave/slave.cpp rename to heron/instance/src/cpp/executor/executor.cpp index f50e20109f6..af0e1eb8a65 100644 --- a/heron/instance/src/cpp/slave/slave.cpp +++ b/heron/instance/src/cpp/executor/executor.cpp @@ -24,22 +24,22 @@ #include "glog/logging.h" -#include "slave/slave.h" +#include "executor/executor.h" #include "proto/messages.h" #include "network/network.h" #include "basics/basics.h" -#include "slave/imetrics-registrar-impl.h" -#include "slave/task-context-impl.h" +#include "executor/imetrics-registrar-impl.h" +#include "executor/task-context-impl.h" #include "spoutimpl/spout-instance.h" #include "boltimpl/bolt-instance.h" namespace heron { namespace instance { -Slave::Slave(int myTaskId, const std::string& topologySo) +Executor::Executor(int myTaskId, const std::string& topologySo) : myTaskId_(myTaskId), taskContext_(new TaskContextImpl(myTaskId_)), - dataToSlave_(NULL), dataFromSlave_(NULL), metricsFromSlave_(NULL), + dataToExecutor_(NULL), dataFromExecutor_(NULL), metricsFromExecutor_(NULL), instance_(NULL), eventLoop_(std::make_shared()) { auto pplan = new proto::system::PhysicalPlan(); pplan_typename_ = pplan->GetTypeName(); @@ -51,38 +51,38 @@ Slave::Slave(int myTaskId, const std::string& topologySo) } } -Slave::~Slave() { +Executor::~Executor() { if (dlclose(dllHandle_) != 0) { LOG(FATAL) << "dlclose failed with error " << dlerror(); } } -void Slave::setCommunicators( - NotifyingCommunicator>* dataToSlave, - NotifyingCommunicator* dataFromSlave, - NotifyingCommunicator* metricsFromSlave) { - dataToSlave_ = dataToSlave; - dataFromSlave_ = dataFromSlave; - metricsFromSlave_ = metricsFromSlave; +void Executor::setCommunicators( + NotifyingCommunicator>* dataToExecutor, + NotifyingCommunicator* dataFromExecutor, + NotifyingCommunicator* metricsFromExecutor) { + dataToExecutor_ = dataToExecutor; + dataFromExecutor_ = dataFromExecutor; + metricsFromExecutor_ = metricsFromExecutor; std::shared_ptr registrar(new IMetricsRegistrarImpl(eventLoop_, - metricsFromSlave)); + metricsFromExecutor)); taskContext_->setMericsRegistrar(registrar); } -void Slave::Start() { - LOG(INFO) << "Creating slave thread"; - slaveThread_.reset(new std::thread(&Slave::InternalStart, this)); +void Executor::Start() { + LOG(INFO) << "Creating executor thread"; + executorThread_.reset(new std::thread(&Executor::InternalStart, this)); } -// This is the one thats running in the slave thread -void Slave::InternalStart() { - LOG(INFO) << "Slave thread started up"; +// This is the one thats running in the executor thread +void Executor::InternalStart() { + LOG(INFO) << "Executor thread started up"; eventLoop_->loop(); } -void Slave::HandleGatewayData(pool_unique_ptr msg) { +void Executor::HandleGatewayData(pool_unique_ptr msg) { if (msg->GetTypeName() == pplan_typename_) { - LOG(INFO) << "Slave Received a new pplan message from Gateway"; + LOG(INFO) << "Executor Received a new pplan message from Gateway"; auto pplan = pool_unique_ptr( static_cast(msg.release())); HandleNewPhysicalPlan(std::move(pplan)); @@ -93,7 +93,7 @@ void Slave::HandleGatewayData(pool_unique_ptr msg) { } } -void Slave::HandleNewPhysicalPlan(pool_unique_ptr pplan) { +void Executor::HandleNewPhysicalPlan(pool_unique_ptr pplan) { std::shared_ptr newPplan = std::move(pplan); taskContext_->newPhysicalPlan(newPplan); if (!instance_) { @@ -101,11 +101,11 @@ void Slave::HandleNewPhysicalPlan(pool_unique_ptr p if (taskContext_->isSpout()) { LOG(INFO) << "We are a spout"; instance_ = new SpoutInstance(eventLoop_, taskContext_, - dataFromSlave_, dllHandle_); + dataFromExecutor_, dllHandle_); } else { LOG(INFO) << "We are a bolt"; instance_ = new BoltInstance(eventLoop_, taskContext_, - dataToSlave_, dataFromSlave_, dllHandle_); + dataToExecutor_, dataFromExecutor_, dllHandle_); } if (newPplan->topology().state() == proto::api::TopologyState::RUNNING) { LOG(INFO) << "Starting the instance"; @@ -125,7 +125,7 @@ void Slave::HandleNewPhysicalPlan(pool_unique_ptr p } } -void Slave::HandleStMgrTuples(pool_unique_ptr tupleSet) { +void Executor::HandleStMgrTuples(pool_unique_ptr tupleSet) { if (instance_) { instance_->HandleGatewayTuples(std::move(tupleSet)); } else { @@ -133,7 +133,7 @@ void Slave::HandleStMgrTuples(pool_unique_ptr tup } } -void Slave::HandleGatewayDataConsumed() { +void Executor::HandleGatewayDataConsumed() { if (instance_) { instance_->DoWork(); } diff --git a/heron/instance/src/cpp/slave/slave.h b/heron/instance/src/cpp/executor/executor.h similarity index 79% rename from heron/instance/src/cpp/slave/slave.h rename to heron/instance/src/cpp/executor/executor.h index 3e5f9468e6d..72e6f97b1fd 100644 --- a/heron/instance/src/cpp/slave/slave.h +++ b/heron/instance/src/cpp/executor/executor.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef HERON_INSTANCE_SLAVE_SLAVE_H_ -#define HERON_INSTANCE_SLAVE_SLAVE_H_ +#ifndef HERON_INSTANCE_EXECUTOR_EXECUTOR_H_ +#define HERON_INSTANCE_EXECUTOR_EXECUTOR_H_ #include #include @@ -27,25 +27,25 @@ #include "basics/basics.h" #include "utils/notifying-communicator.h" -#include "slave/task-context-impl.h" -#include "slave/instance-base.h" +#include "executor/task-context-impl.h" +#include "executor/instance-base.h" namespace heron { namespace instance { -class Slave { +class Executor { public: - Slave(int myTaskId, const std::string& topologySo); - ~Slave(); + Executor(int myTaskId, const std::string& topologySo); + ~Executor(); // This essentially fires a thread with internalStart void Start(); std::shared_ptr eventLoop() { return eventLoop_; } void setCommunicators( - NotifyingCommunicator>* dataToSlave, - NotifyingCommunicator* dataFromSlave, - NotifyingCommunicator* metricsFromSlave); + NotifyingCommunicator>* dataToExecutor, + NotifyingCommunicator* dataFromExecutor, + NotifyingCommunicator* metricsFromExecutor); // Handles data from gateway thread void HandleGatewayData(pool_unique_ptr msg); @@ -57,7 +57,7 @@ class Slave { void HandleGatewayMetricsConsumed() { } private: - // This is the one thats running in the slave thread + // This is the one thats running in the executor thread void InternalStart(); // Called when a new phyiscal plan is received void HandleNewPhysicalPlan(pool_unique_ptr pplan); @@ -66,17 +66,17 @@ class Slave { int myTaskId_; std::shared_ptr taskContext_; - NotifyingCommunicator>* dataToSlave_; - NotifyingCommunicator* dataFromSlave_; - NotifyingCommunicator* metricsFromSlave_; + NotifyingCommunicator>* dataToExecutor_; + NotifyingCommunicator* dataFromExecutor_; + NotifyingCommunicator* metricsFromExecutor_; InstanceBase* instance_; std::shared_ptr eventLoop_; void* dllHandle_; std::string pplan_typename_; - std::unique_ptr slaveThread_; + std::unique_ptr executorThread_; }; } // namespace instance } // namespace heron -#endif // HERON_INSTANCE_SLAVE_SLAVE_H_ +#endif // HERON_INSTANCE_EXECUTOR_EXECUTOR_H_ diff --git a/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp b/heron/instance/src/cpp/executor/imetrics-registrar-impl.cpp similarity index 95% rename from heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp rename to heron/instance/src/cpp/executor/imetrics-registrar-impl.cpp index 907f68f63eb..279387b812e 100644 --- a/heron/instance/src/cpp/slave/imetrics-registrar-impl.cpp +++ b/heron/instance/src/cpp/executor/imetrics-registrar-impl.cpp @@ -21,7 +21,7 @@ #include #include #include -#include "slave/imetrics-registrar-impl.h" +#include "executor/imetrics-registrar-impl.h" #include "basics/basics.h" #include "proto/messages.h" #include "network/network.h" @@ -30,8 +30,8 @@ namespace heron { namespace instance { IMetricsRegistrarImpl::IMetricsRegistrarImpl(std::shared_ptr eventLoop, - NotifyingCommunicator* metricsFromSlave) - : eventLoop_(eventLoop), metricsFromSlave_(metricsFromSlave) { + NotifyingCommunicator* metricsFromExecutor) + : eventLoop_(eventLoop), metricsFromExecutor_(metricsFromExecutor) { } IMetricsRegistrarImpl::~IMetricsRegistrarImpl() { @@ -89,7 +89,7 @@ void IMetricsRegistrarImpl::sendMetrics(int timeBucketSizeInSecs) { } } } - metricsFromSlave_->enqueue(msg); + metricsFromExecutor_->enqueue(msg); } } // namespace instance diff --git a/heron/instance/src/cpp/slave/imetrics-registrar-impl.h b/heron/instance/src/cpp/executor/imetrics-registrar-impl.h similarity index 93% rename from heron/instance/src/cpp/slave/imetrics-registrar-impl.h rename to heron/instance/src/cpp/executor/imetrics-registrar-impl.h index e42d2e9b690..d79da22a3e9 100644 --- a/heron/instance/src/cpp/slave/imetrics-registrar-impl.h +++ b/heron/instance/src/cpp/executor/imetrics-registrar-impl.h @@ -18,8 +18,8 @@ */ -#ifndef HERON_INSTANCE_SLAVE_IMETRICS_REGISTRAR_IMPL_H_ -#define HERON_INSTANCE_SLAVE_IMETRICS_REGISTRAR_IMPL_H_ +#ifndef HERON_INSTANCE_EXECUTOR_IMETRICS_REGISTRAR_IMPL_H_ +#define HERON_INSTANCE_EXECUTOR_IMETRICS_REGISTRAR_IMPL_H_ #include #include @@ -43,7 +43,7 @@ namespace instance { class IMetricsRegistrarImpl : public api::metric::IMetricsRegistrar { public: explicit IMetricsRegistrarImpl(std::shared_ptr eventLoop, - NotifyingCommunicator* metricsFromSlave); + NotifyingCommunicator* metricsFromExecutor); virtual ~IMetricsRegistrarImpl(); virtual void registerMetric(const std::string& metricName, std::shared_ptr metric, @@ -58,7 +58,7 @@ class IMetricsRegistrarImpl : public api::metric::IMetricsRegistrar { std::map> multiMetrics_; std::map> timeBuckets_; std::shared_ptr eventLoop_; - NotifyingCommunicator* metricsFromSlave_; + NotifyingCommunicator* metricsFromExecutor_; }; } // namespace instance diff --git a/heron/instance/src/cpp/slave/instance-base.h b/heron/instance/src/cpp/executor/instance-base.h similarity index 89% rename from heron/instance/src/cpp/slave/instance-base.h rename to heron/instance/src/cpp/executor/instance-base.h index a10398fd5f4..40480d20cf2 100644 --- a/heron/instance/src/cpp/slave/instance-base.h +++ b/heron/instance/src/cpp/executor/instance-base.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef HERON_INSTANCE_SLAVE_INSTANCE_BASE_H_ -#define HERON_INSTANCE_SLAVE_INSTANCE_BASE_H_ +#ifndef HERON_INSTANCE_EXECUTOR_INSTANCE_BASE_H_ +#define HERON_INSTANCE_EXECUTOR_INSTANCE_BASE_H_ #include #include "proto/messages.h" @@ -42,4 +42,4 @@ class InstanceBase { } // namespace instance } // namespace heron -#endif // HERON_INSTANCE_SLAVE_INSTANCE_BASE_H_ +#endif // HERON_INSTANCE_EXECUTOR_INSTANCE_BASE_H_ diff --git a/heron/instance/src/cpp/slave/outgoing-tuple-collection.cpp b/heron/instance/src/cpp/executor/outgoing-tuple-collection.cpp similarity index 95% rename from heron/instance/src/cpp/slave/outgoing-tuple-collection.cpp rename to heron/instance/src/cpp/executor/outgoing-tuple-collection.cpp index 10b1b28e717..768336d5505 100644 --- a/heron/instance/src/cpp/slave/outgoing-tuple-collection.cpp +++ b/heron/instance/src/cpp/executor/outgoing-tuple-collection.cpp @@ -18,7 +18,7 @@ */ #include -#include "slave/outgoing-tuple-collection.h" +#include "executor/outgoing-tuple-collection.h" #include "proto/messages.h" #include "network/network.h" #include "basics/basics.h" @@ -29,8 +29,8 @@ namespace heron { namespace instance { OutgoingTupleCollection::OutgoingTupleCollection(const std::string& componentName, - NotifyingCommunicator* dataFromSlave) - : componentName_(componentName), dataFromSlave_(dataFromSlave), + NotifyingCommunicator* dataFromExecutor) + : componentName_(componentName), dataFromExecutor_(dataFromExecutor), currentDataTuple_(NULL), currentControlTuple_(NULL), totalDataSizeEmitted_(0), totalDataTuplesEmitted_(0), totalAckTuplesEmitted_(0), totalFailTuplesEmitted_(0), currentDataTupleSize_(0) { @@ -106,14 +106,14 @@ void OutgoingTupleCollection::flushRemaining() { if (currentDataTuple_) { auto msg = new proto::system::HeronTupleSet(); msg->set_allocated_data(currentDataTuple_); - dataFromSlave_->enqueue(msg); + dataFromExecutor_->enqueue(msg); currentDataTuple_ = NULL; currentDataTupleSize_ = 0; } if (currentControlTuple_) { auto msg = new proto::system::HeronTupleSet(); msg->set_allocated_control(currentControlTuple_); - dataFromSlave_->enqueue(msg); + dataFromExecutor_->enqueue(msg); currentControlTuple_ = NULL; } } diff --git a/heron/instance/src/cpp/slave/outgoing-tuple-collection.h b/heron/instance/src/cpp/executor/outgoing-tuple-collection.h similarity index 88% rename from heron/instance/src/cpp/slave/outgoing-tuple-collection.h rename to heron/instance/src/cpp/executor/outgoing-tuple-collection.h index 9718096f3f0..3c47c297d4c 100644 --- a/heron/instance/src/cpp/slave/outgoing-tuple-collection.h +++ b/heron/instance/src/cpp/executor/outgoing-tuple-collection.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef HERON_INSTANCE_SLAVE_OUTGOING_TUPLE_COLLECTION_H_ -#define HERON_INSTANCE_SLAVE_OUTGOING_TUPLE_COLLECTION_H_ +#ifndef HERON_INSTANCE_EXECUTOR_OUTGOING_TUPLE_COLLECTION_H_ +#define HERON_INSTANCE_EXECUTOR_OUTGOING_TUPLE_COLLECTION_H_ #include #include "proto/messages.h" @@ -33,7 +33,7 @@ namespace instance { class OutgoingTupleCollection { public: OutgoingTupleCollection(const std::string& componentName, - NotifyingCommunicator* dataFromSlave); + NotifyingCommunicator* dataFromExecutor); ~OutgoingTupleCollection(); void sendOutTuples(); @@ -54,7 +54,7 @@ class OutgoingTupleCollection { void flushRemaining(); std::string componentName_; - NotifyingCommunicator* dataFromSlave_; + NotifyingCommunicator* dataFromExecutor_; proto::system::HeronDataTupleSet* currentDataTuple_; proto::system::HeronControlTupleSet* currentControlTuple_; int64_t totalDataSizeEmitted_; @@ -70,4 +70,4 @@ class OutgoingTupleCollection { } // namespace instance } // namespace heron -#endif // HERON_INSTANCE_SLAVE_OUTGOING_TUPLE_COLLECTION_H_ +#endif // HERON_INSTANCE_EXECUTOR_OUTGOING_TUPLE_COLLECTION_H_ diff --git a/heron/instance/src/cpp/slave/task-context-impl.cpp b/heron/instance/src/cpp/executor/task-context-impl.cpp similarity index 99% rename from heron/instance/src/cpp/slave/task-context-impl.cpp rename to heron/instance/src/cpp/executor/task-context-impl.cpp index b54a3db08be..4d7bc47bfd4 100644 --- a/heron/instance/src/cpp/slave/task-context-impl.cpp +++ b/heron/instance/src/cpp/executor/task-context-impl.cpp @@ -22,7 +22,7 @@ #include #include #include -#include "slave/task-context-impl.h" +#include "executor/task-context-impl.h" #include "basics/basics.h" #include "proto/messages.h" #include "network/network.h" diff --git a/heron/instance/src/cpp/slave/task-context-impl.h b/heron/instance/src/cpp/executor/task-context-impl.h similarity index 97% rename from heron/instance/src/cpp/slave/task-context-impl.h rename to heron/instance/src/cpp/executor/task-context-impl.h index 2189f500059..27ba164d140 100644 --- a/heron/instance/src/cpp/slave/task-context-impl.h +++ b/heron/instance/src/cpp/executor/task-context-impl.h @@ -18,8 +18,8 @@ */ -#ifndef HERON_INSTANCE_SLAVE_TASK_CONTEXT_IMPL_H_ -#define HERON_INSTANCE_SLAVE_TASK_CONTEXT_IMPL_H_ +#ifndef HERON_INSTANCE_EXECUTOR_TASK_CONTEXT_IMPL_H_ +#define HERON_INSTANCE_EXECUTOR_TASK_CONTEXT_IMPL_H_ #include #include diff --git a/heron/instance/src/cpp/gateway/gateway.cpp b/heron/instance/src/cpp/gateway/gateway.cpp index 883cb6bed9f..94a7f9b2c94 100644 --- a/heron/instance/src/cpp/gateway/gateway.cpp +++ b/heron/instance/src/cpp/gateway/gateway.cpp @@ -39,10 +39,10 @@ Gateway::Gateway(const std::string& topologyName, const std::string& stmgrId, int stmgrPort, int metricsMgrPort, std::shared_ptr eventLoop) : topologyName_(topologyName), topologyId_(topologyId), stmgrPort_(stmgrPort), - metricsMgrPort_(metricsMgrPort), dataToSlave_(NULL), dataFromSlave_(NULL), - metricsFromSlave_(NULL), eventLoop_(eventLoop), + metricsMgrPort_(metricsMgrPort), dataToExecutor_(NULL), dataFromExecutor_(NULL), + metricsFromExecutor_(NULL), eventLoop_(eventLoop), maxReadBufferSize_(128), maxWriteBufferSize_(128), - readingFromSlave_(true) { + readingFromExecutor_(true) { maxPacketSize_ = config::HeronInternalsConfigReader::Instance() ->GetHeronStreammgrNetworkOptionsMaximumPacketMb() * 1_MB; instanceProto_.set_instance_id(instanceId); @@ -83,10 +83,10 @@ void Gateway::Start() { std::placeholders::_1))); stmgrClient_->Start(); - // Setup timer to periodically check for resumption of slave consumption + // Setup timer to periodically check for resumption of executor consumption CHECK_GT( eventLoop_->registerTimer( - [this](EventLoop::Status status) { this->ResumeConsumingFromSlaveTimer(); }, true, + [this](EventLoop::Status status) { this->ResumeConsumingFromExecutorTimer(); }, true, 10 * 1000), 0); eventLoop_->loop(); } @@ -106,47 +106,47 @@ void Gateway::HandleNewPhysicalPlan(pool_unique_ptr ->GetHeronInstanceInternalBoltWriteQueueCapacity(); } - dataToSlave_->enqueue(std::move(pplan)); + dataToExecutor_->enqueue(std::move(pplan)); } void Gateway::HandleStMgrTuples(pool_unique_ptr msg) { - dataToSlave_->enqueue(std::move(msg)); - if (dataToSlave_->size() > maxReadBufferSize_) { + dataToExecutor_->enqueue(std::move(msg)); + if (dataToExecutor_->size() > maxReadBufferSize_) { stmgrClient_->putBackPressure(); } } -void Gateway::HandleSlaveDataConsumed() { - if (dataToSlave_->size() < maxReadBufferSize_) { +void Gateway::HandleExecutorDataConsumed() { + if (dataToExecutor_->size() < maxReadBufferSize_) { stmgrClient_->removeBackPressure(); } } -void Gateway::HandleSlaveData(google::protobuf::Message* msg) { +void Gateway::HandleExecutorData(google::protobuf::Message* msg) { auto tupleSet = static_cast(msg); stmgrClient_->SendTupleMessage(*tupleSet); delete tupleSet; if (stmgrClient_->getOutstandingBytes() > (maxWriteBufferSize_ * maxPacketSize_) && - readingFromSlave_) { + readingFromExecutor_) { LOG(INFO) << "Gateway buffered too much data to be written to stmgr; " - << "Clamping down on consumption from slave"; - dataFromSlave_->stopConsumption(); - readingFromSlave_ = false; + << "Clamping down on consumption from executor"; + dataFromExecutor_->stopConsumption(); + readingFromExecutor_ = false; } } -void Gateway::HandleSlaveMetrics(google::protobuf::Message* msg) { +void Gateway::HandleExecutorMetrics(google::protobuf::Message* msg) { auto metrics = static_cast(msg); metricsMgrClient_->SendMetrics(metrics); } -void Gateway::ResumeConsumingFromSlaveTimer() { +void Gateway::ResumeConsumingFromExecutorTimer() { if (stmgrClient_->getOutstandingBytes() < (maxWriteBufferSize_ * maxPacketSize_) && - !readingFromSlave_) { + !readingFromExecutor_) { LOG(INFO) << "Gateway buffer now under max limit; " - << "Resuming consumption from slave"; - dataFromSlave_->resumeConsumption(); - readingFromSlave_ = true; + << "Resuming consumption from executor"; + dataFromExecutor_->resumeConsumption(); + readingFromExecutor_ = true; } } diff --git a/heron/instance/src/cpp/gateway/gateway.h b/heron/instance/src/cpp/gateway/gateway.h index cfd412099e9..a726fee0013 100644 --- a/heron/instance/src/cpp/gateway/gateway.h +++ b/heron/instance/src/cpp/gateway/gateway.h @@ -46,29 +46,29 @@ class Gateway { // All kinds of initialization like starting clients void Start(); - // Called when Slave indicates that it consumed some data - void HandleSlaveDataConsumed(); + // Called when Executor indicates that it consumed some data + void HandleExecutorDataConsumed(); - // Called when we need to consume data from slave - void HandleSlaveData(google::protobuf::Message* msg); + // Called when we need to consume data from executor + void HandleExecutorData(google::protobuf::Message* msg); - // Called when we need to consume metrics from slave - void HandleSlaveMetrics(google::protobuf::Message* msg); + // Called when we need to consume metrics from executor + void HandleExecutorMetrics(google::protobuf::Message* msg); std::shared_ptr eventLoop() { return eventLoop_; } void setCommunicators( - NotifyingCommunicator>* dataToSlave, - NotifyingCommunicator* dataFromSlave, - NotifyingCommunicator* metricsFromSlave) { - dataToSlave_ = dataToSlave; - dataFromSlave_ = dataFromSlave; - metricsFromSlave_ = metricsFromSlave; + NotifyingCommunicator>* dataToExecutor, + NotifyingCommunicator* dataFromExecutor, + NotifyingCommunicator* metricsFromExecutor) { + dataToExecutor_ = dataToExecutor; + dataFromExecutor_ = dataFromExecutor; + metricsFromExecutor_ = metricsFromExecutor; } private: void HandleNewPhysicalPlan(pool_unique_ptr pplan); void HandleStMgrTuples(pool_unique_ptr tuples); - void ResumeConsumingFromSlaveTimer(); + void ResumeConsumingFromExecutorTimer(); std::string topologyName_; std::string topologyId_; int stmgrPort_; @@ -77,20 +77,20 @@ class Gateway { std::shared_ptr stmgrClient_; std::shared_ptr metricsMgrClient_; std::shared_ptr gatewayMetrics_; - NotifyingCommunicator>* dataToSlave_; - NotifyingCommunicator* dataFromSlave_; - NotifyingCommunicator* metricsFromSlave_; + NotifyingCommunicator>* dataToExecutor_; + NotifyingCommunicator* dataFromExecutor_; + NotifyingCommunicator* metricsFromExecutor_; std::shared_ptr eventLoop_; // This is the max number of outstanding packets that are yet to be - // consumed by the Slave + // consumed by the Executor int maxReadBufferSize_; // This is the max number of outstanding packets that are buffered // to be sent to the stmgr int maxWriteBufferSize_; // The maximum size of a packet int maxPacketSize_; - // Are we actively reading from slaveQueue - bool readingFromSlave_; + // Are we actively reading from executorQueue + bool readingFromExecutor_; }; } // namespace instance diff --git a/heron/instance/src/cpp/gateway/stmgr-client.cpp b/heron/instance/src/cpp/gateway/stmgr-client.cpp index 5fe7adc9ba8..f2dbf745b40 100644 --- a/heron/instance/src/cpp/gateway/stmgr-client.cpp +++ b/heron/instance/src/cpp/gateway/stmgr-client.cpp @@ -172,7 +172,7 @@ void StMgrClient::SendTupleMessage(const proto::system::HeronTupleSet& msg) { void StMgrClient::putBackPressure() { auto conn = static_cast(conn_); if (!conn->isUnderBackPressure()) { - LOG(INFO) << "Buffer to Slave Thread at maximum capacity. Clamping down on reads from Stmgr"; + LOG(INFO) << "Buffer to Executor Thread at maximum capacity. Clamping down on reads from Stmgr"; conn->putBackPressure(); } } @@ -180,7 +180,7 @@ void StMgrClient::putBackPressure() { void StMgrClient::removeBackPressure() { auto conn = static_cast(conn_); if (conn->isUnderBackPressure()) { - LOG(INFO) << "Buffer to Slave Thread less than capacity. Resuming reads from stmgr"; + LOG(INFO) << "Buffer to Executor Thread less than capacity. Resuming reads from stmgr"; conn->removeBackPressure(); } } diff --git a/heron/instance/src/cpp/instance-main.cpp b/heron/instance/src/cpp/instance-main.cpp index d81897f39f4..73baab08e28 100644 --- a/heron/instance/src/cpp/instance-main.cpp +++ b/heron/instance/src/cpp/instance-main.cpp @@ -30,7 +30,7 @@ #include "config/heron-internals-config-reader.h" #include "gateway/gateway.h" -#include "slave/slave.h" +#include "executor/executor.h" DEFINE_string(topology_name, "", "Name of the topology"); DEFINE_string(topology_id, "", "Id of the topology"); @@ -61,36 +61,36 @@ int main(int argc, char* argv[]) { FLAGS_task_id, FLAGS_component_index, FLAGS_stmgr_id, FLAGS_stmgr_port, FLAGS_metricsmgr_port, eventLoop); - auto slave = new heron::instance::Slave(FLAGS_task_id, FLAGS_topology_binary); + auto executor = new heron::instance::Executor(FLAGS_task_id, FLAGS_topology_binary); - auto dataToSlave = + auto dataToExecutor = new heron::instance::NotifyingCommunicator>( - slave->eventLoop(), - std::bind(&heron::instance::Slave::HandleGatewayData, - slave, std::placeholders::_1), + executor->eventLoop(), + std::bind(&heron::instance::Executor::HandleGatewayData, + executor, std::placeholders::_1), gateway->eventLoop(), - std::bind(&heron::instance::Gateway::HandleSlaveDataConsumed, + std::bind(&heron::instance::Gateway::HandleExecutorDataConsumed, gateway)); - auto dataFromSlave = new heron::instance::NotifyingCommunicator( + auto dataFromExecutor = new heron::instance::NotifyingCommunicator( gateway->eventLoop(), - std::bind(&heron::instance::Gateway::HandleSlaveData, + std::bind(&heron::instance::Gateway::HandleExecutorData, gateway, std::placeholders::_1), - slave->eventLoop(), - std::bind(&heron::instance::Slave::HandleGatewayDataConsumed, - slave)); + executor->eventLoop(), + std::bind(&heron::instance::Executor::HandleGatewayDataConsumed, + executor)); - auto metricsFromSlave = new heron::instance::NotifyingCommunicator( + auto metricsFromExecutor = new heron::instance::NotifyingCommunicator( gateway->eventLoop(), - std::bind(&heron::instance::Gateway::HandleSlaveMetrics, + std::bind(&heron::instance::Gateway::HandleExecutorMetrics, gateway, std::placeholders::_1), - slave->eventLoop(), - std::bind(&heron::instance::Slave::HandleGatewayMetricsConsumed, - slave)); + executor->eventLoop(), + std::bind(&heron::instance::Executor::HandleGatewayMetricsConsumed, + executor)); - gateway->setCommunicators(dataToSlave, dataFromSlave, metricsFromSlave); - slave->setCommunicators(dataToSlave, dataFromSlave, metricsFromSlave); - slave->Start(); // goes off to a thread + gateway->setCommunicators(dataToExecutor, dataFromExecutor, metricsFromExecutor); + executor->setCommunicators(dataToExecutor, dataFromExecutor, metricsFromExecutor); + executor->Start(); // goes off to a thread gateway->Start(); // never returns return 0; } diff --git a/heron/instance/src/cpp/spoutimpl/spout-instance.cpp b/heron/instance/src/cpp/spoutimpl/spout-instance.cpp index 03747947f5a..e4412f575e6 100644 --- a/heron/instance/src/cpp/spoutimpl/spout-instance.cpp +++ b/heron/instance/src/cpp/spoutimpl/spout-instance.cpp @@ -35,10 +35,10 @@ namespace instance { SpoutInstance::SpoutInstance(std::shared_ptr eventLoop, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator* dataFromExecutor, void* dllHandle) : taskContext_(taskContext), - dataFromSlave_(dataFromSlave), eventLoop_(eventLoop), spout_(NULL), active_(false) { + dataFromExecutor_(dataFromExecutor), eventLoop_(eventLoop), spout_(NULL), active_(false) { maxWriteBufferSize_ = config::HeronInternalsConfigReader::Instance() ->GetHeronInstanceInternalSpoutWriteQueueCapacity(); maxEmitBatchIntervalMs_ = config::HeronInternalsConfigReader::Instance() @@ -63,7 +63,7 @@ SpoutInstance::SpoutInstance(std::shared_ptr eventLoop, serializer_.reset(api::serializer::IPluggableSerializer::createSerializer( taskContext_->getConfig())); metrics_.reset(new SpoutMetrics(taskContext->getMetricsRegistrar())); - collector_.reset(new SpoutOutputCollectorImpl(serializer_, taskContext_, dataFromSlave_)); + collector_.reset(new SpoutOutputCollectorImpl(serializer_, taskContext_, dataFromExecutor_)); LOG(INFO) << "Instantiated spout for component " << taskContext->getThisComponentName() << " with task_id " << taskContext->getThisTaskId() << " and maxWriteBufferSize_ " << maxWriteBufferSize_ << " and maxEmitBatchIntervalMs " << maxEmitBatchIntervalMs_ @@ -130,7 +130,7 @@ void SpoutInstance::DoWork() { } bool SpoutInstance::canProduceTuple() { - return (active_ && dataFromSlave_->size() < maxWriteBufferSize_); + return (active_ && dataFromExecutor_->size() < maxWriteBufferSize_); } void SpoutInstance::produceTuple() { @@ -173,8 +173,8 @@ bool SpoutInstance::canContinueWork() { int maxSpoutPending = atoi(taskContext_->getConfig() ->get(api::config::Config::TOPOLOGY_MAX_SPOUT_PENDING).c_str()); return active_ && ( - (!ackingEnabled_ && dataFromSlave_->size() < maxWriteBufferSize_) || - (ackingEnabled_ && dataFromSlave_->size() < maxWriteBufferSize_ && + (!ackingEnabled_ && dataFromExecutor_->size() < maxWriteBufferSize_) || + (ackingEnabled_ && dataFromExecutor_->size() < maxWriteBufferSize_ && collector_->numInFlight() < maxSpoutPending)); } diff --git a/heron/instance/src/cpp/spoutimpl/spout-instance.h b/heron/instance/src/cpp/spoutimpl/spout-instance.h index a93ec1481e7..3f14e2173e8 100644 --- a/heron/instance/src/cpp/spoutimpl/spout-instance.h +++ b/heron/instance/src/cpp/spoutimpl/spout-instance.h @@ -22,7 +22,7 @@ #include -#include "slave/instance-base.h" +#include "executor/instance-base.h" #include "proto/messages.h" #include "network/network.h" @@ -41,7 +41,7 @@ namespace instance { class SpoutInstance : public InstanceBase { public: SpoutInstance(std::shared_ptr eventLoop, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave, + NotifyingCommunicator* dataFromExecutor, void* dllHandle); virtual ~SpoutInstance(); @@ -62,7 +62,7 @@ class SpoutInstance : public InstanceBase { void handleAckTuple(const proto::system::AckTuple& ackTuple, bool isAck); std::shared_ptr taskContext_; - NotifyingCommunicator* dataFromSlave_; + NotifyingCommunicator* dataFromExecutor_; std::shared_ptr eventLoop_; api::spout::ISpout* spout_; std::shared_ptr serializer_; diff --git a/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.cpp b/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.cpp index 4adf6d75b77..2dd107dd3a6 100644 --- a/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.cpp +++ b/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.cpp @@ -33,9 +33,9 @@ namespace instance { SpoutOutputCollectorImpl::SpoutOutputCollectorImpl( std::shared_ptr serializer, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave) + NotifyingCommunicator* dataFromExecutor) : api::spout::ISpoutOutputCollector(serializer) { - collector_ = new OutgoingTupleCollection(taskContext->getThisComponentName(), dataFromSlave); + collector_ = new OutgoingTupleCollection(taskContext->getThisComponentName(), dataFromExecutor); ackingEnabled_ = taskContext->isAckingEnabled(); taskId_ = taskContext->getThisTaskId(); } diff --git a/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.h b/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.h index 93c8dac430c..bf71b0c9ea2 100644 --- a/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.h +++ b/heron/instance/src/cpp/spoutimpl/spout-output-collector-impl.h @@ -34,8 +34,8 @@ #include "spout/ispout-output-collector.h" #include "serializer/ipluggable-serializer.h" #include "spoutimpl/root-tuple-info.h" -#include "slave/task-context-impl.h" -#include "slave/outgoing-tuple-collection.h" +#include "executor/task-context-impl.h" +#include "executor/outgoing-tuple-collection.h" namespace heron { namespace instance { @@ -44,7 +44,7 @@ class SpoutOutputCollectorImpl : public api::spout::ISpoutOutputCollector { public: SpoutOutputCollectorImpl(std::shared_ptr serializer, std::shared_ptr taskContext, - NotifyingCommunicator* dataFromSlave); + NotifyingCommunicator* dataFromExecutor); virtual ~SpoutOutputCollectorImpl(); virtual void reportError(std::exception& except); diff --git a/heron/instance/src/java/org/apache/heron/instance/Slave.java b/heron/instance/src/java/org/apache/heron/instance/Executor.java similarity index 89% rename from heron/instance/src/java/org/apache/heron/instance/Slave.java rename to heron/instance/src/java/org/apache/heron/instance/Executor.java index 4e577725fca..fe4b176510f 100644 --- a/heron/instance/src/java/org/apache/heron/instance/Slave.java +++ b/heron/instance/src/java/org/apache/heron/instance/Executor.java @@ -34,7 +34,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.MetricsCollector; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; @@ -47,15 +47,15 @@ import org.apache.heron.proto.system.Metrics; /** - * The slave, which in fact is a InstanceFactory, creates a new spout or bolt according to the PhysicalPlan. + * The executor, which in fact is a InstanceFactory, creates a new spout or bolt according to the PhysicalPlan. * First, if the instance is null, it will wait for the PhysicalPlan from inQueue and, if it receives one, * will instantiate a new instance (spout or bolt) according to the PhysicalPlanHelper in SingletonRegistry. - * It is a Runnable so it could be executed in a Thread. During run(), it will begin the SlaveLooper's loop(). + * It is a Runnable so it could be executed in a Thread. During run(), it will begin the ExecutorLooper's loop(). */ -public class Slave implements Runnable, AutoCloseable { - private static final Logger LOG = Logger.getLogger(Slave.class.getName()); +public class Executor implements Runnable, AutoCloseable { + private static final Logger LOG = Logger.getLogger(Executor.class.getName()); - private final SlaveLooper slaveLooper; + private final ExecutorLooper executorLooper; private MetricsCollector metricsCollector; // Communicator private final Communicator streamInCommunicator; @@ -72,12 +72,12 @@ public class Slave implements Runnable, AutoCloseable { private State instanceState; private boolean isStatefulProcessingStarted; - public Slave(SlaveLooper slaveLooper, + public Executor(ExecutorLooper executorLooper, final Communicator streamInCommunicator, final Communicator streamOutCommunicator, final Communicator inControlQueue, final Communicator metricsOutCommunicator) { - this.slaveLooper = slaveLooper; + this.executorLooper = executorLooper; this.streamInCommunicator = streamInCommunicator; this.streamOutCommunicator = streamOutCommunicator; this.inControlQueue = inControlQueue; @@ -92,7 +92,7 @@ public Slave(SlaveLooper slaveLooper, this.systemConfig = (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); - this.metricsCollector = new MetricsCollector(slaveLooper, metricsOutCommunicator); + this.metricsCollector = new MetricsCollector(executorLooper, metricsOutCommunicator); handleControlMessage(); } @@ -137,7 +137,7 @@ public void run() { } }; - slaveLooper.addTasksOnWakeup(handleControlMessageTask); + executorLooper.addTasksOnWakeup(handleControlMessageTask); } private void handleGlobalCheckpointConsistent(String checkpointId) { @@ -148,8 +148,8 @@ private void handleGlobalCheckpointConsistent(String checkpointId) { private void resetCurrentAssignment() { helper.setTopologyContext(metricsCollector); instance = helper.getMySpout() != null - ? new SpoutInstance(helper, streamInCommunicator, streamOutCommunicator, slaveLooper) - : new BoltInstance(helper, streamInCommunicator, streamOutCommunicator, slaveLooper); + ? new SpoutInstance(helper, streamInCommunicator, streamOutCommunicator, executorLooper) + : new BoltInstance(helper, streamInCommunicator, streamOutCommunicator, executorLooper); startInstanceIfNeeded(); } @@ -164,10 +164,10 @@ private void handleNewAssignment() { SerializeDeSerializeHelper.getSerializer(helper.getTopologyContext().getTopologyConfig()); // During the initiation of instance, - // we would add a bunch of tasks to slaveLooper's tasksOnWakeup + // we would add a bunch of tasks to executorLooper's tasksOnWakeup if (helper.getMySpout() != null) { instance = - new SpoutInstance(helper, streamInCommunicator, streamOutCommunicator, slaveLooper); + new SpoutInstance(helper, streamInCommunicator, streamOutCommunicator, executorLooper); streamInCommunicator.init(systemConfig.getInstanceInternalSpoutReadQueueCapacity(), systemConfig.getInstanceTuningExpectedSpoutReadQueueSize(), @@ -177,7 +177,7 @@ private void handleNewAssignment() { systemConfig.getInstanceTuningCurrentSampleWeight()); } else { instance = - new BoltInstance(helper, streamInCommunicator, streamOutCommunicator, slaveLooper); + new BoltInstance(helper, streamInCommunicator, streamOutCommunicator, executorLooper); streamInCommunicator.init(systemConfig.getInstanceInternalBoltReadQueueCapacity(), systemConfig.getInstanceTuningExpectedBoltReadQueueSize(), @@ -196,9 +196,9 @@ private void handleNewAssignment() { @Override public void run() { - Thread.currentThread().setName(ThreadNames.THREAD_SLAVE_NAME); + Thread.currentThread().setName(ThreadNames.THREAD_EXECUTOR_NAME); - slaveLooper.loop(); + executorLooper.loop(); } @SuppressWarnings("unchecked") @@ -252,7 +252,7 @@ private void startInstanceIfNeeded() { } public void close() { - LOG.info("Closing the Slave Thread"); + LOG.info("Closing the Executor Thread"); this.metricsCollector.forceGatherAllMetrics(); LOG.info("Shutting down the instance"); if (instance != null) { @@ -260,7 +260,7 @@ public void close() { } // Clean the resources we own - slaveLooper.exitLoop(); + executorLooper.exitLoop(); streamInCommunicator.clear(); // The clean of out stream communicator will be handled by instance itself } @@ -277,7 +277,7 @@ private void handleStartInstanceStatefulProcessing(InstanceControlMsg instanceCo startInstanceIfNeeded(); } - private void cleanAndStopSlaveBeforeRestore(String checkpointId) { + private void cleanAndStopExecutorBeforeRestore(String checkpointId) { // Clear all queues streamInCommunicator.clear(); streamOutCommunicator.clear(); @@ -285,9 +285,9 @@ private void cleanAndStopSlaveBeforeRestore(String checkpointId) { // Flash out existing metrics metricsCollector.forceGatherAllMetrics(); - // Stop slave looper consuming data/control_msg - slaveLooper.clearTasksOnWakeup(); - slaveLooper.clearTimers(); + // Stop executor looper consuming data/control_msg + executorLooper.clearTasksOnWakeup(); + executorLooper.clearTimers(); if (instance != null) { instance.preRestore(checkpointId); @@ -297,9 +297,9 @@ private void cleanAndStopSlaveBeforeRestore(String checkpointId) { isStatefulProcessingStarted = false; } - private void registerTasksWithSlave() { - // Create a new MetricsCollector with the clean slaveLooper and register its task - metricsCollector = new MetricsCollector(slaveLooper, metricsOutCommunicator); + private void registerTasksWithExecutor() { + // Create a new MetricsCollector with the clean executorLooper and register its task + metricsCollector = new MetricsCollector(executorLooper, metricsOutCommunicator); // registering the handling of control msg handleControlMessage(); @@ -314,9 +314,9 @@ private void handleRestoreInstanceStateRequest(InstanceControlMsg instanceContro // ID of the checkpoint we are restoring to String checkpointId = request.getState().getCheckpointId(); - // Clean buffers and unregister tasks in slave looper + // Clean buffers and unregister tasks in executor looper if (isInstanceStarted) { - cleanAndStopSlaveBeforeRestore(checkpointId); + cleanAndStopExecutorBeforeRestore(checkpointId); } // Restore the state @@ -362,7 +362,7 @@ private void handleRestoreInstanceStateRequest(InstanceControlMsg instanceContro resetCurrentAssignment(); } - registerTasksWithSlave(); + registerTasksWithExecutor(); // Send back the response CheckpointManager.RestoreInstanceStateResponse response = @@ -407,7 +407,7 @@ private void handleNewPhysicalPlan(InstanceControlMsg instanceControlMsg) { + helper.getTopologyState()); } } else { - LOG.info("Topology state remains the same in Slave: " + oldTopologyState); + LOG.info("Topology state remains the same in Executor: " + oldTopologyState); } } } diff --git a/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java b/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java index 52ef0aff3a0..e86c8382e07 100644 --- a/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java @@ -43,7 +43,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.SysUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.logging.ErrorReportLoggingHandler; @@ -62,7 +62,7 @@ public class HeronInstance { private static final int NUM_THREADS = 2; private final NIOLooper gatewayLooper; - private final SlaveLooper slaveLooper; + private final ExecutorLooper executorLooper; // Only one outStreamQueue, which is responsible for both control tuples and data tuples private final Communicator outStreamQueue; @@ -71,7 +71,7 @@ public class HeronInstance { // For spout, it will buffer Control tuple, while for bolt, it will buffer data tuple. private final Communicator inStreamQueue; - // This queue is used to pass Control Message from Gateway to Slave + // This queue is used to pass Control Message from Gateway to Executor // TODO:- currently it would just pass the PhysicalPlanHelper // TODO:- we might handle more types of ControlMessage in future private final Communicator inControlQueue; @@ -82,7 +82,7 @@ public class HeronInstance { private final List> outMetricsQueues; private final Gateway gateway; - private final Slave slave; + private final Executor executor; private final ExecutorService threadsPool; @@ -114,16 +114,16 @@ public HeronInstance(String topologyName, String topologyId, // Two WakeableLooper gatewayLooper = new NIOLooper(); - slaveLooper = new SlaveLooper(); + executorLooper = new ExecutorLooper(); // Add the task on exit gatewayLooper.addTasksOnExit(new GatewayExitTask()); - slaveLooper.addTasksOnExit(new SlaveExitTask()); + executorLooper.addTasksOnExit(new ExecutorExitTask()); // For stream - inStreamQueue = new Communicator(gatewayLooper, slaveLooper); - outStreamQueue = new Communicator(slaveLooper, gatewayLooper); - inControlQueue = new Communicator(gatewayLooper, slaveLooper); + inStreamQueue = new Communicator(gatewayLooper, executorLooper); + outStreamQueue = new Communicator(executorLooper, gatewayLooper); + inControlQueue = new Communicator(gatewayLooper, executorLooper); // Now for metrics // No need in queues for metrics @@ -135,21 +135,21 @@ public HeronInstance(String topologyName, String topologyId, systemConfig.getInstanceTuningExpectedMetricsWriteQueueSize(), systemConfig.getInstanceTuningCurrentSampleWeight()); - Communicator slaveMetricsOut = - new Communicator(slaveLooper, gatewayLooper); - slaveMetricsOut.init(systemConfig.getInstanceInternalMetricsWriteQueueCapacity(), + Communicator executorMetricsOut = + new Communicator(executorLooper, gatewayLooper); + executorMetricsOut.init(systemConfig.getInstanceInternalMetricsWriteQueueCapacity(), systemConfig.getInstanceTuningExpectedMetricsWriteQueueSize(), systemConfig.getInstanceTuningCurrentSampleWeight()); outMetricsQueues.add(gatewayMetricsOut); - outMetricsQueues.add(slaveMetricsOut); + outMetricsQueues.add(executorMetricsOut); // We will new these two Runnable this.gateway = new Gateway(topologyName, topologyId, instance, streamPort, metricsPort, gatewayLooper, inStreamQueue, outStreamQueue, inControlQueue, outMetricsQueues); - this.slave = new Slave(slaveLooper, inStreamQueue, outStreamQueue, - inControlQueue, slaveMetricsOut); + this.executor = new Executor(executorLooper, inStreamQueue, outStreamQueue, + inControlQueue, executorMetricsOut); // New the ThreadPool and register it inside the SingletonRegistry threadsPool = Executors.newFixedThreadPool(NUM_THREADS); @@ -295,7 +295,7 @@ public static void main(String[] args) throws IOException { Level loggingLevel = Level.INFO; String loggingDir = systemConfig.getHeronLoggingDirectory(); - // Log to file and TMaster + // Log to file and TManager LoggingHelper.loggerInit(loggingLevel, true); LoggingHelper.addLoggingHandler( LoggingHelper.getFileHandler(instanceId, loggingDir, true, @@ -328,7 +328,7 @@ public void start() { // Get the Thread Pool and run it threadsPool.execute(gateway); - threadsPool.execute(slave); + threadsPool.execute(executor); } public void stop() { @@ -394,10 +394,10 @@ private void handleException(Thread thread, Throwable exception) { exitExecutor.execute(new ForceExitTask(exited, systemConfig.getInstanceForceExitTimeout())); // Clean up - if (thread.getName().equals(ThreadNames.THREAD_SLAVE_NAME)) { - // Run the SlaveExitTask here since the thread throw exceptions + if (thread.getName().equals(ThreadNames.THREAD_EXECUTOR_NAME)) { + // Run the ExecutorExitTask here since the thread throw exceptions // and this Task would never be invoked on exit in future - new SlaveExitTask().run(); + new ExecutorExitTask().run(); // And exit the GatewayLooper gatewayLooper.exitLoop(); @@ -424,12 +424,12 @@ public void run() { } } - // The Task to execute on Slave thread's exit - public class SlaveExitTask implements Runnable { + // The Task to execute on Executor thread's exit + public class ExecutorExitTask implements Runnable { @Override public void run() { - SysUtils.closeIgnoringExceptions(slave); + SysUtils.closeIgnoringExceptions(executor); } } diff --git a/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java b/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java index a53ed3d3377..a7b93893d79 100644 --- a/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java @@ -43,7 +43,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.FullBoltMetrics; @@ -81,7 +81,7 @@ public class BoltInstance implements IInstance { // The reference to topology's config private final Map config; - private final SlaveLooper looper; + private final ExecutorLooper looper; private final SystemConfig systemConfig; @@ -91,7 +91,7 @@ public class BoltInstance implements IInstance { public BoltInstance(PhysicalPlanHelper helper, Communicator streamInQueue, Communicator streamOutQueue, - SlaveLooper looper) { + ExecutorLooper looper) { this.helper = helper; this.looper = looper; this.streamInQueue = streamInQueue; diff --git a/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java b/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java index e9a0e131d52..5241f9bd2d2 100644 --- a/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java @@ -43,7 +43,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.FullSpoutMetrics; @@ -77,7 +77,7 @@ public class SpoutInstance implements IInstance { private State instanceState; - private final SlaveLooper looper; + private final ExecutorLooper looper; private final SystemConfig systemConfig; @@ -92,7 +92,7 @@ public class SpoutInstance implements IInstance { public SpoutInstance(PhysicalPlanHelper helper, Communicator streamInQueue, Communicator streamOutQueue, - SlaveLooper looper) { + ExecutorLooper looper) { this.helper = helper; this.looper = looper; this.streamInQueue = streamInQueue; diff --git a/heron/instance/src/java/org/apache/heron/instance/util/InstanceUtils.java b/heron/instance/src/java/org/apache/heron/instance/util/InstanceUtils.java index aa1d12c30da..1a979d20a74 100644 --- a/heron/instance/src/java/org/apache/heron/instance/util/InstanceUtils.java +++ b/heron/instance/src/java/org/apache/heron/instance/util/InstanceUtils.java @@ -24,7 +24,7 @@ import org.apache.heron.api.Config; import org.apache.heron.api.Pair; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; public final class InstanceUtils { @@ -32,7 +32,7 @@ private InstanceUtils() { } @SuppressWarnings("unchecked") - public static void prepareTimerEvents(SlaveLooper looper, PhysicalPlanHelper helper) { + public static void prepareTimerEvents(ExecutorLooper looper, PhysicalPlanHelper helper) { Map> timerEvents = (Map>) helper.getTopologyContext() .getTopologyConfig().get(Config.TOPOLOGY_TIMER_EVENTS); diff --git a/heron/instance/src/java/org/apache/heron/network/MetricsManagerClient.java b/heron/instance/src/java/org/apache/heron/network/MetricsManagerClient.java index 6025806feeb..18ef7cfd9cd 100644 --- a/heron/instance/src/java/org/apache/heron/network/MetricsManagerClient.java +++ b/heron/instance/src/java/org/apache/heron/network/MetricsManagerClient.java @@ -138,7 +138,7 @@ public void onError() { @Override public void onConnect(StatusCode status) { // We will not send registerRequest when we are onConnect - // We will send when we receive the PhysicalPlan sent by slave + // We will send when we receive the PhysicalPlan sent by executor if (status != StatusCode.OK) { LOG.log(Level.WARNING, "Cannot connect to the metrics port with status: {0}, Will Retry..", status); diff --git a/heron/instance/src/java/org/apache/heron/network/StreamManagerClient.java b/heron/instance/src/java/org/apache/heron/network/StreamManagerClient.java index fb9212b8336..53484aa28da 100644 --- a/heron/instance/src/java/org/apache/heron/network/StreamManagerClient.java +++ b/heron/instance/src/java/org/apache/heron/network/StreamManagerClient.java @@ -48,7 +48,7 @@ * 2. Send Register Request when it is onConnect() * 3. Handle relative response for requests * 4. if onIncomingMessage(message) is called, it will see whether it is NewAssignment or NewTuples. - * 5. If it is a new assignment, it will pass the PhysicalPlan to Slave, + * 5. If it is a new assignment, it will pass the PhysicalPlan to Executor, * which will new a corresponding instance. */ @@ -374,7 +374,7 @@ private void handleAssignmentMessage(PhysicalPlans.PhysicalPlan pplan) { helper.getTopologyState(), newHelper.getTopologyState())); } helper = newHelper; - LOG.info("Push to Slave"); + LOG.info("Push to Executor"); InstanceControlMsg instanceControlMsg = InstanceControlMsg.newBuilder(). setNewPhysicalPlanHelper(helper). build(); diff --git a/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java b/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java index aa74f64ea0b..3d8b411ff40 100644 --- a/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java +++ b/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java @@ -35,7 +35,7 @@ import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.TestBolt; @@ -55,7 +55,7 @@ public abstract class AbstractTupleRoutingTest { private volatile int tupleReceived; private volatile StringBuilder groupingInitInfo; private CountDownLatch outStreamQueueOfferLatch; - private SlaveTester slaveTester; + private ExecutorTester executorTester; // Test component info. Topology is SPOUT -> BOLT_A -> BOLT_B protected enum Component { @@ -86,13 +86,13 @@ public void before() { groupingInitInfo = new StringBuilder(); outStreamQueueOfferLatch = new CountDownLatch(1); - slaveTester = new SlaveTester(outStreamQueueOfferLatch); - slaveTester.start(); + executorTester = new ExecutorTester(outStreamQueueOfferLatch); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } String getInitInfoKey(String componentName) { @@ -110,7 +110,7 @@ public void testRoundRobinRouting() throws Exception { .setNewPhysicalPlanHelper(physicalPlanHelper) .build(); - slaveTester.getInControlQueue().offer(instanceControlMsg); + executorTester.getInControlQueue().offer(instanceControlMsg); SingletonRegistry.INSTANCE.registerSingleton( getInitInfoKey(getComponentToVerify().getName()), groupingInitInfo); @@ -120,14 +120,14 @@ public void testRoundRobinRouting() throws Exception { @Override public void run() { HeronServerTester.await(outStreamQueueOfferLatch); - assertNotEquals(0, slaveTester.getOutStreamQueue().size()); + assertNotEquals(0, executorTester.getOutStreamQueue().size()); while (tupleReceived < expectedTuplesValidated) { - if (slaveTester.getOutStreamQueue().isEmpty()) { + if (executorTester.getOutStreamQueue().isEmpty()) { continue; } - Message msg = slaveTester.getOutStreamQueue().poll(); + Message msg = executorTester.getOutStreamQueue().poll(); assertTrue(msg instanceof HeronTuples.HeronTupleSet); HeronTuples.HeronTupleSet set = (HeronTuples.HeronTupleSet) msg; @@ -151,12 +151,12 @@ public void run() { assertEquals(expectedTuplesValidated, tupleReceived); assertEquals(getExpectedComponentInitInfo(), groupingInitInfo.toString()); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); assertEquals(expectedTuplesValidated, tupleReceived); } diff --git a/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java b/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java index 155ca3fbd55..e5d1000569f 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java +++ b/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java @@ -26,7 +26,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.NIOLooper; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.WakeableLooper; import org.apache.heron.common.testhelpers.CommunicatorTestHelper; import org.apache.heron.proto.system.Metrics; @@ -38,7 +38,7 @@ */ public class CommunicatorTester { private final WakeableLooper testLooper; - private final SlaveLooper slaveLooper; + private final ExecutorLooper executorLooper; // Only one outStreamQueue, which is responsible for both control tuples and data tuples private final Communicator outStreamQueue; @@ -47,7 +47,7 @@ public class CommunicatorTester { // For spout, it will buffer Control tuple, while for bolt, it will buffer data tuple. private final Communicator inStreamQueue; private final Communicator inControlQueue; - private final Communicator slaveMetricsOut; + private final Communicator executorMetricsOut; public CommunicatorTester(CountDownLatch inControlQueueOfferLatch, CountDownLatch inStreamQueueOfferLatch) throws IOException { @@ -64,17 +64,17 @@ private CommunicatorTester(WakeableLooper testLooper, final CountDownLatch outStreamQueueOfferLatch) { UnitTestHelper.addSystemConfigToSingleton(); this.testLooper = testLooper; - slaveLooper = new SlaveLooper(); + executorLooper = new ExecutorLooper(); outStreamQueue = initCommunicator( - new Communicator(slaveLooper, testLooper), + new Communicator(executorLooper, testLooper), outStreamQueueOfferLatch); inStreamQueue = initCommunicator( - new Communicator(testLooper, slaveLooper), + new Communicator(testLooper, executorLooper), inStreamQueueOfferLatch); inControlQueue = initCommunicator( - new Communicator(testLooper, slaveLooper), inControlQueueOfferLatch); - slaveMetricsOut = initCommunicator( - new Communicator(slaveLooper, testLooper), null); + new Communicator(testLooper, executorLooper), inControlQueueOfferLatch); + executorMetricsOut = initCommunicator( + new Communicator(executorLooper, testLooper), null); } private Communicator initCommunicator(Communicator communicator, @@ -93,21 +93,21 @@ public void stop() throws NoSuchFieldException, IllegalAccessException { if (testLooper != null) { testLooper.exitLoop(); } - if (slaveLooper != null) { - slaveLooper.exitLoop(); + if (executorLooper != null) { + executorLooper.exitLoop(); } } - public Communicator getSlaveMetricsOut() { - return slaveMetricsOut; + public Communicator getExecutorMetricsOut() { + return executorMetricsOut; } public WakeableLooper getTestLooper() { return testLooper; } - public SlaveLooper getSlaveLooper() { - return slaveLooper; + public ExecutorLooper getExecutorLooper() { + return executorLooper; } public Communicator getInControlQueue() { diff --git a/heron/instance/tests/java/org/apache/heron/instance/SlaveTester.java b/heron/instance/tests/java/org/apache/heron/instance/ExecutorTester.java similarity index 69% rename from heron/instance/tests/java/org/apache/heron/instance/SlaveTester.java rename to heron/instance/tests/java/org/apache/heron/instance/ExecutorTester.java index c65472d95c1..92bfa5640b7 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/SlaveTester.java +++ b/heron/instance/tests/java/org/apache/heron/instance/ExecutorTester.java @@ -23,28 +23,28 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; /** - * Class to help write tests that require Slave instances, loopers and communicators + * Class to help write tests that require Executor instances, loopers and communicators */ -public class SlaveTester extends CommunicatorTester { +public class ExecutorTester extends CommunicatorTester { private final ExecutorService threadsPool; - private final Slave slave; + private final Executor executor; - public SlaveTester() { + public ExecutorTester() { this(null); } - public SlaveTester(CountDownLatch outStreamQueueOfferLatch) { - super(new SlaveLooper(), outStreamQueueOfferLatch); - slave = new Slave(getSlaveLooper(), getInStreamQueue(), getOutStreamQueue(), - getInControlQueue(), getSlaveMetricsOut()); + public ExecutorTester(CountDownLatch outStreamQueueOfferLatch) { + super(new ExecutorLooper(), outStreamQueueOfferLatch); + executor = new Executor(getExecutorLooper(), getInStreamQueue(), getOutStreamQueue(), + getInControlQueue(), getExecutorMetricsOut()); threadsPool = Executors.newSingleThreadExecutor(); } public void start() { - threadsPool.execute(slave); + threadsPool.execute(executor); } public void stop() throws NoSuchFieldException, IllegalAccessException { diff --git a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java index 2c5f2b0d60e..0829545893d 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java @@ -37,7 +37,7 @@ import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; @@ -45,7 +45,7 @@ /** * To test the Bolt's ReadTupleAndExecute() method, it will: - * 1. We will instantiate a slave with TestBolt's instance. + * 1. We will instantiate a executor with TestBolt's instance. * 2. Construct a bunch of mock Tuples as protobuf Message to be consumed by the TestBolt * 3. Offer those protobuf Message into inStreamQueue. * 4. The TestBolt should consume the Tuples, and behave as described in its comments. @@ -63,7 +63,7 @@ public class BoltInstanceTest { private AtomicInteger tupleExecutedCount; private volatile StringBuilder receivedStrings; - private SlaveTester slaveTester; + private ExecutorTester executorTester; static { serializer.initialize(null); @@ -76,13 +76,13 @@ public void before() { tupleExecutedCount = new AtomicInteger(0); receivedStrings = new StringBuilder(); - slaveTester = new SlaveTester(); - slaveTester.start(); + executorTester = new ExecutorTester(); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } /** @@ -97,7 +97,7 @@ public void testReadTupleAndExecute() { setNewPhysicalPlanHelper(physicalPlanHelper). build(); - slaveTester.getInControlQueue().offer(instanceControlMsg); + executorTester.getInControlQueue().offer(instanceControlMsg); final int expectedTuples = 10; CountDownLatch executeLatch = new CountDownLatch(expectedTuples); @@ -133,7 +133,7 @@ public void testReadTupleAndExecute() { } heronTupleSet.setData(dataTupleSet); - slaveTester.getInStreamQueue().offer(heronTupleSet.build()); + executorTester.getInStreamQueue().offer(heronTupleSet.build()); // Wait the bolt's finishing HeronServerTester.await(executeLatch); diff --git a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java index d5eac2c38d7..3b916f49251 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java @@ -35,7 +35,7 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; @@ -51,18 +51,18 @@ * Test if stateful bolt is able to respond to incoming control/data tuples as expected. */ public class BoltStatefulInstanceTest { - private SlaveTester slaveTester; + private ExecutorTester executorTester; private static IPluggableSerializer serializer = new JavaSerializer(); @Before public void before() { - slaveTester = new SlaveTester(); - slaveTester.start(); + executorTester = new ExecutorTester(); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } @Test @@ -72,22 +72,22 @@ public void testPreSaveAndPostSave() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRESAVE_LATCH, preSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); assertEquals(1, postSaveLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); assertTrue(preSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(1, postSaveLatch.getCount()); // this should invoke postSave - slaveTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(0, postSaveLatch.getCount()); @@ -98,13 +98,13 @@ public void testPreRestore() throws InterruptedException { CountDownLatch preRestoreLatch = new CountDownLatch(1); SingletonRegistry.INSTANCE.registerSingleton(Constants.PRERESTORE_LATCH, preRestoreLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); assertEquals(1, preRestoreLatch.getCount()); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("cx")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("cx")); assertTrue(preRestoreLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preRestoreLatch.getCount()); @@ -125,9 +125,9 @@ public void testPostSaveBlockExecute() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.EXECUTE_LATCH, executeLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); @@ -135,10 +135,10 @@ public void testPostSaveBlockExecute() throws Exception { assertEquals(1, executeLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); // put a data tuple into the inStreamQueue - slaveTester.getInStreamQueue().offer(buildTupleSet()); + executorTester.getInStreamQueue().offer(buildTupleSet()); assertTrue(preSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); @@ -154,7 +154,7 @@ public void testPostSaveBlockExecute() throws Exception { assertEquals(1, executeLatch.getCount()); // this should invoke postSave - slaveTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertTrue(executeLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); @@ -175,19 +175,19 @@ public void testExecuteNotBlocked() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRESAVE_LATCH, preSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.EXECUTE_LATCH, executeLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulBolt()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulBolt()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); assertEquals(1, executeLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); // put a data tuple into the inStreamQueue - slaveTester.getInStreamQueue().offer(buildTupleSet()); + executorTester.getInStreamQueue().offer(buildTupleSet()); assertTrue(preSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java index 547ae171be2..0e66170b1b6 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java @@ -30,7 +30,7 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; import org.apache.heron.resource.UnitTestHelper; @@ -40,17 +40,17 @@ public class ActivateDeactivateTest { private static final String SPOUT_INSTANCE_ID = "spout-id"; - private SlaveTester slaveTester; + private ExecutorTester executorTester; @Before public void before() { - slaveTester = new SlaveTester(); - slaveTester.start(); + executorTester = new ExecutorTester(); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } /** @@ -64,20 +64,20 @@ public void testActivateAndDeactivate() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.ACTIVATE_COUNT_LATCH, activateLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.DEACTIVATE_COUNT_LATCH, deactivateLatch); - slaveTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.RUNNING)); + executorTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.RUNNING)); // Now the activateLatch and deactivateLatch should be 1 assertEquals(1, activateLatch.getCount()); assertEquals(1, deactivateLatch.getCount()); // And we start the test - slaveTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.PAUSED)); + executorTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.PAUSED)); assertTrue(deactivateLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(1, activateLatch.getCount()); assertEquals(0, deactivateLatch.getCount()); - slaveTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.RUNNING)); + executorTester.getInControlQueue().offer(buildMessage(TopologyAPI.TopologyState.RUNNING)); assertTrue(activateLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, activateLatch.getCount()); diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java index 5ae6cf2e305..18f0ae91679 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java @@ -40,7 +40,7 @@ import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.Metrics; import org.apache.heron.proto.system.PhysicalPlans; @@ -50,9 +50,9 @@ /** * To test the SpoutInstance. - * We will test by instantiate a slave with TestSpout's instance: + * We will test by instantiate a executor with TestSpout's instance: * 1. nextTuple(). - * Check whether Message inside slaveTester.getOutStreamQueue() matches tuples emitted by TestSpout. + * Check whether Message inside executorTester.getOutStreamQueue() matches tuples emitted by TestSpout. * We will not enable acking system and not enable timeout. * 2. gatherMetrics() * We wait for the interval for gathering metrics, and check whether the Metrics Message contains @@ -77,7 +77,7 @@ public class SpoutInstanceTest { private AtomicInteger ackCount; private AtomicInteger failCount; - private SlaveTester slaveTester; + private ExecutorTester executorTester; private int tupleReceived; private List heronDataTupleList; @@ -93,13 +93,13 @@ public void before() { ackCount = new AtomicInteger(0); failCount = new AtomicInteger(0); - slaveTester = new SlaveTester(); - slaveTester.start(); + executorTester = new ExecutorTester(); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } /** @@ -107,7 +107,7 @@ public void after() throws NoSuchFieldException, IllegalAccessException { */ @Test public void testNextTuple() { - initSpout(slaveTester, false, -1); + initSpout(executorTester, false, -1); Runnable task = new Runnable() { private String streamId = ""; @@ -117,8 +117,8 @@ public void testNextTuple() { @Override public void run() { for (int i = 0; i < Constants.RETRY_TIMES; i++) { - if (slaveTester.getOutStreamQueue().size() != 0) { - Message msg = slaveTester.getOutStreamQueue().poll(); + if (executorTester.getOutStreamQueue().size() != 0) { + Message msg = executorTester.getOutStreamQueue().poll(); if (msg instanceof HeronTuples.HeronTupleSet) { HeronTuples.HeronTupleSet set = (HeronTuples.HeronTupleSet) msg; @@ -144,7 +144,7 @@ public void run() { } if (tupleReceived == 10) { Assert.assertEquals("ABABABABAB", receivedTupleStrings); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); break; } } @@ -152,8 +152,8 @@ public void run() { } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); Assert.assertEquals(tupleReceived, 10); } @@ -163,14 +163,14 @@ public void run() { */ @Test public void testGatherMetrics() { - initSpout(slaveTester, false, -1); + initSpout(executorTester, false, -1); Runnable task = new Runnable() { @Override public void run() { for (int i = 0; i < Constants.RETRY_TIMES; i++) { - if (!slaveTester.getSlaveMetricsOut().isEmpty()) { - Metrics.MetricPublisherPublishMessage msg = slaveTester.getSlaveMetricsOut().poll(); + if (!executorTester.getExecutorMetricsOut().isEmpty()) { + Metrics.MetricPublisherPublishMessage msg = executorTester.getExecutorMetricsOut().poll(); Set metricsName = new HashSet<>(); for (Metrics.MetricDatum metricDatum : msg.getMetricsList()) { metricsName.add(metricDatum.getName()); @@ -182,15 +182,15 @@ public void run() { Assert.assertTrue(metricsName.contains("__next-tuple-latency")); Assert.assertTrue(metricsName.contains("__next-tuple-count")); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); break; } } } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); } /** @@ -204,7 +204,7 @@ public void testDoImmediateAcks() { SingletonRegistry.INSTANCE.registerSingleton(Constants.ACK_COUNT, ackCount); SingletonRegistry.INSTANCE.registerSingleton(Constants.ACK_LATCH, ackLatch); - initSpout(slaveTester, false, -1); + initSpout(executorTester, false, -1); Runnable task = new Runnable() { @Override @@ -214,13 +214,13 @@ public void run() { // Wait until the acks are received HeronServerTester.await(ackLatch); Assert.assertEquals(tuplesExpected, ackCount.intValue()); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); } } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); } @Test @@ -231,7 +231,7 @@ public void testLookForTimeouts() { SingletonRegistry.INSTANCE.registerSingleton(Constants.FAIL_COUNT, failCount); SingletonRegistry.INSTANCE.registerSingleton(Constants.FAIL_LATCH, failLatch); - initSpout(slaveTester, true, 1); + initSpout(executorTester, true, 1); Runnable task = new Runnable() { @Override @@ -240,12 +240,12 @@ public void run() { HeronServerTester.await(failLatch); Assert.assertEquals(tuplesExpected, failCount.intValue()); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); } /** @@ -264,7 +264,7 @@ public void testAckAndFail() { SingletonRegistry.INSTANCE.registerSingleton(Constants.FAIL_COUNT, failCount); SingletonRegistry.INSTANCE.registerSingleton(Constants.FAIL_LATCH, failLatch); - initSpout(slaveTester, true, -1); + initSpout(executorTester, true, -1); Runnable task = new Runnable() { @Override @@ -279,13 +279,13 @@ public void run() { Assert.assertEquals(acksExpected, ackCount.intValue()); Assert.assertEquals(failsExpected, failCount.intValue()); - slaveTester.getTestLooper().exitLoop(); + executorTester.getTestLooper().exitLoop(); } } }; - slaveTester.getTestLooper().addTasksOnWakeup(task); - slaveTester.getTestLooper().loop(); + executorTester.getTestLooper().addTasksOnWakeup(task); + executorTester.getTestLooper().loop(); } private void constructAndSendAcks() { @@ -319,12 +319,12 @@ private void constructAndSendAcks() { bldr.setControl(controlTupleSet); // We will send back to the SpoutInstance - slaveTester.getInStreamQueue().offer(bldr.build()); + executorTester.getInStreamQueue().offer(bldr.build()); } private void drainOutStream() { - while (slaveTester.getOutStreamQueue().size() != 0) { - Message msg = slaveTester.getOutStreamQueue().poll(); + while (executorTester.getOutStreamQueue().size() != 0) { + Message msg = executorTester.getOutStreamQueue().poll(); if (msg instanceof HeronTuples.HeronTupleSet) { HeronTuples.HeronTupleSet set = (HeronTuples.HeronTupleSet) msg; @@ -339,11 +339,11 @@ private void drainOutStream() { } } - private static void initSpout(SlaveTester slaveTester, boolean ackEnabled, int timeout) { + private static void initSpout(ExecutorTester executorTester, boolean ackEnabled, int timeout) { PhysicalPlans.PhysicalPlan physicalPlan = UnitTestHelper.getPhysicalPlan(ackEnabled, timeout); PhysicalPlanHelper physicalPlanHelper = new PhysicalPlanHelper(physicalPlan, SPOUT_INSTANCE_ID); - slaveTester.getInControlQueue().offer( + executorTester.getInControlQueue().offer( InstanceControlMsg.newBuilder().setNewPhysicalPlanHelper(physicalPlanHelper).build()); } } diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java index 4d65aec84ab..228296f3ac7 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java @@ -34,7 +34,7 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.instance.InstanceControlMsg; -import org.apache.heron.instance.SlaveTester; +import org.apache.heron.instance.ExecutorTester; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; import org.apache.heron.resource.MockPhysicalPlansBuilder; @@ -47,18 +47,18 @@ public class SpoutStatefulInstanceTest { - private SlaveTester slaveTester; + private ExecutorTester executorTester; private static IPluggableSerializer serializer = new JavaSerializer(); @Before public void before() { - slaveTester = new SlaveTester(); - slaveTester.start(); + executorTester = new ExecutorTester(); + executorTester.start(); } @After public void after() throws NoSuchFieldException, IllegalAccessException { - slaveTester.stop(); + executorTester.stop(); } @Test @@ -68,22 +68,22 @@ public void testPreSaveAndPostSave() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRESAVE_LATCH, preSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); assertEquals(1, postSaveLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); assertTrue(preSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(1, postSaveLatch.getCount()); // this should invoke postSave - slaveTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(0, postSaveLatch.getCount()); @@ -94,13 +94,13 @@ public void testPreRestore() throws InterruptedException { CountDownLatch preRestoreLatch = new CountDownLatch(1); SingletonRegistry.INSTANCE.registerSingleton(Constants.PRERESTORE_LATCH, preRestoreLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); assertEquals(1, preRestoreLatch.getCount()); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("cx")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("cx")); assertTrue(preRestoreLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preRestoreLatch.getCount()); @@ -126,16 +126,16 @@ public void testPostSaveBlockExecute() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.EMIT_LATCH, emitLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); assertEquals(1, postSaveLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); // tell the spout to start emitting tuples assertFalse(shouldStartEmit.getAndSet(true)); @@ -155,7 +155,7 @@ public void testPostSaveBlockExecute() throws Exception { assertEquals(1, emitLatch.getCount()); // this should invoke postSave - slaveTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertTrue(emitLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); @@ -182,16 +182,16 @@ public void testExecuteNotBlocked() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRESAVE_LATCH, preSaveLatch); SingletonRegistry.INSTANCE.registerSingleton(Constants.EMIT_LATCH, emitLatch); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - slaveTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); - slaveTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulSpout()); + executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); + executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulSpout()); // initially non of preSave or postSave are invoked yet assertEquals(1, preSaveLatch.getCount()); assertEquals(1, emitLatch.getCount()); // this should invoke preSave - slaveTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); + executorTester.getInStreamQueue().offer(UnitTestHelper.buildPersistStateMessage("c0")); // tell the spout to start emitting tuples assertFalse(shouldStartEmit.getAndSet(true)); From 9d3206c9ea3f1f58506c195656c0439c456c512a Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 14:15:20 -0400 Subject: [PATCH 24/32] renaming "topology master" to "topology manager" in heron/packing --- .../org/apache/heron/packing/roundrobin/RoundRobinPacking.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/heron/packing/src/java/org/apache/heron/packing/roundrobin/RoundRobinPacking.java b/heron/packing/src/java/org/apache/heron/packing/roundrobin/RoundRobinPacking.java index 0b3f4c87870..bbbad2acbfe 100644 --- a/heron/packing/src/java/org/apache/heron/packing/roundrobin/RoundRobinPacking.java +++ b/heron/packing/src/java/org/apache/heron/packing/roundrobin/RoundRobinPacking.java @@ -66,7 +66,7 @@ * 2. The size of resource required by the whole topology is equal to * ((# of container specified in config) + 1) * (size of resource required for a single container). * The extra 1 is considered for Heron internal container, - * i.e. the one containing Scheduler and TMaster. + * i.e. the one containing Scheduler and TManager. *

* 3. The disk required for a container is calculated as: * value for org.apache.heron.api.Config.TOPOLOGY_CONTAINER_DISK_REQUESTED if exists, otherwise, From afef9b5e7b313855d15e513996586681e64da4c6 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 15:26:49 -0400 Subject: [PATCH 25/32] renaming "topology master" to "topology manager" in heron/metricsmgr --- heron/metricsmgr/src/java/BUILD | 2 +- .../heron/metricsmgr/MetricsManager.java | 6 +- .../metricsmgr/MetricsManagerServer.java | 90 ++-- .../metricsmgr/executor/SinkExecutor.java | 18 +- .../sink/metricscache/MetricsCacheClient.java | 10 +- .../sink/metricscache/MetricsCacheSink.java | 100 ++-- .../TManagerClient.java} | 44 +- .../sink/tmanager/TManagerSink.java | 441 ++++++++++++++++++ .../metricsmgr/sink/tmaster/TMasterSink.java | 441 ------------------ heron/metricsmgr/tests/java/BUILD | 6 +- ...t.java => HandleTManagerLocationTest.java} | 80 ++-- .../metricsmgr/executor/SinkExecutorTest.java | 12 +- .../metricscache/MetricsCacheSinkTest.java | 20 +- .../TManagerSinkTest.java} | 114 ++--- 14 files changed, 692 insertions(+), 692 deletions(-) rename heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/{tmaster/TMasterClient.java => tmanager/TManagerClient.java} (69%) create mode 100644 heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java delete mode 100644 heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSink.java rename heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/{HandleTMasterLocationTest.java => HandleTManagerLocationTest.java} (66%) rename heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/{tmaster/TMasterSinkTest.java => tmanager/TManagerSinkTest.java} (58%) diff --git a/heron/metricsmgr/src/java/BUILD b/heron/metricsmgr/src/java/BUILD index 77bcac16257..a734416c454 100644 --- a/heron/metricsmgr/src/java/BUILD +++ b/heron/metricsmgr/src/java/BUILD @@ -18,7 +18,7 @@ deps = [ "//heron/spi/src/java:metricsmgr-spi-java", "//heron/proto:proto_common_java", "//heron/proto:proto_metrics_java", - "//heron/proto:proto_tmaster_java", + "//heron/proto:proto_tmanager_java", "//third_party/java:guava", # only used in WebSink "//third_party/java:jackson", "//third_party/java:cli", diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java index 076a50477ab..b4788c370e6 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java @@ -42,7 +42,7 @@ import org.apache.heron.common.basics.Communicator; import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.SysUtils; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; @@ -373,7 +373,7 @@ public static void main(String[] args) throws Exception { Level loggingLevel = Level.INFO; String loggingDir = systemConfig.getHeronLoggingDirectory(); - // Log to file and TMaster + // Log to file and TManager LoggingHelper.loggerInit(loggingLevel, true); LoggingHelper.addLoggingHandler( LoggingHelper.getFileHandler(metricsmgrId, loggingDir, true, @@ -442,7 +442,7 @@ private SinkExecutor initSinkExecutor(String sinkId) { } catch (ClassNotFoundException e) { throw new RuntimeException(e + " IMetricsSink class must be a class path."); } - SlaveLooper sinkExecutorLoop = new SlaveLooper(); + ExecutorLooper sinkExecutorLoop = new ExecutorLooper(); Communicator executorInMetricsQueue = new Communicator(null, sinkExecutorLoop); diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java index 8b9e6abf5db..fbe284948a0 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java @@ -41,7 +41,7 @@ import org.apache.heron.common.network.REQID; import org.apache.heron.proto.system.Common; import org.apache.heron.proto.system.Metrics; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.metrics.ExceptionInfo; import org.apache.heron.spi.metricsmgr.metrics.MetricsInfo; import org.apache.heron.spi.metricsmgr.metrics.MetricsRecord; @@ -49,19 +49,19 @@ public class MetricsManagerServer extends HeronServer { private static final Logger LOG = Logger.getLogger(MetricsManagerServer.class.getName()); - // Bean name to register the TMasterLocation object into SingletonRegistry - private static final String TMASTER_LOCATION_BEAN_NAME = - TopologyMaster.TMasterLocation.newBuilder().getDescriptorForType().getFullName(); + // Bean name to register the TManagerLocation object into SingletonRegistry + private static final String TMANAGER_LOCATION_BEAN_NAME = + TopologyManager.TManagerLocation.newBuilder().getDescriptorForType().getFullName(); public static final String METRICSCACHE_LOCATION_BEAN_NAME = - TopologyMaster.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); + TopologyManager.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); // Metrics Counter Name private static final String SERVER_CLOSE_PUBLISHER = "close-publisher"; private static final String SERVER_NEW_REGISTER = "new-register-request"; private static final String SERVER_METRICS_RECEIVED = "metrics-received"; private static final String SERVER_EXCEPTIONS_RECEIVED = "exceptions-received"; - private static final String SERVER_NEW_TMASTER_LOCATION = "new-tmaster-location"; - private static final String SERVER_TMASTER_LOCATION_RECEIVED = "tmaster-location-received"; + private static final String SERVER_NEW_TMANAGER_LOCATION = "new-tmanager-location"; + private static final String SERVER_TMANAGER_LOCATION_RECEIVED = "tmanager-location-received"; private static final String SERVER_COMMUNICATOR_OFFER = "communicator-offer"; private static final String SERVER_COMMUNICATOR_SIZE = "communicator-size"; @@ -114,11 +114,11 @@ private void registerInitialization() { // Register the Metrics Message registerOnMessage(Metrics.MetricPublisherPublishMessage.newBuilder()); - // Register the TMasterLocationRefreshMessage, which is used by TMasterSink - // We do this to avoid communication between TMasterSink and Zookeeper - // TODO -- Reading TMasterLocationRefreshMessage from StreamMgr is more a temp solution + // Register the TManagerLocationRefreshMessage, which is used by TManagerSink + // We do this to avoid communication between TManagerSink and Zookeeper + // TODO -- Reading TManagerLocationRefreshMessage from StreamMgr is more a temp solution // TODO -- It adds dependencies on internal broadcast service - registerOnMessage(Metrics.TMasterLocationRefreshMessage.newBuilder()); + registerOnMessage(Metrics.TManagerLocationRefreshMessage.newBuilder()); registerOnMessage(Metrics.MetricsCacheLocationRefreshMessage.newBuilder()); } @@ -175,10 +175,10 @@ public void onMessage(SocketChannel channel, Message message) { + channel.socket().getRemoteSocketAddress()); handleMetricsCacheLocationRefreshMessage( request, (Metrics.MetricsCacheLocationRefreshMessage) message); - } else if (message instanceof Metrics.TMasterLocationRefreshMessage) { - // LOG down where the TMaster Location comes from - LOG.info("TMaster Location is refresh from: " + channel.socket().getRemoteSocketAddress()); - handleTMasterLocationRefreshMessage(request, (Metrics.TMasterLocationRefreshMessage) message); + } else if (message instanceof Metrics.TManagerLocationRefreshMessage) { + // LOG down where the TManager Location comes from + LOG.info("TManager Location is refresh from: " + channel.socket().getRemoteSocketAddress()); + handleTManagerLocationRefreshMessage(request, (Metrics.TManagerLocationRefreshMessage) message); } else { LOG.severe("Unknown kind of message received from Metrics Manager"); } @@ -291,7 +291,7 @@ private void handlePublisherPublishMessage(Metrics.MetricPublisher request, MetricsRecord record = new MetricsRecord(source, metricsInfos, exceptionInfos); - // Push MetricsRecord to Communicator, which would wake up SlaveLooper bind with IMetricsSink + // Push MetricsRecord to Communicator, which would wake up ExecutorLooper bind with IMetricsSink synchronized (metricsSinkCommunicators) { Iterator itr = metricsSinkCommunicators.keySet().iterator(); while (itr.hasNext()) { @@ -304,68 +304,68 @@ private void handlePublisherPublishMessage(Metrics.MetricPublisher request, } } - // TMasterLocationRefreshMessage handler - private void handleTMasterLocationRefreshMessage( + // TManagerLocationRefreshMessage handler + private void handleTManagerLocationRefreshMessage( Metrics.MetricPublisher request, - Metrics.TMasterLocationRefreshMessage tMasterLocationRefreshMessage) { - TopologyMaster.TMasterLocation oldLocation = - (TopologyMaster.TMasterLocation) - SingletonRegistry.INSTANCE.getSingleton(TMASTER_LOCATION_BEAN_NAME); + Metrics.TManagerLocationRefreshMessage tManagerLocationRefreshMessage) { + TopologyManager.TManagerLocation oldLocation = + (TopologyManager.TManagerLocation) + SingletonRegistry.INSTANCE.getSingleton(TMANAGER_LOCATION_BEAN_NAME); - TopologyMaster.TMasterLocation newLocation = tMasterLocationRefreshMessage.getTmaster(); + TopologyManager.TManagerLocation newLocation = tManagerLocationRefreshMessage.getTmanager(); if (oldLocation == null) { - // The first time to get TMasterLocation + // The first time to get TManagerLocation // Register to the SingletonRegistry - LOG.info("We received a new TMasterLocation. Register it into SingletonRegistry"); - SingletonRegistry.INSTANCE.registerSingleton(TMASTER_LOCATION_BEAN_NAME, newLocation); + LOG.info("We received a new TManagerLocation. Register it into SingletonRegistry"); + SingletonRegistry.INSTANCE.registerSingleton(TMANAGER_LOCATION_BEAN_NAME, newLocation); // Update Metrics - serverMetricsCounters.scope(SERVER_NEW_TMASTER_LOCATION).incr(); + serverMetricsCounters.scope(SERVER_NEW_TMANAGER_LOCATION).incr(); } else if (oldLocation.equals(newLocation)) { // The new one is the same as old one. // Just Log. Do nothing - LOG.info("We received a new TMasterLocation the same as the old one. Do nothing."); + LOG.info("We received a new TManagerLocation the same as the old one. Do nothing."); } else { - // Have received TMasterLocation earlier, but it changed. + // Have received TManagerLocation earlier, but it changed. // We need update the SingletonRegistry - LOG.info("We received a new TMasterLocation. Replace the old one."); - LOG.info("Old TMasterLocation: " + oldLocation); - SingletonRegistry.INSTANCE.updateSingleton(TMASTER_LOCATION_BEAN_NAME, newLocation); + LOG.info("We received a new TManagerLocation. Replace the old one."); + LOG.info("Old TManagerLocation: " + oldLocation); + SingletonRegistry.INSTANCE.updateSingleton(TMANAGER_LOCATION_BEAN_NAME, newLocation); // Update Metrics - serverMetricsCounters.scope(SERVER_NEW_TMASTER_LOCATION).incr(); + serverMetricsCounters.scope(SERVER_NEW_TMANAGER_LOCATION).incr(); } - LOG.info("Current TMaster location: " + newLocation); + LOG.info("Current TManager location: " + newLocation); // Update Metrics - serverMetricsCounters.scope(SERVER_TMASTER_LOCATION_RECEIVED).incr(); + serverMetricsCounters.scope(SERVER_TMANAGER_LOCATION_RECEIVED).incr(); } private void handleMetricsCacheLocationRefreshMessage( Metrics.MetricPublisher request, - Metrics.MetricsCacheLocationRefreshMessage tMasterLocationRefreshMessage) { - TopologyMaster.MetricsCacheLocation oldLocation = - (TopologyMaster.MetricsCacheLocation) + Metrics.MetricsCacheLocationRefreshMessage tManagerLocationRefreshMessage) { + TopologyManager.MetricsCacheLocation oldLocation = + (TopologyManager.MetricsCacheLocation) SingletonRegistry.INSTANCE.getSingleton(METRICSCACHE_LOCATION_BEAN_NAME); - TopologyMaster.MetricsCacheLocation newLocation = - tMasterLocationRefreshMessage.getMetricscache(); + TopologyManager.MetricsCacheLocation newLocation = + tManagerLocationRefreshMessage.getMetricscache(); if (oldLocation == null) { - // The first time to get TMasterLocation + // The first time to get TManagerLocation // Register to the SingletonRegistry LOG.info("We received a new MetricsCacheLocation. Register it into SingletonRegistry"); SingletonRegistry.INSTANCE.registerSingleton(METRICSCACHE_LOCATION_BEAN_NAME, newLocation); // Update Metrics - serverMetricsCounters.scope(SERVER_NEW_TMASTER_LOCATION).incr(); + serverMetricsCounters.scope(SERVER_NEW_TMANAGER_LOCATION).incr(); } else if (oldLocation.equals(newLocation)) { // The new one is the same as old one. @@ -374,7 +374,7 @@ private void handleMetricsCacheLocationRefreshMessage( LOG.info("We received a new MetricsCacheLocation the same as the old one " + newLocation + " . Do nothing."); } else { - // Have received TMasterLocation earlier, but it changed. + // Have received TManagerLocation earlier, but it changed. // We need update the SingletonRegistry LOG.info("We received a new MetricsCacheLocation " + newLocation @@ -382,10 +382,10 @@ private void handleMetricsCacheLocationRefreshMessage( SingletonRegistry.INSTANCE.updateSingleton(METRICSCACHE_LOCATION_BEAN_NAME, newLocation); // Update Metrics - serverMetricsCounters.scope(SERVER_NEW_TMASTER_LOCATION).incr(); + serverMetricsCounters.scope(SERVER_NEW_TMANAGER_LOCATION).incr(); } // Update Metrics - serverMetricsCounters.scope(SERVER_TMASTER_LOCATION_RECEIVED).incr(); + serverMetricsCounters.scope(SERVER_TMANAGER_LOCATION_RECEIVED).incr(); } } diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/executor/SinkExecutor.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/executor/SinkExecutor.java index 2ac3a902177..38fa4354dd6 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/executor/SinkExecutor.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/executor/SinkExecutor.java @@ -26,7 +26,7 @@ import java.util.Map; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.SysUtils; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.metricsmgr.MetricsSinksConfig; @@ -46,7 +46,7 @@ */ public class SinkExecutor implements Runnable, AutoCloseable { private final IMetricsSink metricsSink; - private final SlaveLooper slaveLooper; + private final ExecutorLooper executorLooper; // Communicator to read MetricsRecord private final Communicator metricsInSinkQueue; @@ -65,15 +65,15 @@ public class SinkExecutor implements Runnable, AutoCloseable { * * @param executorName the name of this executor used as the name of running thread * @param metricsSink the class implementing IMetricsSink - * @param slaveLooper the SlaveLoop to bind with + * @param executorLooper the ExecutorLooper to bind with * @param metricsInSinkQueue the queue to read MetricsRecord from */ public SinkExecutor(String executorName, IMetricsSink metricsSink, - SlaveLooper slaveLooper, Communicator metricsInSinkQueue, + ExecutorLooper executorLooper, Communicator metricsInSinkQueue, SinkContext sinkContext) { this.executorName = executorName; this.metricsSink = metricsSink; - this.slaveLooper = slaveLooper; + this.executorLooper = executorLooper; this.metricsInSinkQueue = metricsInSinkQueue; this.sinkContext = sinkContext; this.sinkConfig = new HashMap(); @@ -108,7 +108,7 @@ public void run() { metricsSink.init(Collections.unmodifiableMap(sinkConfig), sinkContext); - slaveLooper.loop(); + executorLooper.loop(); } // Add task to invoke processRecord method when the WakeableLooper is waken up @@ -122,7 +122,7 @@ public void run() { } }; - slaveLooper.addTasksOnWakeup(sinkTasks); + executorLooper.addTasksOnWakeup(sinkTasks); } // Add TimerTask to invoke flush() in IMetricsSink @@ -138,12 +138,12 @@ private void flushSinkAtInterval() { public void run() { metricsSink.flush(); //Plan itself in future - slaveLooper.registerTimerEvent(flushInterval, this); + executorLooper.registerTimerEvent(flushInterval, this); } }; // Plan the runnable explicitly at the first time - slaveLooper.registerTimerEvent(flushInterval, flushSink); + executorLooper.registerTimerEvent(flushInterval, flushSink); } } } diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheClient.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheClient.java index 5e0a3e95175..80761eb7813 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheClient.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheClient.java @@ -29,15 +29,15 @@ import org.apache.heron.common.network.HeronClient; import org.apache.heron.common.network.HeronSocketOptions; import org.apache.heron.common.network.StatusCode; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; /** - * MetricsCacheClient connects to MetricsCache and then send TopologyMaster.PublishMetrics continuously. + * MetricsCacheClient connects to MetricsCache and then send TopologyManager.PublishMetrics continuously. * Note that MetricsCache will not send registerRequest or wait for registerResponse. */ public class MetricsCacheClient extends HeronClient implements Runnable { private static final Logger LOG = Logger.getLogger(MetricsCacheClient.class.getName()); - private final Communicator publishMetricsCommunicator; + private final Communicator publishMetricsCommunicator; private final String host; private final int port; private final Duration reconnectInterval; @@ -52,7 +52,7 @@ public class MetricsCacheClient extends HeronClient implements Runnable { */ public MetricsCacheClient( NIOLooper s, String host, int port, HeronSocketOptions options, - Communicator publishMetricsCommunicator, + Communicator publishMetricsCommunicator, Duration reconnectInterval) { super(s, host, port, options); this.host = host; @@ -93,7 +93,7 @@ private void addMetricsCacheClientTasksOnWakeUp() { Runnable task = new Runnable() { @Override public void run() { - TopologyMaster.PublishMetrics publishMetrics; + TopologyManager.PublishMetrics publishMetrics; while (true) { synchronized (publishMetricsCommunicator) { publishMetrics = publishMetricsCommunicator.poll(); diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java index 0f049d1619c..fc7638bd49e 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java @@ -44,7 +44,7 @@ import org.apache.heron.common.network.HeronSocketOptions; import org.apache.heron.metricsmgr.MetricsManagerServer; import org.apache.heron.metricsmgr.MetricsUtil; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.metrics.ExceptionInfo; import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; import org.apache.heron.spi.metricsmgr.metrics.MetricsInfo; @@ -70,7 +70,7 @@ * external logic would take care of it. *

* -- MetricsCacheLocation changes (though in fact, metricsCacheClient might also throw exceptions in this case), - * in this case, we would invoke MetricsCacheService to start from tMasterLocationStarter's thread. + * in this case, we would invoke MetricsCacheService to start from tManagerLocationStarter's thread. * But the MetricsCacheService and metricsCacheClient still start wihtin the thread they run. *

* 3. When a new MetricsRecord comes by invoking processRecord, it would push the MetricsRecord @@ -86,10 +86,10 @@ public class MetricsCacheSink implements IMetricsSink { private static final int MAX_COMMUNICATOR_SIZE = 128; // These configs would be read from metrics-sink-configs.yaml - private static final String KEY_TMASTER_LOCATION_CHECK_INTERVAL_SEC = + private static final String KEY_TMANAGER_LOCATION_CHECK_INTERVAL_SEC = "metricscache-location-check-interval-sec"; - private static final String KEY_TMASTER = "metricscache-client"; - private static final String KEY_TMASTER_RECONNECT_INTERVAL_SEC = "reconnect-interval-second"; + private static final String KEY_TMANAGER = "metricscache-client"; + private static final String KEY_TMANAGER_RECONNECT_INTERVAL_SEC = "reconnect-interval-second"; private static final String KEY_NETWORK_WRITE_BATCH_SIZE_BYTES = "network-write-batch-size-bytes"; private static final String KEY_NETWORK_WRITE_BATCH_TIME_MS = "network-write-batch-time-ms"; private static final String KEY_NETWORK_READ_BATCH_SIZE_BYTES = "network-read-batch-size-bytes"; @@ -97,31 +97,31 @@ public class MetricsCacheSink implements IMetricsSink { private static final String KEY_SOCKET_SEND_BUFFER_BYTES = "socket-send-buffer-size-bytes"; private static final String KEY_SOCKET_RECEIVED_BUFFER_BYTES = "socket-received-buffer-size-bytes"; - private static final String KEY_TMASTER_METRICS_TYPE = "metricscache-metrics-type"; + private static final String KEY_TMANAGER_METRICS_TYPE = "metricscache-metrics-type"; // Bean name to fetch the MetricsCacheLocation object from SingletonRegistry -// private static final String TMASTER_LOCATION_BEAN_NAME = -// TopologyMaster.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); +// private static final String TMANAGER_LOCATION_BEAN_NAME = +// TopologyManager.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); // Metrics Counter Name private static final String METRICS_COUNT = "metrics-count"; private static final String EXCEPTIONS_COUNT = "exceptions-count"; private static final String RECORD_PROCESS_COUNT = "record-process-count"; private static final String METRICSMGR_RESTART_COUNT = "metricsmgr-restart-count"; private static final String METRICSMGR_LOCATION_UPDATE_COUNT = "metricsmgr-location-update-count"; - private final Communicator metricsCommunicator = + private final Communicator metricsCommunicator = new Communicator<>(); - private final MetricsFilter tMasterMetricsFilter = new MetricsFilter(); + private final MetricsFilter tManagerMetricsFilter = new MetricsFilter(); private final Map sinkConfig = new HashMap<>(); // A scheduled executor service to check whether the MetricsCacheLocation has changed // If so, restart the metricsCacheClientService with the new MetricsCacheLocation // Start of metricsCacheClientService will also be in this thread - private final ScheduledExecutorService tMasterLocationStarter = + private final ScheduledExecutorService tManagerLocationStarter = Executors.newSingleThreadScheduledExecutor(); private MetricsCacheClientService metricsCacheClientService; // We need to cache it locally to check whether the MetricsCacheLocation is changed // This field is changed only in ScheduledExecutorService's thread, // so no need to make it volatile - private TopologyMaster.MetricsCacheLocation currentMetricsCacheLocation = null; + private TopologyManager.MetricsCacheLocation currentMetricsCacheLocation = null; private SinkContext sinkContext; @Override @@ -132,24 +132,24 @@ public void init(Map conf, SinkContext context) { sinkContext = context; - // Fill the tMasterMetricsFilter according to metrics-sink-configs.yaml - Map tmasterMetricsType = - (Map) sinkConfig.get(KEY_TMASTER_METRICS_TYPE); - if (tmasterMetricsType != null) { - for (Map.Entry metricToType : tmasterMetricsType.entrySet()) { + // Fill the tManagerMetricsFilter according to metrics-sink-configs.yaml + Map tmanagerMetricsType = + (Map) sinkConfig.get(KEY_TMANAGER_METRICS_TYPE); + if (tmanagerMetricsType != null) { + for (Map.Entry metricToType : tmanagerMetricsType.entrySet()) { String value = metricToType.getValue(); MetricsFilter.MetricAggregationType type = MetricsFilter.MetricAggregationType.valueOf(value); - tMasterMetricsFilter.setPrefixToType(metricToType.getKey(), type); + tManagerMetricsFilter.setPrefixToType(metricToType.getKey(), type); } } // Construct the long-live metricsCacheClientService metricsCacheClientService = new MetricsCacheClientService((Map) - sinkConfig.get(KEY_TMASTER), metricsCommunicator); + sinkConfig.get(KEY_TMANAGER), metricsCommunicator); - // Start the tMasterLocationStarter + // Start the tManagerLocationStarter startMetricsCacheChecker(); } @@ -158,13 +158,13 @@ public void init(Map conf, SinkContext context) { // If so, restart the metricsCacheClientService with the new MetricsCacheLocation private void startMetricsCacheChecker() { final int checkIntervalSec = - TypeUtils.getInteger(sinkConfig.get(KEY_TMASTER_LOCATION_CHECK_INTERVAL_SEC)); + TypeUtils.getInteger(sinkConfig.get(KEY_TMANAGER_LOCATION_CHECK_INTERVAL_SEC)); Runnable runnable = new Runnable() { @Override public void run() { - TopologyMaster.MetricsCacheLocation location = - (TopologyMaster.MetricsCacheLocation) SingletonRegistry.INSTANCE.getSingleton( + TopologyManager.MetricsCacheLocation location = + (TopologyManager.MetricsCacheLocation) SingletonRegistry.INSTANCE.getSingleton( MetricsManagerServer.METRICSCACHE_LOCATION_BEAN_NAME); if (location != null) { @@ -173,7 +173,7 @@ public void run() { LOG.info("Update current MetricsCacheLocation to: " + location); currentMetricsCacheLocation = location; metricsCacheClientService.updateMetricsCacheLocation(currentMetricsCacheLocation); - metricsCacheClientService.startNewMasterClient(); + metricsCacheClientService.startNewPrimaryClient(); // Update Metrics sinkContext.exportCountMetric(METRICSMGR_LOCATION_UPDATE_COUNT, 1); @@ -181,19 +181,19 @@ public void run() { } // Schedule itself in future - tMasterLocationStarter.schedule(this, checkIntervalSec, TimeUnit.SECONDS); + tManagerLocationStarter.schedule(this, checkIntervalSec, TimeUnit.SECONDS); } }; // First Entry - tMasterLocationStarter.schedule(runnable, checkIntervalSec, TimeUnit.SECONDS); + tManagerLocationStarter.schedule(runnable, checkIntervalSec, TimeUnit.SECONDS); LOG.info("MetricsCacheChecker started with interval: " + checkIntervalSec); } @Override public void processRecord(MetricsRecord record) { LOG.info("metricscache sink processRecord"); - // Format it into TopologyMaster.PublishMetrics + // Format it into TopologyManager.PublishMetrics // The format of record is "host:port/componentName/instanceId" // So MetricsRecord.getSource().split("/") would be an array with 3 elements: @@ -203,12 +203,12 @@ public void processRecord(MetricsRecord record) { String componentName = sources[1]; String instanceId = sources[2]; - TopologyMaster.PublishMetrics.Builder publishMetrics = - TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder publishMetrics = + TopologyManager.PublishMetrics.newBuilder(); - for (MetricsInfo metricsInfo : tMasterMetricsFilter.filter(record.getMetrics())) { + for (MetricsInfo metricsInfo : tManagerMetricsFilter.filter(record.getMetrics())) { // We would filter out unneeded metrics - TopologyMaster.MetricDatum metricDatum = TopologyMaster.MetricDatum.newBuilder(). + TopologyManager.MetricDatum metricDatum = TopologyManager.MetricDatum.newBuilder(). setComponentName(componentName).setInstanceId(instanceId).setName(metricsInfo.getName()). setValue(metricsInfo.getValue()).setTimestamp(record.getTimestamp()).build(); publishMetrics.addMetrics(metricDatum); @@ -219,8 +219,8 @@ public void processRecord(MetricsRecord record) { String[] exceptionStackTraceLines = exceptionStackTrace.split("\r\n|[\r\n]", 3); String exceptionStackTraceFirstTwoLines = String.join(System.lineSeparator(), exceptionStackTraceLines[0], exceptionStackTraceLines[1]); - TopologyMaster.TmasterExceptionLog exceptionLog = - TopologyMaster.TmasterExceptionLog.newBuilder() + TopologyManager.TmanagerExceptionLog exceptionLog = + TopologyManager.TmanagerExceptionLog.newBuilder() .setComponentName(componentName) .setHostname(hostPort) .setInstanceId(instanceId) @@ -246,7 +246,7 @@ public void processRecord(MetricsRecord record) { // Check if the communicator is full/overflow. Poll and drop extra elements that // are over the queue limit from the head. - public static void checkCommunicator(Communicator communicator, + public static void checkCommunicator(Communicator communicator, int maxSize) { synchronized (communicator) { int size = communicator.size(); @@ -287,9 +287,9 @@ MetricsCacheClient getMetricsCacheClient() { } @VisibleForTesting - void startNewMetricsCacheClient(TopologyMaster.MetricsCacheLocation location) { + void startNewMetricsCacheClient(TopologyManager.MetricsCacheLocation location) { metricsCacheClientService.updateMetricsCacheLocation(location); - metricsCacheClientService.startNewMasterClient(); + metricsCacheClientService.startNewPrimaryClient(); } @VisibleForTesting @@ -298,12 +298,12 @@ int getMetricsCacheStartedAttempts() { } @VisibleForTesting - TopologyMaster.MetricsCacheLocation getCurrentMetricsCacheLocation() { + TopologyManager.MetricsCacheLocation getCurrentMetricsCacheLocation() { return currentMetricsCacheLocation; } @VisibleForTesting - TopologyMaster.MetricsCacheLocation getCurrentMetricsCacheLocationInService() { + TopologyManager.MetricsCacheLocation getCurrentMetricsCacheLocationInService() { return metricsCacheClientService.getCurrentMetricsCacheLocation(); } @@ -312,17 +312,17 @@ TopologyMaster.MetricsCacheLocation getCurrentMetricsCacheLocationInService() { * It would automatically restart the metricsCacheClient connecting and communicating to the latest * MetricsCacheLocation if any uncaught exceptions throw. *

- * It provides startNewMasterClient(TopologyMaster.MetricsCacheLocation location), which would also + * It provides startNewPrimaryClient(TopologyManager.MetricsCacheLocation location), which would also * update the currentMetricsCacheLocation to the lastest location. *

* So a new metricsCacheClient would start in two cases: * 1. The old one threw exceptions and died. - * 2. startNewMasterClient() is invoked externally with MetricsCacheLocation. + * 2. startNewPrimaryClient() is invoked externally with MetricsCacheLocation. */ private static final class MetricsCacheClientService { private final AtomicInteger startedAttempts = new AtomicInteger(0); private final Map metricsCacheClientConfig; - private final Communicator metricsCommunicator; + private final Communicator metricsCommunicator; private final ExecutorService metricsCacheClientExecutor = Executors.newSingleThreadExecutor(new MetricsCacheClientThreadFactory()); private volatile MetricsCacheClient metricsCacheClient; @@ -330,11 +330,11 @@ private static final class MetricsCacheClientService { // This value is set in ScheduledExecutorService' thread while // it is used in metricsCacheClientService thread, // so we need to make it volatile to guarantee the visiability. - private volatile TopologyMaster.MetricsCacheLocation currentMetricsCacheLocation; + private volatile TopologyManager.MetricsCacheLocation currentMetricsCacheLocation; private MetricsCacheClientService( Map metricsCacheClientConfig, - Communicator metricsCommunicator) { + Communicator metricsCommunicator) { this.metricsCacheClientConfig = metricsCacheClientConfig; this.metricsCommunicator = metricsCommunicator; } @@ -344,13 +344,13 @@ private MetricsCacheClientService( // currentMetricsCacheLocation is volatile and we just replace it. // In our scenario, it is only invoked when MetricsCacheLocation is changed, // i.e. this method is only invoked in scheduled executor thread. - public void updateMetricsCacheLocation(TopologyMaster.MetricsCacheLocation location) { + public void updateMetricsCacheLocation(TopologyManager.MetricsCacheLocation location) { currentMetricsCacheLocation = location; } // This method could be invoked by different threads // Make it synchronized to guarantee thread-safe - public synchronized void startNewMasterClient() { + public synchronized void startNewPrimaryClient() { // Exit any running metricsCacheClient if there is any to release // the thread in metricsCacheClientExecutor @@ -390,10 +390,10 @@ public synchronized void startNewMasterClient() { metricsCacheClient = new MetricsCacheClient(looper, currentMetricsCacheLocation.getHost(), - currentMetricsCacheLocation.getMasterPort(), + currentMetricsCacheLocation.getManagerPort(), socketOptions, metricsCommunicator, TypeUtils.getDuration( - metricsCacheClientConfig.get(KEY_TMASTER_RECONNECT_INTERVAL_SEC), + metricsCacheClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS)); int attempts = startedAttempts.incrementAndGet(); @@ -419,7 +419,7 @@ int getMetricsCacheStartedAttempts() { } @VisibleForTesting - TopologyMaster.MetricsCacheLocation getCurrentMetricsCacheLocation() { + TopologyManager.MetricsCacheLocation getCurrentMetricsCacheLocation() { return currentMetricsCacheLocation; } @@ -439,14 +439,14 @@ public void uncaughtException(Thread t, Throwable e) { LOG.log(Level.SEVERE, "metricsCacheClient dies in thread: " + t, e); Duration reconnectInterval = TypeUtils.getDuration( - metricsCacheClientConfig.get(KEY_TMASTER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS); + metricsCacheClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS); SysUtils.sleep(reconnectInterval); LOG.info("Restarting metricsCacheClient"); // We would use the MetricsCacheLocation in cache, since // the new metricsCacheClient is started due to exception thrown, // rather than MetricsCacheLocation changes - startNewMasterClient(); + startNewPrimaryClient(); } } } diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterClient.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerClient.java similarity index 69% rename from heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterClient.java rename to heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerClient.java index 48e0a32445d..bc1bce8ec4c 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterClient.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerClient.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.heron.metricsmgr.sink.tmaster; +package org.apache.heron.metricsmgr.sink.tmanager; import java.time.Duration; import java.util.logging.Logger; @@ -29,15 +29,15 @@ import org.apache.heron.common.network.HeronClient; import org.apache.heron.common.network.HeronSocketOptions; import org.apache.heron.common.network.StatusCode; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; /** - * TMasterClient connects to TMaster and then send TopologyMaster.PublishMetrics continuously. - * Note that TMaster will not send registerRequest or wait for registerResponse. + * TManagerClient connects to TManager and then send TopologyManager.PublishMetrics continuously. + * Note that TManager will not send registerRequest or wait for registerResponse. */ -public class TMasterClient extends HeronClient implements Runnable { - private static final Logger LOG = Logger.getLogger(TMasterClient.class.getName()); - private final Communicator publishMetricsCommunicator; +public class TManagerClient extends HeronClient implements Runnable { + private static final Logger LOG = Logger.getLogger(TManagerClient.class.getName()); + private final Communicator publishMetricsCommunicator; private final Duration reconnectInterval; /** @@ -46,10 +46,10 @@ public class TMasterClient extends HeronClient implements Runnable { * @param s the NIOLooper bind with this socket client * @param host the host of remote endpoint to communicate with * @param port the port of remote endpoint to communicate with - * @param publishMetricsCommunicator the queue to read PublishMetrics from and send to TMaster + * @param publishMetricsCommunicator the queue to read PublishMetrics from and send to TManager */ - public TMasterClient(NIOLooper s, String host, int port, HeronSocketOptions options, - Communicator publishMetricsCommunicator, + public TManagerClient(NIOLooper s, String host, int port, HeronSocketOptions options, + Communicator publishMetricsCommunicator, Duration reconnectInterval) { super(s, host, port, options); this.publishMetricsCommunicator = publishMetricsCommunicator; @@ -58,8 +58,8 @@ public TMasterClient(NIOLooper s, String host, int port, HeronSocketOptions opti @Override public void onError() { - LOG.severe("Disconnected from TMaster."); - throw new RuntimeException("Errors happened due to write or read failure from TMaster."); + LOG.severe("Disconnected from TManager."); + throw new RuntimeException("Errors happened due to write or read failure from TManager."); // We would not clear the publishMetricsCommunicator since we need to copy items from it // to the new one to avoid data loss } @@ -67,7 +67,7 @@ public void onError() { @Override public void onConnect(StatusCode status) { if (status != StatusCode.OK) { - LOG.severe("Cannot connect to the TMaster port, Will Retry.."); + LOG.severe("Cannot connect to the TManager port, Will Retry.."); if (reconnectInterval != Duration.ZERO) { Runnable r = new Runnable() { public void run() { @@ -79,16 +79,16 @@ public void run() { return; } - addTMasterClientTasksOnWakeUp(); + addTManagerClientTasksOnWakeUp(); - LOG.info("Connected to TMaster. Ready to send metrics"); + LOG.info("Connected to TManager. Ready to send metrics"); } - private void addTMasterClientTasksOnWakeUp() { + private void addTManagerClientTasksOnWakeUp() { Runnable task = new Runnable() { @Override public void run() { - TopologyMaster.PublishMetrics publishMetrics; + TopologyManager.PublishMetrics publishMetrics; while (true) { synchronized (publishMetricsCommunicator) { publishMetrics = publishMetricsCommunicator.poll(); @@ -97,9 +97,9 @@ public void run() { break; // No metrics left } - LOG.info(String.format("%d Metrics, %d Exceptions to send to TMaster", + LOG.info(String.format("%d Metrics, %d Exceptions to send to TManager", publishMetrics.getMetricsCount(), publishMetrics.getExceptionsCount())); - LOG.fine("Publish Metrics sending to TMaster: " + publishMetrics.toString()); + LOG.fine("Publish Metrics sending to TManager: " + publishMetrics.toString()); sendMessage(publishMetrics); } @@ -110,17 +110,17 @@ public void run() { @Override public void onResponse(StatusCode status, Object ctx, Message response) { - LOG.severe("TMasterClient got an unknown response from TMaster"); + LOG.severe("TManagerClient got an unknown response from TManager"); } @Override public void onIncomingMessage(Message message) { - LOG.severe("TMasterClient got an unknown message from TMaster"); + LOG.severe("TManagerClient got an unknown message from TManager"); } @Override public void onClose() { - LOG.info("TMasterClient exits"); + LOG.info("TManagerClient exits"); } @Override diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java new file mode 100644 index 00000000000..0a09428adae --- /dev/null +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java @@ -0,0 +1,441 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.heron.metricsmgr.sink.tmanager; + +import java.io.IOException; +import java.time.Duration; +import java.time.temporal.ChronoUnit; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.logging.Level; +import java.util.logging.Logger; + +import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.NIOLooper; +import org.apache.heron.common.basics.SingletonRegistry; +import org.apache.heron.common.basics.SysUtils; +import org.apache.heron.common.basics.TypeUtils; +import org.apache.heron.common.config.SystemConfig; +import org.apache.heron.common.network.HeronSocketOptions; +import org.apache.heron.metricsmgr.MetricsUtil; +import org.apache.heron.proto.tmanager.TopologyManager; +import org.apache.heron.spi.metricsmgr.metrics.ExceptionInfo; +import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; +import org.apache.heron.spi.metricsmgr.metrics.MetricsInfo; +import org.apache.heron.spi.metricsmgr.metrics.MetricsRecord; +import org.apache.heron.spi.metricsmgr.sink.IMetricsSink; +import org.apache.heron.spi.metricsmgr.sink.SinkContext; + +/** + * An IMetricsSink sends Metrics to TManager. + * 1. It gets the TManagerLocation + *

+ * 2. Then it would construct a long-live Service running TManagerClient, which could automatically + * recover from uncaught exceptions, i.e. close the old one and start a new one. + * Also, it provides API to update the TManagerLocation that TManagerClient need to connect and + * restart the TManagerClient. + * There are two scenarios we need to restart a TManagerClient in our case: + *

+ * -- Uncaught exceptions happen within TManagerClient; then we would restart TManagerClient inside + * the same ExecutorService inside the UncaughtExceptionHandlers. + * Notice that, in java, exceptions occur inside UncaughtExceptionHandlers would not invoke + * UncaughtExceptionHandlers; instead, it would kill the thread with that exception. + * So if exceptions thrown during restart a new TManagerClient, this TManagerSink would die, and + * external logic would take care of it. + *

+ * -- TManagerLocation changes (though in fact, TManagerClient might also throw exceptions in this case), + * in this case, we would invoke TManagerService to start from tManagerLocationStarter's thread. + * But the TManagerService and TManagerClient still start wihtin the thread they run. + *

+ * 3. When a new MetricsRecord comes by invoking processRecord, it would push the MetricsRecord + * to the Communicator Queue to TManagerClient + *

+ * Notice that we would not send all metrics to TManager; we would use MetricsFilter to figure out + * needed metrics. + */ + +public class TManagerSink implements IMetricsSink { + private static final Logger LOG = Logger.getLogger(TManagerSink.class.getName()); + + private static final int MAX_COMMUNICATOR_SIZE = 128; + + // These configs would be read from metrics-sink-configs.yaml + private static final String KEY_TMANAGER_LOCATION_CHECK_INTERVAL_SEC = + "tmanager-location-check-interval-sec"; + private static final String KEY_TMANAGER = "tmanager-client"; + private static final String KEY_TMANAGER_RECONNECT_INTERVAL_SEC = "reconnect-interval-second"; + private static final String KEY_NETWORK_WRITE_BATCH_SIZE_BYTES = "network-write-batch-size-bytes"; + private static final String KEY_NETWORK_WRITE_BATCH_TIME_MS = "network-write-batch-time-ms"; + private static final String KEY_NETWORK_READ_BATCH_SIZE_BYTES = "network-read-batch-size-bytes"; + private static final String KEY_NETWORK_READ_BATCH_TIME_MS = "network-read-batch-time-ms"; + private static final String KEY_SOCKET_SEND_BUFFER_BYTES = "socket-send-buffer-size-bytes"; + private static final String KEY_SOCKET_RECEIVED_BUFFER_BYTES = + "socket-received-buffer-size-bytes"; + private static final String KEY_TMANAGER_METRICS_TYPE = "tmanager-metrics-type"; + + // Bean name to fetch the TManagerLocation object from SingletonRegistry + private static final String TMANAGER_LOCATION_BEAN_NAME = + TopologyManager.TManagerLocation.newBuilder().getDescriptorForType().getFullName(); + // Metrics Counter Name + private static final String METRICS_COUNT = "metrics-count"; + private static final String EXCEPTIONS_COUNT = "exceptions-count"; + private static final String RECORD_PROCESS_COUNT = "record-process-count"; + private static final String TMANAGER_RESTART_COUNT = "tmanager-restart-count"; + private static final String TMANAGER_LOCATION_UPDATE_COUNT = "tmanager-location-update-count"; + private final Communicator metricsCommunicator = + new Communicator<>(); + private final MetricsFilter tManagerMetricsFilter = new MetricsFilter(); + private final Map sinkConfig = new HashMap<>(); + // A scheduled executor service to check whether the TManagerLocation has changed + // If so, restart the TManagerClientService with the new TManagerLocation + // Start of TManagerClientService will also be in this thread + private final ScheduledExecutorService tManagerLocationStarter = + Executors.newSingleThreadScheduledExecutor(); + private TManagerClientService tManagerClientService; + // We need to cache it locally to check whether the TManagerLocation is changed + // This field is changed only in ScheduledExecutorService's thread, + // so no need to make it volatile + private TopologyManager.TManagerLocation currentTManagerLocation = null; + private SinkContext sinkContext; + + @Override + @SuppressWarnings("unchecked") + public void init(Map conf, SinkContext context) { + sinkConfig.putAll(conf); + + sinkContext = context; + + // Fill the tManagerMetricsFilter according to metrics-sink-configs.yaml + Map tmanagerMetricsType = + (Map) sinkConfig.get(KEY_TMANAGER_METRICS_TYPE); + if (tmanagerMetricsType != null) { + for (Map.Entry metricToType : tmanagerMetricsType.entrySet()) { + String value = metricToType.getValue(); + MetricsFilter.MetricAggregationType type; + if ("SUM".equals(value)) { + type = MetricsFilter.MetricAggregationType.SUM; + } else if ("AVG".equals(value)) { + type = MetricsFilter.MetricAggregationType.AVG; + } else if ("LAST".equals(value)) { + type = MetricsFilter.MetricAggregationType.LAST; + } else { + type = MetricsFilter.MetricAggregationType.UNKNOWN; + } + tManagerMetricsFilter.setPrefixToType(metricToType.getKey(), type); + } + } + + // Construct the long-live TManagerClientService + tManagerClientService = + new TManagerClientService((Map) + sinkConfig.get(KEY_TMANAGER), metricsCommunicator); + + // Start the tManagerLocationStarter + startTManagerChecker(); + } + + // Start the TManagerCheck, which would check whether the TManagerLocation is changed + // at an interval. + // If so, restart the TManagerClientService with the new TManagerLocation + private void startTManagerChecker() { + final int checkIntervalSec = + TypeUtils.getInteger(sinkConfig.get(KEY_TMANAGER_LOCATION_CHECK_INTERVAL_SEC)); + + Runnable runnable = new Runnable() { + @Override + public void run() { + TopologyManager.TManagerLocation location = + (TopologyManager.TManagerLocation) SingletonRegistry.INSTANCE.getSingleton( + TMANAGER_LOCATION_BEAN_NAME); + + if (location != null) { + if (currentTManagerLocation == null || !location.equals(currentTManagerLocation)) { + LOG.info("Update current TManagerLocation to: " + location); + currentTManagerLocation = location; + tManagerClientService.updateTManagerLocation(currentTManagerLocation); + tManagerClientService.startNewPrimaryClient(); + + // Update Metrics + sinkContext.exportCountMetric(TMANAGER_LOCATION_UPDATE_COUNT, 1); + } + } + + // Schedule itself in future + tManagerLocationStarter.schedule(this, checkIntervalSec, TimeUnit.SECONDS); + } + }; + + // First Entry + tManagerLocationStarter.schedule(runnable, checkIntervalSec, TimeUnit.SECONDS); + LOG.info("TManagerChecker started with interval: " + checkIntervalSec); + } + + @Override + public void processRecord(MetricsRecord record) { + // Format it into TopologyManager.PublishMetrics + + // The format of record is "host:port/componentName/instanceId" + // So MetricsRecord.getSource().split("/") would be an array with 3 elements: + // ["host:port", componentName, instanceId] + String[] sources = MetricsUtil.splitRecordSource(record); + String hostPort = sources[0]; + String componentName = sources[1]; + String instanceId = sources[2]; + + TopologyManager.PublishMetrics.Builder publishMetrics = + TopologyManager.PublishMetrics.newBuilder(); + + for (MetricsInfo metricsInfo : tManagerMetricsFilter.filter(record.getMetrics())) { + // We would filter out unneeded metrics + TopologyManager.MetricDatum metricDatum = TopologyManager.MetricDatum.newBuilder(). + setComponentName(componentName).setInstanceId(instanceId).setName(metricsInfo.getName()). + setValue(metricsInfo.getValue()).setTimestamp(record.getTimestamp()).build(); + publishMetrics.addMetrics(metricDatum); + } + + for (ExceptionInfo exceptionInfo : record.getExceptions()) { + TopologyManager.TmanagerExceptionLog exceptionLog = + TopologyManager.TmanagerExceptionLog.newBuilder() + .setComponentName(componentName) + .setHostname(hostPort) + .setInstanceId(instanceId) + .setStacktrace(exceptionInfo.getStackTrace()) + .setLasttime(exceptionInfo.getLastTime()) + .setFirsttime(exceptionInfo.getFirstTime()) + .setCount(exceptionInfo.getCount()) + .setLogging(exceptionInfo.getLogging()).build(); + publishMetrics.addExceptions(exceptionLog); + } + + metricsCommunicator.offer(publishMetrics.build()); + + // Update metrics + sinkContext.exportCountMetric(RECORD_PROCESS_COUNT, 1); + sinkContext.exportCountMetric(METRICS_COUNT, publishMetrics.getMetricsCount()); + sinkContext.exportCountMetric(EXCEPTIONS_COUNT, publishMetrics.getExceptionsCount()); + + checkCommunicator(metricsCommunicator, MAX_COMMUNICATOR_SIZE); + } + + // Check if the communicator is full/overflow. Poll and drop extra elements that + // are over the queue limit from the head. + public static void checkCommunicator(Communicator communicator, + int maxSize) { + synchronized (communicator) { + int size = communicator.size(); + + for (int i = 0; i < size - maxSize; ++i) { + communicator.poll(); + } + } + } + + @Override + public void flush() { + // We do nothing here but update metrics + sinkContext.exportCountMetric(TMANAGER_RESTART_COUNT, + tManagerClientService.startedAttempts.longValue()); + } + + @Override + public void close() { + tManagerClientService.close(); + metricsCommunicator.clear(); + } + + ///////////////////////////////////////////////////////// + // Following protected methods should be used only for unit testing + ///////////////////////////////////////////////////////// + protected TManagerClientService getTManagerClientService() { + return tManagerClientService; + } + + protected void createSimpleTManagerClientService(Map serviceConfig) { + tManagerClientService = + new TManagerClientService(serviceConfig, metricsCommunicator); + } + + protected TManagerClient getTManagerClient() { + return tManagerClientService.getTManagerClient(); + } + + protected void startNewTManagerClient(TopologyManager.TManagerLocation location) { + tManagerClientService.updateTManagerLocation(location); + tManagerClientService.startNewPrimaryClient(); + } + + protected int getTManagerStartedAttempts() { + return tManagerClientService.startedAttempts.get(); + } + + protected TopologyManager.TManagerLocation getCurrentTManagerLocation() { + return currentTManagerLocation; + } + + protected TopologyManager.TManagerLocation getCurrentTManagerLocationInService() { + return tManagerClientService.getCurrentTManagerLocation(); + } + + /** + * A long-live Service running TManagerClient + * It would automatically restart the TManagerClient connecting and communicating to the latest + * TManagerLocation if any uncaught exceptions throw. + *

+ * It provides startNewPrimaryClient(TopologyManager.TManagerLocation location), which would also + * update the currentTManagerLocation to the lastest location. + *

+ * So a new TManagerClient would start in two cases: + * 1. The old one threw exceptions and died. + * 2. startNewPrimaryClient() is invoked externally with TManagerLocation. + */ + private static final class TManagerClientService { + private final AtomicInteger startedAttempts = new AtomicInteger(0); + private final Map tmanagerClientConfig; + private final Communicator metricsCommunicator; + private final ExecutorService tmanagerClientExecutor = + Executors.newSingleThreadExecutor(new TManagerClientThreadFactory()); + private volatile TManagerClient tManagerClient; + // We need to cache TManagerLocation for failover case + // This value is set in ScheduledExecutorService' thread while + // it is used in TManagerClientService thread, + // so we need to make it volatile to guarantee the visiability. + private volatile TopologyManager.TManagerLocation currentTManagerLocation; + + private TManagerClientService(Map tmanagerClientConfig, + Communicator metricsCommunicator) { + this.tmanagerClientConfig = tmanagerClientConfig; + this.metricsCommunicator = metricsCommunicator; + } + + // Update the TManagerLocation to connect within the TManagerClient + // This method is thread-safe, since + // currentTManagerLocation is volatile and we just replace it. + // In our scenario, it is only invoked when TManagerLocation is changed, + // i.e. this method is only invoked in scheduled executor thread. + public void updateTManagerLocation(TopologyManager.TManagerLocation location) { + currentTManagerLocation = location; + } + + // This method could be invoked by different threads + // Make it synchronized to guarantee thread-safe + public synchronized void startNewPrimaryClient() { + + // Exit any running tManagerClient if there is any to release + // the thread in tmanagerClientExecutor + if (tManagerClient != null) { + tManagerClient.stop(); + tManagerClient.getNIOLooper().exitLoop(); + } + + // Construct the new TManagerClient + final NIOLooper looper; + try { + looper = new NIOLooper(); + } catch (IOException e) { + throw new RuntimeException("Could not create the NIOLooper", e); + } + + SystemConfig systemConfig = + (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); + HeronSocketOptions socketOptions = + new HeronSocketOptions( + TypeUtils.getByteAmount(tmanagerClientConfig.get(KEY_NETWORK_WRITE_BATCH_SIZE_BYTES)), + TypeUtils.getDuration( + tmanagerClientConfig.get(KEY_NETWORK_WRITE_BATCH_TIME_MS), ChronoUnit.MILLIS), + TypeUtils.getByteAmount(tmanagerClientConfig.get(KEY_NETWORK_READ_BATCH_SIZE_BYTES)), + TypeUtils.getDuration( + tmanagerClientConfig.get(KEY_NETWORK_READ_BATCH_TIME_MS), ChronoUnit.MILLIS), + TypeUtils.getByteAmount(tmanagerClientConfig.get(KEY_SOCKET_SEND_BUFFER_BYTES)), + TypeUtils.getByteAmount(tmanagerClientConfig.get(KEY_SOCKET_RECEIVED_BUFFER_BYTES)), + systemConfig.getMetricsMgrNetworkOptionsMaximumPacketSize()); + + // Reset the Consumer + metricsCommunicator.setConsumer(looper); + + tManagerClient = + new TManagerClient(looper, + currentTManagerLocation.getHost(), + currentTManagerLocation.getManagerPort(), + socketOptions, metricsCommunicator, + TypeUtils.getDuration( + tmanagerClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS)); + + int attempts = startedAttempts.incrementAndGet(); + LOG.severe(String.format("Starting TManagerClient for the %d time.", attempts)); + tmanagerClientExecutor.execute(tManagerClient); + } + + // This method could be invoked by different threads + // Make it synchronized to guarantee thread-safe + public synchronized void close() { + tManagerClient.getNIOLooper().exitLoop(); + tmanagerClientExecutor.shutdownNow(); + } + + ///////////////////////////////////////////////////////// + // Following protected methods should be used only for unit testing + ///////////////////////////////////////////////////////// + protected TManagerClient getTManagerClient() { + return tManagerClient; + } + + protected int getTManagerStartedAttempts() { + return startedAttempts.get(); + } + + protected TopologyManager.TManagerLocation getCurrentTManagerLocation() { + return currentTManagerLocation; + } + + // An UncaughtExceptionHandler, which would restart TManagerLocation with + // current TManagerLocation. + private class TManagerClientThreadFactory implements ThreadFactory { + @Override + public Thread newThread(Runnable r) { + final Thread thread = new Thread(r); + thread.setUncaughtExceptionHandler(new TManagerClientExceptionHandler()); + return thread; + } + + private class TManagerClientExceptionHandler implements Thread.UncaughtExceptionHandler { + @Override + public void uncaughtException(Thread t, Throwable e) { + LOG.log(Level.SEVERE, "TManagerClient dies in thread: " + t, e); + + Duration reconnectInterval = TypeUtils.getDuration( + tmanagerClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS); + SysUtils.sleep(reconnectInterval); + LOG.info("Restarting TManagerClient"); + + // We would use the TManagerLocation in cache, since + // the new TManagerClient is started due to exception thrown, + // rather than TManagerLocation changes + startNewPrimaryClient(); + } + } + } + } +} diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSink.java deleted file mode 100644 index 129d5468793..00000000000 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSink.java +++ /dev/null @@ -1,441 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.heron.metricsmgr.sink.tmaster; - -import java.io.IOException; -import java.time.Duration; -import java.time.temporal.ChronoUnit; -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.logging.Level; -import java.util.logging.Logger; - -import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.NIOLooper; -import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.SysUtils; -import org.apache.heron.common.basics.TypeUtils; -import org.apache.heron.common.config.SystemConfig; -import org.apache.heron.common.network.HeronSocketOptions; -import org.apache.heron.metricsmgr.MetricsUtil; -import org.apache.heron.proto.tmaster.TopologyMaster; -import org.apache.heron.spi.metricsmgr.metrics.ExceptionInfo; -import org.apache.heron.spi.metricsmgr.metrics.MetricsFilter; -import org.apache.heron.spi.metricsmgr.metrics.MetricsInfo; -import org.apache.heron.spi.metricsmgr.metrics.MetricsRecord; -import org.apache.heron.spi.metricsmgr.sink.IMetricsSink; -import org.apache.heron.spi.metricsmgr.sink.SinkContext; - -/** - * An IMetricsSink sends Metrics to TMaster. - * 1. It gets the TMasterLocation - *

- * 2. Then it would construct a long-live Service running TMasterClient, which could automatically - * recover from uncaught exceptions, i.e. close the old one and start a new one. - * Also, it provides API to update the TMasterLocation that TMasterClient need to connect and - * restart the TMasterClient. - * There are two scenarios we need to restart a TMasterClient in our case: - *

- * -- Uncaught exceptions happen within TMasterClient; then we would restart TMasterClient inside - * the same ExecutorService inside the UncaughtExceptionHandlers. - * Notice that, in java, exceptions occur inside UncaughtExceptionHandlers would not invoke - * UncaughtExceptionHandlers; instead, it would kill the thread with that exception. - * So if exceptions thrown during restart a new TMasterClient, this TMasterSink would die, and - * external logic would take care of it. - *

- * -- TMasterLocation changes (though in fact, TMasterClient might also throw exceptions in this case), - * in this case, we would invoke TMasterService to start from tMasterLocationStarter's thread. - * But the TMasterService and TMasterClient still start wihtin the thread they run. - *

- * 3. When a new MetricsRecord comes by invoking processRecord, it would push the MetricsRecord - * to the Communicator Queue to TMasterClient - *

- * Notice that we would not send all metrics to TMaster; we would use MetricsFilter to figure out - * needed metrics. - */ - -public class TMasterSink implements IMetricsSink { - private static final Logger LOG = Logger.getLogger(TMasterSink.class.getName()); - - private static final int MAX_COMMUNICATOR_SIZE = 128; - - // These configs would be read from metrics-sink-configs.yaml - private static final String KEY_TMASTER_LOCATION_CHECK_INTERVAL_SEC = - "tmaster-location-check-interval-sec"; - private static final String KEY_TMASTER = "tmaster-client"; - private static final String KEY_TMASTER_RECONNECT_INTERVAL_SEC = "reconnect-interval-second"; - private static final String KEY_NETWORK_WRITE_BATCH_SIZE_BYTES = "network-write-batch-size-bytes"; - private static final String KEY_NETWORK_WRITE_BATCH_TIME_MS = "network-write-batch-time-ms"; - private static final String KEY_NETWORK_READ_BATCH_SIZE_BYTES = "network-read-batch-size-bytes"; - private static final String KEY_NETWORK_READ_BATCH_TIME_MS = "network-read-batch-time-ms"; - private static final String KEY_SOCKET_SEND_BUFFER_BYTES = "socket-send-buffer-size-bytes"; - private static final String KEY_SOCKET_RECEIVED_BUFFER_BYTES = - "socket-received-buffer-size-bytes"; - private static final String KEY_TMASTER_METRICS_TYPE = "tmaster-metrics-type"; - - // Bean name to fetch the TMasterLocation object from SingletonRegistry - private static final String TMASTER_LOCATION_BEAN_NAME = - TopologyMaster.TMasterLocation.newBuilder().getDescriptorForType().getFullName(); - // Metrics Counter Name - private static final String METRICS_COUNT = "metrics-count"; - private static final String EXCEPTIONS_COUNT = "exceptions-count"; - private static final String RECORD_PROCESS_COUNT = "record-process-count"; - private static final String TMASTER_RESTART_COUNT = "tmaster-restart-count"; - private static final String TMASTER_LOCATION_UPDATE_COUNT = "tmaster-location-update-count"; - private final Communicator metricsCommunicator = - new Communicator<>(); - private final MetricsFilter tMasterMetricsFilter = new MetricsFilter(); - private final Map sinkConfig = new HashMap<>(); - // A scheduled executor service to check whether the TMasterLocation has changed - // If so, restart the TMasterClientService with the new TMasterLocation - // Start of TMasterClientService will also be in this thread - private final ScheduledExecutorService tMasterLocationStarter = - Executors.newSingleThreadScheduledExecutor(); - private TMasterClientService tMasterClientService; - // We need to cache it locally to check whether the TMasterLocation is changed - // This field is changed only in ScheduledExecutorService's thread, - // so no need to make it volatile - private TopologyMaster.TMasterLocation currentTMasterLocation = null; - private SinkContext sinkContext; - - @Override - @SuppressWarnings("unchecked") - public void init(Map conf, SinkContext context) { - sinkConfig.putAll(conf); - - sinkContext = context; - - // Fill the tMasterMetricsFilter according to metrics-sink-configs.yaml - Map tmasterMetricsType = - (Map) sinkConfig.get(KEY_TMASTER_METRICS_TYPE); - if (tmasterMetricsType != null) { - for (Map.Entry metricToType : tmasterMetricsType.entrySet()) { - String value = metricToType.getValue(); - MetricsFilter.MetricAggregationType type; - if ("SUM".equals(value)) { - type = MetricsFilter.MetricAggregationType.SUM; - } else if ("AVG".equals(value)) { - type = MetricsFilter.MetricAggregationType.AVG; - } else if ("LAST".equals(value)) { - type = MetricsFilter.MetricAggregationType.LAST; - } else { - type = MetricsFilter.MetricAggregationType.UNKNOWN; - } - tMasterMetricsFilter.setPrefixToType(metricToType.getKey(), type); - } - } - - // Construct the long-live TMasterClientService - tMasterClientService = - new TMasterClientService((Map) - sinkConfig.get(KEY_TMASTER), metricsCommunicator); - - // Start the tMasterLocationStarter - startTMasterChecker(); - } - - // Start the TMasterCheck, which would check whether the TMasterLocation is changed - // at an interval. - // If so, restart the TMasterClientService with the new TMasterLocation - private void startTMasterChecker() { - final int checkIntervalSec = - TypeUtils.getInteger(sinkConfig.get(KEY_TMASTER_LOCATION_CHECK_INTERVAL_SEC)); - - Runnable runnable = new Runnable() { - @Override - public void run() { - TopologyMaster.TMasterLocation location = - (TopologyMaster.TMasterLocation) SingletonRegistry.INSTANCE.getSingleton( - TMASTER_LOCATION_BEAN_NAME); - - if (location != null) { - if (currentTMasterLocation == null || !location.equals(currentTMasterLocation)) { - LOG.info("Update current TMasterLocation to: " + location); - currentTMasterLocation = location; - tMasterClientService.updateTMasterLocation(currentTMasterLocation); - tMasterClientService.startNewMasterClient(); - - // Update Metrics - sinkContext.exportCountMetric(TMASTER_LOCATION_UPDATE_COUNT, 1); - } - } - - // Schedule itself in future - tMasterLocationStarter.schedule(this, checkIntervalSec, TimeUnit.SECONDS); - } - }; - - // First Entry - tMasterLocationStarter.schedule(runnable, checkIntervalSec, TimeUnit.SECONDS); - LOG.info("TMasterChecker started with interval: " + checkIntervalSec); - } - - @Override - public void processRecord(MetricsRecord record) { - // Format it into TopologyMaster.PublishMetrics - - // The format of record is "host:port/componentName/instanceId" - // So MetricsRecord.getSource().split("/") would be an array with 3 elements: - // ["host:port", componentName, instanceId] - String[] sources = MetricsUtil.splitRecordSource(record); - String hostPort = sources[0]; - String componentName = sources[1]; - String instanceId = sources[2]; - - TopologyMaster.PublishMetrics.Builder publishMetrics = - TopologyMaster.PublishMetrics.newBuilder(); - - for (MetricsInfo metricsInfo : tMasterMetricsFilter.filter(record.getMetrics())) { - // We would filter out unneeded metrics - TopologyMaster.MetricDatum metricDatum = TopologyMaster.MetricDatum.newBuilder(). - setComponentName(componentName).setInstanceId(instanceId).setName(metricsInfo.getName()). - setValue(metricsInfo.getValue()).setTimestamp(record.getTimestamp()).build(); - publishMetrics.addMetrics(metricDatum); - } - - for (ExceptionInfo exceptionInfo : record.getExceptions()) { - TopologyMaster.TmasterExceptionLog exceptionLog = - TopologyMaster.TmasterExceptionLog.newBuilder() - .setComponentName(componentName) - .setHostname(hostPort) - .setInstanceId(instanceId) - .setStacktrace(exceptionInfo.getStackTrace()) - .setLasttime(exceptionInfo.getLastTime()) - .setFirsttime(exceptionInfo.getFirstTime()) - .setCount(exceptionInfo.getCount()) - .setLogging(exceptionInfo.getLogging()).build(); - publishMetrics.addExceptions(exceptionLog); - } - - metricsCommunicator.offer(publishMetrics.build()); - - // Update metrics - sinkContext.exportCountMetric(RECORD_PROCESS_COUNT, 1); - sinkContext.exportCountMetric(METRICS_COUNT, publishMetrics.getMetricsCount()); - sinkContext.exportCountMetric(EXCEPTIONS_COUNT, publishMetrics.getExceptionsCount()); - - checkCommunicator(metricsCommunicator, MAX_COMMUNICATOR_SIZE); - } - - // Check if the communicator is full/overflow. Poll and drop extra elements that - // are over the queue limit from the head. - public static void checkCommunicator(Communicator communicator, - int maxSize) { - synchronized (communicator) { - int size = communicator.size(); - - for (int i = 0; i < size - maxSize; ++i) { - communicator.poll(); - } - } - } - - @Override - public void flush() { - // We do nothing here but update metrics - sinkContext.exportCountMetric(TMASTER_RESTART_COUNT, - tMasterClientService.startedAttempts.longValue()); - } - - @Override - public void close() { - tMasterClientService.close(); - metricsCommunicator.clear(); - } - - ///////////////////////////////////////////////////////// - // Following protected methods should be used only for unit testing - ///////////////////////////////////////////////////////// - protected TMasterClientService getTMasterClientService() { - return tMasterClientService; - } - - protected void createSimpleTMasterClientService(Map serviceConfig) { - tMasterClientService = - new TMasterClientService(serviceConfig, metricsCommunicator); - } - - protected TMasterClient getTMasterClient() { - return tMasterClientService.getTMasterClient(); - } - - protected void startNewTMasterClient(TopologyMaster.TMasterLocation location) { - tMasterClientService.updateTMasterLocation(location); - tMasterClientService.startNewMasterClient(); - } - - protected int getTMasterStartedAttempts() { - return tMasterClientService.startedAttempts.get(); - } - - protected TopologyMaster.TMasterLocation getCurrentTMasterLocation() { - return currentTMasterLocation; - } - - protected TopologyMaster.TMasterLocation getCurrentTMasterLocationInService() { - return tMasterClientService.getCurrentTMasterLocation(); - } - - /** - * A long-live Service running TMasterClient - * It would automatically restart the TMasterClient connecting and communicating to the latest - * TMasterLocation if any uncaught exceptions throw. - *

- * It provides startNewMasterClient(TopologyMaster.TMasterLocation location), which would also - * update the currentTMasterLocation to the lastest location. - *

- * So a new TMasterClient would start in two cases: - * 1. The old one threw exceptions and died. - * 2. startNewMasterClient() is invoked externally with TMasterLocation. - */ - private static final class TMasterClientService { - private final AtomicInteger startedAttempts = new AtomicInteger(0); - private final Map tmasterClientConfig; - private final Communicator metricsCommunicator; - private final ExecutorService tmasterClientExecutor = - Executors.newSingleThreadExecutor(new TMasterClientThreadFactory()); - private volatile TMasterClient tMasterClient; - // We need to cache TMasterLocation for failover case - // This value is set in ScheduledExecutorService' thread while - // it is used in TMasterClientService thread, - // so we need to make it volatile to guarantee the visiability. - private volatile TopologyMaster.TMasterLocation currentTMasterLocation; - - private TMasterClientService(Map tmasterClientConfig, - Communicator metricsCommunicator) { - this.tmasterClientConfig = tmasterClientConfig; - this.metricsCommunicator = metricsCommunicator; - } - - // Update the TMasterLocation to connect within the TMasterClient - // This method is thread-safe, since - // currentTMasterLocation is volatile and we just replace it. - // In our scenario, it is only invoked when TMasterLocation is changed, - // i.e. this method is only invoked in scheduled executor thread. - public void updateTMasterLocation(TopologyMaster.TMasterLocation location) { - currentTMasterLocation = location; - } - - // This method could be invoked by different threads - // Make it synchronized to guarantee thread-safe - public synchronized void startNewMasterClient() { - - // Exit any running tMasterClient if there is any to release - // the thread in tmasterClientExecutor - if (tMasterClient != null) { - tMasterClient.stop(); - tMasterClient.getNIOLooper().exitLoop(); - } - - // Construct the new TMasterClient - final NIOLooper looper; - try { - looper = new NIOLooper(); - } catch (IOException e) { - throw new RuntimeException("Could not create the NIOLooper", e); - } - - SystemConfig systemConfig = - (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); - HeronSocketOptions socketOptions = - new HeronSocketOptions( - TypeUtils.getByteAmount(tmasterClientConfig.get(KEY_NETWORK_WRITE_BATCH_SIZE_BYTES)), - TypeUtils.getDuration( - tmasterClientConfig.get(KEY_NETWORK_WRITE_BATCH_TIME_MS), ChronoUnit.MILLIS), - TypeUtils.getByteAmount(tmasterClientConfig.get(KEY_NETWORK_READ_BATCH_SIZE_BYTES)), - TypeUtils.getDuration( - tmasterClientConfig.get(KEY_NETWORK_READ_BATCH_TIME_MS), ChronoUnit.MILLIS), - TypeUtils.getByteAmount(tmasterClientConfig.get(KEY_SOCKET_SEND_BUFFER_BYTES)), - TypeUtils.getByteAmount(tmasterClientConfig.get(KEY_SOCKET_RECEIVED_BUFFER_BYTES)), - systemConfig.getMetricsMgrNetworkOptionsMaximumPacketSize()); - - // Reset the Consumer - metricsCommunicator.setConsumer(looper); - - tMasterClient = - new TMasterClient(looper, - currentTMasterLocation.getHost(), - currentTMasterLocation.getMasterPort(), - socketOptions, metricsCommunicator, - TypeUtils.getDuration( - tmasterClientConfig.get(KEY_TMASTER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS)); - - int attempts = startedAttempts.incrementAndGet(); - LOG.severe(String.format("Starting TMasterClient for the %d time.", attempts)); - tmasterClientExecutor.execute(tMasterClient); - } - - // This method could be invoked by different threads - // Make it synchronized to guarantee thread-safe - public synchronized void close() { - tMasterClient.getNIOLooper().exitLoop(); - tmasterClientExecutor.shutdownNow(); - } - - ///////////////////////////////////////////////////////// - // Following protected methods should be used only for unit testing - ///////////////////////////////////////////////////////// - protected TMasterClient getTMasterClient() { - return tMasterClient; - } - - protected int getTMasterStartedAttempts() { - return startedAttempts.get(); - } - - protected TopologyMaster.TMasterLocation getCurrentTMasterLocation() { - return currentTMasterLocation; - } - - // An UncaughtExceptionHandler, which would restart TMasterLocation with - // current TMasterLocation. - private class TMasterClientThreadFactory implements ThreadFactory { - @Override - public Thread newThread(Runnable r) { - final Thread thread = new Thread(r); - thread.setUncaughtExceptionHandler(new TMasterClientExceptionHandler()); - return thread; - } - - private class TMasterClientExceptionHandler implements Thread.UncaughtExceptionHandler { - @Override - public void uncaughtException(Thread t, Throwable e) { - LOG.log(Level.SEVERE, "TMasterClient dies in thread: " + t, e); - - Duration reconnectInterval = TypeUtils.getDuration( - tmasterClientConfig.get(KEY_TMASTER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS); - SysUtils.sleep(reconnectInterval); - LOG.info("Restarting TMasterClient"); - - // We would use the TMasterLocation in cache, since - // the new TMasterClient is started due to exception thrown, - // rather than TMasterLocation changes - startNewMasterClient(); - } - } - } - } -} diff --git a/heron/metricsmgr/tests/java/BUILD b/heron/metricsmgr/tests/java/BUILD index 406798fc9c0..716dccb43d9 100644 --- a/heron/metricsmgr/tests/java/BUILD +++ b/heron/metricsmgr/tests/java/BUILD @@ -12,7 +12,7 @@ java_library( "//heron/metricsmgr/src/java:metricsmgr-java", "//heron/proto:proto_common_java", "//heron/proto:proto_metrics_java", - "//heron/proto:proto_tmaster_java", + "//heron/proto:proto_tmanager_java", "//heron/spi/src/java:metricsmgr-spi-java", "//third_party/java:junit4", "//third_party/java:mockito", @@ -26,10 +26,10 @@ java_tests( data = ["//heron/config/src/yaml:test-config-internals-yaml"], test_classes = [ "org.apache.heron.metricsmgr.MetricsUtilTests", - "org.apache.heron.metricsmgr.HandleTMasterLocationTest", + "org.apache.heron.metricsmgr.HandleTManagerLocationTest", "org.apache.heron.metricsmgr.MetricsManagerServerTest", "org.apache.heron.metricsmgr.executor.SinkExecutorTest", - "org.apache.heron.metricsmgr.sink.tmaster.TMasterSinkTest", + "org.apache.heron.metricsmgr.sink.tmanager.TManagerSinkTest", "org.apache.heron.metricsmgr.sink.metricscache.MetricsCacheSinkTest", "org.apache.heron.metricsmgr.sink.FileSinkTest", "org.apache.heron.metricsmgr.sink.WebSinkTest", diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTMasterLocationTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java similarity index 66% rename from heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTMasterLocationTest.java rename to heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java index 4317624200b..6d05a1e93c8 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTMasterLocationTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java @@ -41,44 +41,44 @@ import org.apache.heron.common.network.StatusCode; import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.proto.system.Metrics; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import static org.apache.heron.common.testhelpers.HeronServerTester.RESPONSE_RECEIVED_TIMEOUT; import static org.mockito.Mockito.spy; /** - * Test whether MetricsManagerServer could handle TMasterLocationRefreshMessage correctly. + * Test whether MetricsManagerServer could handle TManagerLocationRefreshMessage correctly. *

- * We make a SimpleTMasterLocationPublisher, which would send two TMasterLocationRefreshMessage, + * We make a SimpleTManagerLocationPublisher, which would send two TManagerLocationRefreshMessage, * (twice each) after connected and registered with * MetricsManagerServer, and then we check: * 1. Whether onMessage(...) is invoked 4 times, with correct arguments. *

* 2. Whether onMessage(...) is invoked 4 times, with correct order. *

- * 3. Whether eventually the TMasterLocation in SingletonRegistry should be the latest one. + * 3. Whether eventually the TManagerLocation in SingletonRegistry should be the latest one. */ -public class HandleTMasterLocationTest { +public class HandleTManagerLocationTest { - // Two TMasterLocationRefreshMessage to verify - private static final Metrics.TMasterLocationRefreshMessage TMASTERLOCATIONREFRESHMESSAGE0 = - Metrics.TMasterLocationRefreshMessage.newBuilder().setTmaster( - TopologyMaster.TMasterLocation.newBuilder(). + // Two TManagerLocationRefreshMessage to verify + private static final Metrics.TManagerLocationRefreshMessage TMANAGERLOCATIONREFRESHMESSAGE0 = + Metrics.TManagerLocationRefreshMessage.newBuilder().setTmanager( + TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id"). - setHost("host").setControllerPort(0).setMasterPort(0)). + setHost("host").setControllerPort(0).setManagerPort(0)). build(); - private static final Metrics.TMasterLocationRefreshMessage TMASTERLOCATIONREFRESHMESSAGE1 = - Metrics.TMasterLocationRefreshMessage.newBuilder().setTmaster( - TopologyMaster.TMasterLocation.newBuilder(). + private static final Metrics.TManagerLocationRefreshMessage TMANAGERLOCATIONREFRESHMESSAGE1 = + Metrics.TManagerLocationRefreshMessage.newBuilder().setTmanager( + TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id"). - setHost("host").setControllerPort(0).setMasterPort(1)). + setHost("host").setControllerPort(0).setManagerPort(1)). build(); - // Bean name to register the TMasterLocation object into SingletonRegistry - private static final String TMASTER_LOCATION_BEAN_NAME = - TopologyMaster.TMasterLocation.newBuilder().getDescriptorForType().getFullName(); + // Bean name to register the TManagerLocation object into SingletonRegistry + private static final String TMANAGER_LOCATION_BEAN_NAME = + TopologyManager.TManagerLocation.newBuilder().getDescriptorForType().getFullName(); private LatchedMultiCountMetric serverMetrics; private MetricsManagerServer metricsManagerServer; @@ -88,7 +88,7 @@ public class HandleTMasterLocationTest { public void before() throws IOException { // MetricsManagerServer increments this counter every time a location refresh message is // received, so we can await this counter getting to 4 before proceeding with the test - serverMetrics = new LatchedMultiCountMetric("tmaster-location-received", 4L); + serverMetrics = new LatchedMultiCountMetric("tmanager-location-received", 4L); // Spy it for unit test metricsManagerServer = @@ -117,32 +117,32 @@ public void after() throws NoSuchFieldException, IllegalAccessException { } @Test - public void testHandleTMasterLocation() throws InterruptedException { + public void testHandleTManagerLocation() throws InterruptedException { serverMetrics.await(Duration.ofSeconds(10)); // Verification - TopologyMaster.TMasterLocation tMasterLocation = (TopologyMaster.TMasterLocation) - SingletonRegistry.INSTANCE.getSingleton(TMASTER_LOCATION_BEAN_NAME); + TopologyManager.TManagerLocation tManagerLocation = (TopologyManager.TManagerLocation) + SingletonRegistry.INSTANCE.getSingleton(TMANAGER_LOCATION_BEAN_NAME); // Verify we received these message Mockito.verify(metricsManagerServer, Mockito.times(2)). - onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMASTERLOCATIONREFRESHMESSAGE0)); + onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMANAGERLOCATIONREFRESHMESSAGE0)); Mockito.verify(metricsManagerServer, Mockito.times(2)). - onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMASTERLOCATIONREFRESHMESSAGE1)); + onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMANAGERLOCATIONREFRESHMESSAGE1)); // Verify we received message in order InOrder inOrder = Mockito.inOrder(metricsManagerServer); inOrder.verify(metricsManagerServer, Mockito.times(2)). - onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMASTERLOCATIONREFRESHMESSAGE0)); + onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMANAGERLOCATIONREFRESHMESSAGE0)); inOrder.verify(metricsManagerServer, Mockito.times(2)). - onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMASTERLOCATIONREFRESHMESSAGE1)); + onMessage(Mockito.any(SocketChannel.class), Mockito.eq(TMANAGERLOCATIONREFRESHMESSAGE1)); - Assert.assertEquals("topology-name", tMasterLocation.getTopologyName()); - Assert.assertEquals("topology-id", tMasterLocation.getTopologyId()); - Assert.assertEquals("host", tMasterLocation.getHost()); - Assert.assertEquals(0, tMasterLocation.getControllerPort()); - Assert.assertEquals(1, tMasterLocation.getMasterPort()); + Assert.assertEquals("topology-name", tManagerLocation.getTopologyName()); + Assert.assertEquals("topology-id", tManagerLocation.getTopologyId()); + Assert.assertEquals("host", tManagerLocation.getHost()); + Assert.assertEquals(0, tManagerLocation.getControllerPort()); + Assert.assertEquals(1, tManagerLocation.getManagerPort()); } private static final class TestRequestHandler implements HeronServerTester.TestRequestHandler { @@ -151,7 +151,7 @@ public Message getRequestMessage() { Metrics.MetricPublisher publisher = Metrics.MetricPublisher.newBuilder(). setHostname("hostname"). setPort(0). - setComponentName("tmaster-location-publisher"). + setComponentName("tmanager-location-publisher"). setInstanceId("instance-id"). setInstanceIndex(1). build(); @@ -169,17 +169,17 @@ private static final class TestResponseHandler implements HeronServerTester.Test @Override public void handleResponse(HeronClient client, StatusCode status, Object ctx, Message response) { - // We send two TMasterLocationRefreshMessage twice each + // We send two TManagerLocationRefreshMessage twice each // Then we check: // 1. Whether onMessage(...) is invoked 4 times, with correct arguments. - // 2. Finally the TMasterLocation in SingletonRegistry should be the latest one. - // First send TMASTERLOCATIONREFRESHMESSAGE0 twice - client.sendMessage(TMASTERLOCATIONREFRESHMESSAGE0); - client.sendMessage(TMASTERLOCATIONREFRESHMESSAGE0); - - // Then send TMASTERLOCATIONREFRESHMESSAGE1 twice - client.sendMessage(TMASTERLOCATIONREFRESHMESSAGE1); - client.sendMessage(TMASTERLOCATIONREFRESHMESSAGE1); + // 2. Finally the TManagerLocation in SingletonRegistry should be the latest one. + // First send TMANAGERLOCATIONREFRESHMESSAGE0 twice + client.sendMessage(TMANAGERLOCATIONREFRESHMESSAGE0); + client.sendMessage(TMANAGERLOCATIONREFRESHMESSAGE0); + + // Then send TMANAGERLOCATIONREFRESHMESSAGE1 twice + client.sendMessage(TMANAGERLOCATIONREFRESHMESSAGE1); + client.sendMessage(TMANAGERLOCATIONREFRESHMESSAGE1); } } } diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/executor/SinkExecutorTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/executor/SinkExecutorTest.java index 63b8af77be4..fd12cb91714 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/executor/SinkExecutorTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/executor/SinkExecutorTest.java @@ -37,7 +37,7 @@ import org.apache.heron.api.metric.MultiCountMetric; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SlaveLooper; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.metricsmgr.MetricsSinksConfig; import org.apache.heron.metricsmgr.sink.SinkContextImpl; import org.apache.heron.spi.metricsmgr.metrics.ExceptionInfo; @@ -66,7 +66,7 @@ public class SinkExecutorTest { private volatile int flushInvoked = 0; private volatile int initInvoked = 0; private DummyMetricsSink metricsSink; - private SlaveLooper slaveLooper; + private ExecutorLooper executorLooper; private Communicator communicator; private SinkExecutor sinkExecutor; private ExecutorService threadsPool; @@ -74,21 +74,21 @@ public class SinkExecutorTest { @Before public void before() throws Exception { metricsSink = new DummyMetricsSink(EXPECTED_RECORDS, EXPECTED_FLUSHES); - slaveLooper = new SlaveLooper(); - communicator = new Communicator<>(null, slaveLooper); + executorLooper = new ExecutorLooper(); + communicator = new Communicator<>(null, executorLooper); SinkContext sinkContext = new SinkContextImpl("topology-name", "cluster", "role", "environment", "metricsmgr-id", "sink-id", new MultiCountMetric()); sinkExecutor = - new SinkExecutor("testSinkId", metricsSink, slaveLooper, communicator, sinkContext); + new SinkExecutor("testSinkId", metricsSink, executorLooper, communicator, sinkContext); } @After public void after() throws Exception { metricsSink = null; - slaveLooper = null; + executorLooper = null; communicator = null; sinkExecutor = null; } diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java index b02669ecfd4..48bcf18c65b 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java @@ -37,7 +37,7 @@ import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.config.SystemConfigKey; import org.apache.heron.metricsmgr.sink.SinkContextImpl; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.sink.SinkContext; import static org.junit.Assert.assertEquals; @@ -51,7 +51,7 @@ public class MetricsCacheSinkTest { // Bean name to register the MetricsCacheLocation object into SingletonRegistry private static final String METRICSCACHE_LOCATION_BEAN_NAME = - TopologyMaster.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); + TopologyManager.MetricsCacheLocation.newBuilder().getDescriptorForType().getFullName(); private static final Duration RECONNECT_INTERVAL = Duration.ofSeconds(1); // Restart wait time is set at 2 times of reconnect time plus another second. The 2 times factor @@ -73,13 +73,13 @@ private static Map buildServiceConfig() { return serviceConfig; } - private static TopologyMaster.MetricsCacheLocation getMetricsCacheLocation(int masterPort) { + private static TopologyManager.MetricsCacheLocation getMetricsCacheLocation(int serverPort) { // Notice here we set host and port as invalid values // So MetricsCache would throw "java.nio.channels.UnresolvedAddressException" once it starts, // and then dies - return TopologyMaster.MetricsCacheLocation.newBuilder(). + return TopologyManager.MetricsCacheLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id").setHost("host"). - setControllerPort(0).setMasterPort(masterPort).setStatsPort(0).build(); + setControllerPort(0).setManagerPort(serverPort).setStatsPort(0).build(); } @Before @@ -161,7 +161,7 @@ public void testHandleMetricsCacheLocation() throws Exception { metricsCacheSink.init(sinkConfig, sinkContext); // Put the MetricsCacheLocation into SingletonRegistry - TopologyMaster.MetricsCacheLocation oldLoc = getMetricsCacheLocation(0); + TopologyManager.MetricsCacheLocation oldLoc = getMetricsCacheLocation(0); SingletonRegistry.INSTANCE.registerSingleton(METRICSCACHE_LOCATION_BEAN_NAME, oldLoc); SysUtils.sleep(RESTART_WAIT_INTERVAL); @@ -172,7 +172,7 @@ public void testHandleMetricsCacheLocation() throws Exception { assertEquals(oldLoc, metricsCacheSink.getCurrentMetricsCacheLocationInService()); // Update it, the MetricsCacheSink should pick up the new one. - TopologyMaster.MetricsCacheLocation newLoc = getMetricsCacheLocation(1); + TopologyManager.MetricsCacheLocation newLoc = getMetricsCacheLocation(1); SingletonRegistry.INSTANCE.updateSingleton(METRICSCACHE_LOCATION_BEAN_NAME, newLoc); int lastMetricsCacheStartedAttempts = metricsCacheSink.getMetricsCacheStartedAttempts(); @@ -192,12 +192,12 @@ public void testHandleMetricsCacheLocation() throws Exception { @Test public void testCheckCommunicator() { - Communicator communicator = new Communicator<>(); + Communicator communicator = new Communicator<>(); int initSize = 16; int capSize = 10; - TopologyMaster.PublishMetrics.Builder publishMetrics = - TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder publishMetrics = + TopologyManager.PublishMetrics.newBuilder(); for (int i = 0; i < initSize; ++i) { communicator.offer(publishMetrics.build()); } diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSinkTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java similarity index 58% rename from heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSinkTest.java rename to heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java index 3d57adfbbc4..b36db801737 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmaster/TMasterSinkTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.heron.metricsmgr.sink.tmaster; +package org.apache.heron.metricsmgr.sink.tmanager; import java.lang.reflect.Field; import java.nio.file.Paths; @@ -37,7 +37,7 @@ import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.config.SystemConfigKey; import org.apache.heron.metricsmgr.sink.SinkContextImpl; -import org.apache.heron.proto.tmaster.TopologyMaster; +import org.apache.heron.proto.tmanager.TopologyManager; import org.apache.heron.spi.metricsmgr.sink.SinkContext; import static org.junit.Assert.assertEquals; @@ -45,22 +45,22 @@ import static org.junit.Assert.assertTrue; /** - * TMasterSink Tester. + * TManagerSink Tester. */ -public class TMasterSinkTest { +public class TManagerSinkTest { - // Bean name to register the TMasterLocation object into SingletonRegistry - private static final String TMASTER_LOCATION_BEAN_NAME = - TopologyMaster.TMasterLocation.newBuilder().getDescriptorForType().getFullName(); + // Bean name to register the TManagerLocation object into SingletonRegistry + private static final String TMANAGER_LOCATION_BEAN_NAME = + TopologyManager.TManagerLocation.newBuilder().getDescriptorForType().getFullName(); private static final Duration RECONNECT_INTERVAL = Duration.ofSeconds(1); // Restart wait time is set at 2 times of reconnect time plus another second. The 2 times factor // is because of location checking event interval and the sleep of reconnect interval in // exception handling. private static final Duration RESTART_WAIT_INTERVAL = Duration.ofSeconds(3); - private static final Duration TMASTER_LOCATION_CHECK_INTERVAL = Duration.ofSeconds(1); + private static final Duration TMANAGER_LOCATION_CHECK_INTERVAL = Duration.ofSeconds(1); - // These are config for TMasterClient + // These are config for TManagerClient private static Map buildServiceConfig() { Map serviceConfig = new HashMap<>(); // Fill with necessary config @@ -74,13 +74,13 @@ private static Map buildServiceConfig() { return serviceConfig; } - private static TopologyMaster.TMasterLocation getTMasterLocation(int masterPort) { + private static TopologyManager.TManagerLocation getTManagerLocation(int serverPort) { // Notice here we set host and port as invalid values - // So TMaster would throw "java.nio.channels.UnresolvedAddressException" once it starts, + // So TManager would throw "java.nio.channels.UnresolvedAddressException" once it starts, // and then dies - return TopologyMaster.TMasterLocation.newBuilder(). + return TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id").setHost("host"). - setControllerPort(0).setMasterPort(masterPort).setStatsPort(0).build(); + setControllerPort(0).setManagerPort(serverPort).setStatsPort(0).build(); } @Before @@ -113,104 +113,104 @@ public void after() throws NoSuchFieldException, IllegalAccessException { } /** - * Test automatic recover from uncaught exceptions in TMasterClient + * Test automatic recover from uncaught exceptions in TManagerClient */ @Test - public void testTMasterClientService() throws InterruptedException { - // create a new TMasterClientService - TMasterSink tMasterSink = new TMasterSink(); - tMasterSink.createSimpleTMasterClientService(buildServiceConfig()); - tMasterSink.startNewTMasterClient(getTMasterLocation(0)); + public void testTManagerClientService() throws InterruptedException { + // create a new TManagerClientService + TManagerSink tManagerSink = new TManagerSink(); + tManagerSink.createSimpleTManagerClientService(buildServiceConfig()); + tManagerSink.startNewTManagerClient(getTManagerLocation(0)); // We wait for a while to let auto recover fully finish. SysUtils.sleep(RESTART_WAIT_INTERVAL); - // Then we check whether the TMasterService has restarted the TMasterClient for several times - // Take other factors into account, we would check whether the TMasterClient has restarted + // Then we check whether the TManagerService has restarted the TManagerClient for several times + // Take other factors into account, we would check whether the TManagerClient has restarted // at least half the RESTART_WAIT_INTERVAL/RECONNECT_INTERVAL - assertTrue(tMasterSink.getTMasterStartedAttempts() + assertTrue(tManagerSink.getTManagerStartedAttempts() >= (RESTART_WAIT_INTERVAL.getSeconds() / RECONNECT_INTERVAL.getSeconds() / 2)); - tMasterSink.close(); + tManagerSink.close(); } /** - * Test whether TMasterSink would handle TMasterLocation in SingletonRegistry automatically + * Test whether TManagerSink would handle TManagerLocation in SingletonRegistry automatically */ @Test - public void testHandleTMasterLocation() throws InterruptedException { - // create a new TMasterClientService - TMasterSink tMasterSink = new TMasterSink(); + public void testHandleTManagerLocation() throws InterruptedException { + // create a new TManagerClientService + TManagerSink tManagerSink = new TManagerSink(); Map sinkConfig = new HashMap<>(); // Fill with necessary config sinkConfig.put( - "tmaster-location-check-interval-sec", TMASTER_LOCATION_CHECK_INTERVAL.getSeconds()); + "tmanager-location-check-interval-sec", TMANAGER_LOCATION_CHECK_INTERVAL.getSeconds()); - sinkConfig.put("tmaster-client", buildServiceConfig()); + sinkConfig.put("tmanager-client", buildServiceConfig()); // It is null since we have not set it - Assert.assertNull(tMasterSink.getCurrentTMasterLocation()); + Assert.assertNull(tManagerSink.getCurrentTManagerLocation()); MultiCountMetric multiCountMetric = new MultiCountMetric(); SinkContext sinkContext = new SinkContextImpl("topology-name", "cluster", "role", "environment", "metricsmgr-id", "sink-id", multiCountMetric); - // Start the TMasterSink - tMasterSink.init(sinkConfig, sinkContext); + // Start the TManagerSink + tManagerSink.init(sinkConfig, sinkContext); - // Put the TMasterLocation into SingletonRegistry - TopologyMaster.TMasterLocation oldLoc = getTMasterLocation(0); - SingletonRegistry.INSTANCE.registerSingleton(TMASTER_LOCATION_BEAN_NAME, oldLoc); + // Put the TManagerLocation into SingletonRegistry + TopologyManager.TManagerLocation oldLoc = getTManagerLocation(0); + SingletonRegistry.INSTANCE.registerSingleton(TMANAGER_LOCATION_BEAN_NAME, oldLoc); SysUtils.sleep(RESTART_WAIT_INTERVAL); - // The TMasterService should start - assertTrue(tMasterSink.getTMasterStartedAttempts() > 0); - assertEquals(oldLoc, tMasterSink.getCurrentTMasterLocation()); - assertEquals(oldLoc, tMasterSink.getCurrentTMasterLocationInService()); + // The TManagerService should start + assertTrue(tManagerSink.getTManagerStartedAttempts() > 0); + assertEquals(oldLoc, tManagerSink.getCurrentTManagerLocation()); + assertEquals(oldLoc, tManagerSink.getCurrentTManagerLocationInService()); - // Update it, the TMasterSink should pick up the new one. - TopologyMaster.TMasterLocation newLoc = getTMasterLocation(1); - SingletonRegistry.INSTANCE.updateSingleton(TMASTER_LOCATION_BEAN_NAME, newLoc); + // Update it, the TManagerSink should pick up the new one. + TopologyManager.TManagerLocation newLoc = getTManagerLocation(1); + SingletonRegistry.INSTANCE.updateSingleton(TMANAGER_LOCATION_BEAN_NAME, newLoc); - int lastTMasterStartedAttempts = tMasterSink.getTMasterStartedAttempts(); + int lastTManagerStartedAttempts = tManagerSink.getTManagerStartedAttempts(); SysUtils.sleep(RESTART_WAIT_INTERVAL); - // The TMasterService should use the new TMasterLocation - assertTrue(tMasterSink.getTMasterStartedAttempts() > lastTMasterStartedAttempts); - assertNotSame(oldLoc, tMasterSink.getCurrentTMasterLocation()); - assertNotSame(oldLoc, tMasterSink.getCurrentTMasterLocationInService()); - assertEquals(newLoc, tMasterSink.getCurrentTMasterLocation()); - assertEquals(newLoc, tMasterSink.getCurrentTMasterLocationInService()); + // The TManagerService should use the new TManagerLocation + assertTrue(tManagerSink.getTManagerStartedAttempts() > lastTManagerStartedAttempts); + assertNotSame(oldLoc, tManagerSink.getCurrentTManagerLocation()); + assertNotSame(oldLoc, tManagerSink.getCurrentTManagerLocationInService()); + assertEquals(newLoc, tManagerSink.getCurrentTManagerLocation()); + assertEquals(newLoc, tManagerSink.getCurrentTManagerLocationInService()); - tMasterSink.close(); + tManagerSink.close(); } @Test public void testCheckCommunicator() { - Communicator communicator = new Communicator<>(); + Communicator communicator = new Communicator<>(); int initSize = 16; int capSize = 10; - TopologyMaster.PublishMetrics.Builder publishMetrics = - TopologyMaster.PublishMetrics.newBuilder(); + TopologyManager.PublishMetrics.Builder publishMetrics = + TopologyManager.PublishMetrics.newBuilder(); for (int i = 0; i < initSize; ++i) { communicator.offer(publishMetrics.build()); } assertEquals(communicator.size(), initSize); - TMasterSink.checkCommunicator(communicator, initSize + 1); + TManagerSink.checkCommunicator(communicator, initSize + 1); assertEquals(communicator.size(), initSize); - TMasterSink.checkCommunicator(communicator, initSize); + TManagerSink.checkCommunicator(communicator, initSize); assertEquals(communicator.size(), initSize); - TMasterSink.checkCommunicator(communicator, initSize - 1); + TManagerSink.checkCommunicator(communicator, initSize - 1); assertEquals(communicator.size(), initSize - 1); - TMasterSink.checkCommunicator(communicator, capSize); + TManagerSink.checkCommunicator(communicator, capSize); assertEquals(communicator.size(), capSize); } } From abda1eb5bc7ad78e81a3a137a578b691629d24c7 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 16:02:08 -0400 Subject: [PATCH 26/32] renaming "topology master" to "topology manager" in heron/schedulers --- .../kubernetes/KubernetesConstants.java | 16 ++--- .../heron/scheduler/local/LocalScheduler.java | 2 +- .../scheduler/marathon/MarathonConstants.java | 16 ++--- .../mesos/framework/LaunchableTask.java | 8 +-- .../heron/scheduler/nomad/NomadConstants.java | 28 ++++----- .../scheduler/yarn/HeronMasterDriver.java | 62 +++++++++---------- .../heron/scheduler/yarn/YarnScheduler.java | 2 +- .../scheduler/aurora/AuroraSchedulerTest.java | 2 +- .../scheduler/local/LocalSchedulerTest.java | 2 +- .../scheduler/yarn/HeronMasterDriverTest.java | 44 ++++++------- .../scheduler/yarn/YarnSchedulerTest.java | 4 +- 11 files changed, 93 insertions(+), 93 deletions(-) diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/KubernetesConstants.java b/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/KubernetesConstants.java index d1a3b8be6f4..e411818f693 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/KubernetesConstants.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/kubernetes/KubernetesConstants.java @@ -57,13 +57,13 @@ private KubernetesConstants() { public static final String PROMETHEUS_PORT = "8080"; - public static final int MASTER_PORT = 6001; - public static final int TMASTER_CONTROLLER_PORT = 6002; - public static final int TMASTER_STATS_PORT = 6003; + public static final int SERVER_PORT = 6001; + public static final int TMANAGER_CONTROLLER_PORT = 6002; + public static final int TMANAGER_STATS_PORT = 6003; public static final int SHELL_PORT = 6004; public static final int METRICSMGR_PORT = 6005; public static final int SCHEDULER_PORT = 6006; - public static final int METRICS_CACHE_MASTER_PORT = 6007; + public static final int METRICS_CACHE_SERVER_PORT = 6007; public static final int METRICS_CACHE_STATS_PORT = 6008; public static final int CHECKPOINT_MGR_PORT = 6009; // port number the start with when more than one port needed for remote debugging @@ -72,13 +72,13 @@ private KubernetesConstants() { public static final Map EXECUTOR_PORTS = new HashMap<>(); static { - EXECUTOR_PORTS.put(ExecutorPort.MASTER_PORT, MASTER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.TMASTER_CONTROLLER_PORT, TMASTER_CONTROLLER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.TMASTER_STATS_PORT, TMASTER_STATS_PORT); + EXECUTOR_PORTS.put(ExecutorPort.SERVER_PORT, SERVER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.TMANAGER_CONTROLLER_PORT, TMANAGER_CONTROLLER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.TMANAGER_STATS_PORT, TMANAGER_STATS_PORT); EXECUTOR_PORTS.put(ExecutorPort.SHELL_PORT, SHELL_PORT); EXECUTOR_PORTS.put(ExecutorPort.METRICS_MANAGER_PORT, METRICSMGR_PORT); EXECUTOR_PORTS.put(ExecutorPort.SCHEDULER_PORT, SCHEDULER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_MASTER_PORT, METRICS_CACHE_MASTER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_SERVER_PORT, METRICS_CACHE_SERVER_PORT); EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_STATS_PORT, METRICS_CACHE_STATS_PORT); EXECUTOR_PORTS.put(ExecutorPort.CHECKPOINT_MANAGER_PORT, CHECKPOINT_MGR_PORT); } diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/local/LocalScheduler.java b/heron/schedulers/src/java/org/apache/heron/scheduler/local/LocalScheduler.java index 5dd44323359..a049e268653 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/local/LocalScheduler.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/local/LocalScheduler.java @@ -190,7 +190,7 @@ public boolean onSchedule(PackingPlan packing) { LOG.info("Starting to deploy topology: " + LocalContext.topologyName(config)); synchronized (processToContainer) { - LOG.info("Starting executor for TMaster"); + LOG.info("Starting executor for TManager"); startExecutor(0, null); // for each container, run its own executor diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/marathon/MarathonConstants.java b/heron/schedulers/src/java/org/apache/heron/scheduler/marathon/MarathonConstants.java index 296fd122f01..b365cc99cab 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/marathon/MarathonConstants.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/marathon/MarathonConstants.java @@ -61,25 +61,25 @@ private MarathonConstants() { public static final String DOCKER_FORCE_PULL = "forcePullImage"; public static final String DOCKER_NETWORK_BRIDGE = "BRIDGE"; - public static final String MASTER_PORT = "$PORT0"; - public static final String TMASTER_CONTROLLER_PORT = "$PORT1"; - public static final String TMASTER_STATS_PORT = "$PORT2"; + public static final String SERVER_PORT = "$PORT0"; + public static final String TMANAGER_CONTROLLER_PORT = "$PORT1"; + public static final String TMANAGER_STATS_PORT = "$PORT2"; public static final String SHELL_PORT = "$PORT3"; public static final String METRICSMGR_PORT = "$PORT4"; public static final String SCHEDULER_PORT = "$PORT5"; - public static final String METRICS_CACHE_MASTER_PORT = "$PORT6"; + public static final String METRICS_CACHE_SERVER_PORT = "$PORT6"; public static final String METRICS_CACHE_STATS_PORT = "$PORT7"; public static final String CKPTMGR_PORT = "$PORT8"; public static final Map EXECUTOR_PORTS = new HashMap<>(); static { - EXECUTOR_PORTS.put(ExecutorPort.MASTER_PORT, MASTER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.TMASTER_CONTROLLER_PORT, TMASTER_CONTROLLER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.TMASTER_STATS_PORT, TMASTER_STATS_PORT); + EXECUTOR_PORTS.put(ExecutorPort.SERVER_PORT, SERVER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.TMANAGER_CONTROLLER_PORT, TMANAGER_CONTROLLER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.TMANAGER_STATS_PORT, TMANAGER_STATS_PORT); EXECUTOR_PORTS.put(ExecutorPort.SHELL_PORT, SHELL_PORT); EXECUTOR_PORTS.put(ExecutorPort.METRICS_MANAGER_PORT, METRICSMGR_PORT); EXECUTOR_PORTS.put(ExecutorPort.SCHEDULER_PORT, SCHEDULER_PORT); - EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_MASTER_PORT, METRICS_CACHE_MASTER_PORT); + EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_SERVER_PORT, METRICS_CACHE_SERVER_PORT); EXECUTOR_PORTS.put(ExecutorPort.METRICS_CACHE_STATS_PORT, METRICS_CACHE_STATS_PORT); EXECUTOR_PORTS.put(ExecutorPort.CHECKPOINT_MANAGER_PORT, CKPTMGR_PORT); } diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/mesos/framework/LaunchableTask.java b/heron/schedulers/src/java/org/apache/heron/scheduler/mesos/framework/LaunchableTask.java index 7cea0099e02..96e34762684 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/mesos/framework/LaunchableTask.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/mesos/framework/LaunchableTask.java @@ -212,13 +212,13 @@ protected String join(String[] array, String delimiter) { protected String executorCommand( Config config, Config runtime, int containerIndex) { Map ports = new HashMap<>(); - ports.put(ExecutorPort.MASTER_PORT, String.valueOf(freePorts.get(0))); - ports.put(ExecutorPort.TMASTER_CONTROLLER_PORT, String.valueOf(freePorts.get(1))); - ports.put(ExecutorPort.TMASTER_STATS_PORT, String.valueOf(freePorts.get(2))); + ports.put(ExecutorPort.SERVER_PORT, String.valueOf(freePorts.get(0))); + ports.put(ExecutorPort.TMANAGER_CONTROLLER_PORT, String.valueOf(freePorts.get(1))); + ports.put(ExecutorPort.TMANAGER_STATS_PORT, String.valueOf(freePorts.get(2))); ports.put(ExecutorPort.SHELL_PORT, String.valueOf(freePorts.get(3))); ports.put(ExecutorPort.METRICS_MANAGER_PORT, String.valueOf(freePorts.get(4))); ports.put(ExecutorPort.SCHEDULER_PORT, String.valueOf(freePorts.get(5))); - ports.put(ExecutorPort.METRICS_CACHE_MASTER_PORT, String.valueOf(freePorts.get(6))); + ports.put(ExecutorPort.METRICS_CACHE_SERVER_PORT, String.valueOf(freePorts.get(6))); ports.put(ExecutorPort.METRICS_CACHE_STATS_PORT, String.valueOf(freePorts.get(7))); ports.put(ExecutorPort.CHECKPOINT_MANAGER_PORT, String.valueOf(freePorts.get(8))); diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/nomad/NomadConstants.java b/heron/schedulers/src/java/org/apache/heron/scheduler/nomad/NomadConstants.java index ec2c80d2afe..7e52339334c 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/nomad/NomadConstants.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/nomad/NomadConstants.java @@ -67,13 +67,13 @@ public String getName() { public static final String HERON_TOPOLOGY_DOWNLOAD_CMD = "HERON_TOPOLOGY_DOWNLOAD_CMD"; public static final String HERON_EXECUTOR_CMD = "HERON_EXECUTOR_CMD"; //Ports - public static final String MASTER_PORT = String.format("${NOMAD_PORT_%s}", - SchedulerUtils.ExecutorPort.MASTER_PORT.getName()); - public static final String TMASTER_CONTROLLER_PORT = String.format("${NOMAD_PORT_%s}", - SchedulerUtils.ExecutorPort.TMASTER_CONTROLLER_PORT.getName() + public static final String SERVER_PORT = String.format("${NOMAD_PORT_%s}", + SchedulerUtils.ExecutorPort.SERVER_PORT.getName()); + public static final String TMANAGER_CONTROLLER_PORT = String.format("${NOMAD_PORT_%s}", + SchedulerUtils.ExecutorPort.TMANAGER_CONTROLLER_PORT.getName() .replace("-", "_")); - public static final String TMASTER_STATS_PORT = String.format("${NOMAD_PORT_%s}", - SchedulerUtils.ExecutorPort.TMASTER_STATS_PORT.getName() + public static final String TMANAGER_STATS_PORT = String.format("${NOMAD_PORT_%s}", + SchedulerUtils.ExecutorPort.TMANAGER_STATS_PORT.getName() .replace("-", "_")); public static final String SHELL_PORT = String.format("${NOMAD_PORT_%s}", SchedulerUtils.ExecutorPort.SHELL_PORT.getName() @@ -84,8 +84,8 @@ public String getName() { public static final String SCHEDULER_PORT = String.format("${NOMAD_PORT_%s}", SchedulerUtils.ExecutorPort.SCHEDULER_PORT.getName() .replace("-", "_")); - public static final String METRICS_CACHE_MASTER_PORT = String.format("${NOMAD_PORT_%s}", - SchedulerUtils.ExecutorPort.METRICS_CACHE_MASTER_PORT.getName() + public static final String METRICS_CACHE_SERVER_PORT = String.format("${NOMAD_PORT_%s}", + SchedulerUtils.ExecutorPort.METRICS_CACHE_SERVER_PORT.getName() .replace("-", "_")); public static final String METRICS_CACHE_STATS_PORT = String.format("${NOMAD_PORT_%s}", SchedulerUtils.ExecutorPort.METRICS_CACHE_STATS_PORT.getName() @@ -102,15 +102,15 @@ public String getName() { public static final Map EXECUTOR_PORTS = new HashMap<>(); static { - EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.MASTER_PORT, MASTER_PORT); - EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.TMASTER_CONTROLLER_PORT, - TMASTER_CONTROLLER_PORT); - EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.TMASTER_STATS_PORT, TMASTER_STATS_PORT); + EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.SERVER_PORT, SERVER_PORT); + EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.TMANAGER_CONTROLLER_PORT, + TMANAGER_CONTROLLER_PORT); + EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.TMANAGER_STATS_PORT, TMANAGER_STATS_PORT); EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.SHELL_PORT, SHELL_PORT); EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.METRICS_MANAGER_PORT, METRICS_MANAGER_PORT); EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.SCHEDULER_PORT, SCHEDULER_PORT); - EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.METRICS_CACHE_MASTER_PORT, - METRICS_CACHE_MASTER_PORT); + EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.METRICS_CACHE_SERVER_PORT, + METRICS_CACHE_SERVER_PORT); EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.METRICS_CACHE_STATS_PORT, METRICS_CACHE_STATS_PORT); EXECUTOR_PORTS.put(SchedulerUtils.ExecutorPort.CHECKPOINT_MANAGER_PORT, diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/HeronMasterDriver.java b/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/HeronMasterDriver.java index 7322ec324e7..d166f3c4329 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/HeronMasterDriver.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/HeronMasterDriver.java @@ -105,7 +105,7 @@ @Unit public class HeronMasterDriver { static final int TM_MEM_SIZE_MB = 1024; - static final int TMASTER_CONTAINER_ID = 0; + static final int TMANAGER_CONTAINER_ID = 0; static final int MB = 1024 * 1024; private static final Logger LOG = Logger.getLogger(HeronMasterDriver.class.getName()); private final String topologyPackageName; @@ -128,7 +128,7 @@ public class HeronMasterDriver { // looked up by heron's executor id or REEF's container id. private MultiKeyWorkerMap multiKeyWorkerMap; - private TMaster tMaster; + private TManager tManager; // TODO: https://github.com/apache/incubator-heron/issues/949: implement Driver HA @@ -185,11 +185,11 @@ void scheduleHeronWorkers(PackingPlan topologyPacking) throws ContainerAllocatio } /* - * Must be invoked after workers are scheduled. TMaster needs component RAM map. + * Must be invoked after workers are scheduled. TManager needs component RAM map. */ - void launchTMaster() { - tMaster = buildTMaster(Executors.newSingleThreadExecutor()); - tMaster.launch(); + void launchTManager() { + tManager = buildTManager(Executors.newSingleThreadExecutor()); + tManager.launch(); } /** @@ -266,7 +266,7 @@ public void killTopology() { LOG.log(Level.INFO, "Kill topology: {0}", topologyName); isTopologyKilled.set(true); - tMaster.killTMaster(); + tManager.killTManager(); for (HeronWorker worker : multiKeyWorkerMap.getHeronWorkers()) { AllocatedEvaluator evaluator = multiKeyWorkerMap.detachEvaluatorAndRemove(worker); @@ -397,10 +397,10 @@ Optional lookupByContainerPlan(int id) { } @VisibleForTesting - TMaster buildTMaster(ExecutorService executor) { - TMaster tMasterManager = new TMaster(); - tMasterManager.executor = executor; - return tMasterManager; + TManager buildTManager(ExecutorService executor) { + TManager tManagerManager = new TManager(); + tManagerManager.executor = executor; + return tManagerManager; } /** @@ -501,29 +501,29 @@ public ContainerAllocationException(String message, Exception e) { } /** - * This class manages the TMaster executor process, including launching the TMaster, monitoring it + * This class manages the TManager executor process, including launching the TManager, monitoring it * and killing it when needed. */ @VisibleForTesting - class TMaster implements Runnable { + class TManager implements Runnable { private ExecutorService executor; - private Future tMasterFuture; - private CountDownLatch tMasterErrorCounter = new CountDownLatch(3); + private Future tManagerFuture; + private CountDownLatch tManagerErrorCounter = new CountDownLatch(3); void launch() { LOG.log(Level.INFO, "Launching executor for TM: {0}", topologyName); - tMasterFuture = executor.submit(this); + tManagerFuture = executor.submit(this); - // the following task will restart the tMaster if it fails + // the following task will restart the tManager if it fails executor.submit(new Runnable() { @Override public void run() { try { - tMasterFuture.get(); - LOG.log(Level.INFO, "TMaster executor terminated, {0}", topologyName); + tManagerFuture.get(); + LOG.log(Level.INFO, "TManager executor terminated, {0}", topologyName); } catch (InterruptedException | ExecutionException e) { - LOG.log(Level.WARNING, "Error while waiting for TMaster executor", e); + LOG.log(Level.WARNING, "Error while waiting for TManager executor", e); } if (isTopologyKilled.get()) { @@ -531,35 +531,35 @@ public void run() { return; } - tMasterErrorCounter.countDown(); - long counter = tMasterErrorCounter.getCount(); + tManagerErrorCounter.countDown(); + long counter = tManagerErrorCounter.getCount(); if (counter > 0) { - LOG.log(Level.WARNING, "Restarting TMaster, attempts left: {0}", counter); + LOG.log(Level.WARNING, "Restarting TManager, attempts left: {0}", counter); launch(); } } }); } - void killTMaster() { - LOG.log(Level.INFO, "Killing TMaster process: {0}", topologyName); - if (!tMasterFuture.isDone()) { - tMasterFuture.cancel(true); + void killTManager() { + LOG.log(Level.INFO, "Killing TManager process: {0}", topologyName); + if (!tManagerFuture.isDone()) { + tManagerFuture.cancel(true); } executor.shutdownNow(); } - HeronExecutorTask getTMasterExecutorTask() { - return new HeronExecutorTask(reefFileNames, TMASTER_CONTAINER_ID, + HeronExecutorTask getTManagerExecutorTask() { + return new HeronExecutorTask(reefFileNames, TMANAGER_CONTAINER_ID, cluster, role, topologyName, env, topologyPackageName, heronCorePackageName, topologyJar, getComponentRamMap(), verboseMode); } @Override public void run() { - HeronExecutorTask tMasterTask = getTMasterExecutorTask(); + HeronExecutorTask tManagerTask = getTManagerExecutorTask(); try { - tMasterTask.startExecutor(); + tManagerTask.startExecutor(); } catch (InvalidTopologyException e) { throw new RuntimeException(e); } diff --git a/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/YarnScheduler.java b/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/YarnScheduler.java index 6baae8f06a2..b9f565d3d43 100644 --- a/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/YarnScheduler.java +++ b/heron/schedulers/src/java/org/apache/heron/scheduler/yarn/YarnScheduler.java @@ -58,7 +58,7 @@ public boolean onSchedule(PackingPlan packing) { HeronMasterDriver driver = HeronMasterDriverProvider.getInstance(); try { driver.scheduleHeronWorkers(packing); - driver.launchTMaster(); + driver.launchTManager(); return true; } catch (HeronMasterDriver.ContainerAllocationException e) { LOG.log(Level.ALL, "Failed to allocate containers for topology", e); diff --git a/heron/schedulers/tests/java/org/apache/heron/scheduler/aurora/AuroraSchedulerTest.java b/heron/schedulers/tests/java/org/apache/heron/scheduler/aurora/AuroraSchedulerTest.java index b32bb22ec59..a73a7e0676d 100644 --- a/heron/schedulers/tests/java/org/apache/heron/scheduler/aurora/AuroraSchedulerTest.java +++ b/heron/schedulers/tests/java/org/apache/heron/scheduler/aurora/AuroraSchedulerTest.java @@ -312,7 +312,7 @@ public void testProperties() throws URISyntaxException { + " --state-manager-connection=null" + " --state-manager-root=null" + " --state-manager-config-file=" + expectedConf + "/statemgr.yaml" - + " --tmaster-binary=" + expectedBin + "/heron-tmaster" + + " --tmanager-binary=" + expectedBin + "/heron-tmanager" + " --stmgr-binary=" + expectedBin + "/heron-stmgr" + " --metrics-manager-classpath=" + expectedLib + "/metricsmgr/*" + " --instance-jvm-opts=\"\"" diff --git a/heron/schedulers/tests/java/org/apache/heron/scheduler/local/LocalSchedulerTest.java b/heron/schedulers/tests/java/org/apache/heron/scheduler/local/LocalSchedulerTest.java index 5778f2c43e6..0832795eae9 100644 --- a/heron/schedulers/tests/java/org/apache/heron/scheduler/local/LocalSchedulerTest.java +++ b/heron/schedulers/tests/java/org/apache/heron/scheduler/local/LocalSchedulerTest.java @@ -193,7 +193,7 @@ public void testRemoveContainer() throws Exception { Mockito.doReturn(processes[i]).when(scheduler) .startExecutorProcess(i, instances); if (i > 0) { - // ignore the container for TMaster. existing containers simulate the containers created + // ignore the container for TManager. existing containers simulate the containers created // by packing plan existingContainers.add(PackingTestUtils.testContainerPlan(i)); } diff --git a/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/HeronMasterDriverTest.java b/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/HeronMasterDriverTest.java index f191c30ef27..480dd1de96a 100644 --- a/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/HeronMasterDriverTest.java +++ b/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/HeronMasterDriverTest.java @@ -128,13 +128,13 @@ public void scheduleHeronWorkersRequestsContainersForPacking() throws Exception } @Test - public void onKillClosesContainersKillsTMaster() throws Exception { - HeronMasterDriver.TMaster mockTMaster = mock(HeronMasterDriver.TMaster.class); - when(spyDriver.buildTMaster(any(ExecutorService.class))).thenReturn(mockTMaster); + public void onKillClosesContainersKillsTManager() throws Exception { + HeronMasterDriver.TManager mockTManager = mock(HeronMasterDriver.TManager.class); + when(spyDriver.buildTManager(any(ExecutorService.class))).thenReturn(mockTManager); int numContainers = 3; AllocatedEvaluator[] mockEvaluators = createApplicationWithContainers(numContainers); - spyDriver.launchTMaster(); + spyDriver.launchTManager(); spyDriver.killTopology(); @@ -143,7 +143,7 @@ public void onKillClosesContainersKillsTMaster() throws Exception { assertFalse(spyDriver.lookupByEvaluatorId("e" + id).isPresent()); } - verify(mockTMaster, times(1)).killTMaster(); + verify(mockTManager, times(1)).killTManager(); } /** @@ -364,33 +364,33 @@ public void onNextAllocatedEvaluatorDiscardsExtraWorker() throws Exception { } @Test - public void tMasterLaunchLaunchesExecutorForTMaster() throws Exception { + public void tManagerLaunchLaunchesExecutorForTManager() throws Exception { ExecutorService executorService = mock(ExecutorService.class); - HeronMasterDriver.TMaster tMaster = spyDriver.buildTMaster(executorService); - doReturn(mock(Future.class)).when(executorService).submit(tMaster); - tMaster.launch(); - verify(executorService, times(1)).submit(tMaster); + HeronMasterDriver.TManager tManager = spyDriver.buildTManager(executorService); + doReturn(mock(Future.class)).when(executorService).submit(tManager); + tManager.launch(); + verify(executorService, times(1)).submit(tManager); } @Test - public void tMasterKillTerminatesTMaster() throws Exception { + public void tManagerKillTerminatesTManager() throws Exception { ExecutorService mockExecutorService = mock(ExecutorService.class); - HeronMasterDriver.TMaster tMaster = spyDriver.buildTMaster(mockExecutorService); + HeronMasterDriver.TManager tManager = spyDriver.buildTManager(mockExecutorService); Future mockFuture = mock(Future.class); - doReturn(mockFuture).when(mockExecutorService).submit(tMaster); + doReturn(mockFuture).when(mockExecutorService).submit(tManager); - tMaster.launch(); - tMaster.killTMaster(); + tManager.launch(); + tManager.killTManager(); verify(mockFuture, times(1)).cancel(true); verify(mockExecutorService, times(1)).shutdownNow(); } @Test - public void tMasterLaunchRestartsTMasterOnFailure() throws Exception { - HeronMasterDriver.TMaster tMaster = - spy(spyDriver.buildTMaster(Executors.newSingleThreadExecutor())); + public void tManagerLaunchRestartsTManagerOnFailure() throws Exception { + HeronMasterDriver.TManager tManager = + spy(spyDriver.buildTManager(Executors.newSingleThreadExecutor())); HeronExecutorTask mockTask = mock(HeronExecutorTask.class); final CountDownLatch testLatch = new CountDownLatch(1); @@ -401,19 +401,19 @@ public Object answer(InvocationOnMock invocation) throws Throwable { return null; } }).when(mockTask).startExecutor(); - doReturn(mockTask).when(tMaster).getTMasterExecutorTask(); + doReturn(mockTask).when(tManager).getTManagerExecutorTask(); - tMaster.launch(); + tManager.launch(); verify(mockTask, timeout(1000).times(1)).startExecutor(); testLatch.countDown(); - //retries if tmaster ends for some reason + //retries if tmanager ends for some reason verify(mockTask, timeout(1000).times(3)).startExecutor(); } @Test @PrepareForTest({HeronReefUtils.class, SchedulerMain.class}) - public void onNextStartTimeStartsSchedulerTMaster() throws Exception { + public void onNextStartTimeStartsSchedulerTManager() throws Exception { PowerMockito.spy(HeronReefUtils.class); PowerMockito.doNothing().when(HeronReefUtils.class, "extractPackageInSandbox", diff --git a/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/YarnSchedulerTest.java b/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/YarnSchedulerTest.java index a3409cd9782..44a55f51557 100644 --- a/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/YarnSchedulerTest.java +++ b/heron/schedulers/tests/java/org/apache/heron/scheduler/yarn/YarnSchedulerTest.java @@ -33,7 +33,7 @@ public class YarnSchedulerTest { public void delegatesToDriverOnSchedule() throws Exception { HeronMasterDriver mockHeronDriver = Mockito.mock(HeronMasterDriver.class); HeronMasterDriverProvider.setInstance(mockHeronDriver); - Mockito.doNothing().when(mockHeronDriver).launchTMaster(); + Mockito.doNothing().when(mockHeronDriver).launchTManager(); IScheduler scheduler = new YarnScheduler(); PackingPlan mockPacking = Mockito.mock(PackingPlan.class); @@ -41,7 +41,7 @@ public void delegatesToDriverOnSchedule() throws Exception { InOrder invocationOrder = Mockito.inOrder(mockHeronDriver); invocationOrder.verify(mockHeronDriver).scheduleHeronWorkers(mockPacking); - invocationOrder.verify(mockHeronDriver).launchTMaster(); + invocationOrder.verify(mockHeronDriver).launchTManager(); } @Test From 42ea9efcc7650500fdc6df56eb19abd6c69cf585 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 19:15:38 -0400 Subject: [PATCH 27/32] fixing rename of "master port" to "server port" in heron/metricsmgr --- .../metricsmgr/sink/metricscache/MetricsCacheSink.java | 2 +- .../apache/heron/metricsmgr/sink/tmanager/TManagerSink.java | 2 +- .../apache/heron/metricsmgr/HandleTManagerLocationTest.java | 6 +++--- .../metricsmgr/sink/metricscache/MetricsCacheSinkTest.java | 2 +- .../heron/metricsmgr/sink/tmanager/TManagerSinkTest.java | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java index fc7638bd49e..1f348c89cd8 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java @@ -390,7 +390,7 @@ public synchronized void startNewPrimaryClient() { metricsCacheClient = new MetricsCacheClient(looper, currentMetricsCacheLocation.getHost(), - currentMetricsCacheLocation.getManagerPort(), + currentMetricsCacheLocation.getServerPort(), socketOptions, metricsCommunicator, TypeUtils.getDuration( metricsCacheClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java index 0a09428adae..b10f7a2d660 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java @@ -378,7 +378,7 @@ public synchronized void startNewPrimaryClient() { tManagerClient = new TManagerClient(looper, currentTManagerLocation.getHost(), - currentTManagerLocation.getManagerPort(), + currentTManagerLocation.getServerPort(), socketOptions, metricsCommunicator, TypeUtils.getDuration( tmanagerClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS)); diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java index 6d05a1e93c8..f02a3dc4625 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/HandleTManagerLocationTest.java @@ -66,14 +66,14 @@ public class HandleTManagerLocationTest { Metrics.TManagerLocationRefreshMessage.newBuilder().setTmanager( TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id"). - setHost("host").setControllerPort(0).setManagerPort(0)). + setHost("host").setControllerPort(0).setServerPort(0)). build(); private static final Metrics.TManagerLocationRefreshMessage TMANAGERLOCATIONREFRESHMESSAGE1 = Metrics.TManagerLocationRefreshMessage.newBuilder().setTmanager( TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id"). - setHost("host").setControllerPort(0).setManagerPort(1)). + setHost("host").setControllerPort(0).setServerPort(1)). build(); // Bean name to register the TManagerLocation object into SingletonRegistry @@ -142,7 +142,7 @@ public void testHandleTManagerLocation() throws InterruptedException { Assert.assertEquals("topology-id", tManagerLocation.getTopologyId()); Assert.assertEquals("host", tManagerLocation.getHost()); Assert.assertEquals(0, tManagerLocation.getControllerPort()); - Assert.assertEquals(1, tManagerLocation.getManagerPort()); + Assert.assertEquals(1, tManagerLocation.getServerPort()); } private static final class TestRequestHandler implements HeronServerTester.TestRequestHandler { diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java index 48bcf18c65b..a28f8a1e0a3 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSinkTest.java @@ -79,7 +79,7 @@ private static TopologyManager.MetricsCacheLocation getMetricsCacheLocation(int // and then dies return TopologyManager.MetricsCacheLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id").setHost("host"). - setControllerPort(0).setManagerPort(serverPort).setStatsPort(0).build(); + setControllerPort(0).setServerPort(serverPort).setStatsPort(0).build(); } @Before diff --git a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java index b36db801737..130bae207b7 100644 --- a/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java +++ b/heron/metricsmgr/tests/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSinkTest.java @@ -80,7 +80,7 @@ private static TopologyManager.TManagerLocation getTManagerLocation(int serverPo // and then dies return TopologyManager.TManagerLocation.newBuilder(). setTopologyName("topology-name").setTopologyId("topology-id").setHost("host"). - setControllerPort(0).setManagerPort(serverPort).setStatsPort(0).build(); + setControllerPort(0).setServerPort(serverPort).setStatsPort(0).build(); } @Before From ac3b4fc28896e20f28fea3670d184814f591d732 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 26 Oct 2020 20:35:20 -0400 Subject: [PATCH 28/32] renaming "topology master" to "topology manager" in heron/tmaster --- heron/{tmaster => tmanager}/src/cpp/BUILD | 24 +- .../src/cpp/manager/ckptmgr-client.cpp | 16 +- .../src/cpp/manager/ckptmgr-client.h | 14 +- .../src/cpp/manager/stateful-checkpointer.cpp | 12 +- .../src/cpp/manager/stateful-checkpointer.h | 18 +- .../src/cpp/manager/stateful-controller.cpp | 10 +- .../src/cpp/manager/stateful-controller.h | 16 +- .../src/cpp/manager/stateful-restorer.cpp | 4 +- .../src/cpp/manager/stateful-restorer.h | 10 +- .../src/cpp/manager/stats-interface.cpp | 30 +- .../src/cpp/manager/stats-interface.h | 16 +- .../src/cpp/manager/stmgrstate.cpp | 8 +- .../src/cpp/manager/stmgrstate.h | 10 +- .../src/cpp/manager/tcontroller.cpp | 40 +-- .../src/cpp/manager/tcontroller.h | 16 +- .../src/cpp/manager/tmanager.cpp} | 278 +++++++++--------- .../src/cpp/manager/tmanager.h} | 58 ++-- .../src/cpp/manager/tmanagerserver.cpp} | 64 ++-- .../src/cpp/manager/tmanagerserver.h} | 36 +-- .../src/cpp/manager/tmetrics-collector.cpp | 82 +++--- .../src/cpp/manager/tmetrics-collector.h | 70 ++--- .../src/cpp/processor/processor.h | 6 +- .../processor/stmgr-heartbeat-processor.cpp | 22 +- .../cpp/processor/stmgr-heartbeat-processor.h | 10 +- .../processor/stmgr-register-processor.cpp | 24 +- .../cpp/processor/stmgr-register-processor.h | 10 +- .../src/cpp/processor/tmanager-processor.cpp} | 10 +- .../src/cpp/processor/tmanager-processor.h} | 14 +- .../src/cpp/server/tmanager-main.cpp} | 10 +- .../tests/cpp/server/BUILD | 32 +- .../tests/cpp/server/dummystmgr.cpp | 12 +- .../tests/cpp/server/dummystmgr.h | 4 +- .../tests/cpp/server/dummytmanager.cpp} | 26 +- .../tests/cpp/server/dummytmanager.h} | 20 +- .../server/stateful_checkpointer_unittest.cpp | 4 +- .../cpp/server/stateful_restorer_unittest.cpp | 82 +++--- .../tests/cpp/server/tcontroller_unittest.cpp | 2 +- .../tests/cpp/server/tmanager_unittest.cpp} | 116 ++++---- 38 files changed, 618 insertions(+), 618 deletions(-) rename heron/{tmaster => tmanager}/src/cpp/BUILD (84%) rename heron/{tmaster => tmanager}/src/cpp/manager/ckptmgr-client.cpp (93%) rename heron/{tmaster => tmanager}/src/cpp/manager/ckptmgr-client.h (88%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-checkpointer.cpp (93%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-checkpointer.h (85%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-controller.cpp (98%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-controller.h (94%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-restorer.cpp (98%) rename heron/{tmaster => tmanager}/src/cpp/manager/stateful-restorer.h (92%) rename heron/{tmaster => tmanager}/src/cpp/manager/stats-interface.cpp (89%) rename heron/{tmaster => tmanager}/src/cpp/manager/stats-interface.h (85%) rename heron/{tmaster => tmanager}/src/cpp/manager/stmgrstate.cpp (98%) rename heron/{tmaster => tmanager}/src/cpp/manager/stmgrstate.h (96%) rename heron/{tmaster => tmanager}/src/cpp/manager/tcontroller.cpp (92%) rename heron/{tmaster => tmanager}/src/cpp/manager/tcontroller.h (94%) rename heron/{tmaster/src/cpp/manager/tmaster.cpp => tmanager/src/cpp/manager/tmanager.cpp} (80%) rename heron/{tmaster/src/cpp/manager/tmaster.h => tmanager/src/cpp/manager/tmanager.h} (88%) rename heron/{tmaster/src/cpp/manager/tmasterserver.cpp => tmanager/src/cpp/manager/tmanagerserver.cpp} (55%) rename heron/{tmaster/src/cpp/manager/tmasterserver.h => tmanager/src/cpp/manager/tmanagerserver.h} (69%) rename heron/{tmaster => tmanager}/src/cpp/manager/tmetrics-collector.cpp (86%) rename heron/{tmaster => tmanager}/src/cpp/manager/tmetrics-collector.h (77%) rename heron/{tmaster => tmanager}/src/cpp/processor/processor.h (88%) rename heron/{tmaster => tmanager}/src/cpp/processor/stmgr-heartbeat-processor.cpp (69%) rename heron/{tmaster => tmanager}/src/cpp/processor/stmgr-heartbeat-processor.h (84%) rename heron/{tmaster => tmanager}/src/cpp/processor/stmgr-register-processor.cpp (73%) rename heron/{tmaster => tmanager}/src/cpp/processor/stmgr-register-processor.h (84%) rename heron/{tmaster/src/cpp/processor/tmaster-processor.cpp => tmanager/src/cpp/processor/tmanager-processor.cpp} (86%) rename heron/{tmaster/src/cpp/processor/tmaster-processor.h => tmanager/src/cpp/processor/tmanager-processor.h} (88%) rename heron/{tmaster/src/cpp/server/tmaster-main.cpp => tmanager/src/cpp/server/tmanager-main.cpp} (90%) rename heron/{tmaster => tmanager}/tests/cpp/server/BUILD (77%) rename heron/{tmaster => tmanager}/tests/cpp/server/dummystmgr.cpp (91%) rename heron/{tmaster => tmanager}/tests/cpp/server/dummystmgr.h (93%) rename heron/{tmaster/tests/cpp/server/dummytmaster.cpp => tmanager/tests/cpp/server/dummytmanager.cpp} (63%) rename heron/{tmaster/tests/cpp/server/dummytmaster.h => tmanager/tests/cpp/server/dummytmanager.h} (74%) rename heron/{tmaster => tmanager}/tests/cpp/server/stateful_checkpointer_unittest.cpp (98%) rename heron/{tmaster => tmanager}/tests/cpp/server/stateful_restorer_unittest.cpp (88%) rename heron/{tmaster => tmanager}/tests/cpp/server/tcontroller_unittest.cpp (95%) rename heron/{tmaster/tests/cpp/server/tmaster_unittest.cpp => tmanager/tests/cpp/server/tmanager_unittest.cpp} (90%) diff --git a/heron/tmaster/src/cpp/BUILD b/heron/tmanager/src/cpp/BUILD similarity index 84% rename from heron/tmaster/src/cpp/BUILD rename to heron/tmanager/src/cpp/BUILD index ee004e525ec..eb0131890a9 100644 --- a/heron/tmaster/src/cpp/BUILD +++ b/heron/tmanager/src/cpp/BUILD @@ -3,7 +3,7 @@ load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") package(default_visibility = ["//visibility:public"]) cc_library( - name = "tmaster-cxx", + name = "tmanager-cxx", srcs = [ "manager/ckptmgr-client.cpp", "manager/ckptmgr-client.h", @@ -19,27 +19,27 @@ cc_library( "manager/stmgrstate.h", "manager/tcontroller.cpp", "manager/tcontroller.h", - "manager/tmaster.cpp", - "manager/tmasterserver.cpp", - "manager/tmasterserver.h", + "manager/tmanager.cpp", + "manager/tmanagerserver.cpp", + "manager/tmanagerserver.h", "manager/tmetrics-collector.cpp", "manager/tmetrics-collector.h", "processor/stmgr-heartbeat-processor.cpp", "processor/stmgr-heartbeat-processor.h", "processor/stmgr-register-processor.cpp", "processor/stmgr-register-processor.h", - "processor/tmaster-processor.cpp", - "processor/tmaster-processor.h", + "processor/tmanager-processor.cpp", + "processor/tmanager-processor.h", ], hdrs = [ - "manager/tmaster.h", + "manager/tmanager.h", "processor/processor.h", ], copts = [ "-Iheron", "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", - "-Iheron/tmaster/src/cpp", + "-Iheron/tmanager/src/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], @@ -56,21 +56,21 @@ cc_library( ) cc_binary( - name = "heron-tmaster", + name = "heron-tmanager", srcs = [ - "server/tmaster-main.cpp", + "server/tmanager-main.cpp", ], copts = [ "-Iheron", "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", - "-Iheron/tmaster/src/cpp", + "-Iheron/tmanager/src/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], linkstatic = 1, deps = [ - ":tmaster-cxx", + ":tmanager-cxx", "//config:config-cxx", "//heron/common/src/cpp/config:config-cxx", "//heron/common/src/cpp/metrics:metrics-cxx", diff --git a/heron/tmaster/src/cpp/manager/ckptmgr-client.cpp b/heron/tmanager/src/cpp/manager/ckptmgr-client.cpp similarity index 93% rename from heron/tmaster/src/cpp/manager/ckptmgr-client.cpp rename to heron/tmanager/src/cpp/manager/ckptmgr-client.cpp index eb4ac408b76..212567d84b5 100644 --- a/heron/tmaster/src/cpp/manager/ckptmgr-client.cpp +++ b/heron/tmanager/src/cpp/manager/ckptmgr-client.cpp @@ -24,7 +24,7 @@ #include "threads/threads.h" namespace heron { -namespace tmaster { +namespace tmanager { CkptMgrClient::CkptMgrClient(std::shared_ptr eventLoop, const NetworkOptions& _options, const sp_string& _topology_name, const sp_string& _topology_id, @@ -40,8 +40,8 @@ CkptMgrClient::CkptMgrClient(std::shared_ptr eventLoop, const Network - InstallResponseHandler(make_unique(), - &CkptMgrClient::HandleTMasterRegisterResponse); + InstallResponseHandler(make_unique(), + &CkptMgrClient::HandleTManagerRegisterResponse); InstallResponseHandler(make_unique(), &CkptMgrClient::HandleCleanStatefulCheckpointResponse); } @@ -98,9 +98,9 @@ void CkptMgrClient::HandleClose(NetworkErrorCode _status) { } } -void CkptMgrClient::HandleTMasterRegisterResponse( +void CkptMgrClient::HandleTManagerRegisterResponse( void*, - pool_unique_ptr _response, + pool_unique_ptr _response, NetworkErrorCode _status) { if (_status != OK) { LOG(ERROR) << "NonOK network code" << _status << " for register response from ckptmgr " @@ -127,8 +127,8 @@ void CkptMgrClient::HandleTMasterRegisterResponse( void CkptMgrClient::OnReconnectTimer() { Start(); } void CkptMgrClient::SendRegisterRequest() { - LOG(INFO) << "Sending RegisterTmasterRequest to ckptmgr" << std::endl; - auto request = make_unique(); + LOG(INFO) << "Sending RegisterTmanagerRequest to ckptmgr" << std::endl; + auto request = make_unique(); request->set_topology_name(topology_name_); request->set_topology_id(topology_id_); SendRequest(std::move(request), nullptr); @@ -161,5 +161,5 @@ void CkptMgrClient::HandleCleanStatefulCheckpointResponse( clean_response_watcher_(code); } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/ckptmgr-client.h b/heron/tmanager/src/cpp/manager/ckptmgr-client.h similarity index 88% rename from heron/tmaster/src/cpp/manager/ckptmgr-client.h rename to heron/tmanager/src/cpp/manager/ckptmgr-client.h index 8c187aa3267..5c2ea702f01 100644 --- a/heron/tmaster/src/cpp/manager/ckptmgr-client.h +++ b/heron/tmanager/src/cpp/manager/ckptmgr-client.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef SRC_CPP_SVCS_TMASTER_SRC_CKPTMGR_CLIENT_H -#define SRC_CPP_SVCS_TMASTER_SRC_CKPTMGR_CLIENT_H +#ifndef SRC_CPP_SVCS_TMANAGER_SRC_CKPTMGR_CLIENT_H +#define SRC_CPP_SVCS_TMANAGER_SRC_CKPTMGR_CLIENT_H #include #include "basics/basics.h" @@ -27,7 +27,7 @@ #include "proto/messages.h" namespace heron { -namespace tmaster { +namespace tmanager { class CkptMgrClient : public Client { public: @@ -49,9 +49,9 @@ class CkptMgrClient : public Client { virtual void HandleClose(NetworkErrorCode status); private: - void HandleTMasterRegisterResponse( + void HandleTManagerRegisterResponse( void*, - pool_unique_ptr_response, + pool_unique_ptr_response, NetworkErrorCode _status); void SendRegisterRequest(); @@ -67,7 +67,7 @@ class CkptMgrClient : public Client { sp_int32 reconnect_ckptmgr_interval_sec_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron -#endif // SRC_CPP_SVCS_TMASTER_SRC_CKPTMGR_CLIENT_H +#endif // SRC_CPP_SVCS_TMANAGER_SRC_CKPTMGR_CLIENT_H diff --git a/heron/tmaster/src/cpp/manager/stateful-checkpointer.cpp b/heron/tmanager/src/cpp/manager/stateful-checkpointer.cpp similarity index 93% rename from heron/tmaster/src/cpp/manager/stateful-checkpointer.cpp rename to heron/tmanager/src/cpp/manager/stateful-checkpointer.cpp index fc4d4e6b9d7..0506085818d 100644 --- a/heron/tmaster/src/cpp/manager/stateful-checkpointer.cpp +++ b/heron/tmanager/src/cpp/manager/stateful-checkpointer.cpp @@ -23,16 +23,16 @@ #include #include #include "config/physical-plan-helper.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "manager/stmgrstate.h" #include "errors/errors.h" namespace heron { -namespace tmaster { +namespace tmanager { StatefulCheckpointer::StatefulCheckpointer( - std::chrono::high_resolution_clock::time_point _tmaster_start_time) - : tmaster_start_time_(_tmaster_start_time) { + std::chrono::high_resolution_clock::time_point _tmanager_start_time) + : tmanager_start_time_(_tmanager_start_time) { // do nothing } @@ -41,7 +41,7 @@ StatefulCheckpointer::~StatefulCheckpointer() { } sp_string StatefulCheckpointer::GenerateCheckpointId() { // TODO(skukarni) Should we append any topology name/id stuff? std::ostringstream tag; - tag << tmaster_start_time_.time_since_epoch().count() + tag << tmanager_start_time_.time_since_epoch().count() << "-" << time(NULL); return tag.str(); } @@ -97,5 +97,5 @@ bool StatefulCheckpointer::HandleInstanceStateStored(const std::string& _checkpo return false; } } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/stateful-checkpointer.h b/heron/tmanager/src/cpp/manager/stateful-checkpointer.h similarity index 85% rename from heron/tmaster/src/cpp/manager/stateful-checkpointer.h rename to heron/tmanager/src/cpp/manager/stateful-checkpointer.h index 5ec708d4fa9..dd6e3a10630 100644 --- a/heron/tmaster/src/cpp/manager/stateful-checkpointer.h +++ b/heron/tmanager/src/cpp/manager/stateful-checkpointer.h @@ -17,14 +17,14 @@ * under the License. */ -#ifndef __TMASTER_STATEFUL_CHECKPOINTER_H_ -#define __TMASTER_STATEFUL_CHECKPOINTER_H_ +#ifndef __TMANAGER_STATEFUL_CHECKPOINTER_H_ +#define __TMANAGER_STATEFUL_CHECKPOINTER_H_ #include #include #include "network/network.h" -#include "proto/tmaster.pb.h" -#include "manager/tmaster.h" +#include "proto/tmanager.pb.h" +#include "manager/tmanager.h" #include "basics/basics.h" @@ -35,7 +35,7 @@ class PhysicalPlan; } namespace heron { -namespace tmaster { +namespace tmanager { /** * A StatefulCheckpointer is responsible for sending NewStatefulCheckpoint @@ -45,13 +45,13 @@ namespace tmaster { */ class StatefulCheckpointer { public: - explicit StatefulCheckpointer(std::chrono::high_resolution_clock::time_point _tmaster_start_time); + explicit StatefulCheckpointer(std::chrono::high_resolution_clock::time_point _tmanager_start_time); virtual ~StatefulCheckpointer(); void RegisterNewPhysicalPlan(const proto::system::PhysicalPlan& _pplan); void StartCheckpoint(const StMgrMap& _stmgrs); - // Called by tmaster when a InstanceStateStored message is received + // Called by tmanager when a InstanceStateStored message is received // Return true if this completes a globally consistent checkpoint // for this _checkpoint_id bool HandleInstanceStateStored(const std::string& _checkpoint_id, @@ -60,7 +60,7 @@ class StatefulCheckpointer { private: sp_string GenerateCheckpointId(); - std::chrono::high_resolution_clock::time_point tmaster_start_time_; + std::chrono::high_resolution_clock::time_point tmanager_start_time_; // Current partially consistent checkpoint // for which still some more states need to be saved @@ -74,7 +74,7 @@ class StatefulCheckpointer { // consistent checkpoint std::unordered_set partial_checkpoint_remaining_tasks_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/stateful-controller.cpp b/heron/tmanager/src/cpp/manager/stateful-controller.cpp similarity index 98% rename from heron/tmaster/src/cpp/manager/stateful-controller.cpp rename to heron/tmanager/src/cpp/manager/stateful-controller.cpp index a348c108b7d..6db3acb6f53 100644 --- a/heron/tmaster/src/cpp/manager/stateful-controller.cpp +++ b/heron/tmanager/src/cpp/manager/stateful-controller.cpp @@ -25,14 +25,14 @@ #include #include "manager/stateful-checkpointer.h" #include "manager/stateful-restorer.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "manager/stmgrstate.h" #include "metrics/metrics.h" #include "basics/basics.h" #include "errors/errors.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::make_shared; @@ -53,7 +53,7 @@ const int32_t MOST_CHECKPOINTS_NUMBER = 5; StatefulController::StatefulController(const std::string& _topology_name, shared_ptr _ckpt, shared_ptr _state_mgr, - std::chrono::high_resolution_clock::time_point _tmaster_start_time, + std::chrono::high_resolution_clock::time_point _tmanager_start_time, shared_ptr _metrics_manager_client, std::function _ckpt_save_watcher) @@ -62,7 +62,7 @@ StatefulController::StatefulController(const std::string& _topology_name, state_mgr_(_state_mgr), metrics_manager_client_(_metrics_manager_client), ckpt_save_watcher_(_ckpt_save_watcher) { - checkpointer_ = make_unique(_tmaster_start_time); + checkpointer_ = make_unique(_tmanager_start_time); restorer_ = make_unique(); count_metrics_ = make_shared(); @@ -224,5 +224,5 @@ bool StatefulController::GotRestoreResponse(const std::string& _stmgr) const { bool StatefulController::RestoreInProgress() const { return restorer_->IsInProgress(); } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/stateful-controller.h b/heron/tmanager/src/cpp/manager/stateful-controller.h similarity index 94% rename from heron/tmaster/src/cpp/manager/stateful-controller.h rename to heron/tmanager/src/cpp/manager/stateful-controller.h index 4e3b61895f1..a5ad08ff8d8 100644 --- a/heron/tmaster/src/cpp/manager/stateful-controller.h +++ b/heron/tmanager/src/cpp/manager/stateful-controller.h @@ -17,13 +17,13 @@ * under the License. */ -#ifndef __TMASTER_STATEFUL_HELPER_H_ -#define __TMASTER_STATEFUL_HELPER_H_ +#ifndef __TMANAGER_STATEFUL_HELPER_H_ +#define __TMANAGER_STATEFUL_HELPER_H_ #include #include #include "network/network.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "basics/basics.h" namespace heron { @@ -34,7 +34,7 @@ class MultiCountMetric; } // namespace heron namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::shared_ptr; @@ -42,7 +42,7 @@ using std::shared_ptr; class StatefulRestorer; class StatefulCheckpointer; -// For Heron topologies running in effectively once semantics, the tmaster +// For Heron topologies running in effectively once semantics, the tmanager // utilizes the stateful controller to handle all the work related with // checkpointing and restoring from checkpoints. The statful controller // offers methods to start checkpoint/restore. It also manages the state @@ -56,14 +56,14 @@ class StatefulController { explicit StatefulController(const std::string& _topology_name, shared_ptr _ckpt, shared_ptr _state_mgr, - std::chrono::high_resolution_clock::time_point _tmaster_start_time, + std::chrono::high_resolution_clock::time_point _tmanager_start_time, shared_ptr _metrics_manager_client, std::function _ckpt_save_watcher); virtual ~StatefulController(); // Start a new restore process void StartRestore(const StMgrMap& _stmgrs, bool _ignore_prev_checkpoints); - // Called by tmaster when a Stmgr responds back with a RestoreTopologyStateResponse + // Called by tmanager when a Stmgr responds back with a RestoreTopologyStateResponse void HandleStMgrRestored(const std::string& _stmgr_id, const std::string& _checkpoint_id, int64_t _restore_txid, @@ -105,7 +105,7 @@ class StatefulController { shared_ptr count_metrics_; std::function ckpt_save_watcher_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp b/heron/tmanager/src/cpp/manager/stateful-restorer.cpp similarity index 98% rename from heron/tmaster/src/cpp/manager/stateful-restorer.cpp rename to heron/tmanager/src/cpp/manager/stateful-restorer.cpp index d6a76d0055d..123c2cb35a9 100644 --- a/heron/tmaster/src/cpp/manager/stateful-restorer.cpp +++ b/heron/tmanager/src/cpp/manager/stateful-restorer.cpp @@ -22,7 +22,7 @@ #include "manager/stmgrstate.h" namespace heron { -namespace tmaster { +namespace tmanager { StatefulRestorer::StatefulRestorer() : in_progress_(false), @@ -82,5 +82,5 @@ void StatefulRestorer::Finish2PhaseCommit(const StMgrMap& _stmgrs) { checkpoint_id_in_progress_ = ""; } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/stateful-restorer.h b/heron/tmanager/src/cpp/manager/stateful-restorer.h similarity index 92% rename from heron/tmaster/src/cpp/manager/stateful-restorer.h rename to heron/tmanager/src/cpp/manager/stateful-restorer.h index 3871d4d47d6..c1b3ec5830a 100644 --- a/heron/tmaster/src/cpp/manager/stateful-restorer.h +++ b/heron/tmanager/src/cpp/manager/stateful-restorer.h @@ -17,15 +17,15 @@ * under the License. */ -#ifndef __TMASTER_STATEFUL_RESTORER_H_ -#define __TMASTER_STATEFUL_RESTORER_H_ +#ifndef __TMANAGER_STATEFUL_RESTORER_H_ +#define __TMANAGER_STATEFUL_RESTORER_H_ #include #include -#include "manager/tmaster.h" +#include "manager/tmanager.h" namespace heron { -namespace tmaster { +namespace tmanager { class StatefulRestorer { public: @@ -53,7 +53,7 @@ class StatefulRestorer { std::string checkpoint_id_in_progress_; std::set unreplied_stmgrs_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/stats-interface.cpp b/heron/tmanager/src/cpp/manager/stats-interface.cpp similarity index 89% rename from heron/tmaster/src/cpp/manager/stats-interface.cpp rename to heron/tmanager/src/cpp/manager/stats-interface.cpp index a34f2abb8bd..f2d0abd758f 100644 --- a/heron/tmaster/src/cpp/manager/stats-interface.cpp +++ b/heron/tmanager/src/cpp/manager/stats-interface.cpp @@ -17,24 +17,24 @@ * under the License. */ -#include "tmaster/src/cpp/manager/stats-interface.h" +#include "tmanager/src/cpp/manager/stats-interface.h" #include #include -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "manager/tmetrics-collector.h" -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" #include "basics/basics.h" #include "errors/errors.h" #include "threads/threads.h" #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" namespace heron { -namespace tmaster { +namespace tmanager { StatsInterface::StatsInterface(std::shared_ptr eventLoop, const NetworkOptions& _options, - shared_ptr _collector, TMaster* _tmaster) - : metrics_collector_(_collector), tmaster_(_tmaster) { + shared_ptr _collector, TManager* _tmanager) + : metrics_collector_(_collector), tmanager_(_tmanager) { http_server_ = make_unique(eventLoop, _options); // Install the handlers auto cbHandleStats = [this](IncomingHTTPRequest* request) { this->HandleStatsRequest(request); }; @@ -70,14 +70,14 @@ void StatsInterface::HandleStatsRequest(IncomingHTTPRequest* _request) { LOG(INFO) << "Got a stats request " << _request->GetQuery(); // get the entire stuff unsigned char* pb = _request->ExtractFromPostData(0, _request->GetPayloadSize()); - proto::tmaster::MetricRequest req; + proto::tmanager::MetricRequest req; if (!req.ParseFromArray(pb, _request->GetPayloadSize())) { LOG(ERROR) << "Unable to deserialize post data specified in StatsRequest"; http_server_->SendErrorReply(_request, 400); delete _request; return; } - auto res = metrics_collector_->GetMetrics(req, tmaster_->getInitialTopology()); + auto res = metrics_collector_->GetMetrics(req, tmanager_->getInitialTopology()); sp_string response_string; CHECK(res->SerializeToString(&response_string)); auto response = make_unique(_request); @@ -93,14 +93,14 @@ void StatsInterface::HandleExceptionRequest(IncomingHTTPRequest* _request) { LOG(INFO) << "Request for exceptions" << _request->GetQuery(); // Get the Exception request proto. unsigned char* request_data = _request->ExtractFromPostData(0, _request->GetPayloadSize()); - heron::proto::tmaster::ExceptionLogRequest exception_request; + heron::proto::tmanager::ExceptionLogRequest exception_request; if (!exception_request.ParseFromArray(request_data, _request->GetPayloadSize())) { LOG(ERROR) << "Unable to deserialize post data specified in ExceptionRequest" << std::endl; http_server_->SendErrorReply(_request, 400); delete _request; return; } - unique_ptr exception_response = + unique_ptr exception_response = metrics_collector_->GetExceptions(exception_request); sp_string response_string; CHECK(exception_response->SerializeToString(&response_string)); @@ -116,7 +116,7 @@ void StatsInterface::HandleExceptionRequest(IncomingHTTPRequest* _request) { void StatsInterface::HandleExceptionSummaryRequest(IncomingHTTPRequest* _request) { LOG(INFO) << "Request for exception summary " << _request->GetQuery(); unsigned char* request_data = _request->ExtractFromPostData(0, _request->GetPayloadSize()); - heron::proto::tmaster::ExceptionLogRequest exception_request; + heron::proto::tmanager::ExceptionLogRequest exception_request; if (!exception_request.ParseFromArray(request_data, _request->GetPayloadSize())) { LOG(ERROR) << "Unable to deserialize post data specified in ExceptionRequest" << std::endl; http_server_->SendErrorReply(_request, 400); @@ -139,7 +139,7 @@ void StatsInterface::HandleStmgrsRegistrationSummaryRequest(IncomingHTTPRequest* LOG(INFO) << "Request for stream managers registration summary " << _request->GetQuery(); unsigned char* request_data = _request->ExtractFromPostData(0, _request->GetPayloadSize()); - heron::proto::tmaster::StmgrsRegistrationSummaryRequest stmgrs_reg_request; + heron::proto::tmanager::StmgrsRegistrationSummaryRequest stmgrs_reg_request; if (!stmgrs_reg_request.ParseFromArray(request_data, _request->GetPayloadSize())) { LOG(ERROR) << "Unable to deserialize post data specified in" << "StmgrsRegistrationSummaryRequest" << std::endl; @@ -147,7 +147,7 @@ void StatsInterface::HandleStmgrsRegistrationSummaryRequest(IncomingHTTPRequest* delete _request; return; } - auto stmgrs_reg_summary_response = tmaster_->GetStmgrsRegSummary(); + auto stmgrs_reg_summary_response = tmanager_->GetStmgrsRegSummary(); sp_string response_string; CHECK(stmgrs_reg_summary_response->SerializeToString(&response_string)); auto http_response = make_unique(_request); @@ -164,5 +164,5 @@ void StatsInterface::HandleUnknownRequest(IncomingHTTPRequest* _request) { http_server_->SendErrorReply(_request, 400); delete _request; } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/stats-interface.h b/heron/tmanager/src/cpp/manager/stats-interface.h similarity index 85% rename from heron/tmaster/src/cpp/manager/stats-interface.h rename to heron/tmanager/src/cpp/manager/stats-interface.h index fa7fa79d57b..e640ed56295 100644 --- a/heron/tmaster/src/cpp/manager/stats-interface.h +++ b/heron/tmanager/src/cpp/manager/stats-interface.h @@ -17,26 +17,26 @@ * under the License. */ -#ifndef __TMASTER_STATS_INTERFACE_H_ -#define __TMASTER_STATS_INTERFACE_H_ +#ifndef __TMANAGER_STATS_INTERFACE_H_ +#define __TMANAGER_STATS_INTERFACE_H_ #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "basics/basics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::shared_ptr; class TMetricsCollector; -class TMaster; +class TManager; class StatsInterface { public: StatsInterface(std::shared_ptr eventLoop, const NetworkOptions& options, - shared_ptr _collector, TMaster* tmaster); + shared_ptr _collector, TManager* tmanager); virtual ~StatsInterface(); private: @@ -48,9 +48,9 @@ class StatsInterface { unique_ptr http_server_; // Our http server shared_ptr metrics_collector_; - TMaster* tmaster_; + TManager* tmanager_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/stmgrstate.cpp b/heron/tmanager/src/cpp/manager/stmgrstate.cpp similarity index 98% rename from heron/tmaster/src/cpp/manager/stmgrstate.cpp rename to heron/tmanager/src/cpp/manager/stmgrstate.cpp index b144244b8d9..06f2a2b3d54 100644 --- a/heron/tmaster/src/cpp/manager/stmgrstate.cpp +++ b/heron/tmanager/src/cpp/manager/stmgrstate.cpp @@ -21,7 +21,7 @@ #include #include #include -#include "manager/tmasterserver.h" +#include "manager/tmanagerserver.h" #include "proto/messages.h" #include "basics/basics.h" #include "errors/errors.h" @@ -30,7 +30,7 @@ #include "config/heron-internals-config-reader.h" namespace heron { -namespace tmaster { +namespace tmanager { StMgrState::StMgrState(Connection* _conn, const proto::system::StMgr& _stmgr, const std::vector>& _instances, @@ -141,8 +141,8 @@ StMgrState::AddAssignment(const std::vector >& _assignmen bool StMgrState::TimedOut() const { sp_int32 timeout = - config::HeronInternalsConfigReader::Instance()->GetHeronTmasterStmgrStateTimeoutSec(); + config::HeronInternalsConfigReader::Instance()->GetHeronTmanagerStmgrStateTimeoutSec(); return (time(NULL) - last_heartbeat_) > timeout; } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/stmgrstate.h b/heron/tmanager/src/cpp/manager/stmgrstate.h similarity index 96% rename from heron/tmaster/src/cpp/manager/stmgrstate.h rename to heron/tmanager/src/cpp/manager/stmgrstate.h index d9d9159e19e..449670c79b8 100644 --- a/heron/tmaster/src/cpp/manager/stmgrstate.h +++ b/heron/tmanager/src/cpp/manager/stmgrstate.h @@ -23,7 +23,7 @@ #include #include #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "proto/ckptmgr.pb.h" #include "basics/basics.h" @@ -38,11 +38,11 @@ class PhysicalPlan; } namespace heron { -namespace tmaster { +namespace tmanager { using std::shared_ptr; -class TMasterServer; +class TManagerServer; class StMgrState { public: @@ -96,10 +96,10 @@ class StMgrState { shared_ptr stmgr_; // The connection used by the nodemanager to contact us Connection* connection_; - // Our link to our TMaster + // Our link to our TManager Server& server_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/tcontroller.cpp b/heron/tmanager/src/cpp/manager/tcontroller.cpp similarity index 92% rename from heron/tmaster/src/cpp/manager/tcontroller.cpp rename to heron/tmanager/src/cpp/manager/tcontroller.cpp index 8bc4ffcf834..73fba429124 100644 --- a/heron/tmaster/src/cpp/manager/tcontroller.cpp +++ b/heron/tmanager/src/cpp/manager/tcontroller.cpp @@ -29,20 +29,20 @@ #include "cereal/external/base64.hpp" #include "config/topology-config-helper.h" #include "errors/errors.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "network/network.h" #include "proto/messages.h" #include "threads/threads.h" namespace heron { -namespace tmaster { +namespace tmanager { /* * HTTP service controller. */ TController::TController(std::shared_ptr eventLoop, const NetworkOptions& options, - TMaster* tmaster) - : tmaster_(tmaster) { + TManager* tmanager) + : tmanager_(tmanager) { http_server_ = make_unique(eventLoop, options); /* * Install the handlers @@ -90,7 +90,7 @@ void TController::HandleActivateRequest(IncomingHTTPRequest* request) { return; } - if (tmaster_->GetTopologyState() != proto::api::PAUSED) { + if (tmanager_->GetTopologyState() != proto::api::PAUSED) { LOG(ERROR) << "Topology not in paused state"; http_server_->SendErrorReply(request, 400); delete request; @@ -101,7 +101,7 @@ void TController::HandleActivateRequest(IncomingHTTPRequest* request) { this->HandleActivateRequestDone(request, status); }; - tmaster_->ActivateTopology(std::move(cb)); + tmanager_->ActivateTopology(std::move(cb)); } void TController::HandleActivateRequestDone(IncomingHTTPRequest* request, @@ -129,7 +129,7 @@ void TController::HandleDeActivateRequest(IncomingHTTPRequest* request) { return; } - if (tmaster_->GetTopologyState() != proto::api::RUNNING) { + if (tmanager_->GetTopologyState() != proto::api::RUNNING) { LOG(ERROR) << "Topology not in running state"; http_server_->SendErrorReply(request, 400); delete request; @@ -140,7 +140,7 @@ void TController::HandleDeActivateRequest(IncomingHTTPRequest* request) { this->HandleDeActivateRequestDone(request, status); }; - tmaster_->DeActivateTopology(std::move(cb)); + tmanager_->DeActivateTopology(std::move(cb)); } void TController::HandleDeActivateRequestDone(IncomingHTTPRequest* request, @@ -179,7 +179,7 @@ void TController::HandleCleanStatefulCheckpointRequest(IncomingHTTPRequest* requ this->HandleCleanStatefulCheckpointRequestDone(request, status); }; - tmaster_->CleanAllStatefulCheckpoint(); + tmanager_->CleanAllStatefulCheckpoint(); } void TController::HandleCleanStatefulCheckpointResponse(proto::system::StatusCode _status) { @@ -236,7 +236,7 @@ void TController::HandleUpdateRuntimeConfigRequest(IncomingHTTPRequest* request) } // Validate them before applying - if (!tmaster_->ValidateRuntimeConfig(config)) { + if (!tmanager_->ValidateRuntimeConfig(config)) { http_server_->SendErrorReply(request, 400, "Failed to validate runtime configs"); delete request; return; @@ -246,7 +246,7 @@ void TController::HandleUpdateRuntimeConfigRequest(IncomingHTTPRequest* request) this->HandleUpdateRuntimeConfigRequestDone(request, status); }; - if (!tmaster_->UpdateRuntimeConfig(config, std::move(cb))) { + if (!tmanager_->UpdateRuntimeConfig(config, std::move(cb))) { http_server_->SendErrorReply(request, 400, "Failed to update runtime configs"); delete request; return; @@ -275,18 +275,18 @@ void TController::HandleGetCurPPlanRequest(IncomingHTTPRequest* request) { << request->GetRemotePort(); // make sure all the stream managers are alive, in case that when container is fail, - // physical plan is still available at TMaster but not a valid one. - if (tmaster_->GetStmgrsRegSummary()->absent_stmgrs_size() != 0) { + // physical plan is still available at TManager but not a valid one. + if (tmanager_->GetStmgrsRegSummary()->absent_stmgrs_size() != 0) { http_server_->SendErrorReply(request, 400); delete request; return; } - if (tmaster_->getPhysicalPlan() == NULL) { + if (tmanager_->getPhysicalPlan() == NULL) { http_server_->SendErrorReply(request, 400); } else { std::string pplanString; - tmaster_->getPhysicalPlan()->SerializeToString(&pplanString); + tmanager_->getPhysicalPlan()->SerializeToString(&pplanString); // SerializeToString() returns object in binary format which needs to be encoded const unsigned char * encodeString = (unsigned char *)pplanString.c_str(); @@ -314,14 +314,14 @@ bool TController::ValidateTopology(const IncomingHTTPRequest* request, Validatio result.SetResult(400, "Missing 'topologyid' argument in the request"); return false; } - if (id != tmaster_->GetTopologyId()) { + if (id != tmanager_->GetTopologyId()) { LOG(ERROR) << "Topology id does not match"; result.SetResult(400, "Topology id does not match"); return false; } - if (tmaster_->getPhysicalPlan() == NULL) { - LOG(ERROR) << "Tmaster still not initialized (physical plan is not available)"; - result.SetResult(500, "Tmaster still not initialized (physical plan is not available)"); + if (tmanager_->getPhysicalPlan() == NULL) { + LOG(ERROR) << "Tmanager still not initialized (physical plan is not available)"; + result.SetResult(500, "Tmanager still not initialized (physical plan is not available)"); return false; } @@ -356,5 +356,5 @@ bool TController::ParseRuntimeConfig(const std::vector& paramters, return true; } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/tcontroller.h b/heron/tmanager/src/cpp/manager/tcontroller.h similarity index 94% rename from heron/tmaster/src/cpp/manager/tcontroller.h rename to heron/tmanager/src/cpp/manager/tcontroller.h index dec9d5eb2cf..398a0caf1d3 100644 --- a/heron/tmaster/src/cpp/manager/tcontroller.h +++ b/heron/tmanager/src/cpp/manager/tcontroller.h @@ -25,25 +25,25 @@ #include #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "basics/basics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::shared_ptr; -class TMaster; +class TManager; class TController { public: - TController(shared_ptr eventLoop, const NetworkOptions& options, TMaster* tmaster); + TController(shared_ptr eventLoop, const NetworkOptions& options, TManager* tmanager); virtual ~TController(); // Starts the controller sp_int32 Start(); - // Called by the tmaster when it gets response form ckptmgr + // Called by the tmanager when it gets response form ckptmgr void HandleCleanStatefulCheckpointResponse(proto::system::StatusCode _status); // Parse and build a map of component name to config kv map from incoming runtime configs. @@ -73,8 +73,8 @@ class TController { // We are a http server unique_ptr http_server_; - // our tmaster - TMaster* tmaster_; + // our tmanager + TManager* tmanager_; // The callback to be called upon receiving clean stateful checkpoint response std::function clean_stateful_checkpoint_cb_; @@ -101,7 +101,7 @@ class TController { bool ValidateTopology(const IncomingHTTPRequest* request, ValidationResult& result); }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/tmaster.cpp b/heron/tmanager/src/cpp/manager/tmanager.cpp similarity index 80% rename from heron/tmaster/src/cpp/manager/tmaster.cpp rename to heron/tmanager/src/cpp/manager/tmanager.cpp index f8dd296f4cd..88487483587 100644 --- a/heron/tmaster/src/cpp/manager/tmaster.cpp +++ b/heron/tmanager/src/cpp/manager/tmanager.cpp @@ -17,7 +17,7 @@ * under the License. */ -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include #include #include @@ -28,7 +28,7 @@ #include "manager/tmetrics-collector.h" #include "manager/tcontroller.h" #include "manager/stats-interface.h" -#include "manager/tmasterserver.h" +#include "manager/tmanagerserver.h" #include "manager/stmgrstate.h" #include "manager/stateful-controller.h" #include "manager/ckptmgr-client.h" @@ -42,10 +42,10 @@ #include "config/helper.h" #include "config/heron-internals-config-reader.h" #include "statemgr/heron-statemgr.h" -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::make_shared; @@ -60,20 +60,20 @@ const sp_int64 PROCESS_METRICS_FREQUENCY = 60_s; const sp_int64 UPTIME_METRIC_FREQUENCY = 1_s; const sp_string METRIC_PREFIX = "__process"; -TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_name, +TManager::TManager(const std::string& _zk_hostport, const std::string& _topology_name, const std::string& _topology_id, const std::string& _topdir, - sp_int32 _tmaster_controller_port, - sp_int32 _master_port, sp_int32 _stats_port, sp_int32 metricsMgrPort, + sp_int32 _tmanager_controller_port, + sp_int32 _server_port, sp_int32 _stats_port, sp_int32 metricsMgrPort, sp_int32 _ckptmgr_port, const std::string& _metrics_sinks_yaml, const std::string& _myhost_name, shared_ptr eventLoop) { start_time_ = std::chrono::high_resolution_clock::now(); zk_hostport_ = _zk_hostport; topdir_ = _topdir; - tmaster_controller_ = nullptr; - tmaster_controller_port_ = _tmaster_controller_port; - master_ = nullptr; - master_port_ = _master_port; + tmanager_controller_ = nullptr; + tmanager_controller_port_ = _tmanager_controller_port; + server_ = nullptr; + server_port_ = _server_port; stats_ = nullptr; stats_port_ = _stats_port; myhost_name_ = _myhost_name; @@ -82,7 +82,7 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n http_client_ = new HTTPClient(eventLoop_, dns_); metrics_collector_ = make_shared(config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterMetricsCollectorMaximumIntervalMin() * 60, + ->GetHeronTmanagerMetricsCollectorMaximumIntervalMin() * 60, eventLoop_, _metrics_sinks_yaml); mMetricsMgrPort = metricsMgrPort; @@ -92,11 +92,11 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n mMetricsMgrClient = make_shared( mMetricsMgrPort, metricsExportIntervalSec, eventLoop_); - mMetricsMgrClient->Start(myhost_name_, master_port_, "__tmaster__", - "0"); // MM expects task_id, so just giving 0 for tmaster. + mMetricsMgrClient->Start(myhost_name_, server_port_, "__tmanager__", + "0"); // MM expects task_id, so just giving 0 for tmanager. - tmasterProcessMetrics = make_shared(); - mMetricsMgrClient->register_metric(METRIC_PREFIX, tmasterProcessMetrics); + tmanagerProcessMetrics = make_shared(); + mMetricsMgrClient->register_metric(METRIC_PREFIX, tmanagerProcessMetrics); ckptmgr_port_ = _ckptmgr_port; ckptmgr_client_ = nullptr; @@ -104,7 +104,7 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n current_pplan_ = nullptr; // The topology as first submitted by the user - // It shall only be used to construct the physical plan when TMaster first time starts + // It shall only be used to construct the physical plan when TManager first time starts // Any runtime changes shall be made to current_pplan_->topology topology_ = nullptr; packing_plan_ = nullptr; @@ -113,19 +113,19 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n assignment_in_progress_ = false; do_reassign_ = false; - master_establish_attempts_ = 0; - tmaster_location_ = make_unique(); - tmaster_location_->set_topology_name(_topology_name); - tmaster_location_->set_topology_id(_topology_id); - tmaster_location_->set_host(myhost_name_); - tmaster_location_->set_controller_port(tmaster_controller_port_); - tmaster_location_->set_master_port(master_port_); - tmaster_location_->set_stats_port(stats_port_); - DCHECK(tmaster_location_->IsInitialized()); + server_establish_attempts_ = 0; + tmanager_location_ = make_unique(); + tmanager_location_->set_topology_name(_topology_name); + tmanager_location_->set_topology_id(_topology_id); + tmanager_location_->set_host(myhost_name_); + tmanager_location_->set_controller_port(tmanager_controller_port_); + tmanager_location_->set_server_port(server_port_); + tmanager_location_->set_stats_port(stats_port_); + DCHECK(tmanager_location_->IsInitialized()); FetchPackingPlan(); - // Send tmaster location to metrics mgr - mMetricsMgrClient->RefreshTMasterLocation(*tmaster_location_); + // Send tmanager location to metrics mgr + mMetricsMgrClient->RefreshTManagerLocation(*tmanager_location_); // Check for log pruning every 5 minutes CHECK_GT(eventLoop_->registerTimer( @@ -154,16 +154,16 @@ TMaster::TMaster(const std::string& _zk_hostport, const std::string& _topology_n stateful_controller_ = nullptr; } -void TMaster::FetchPackingPlan() { +void TManager::FetchPackingPlan() { auto packing_plan = make_shared(); - state_mgr_->GetPackingPlan(tmaster_location_->topology_name(), packing_plan, + state_mgr_->GetPackingPlan(tmanager_location_->topology_name(), packing_plan, [packing_plan, this](proto::system::StatusCode status) { this->OnPackingPlanFetch(packing_plan, status); }); } -void TMaster::OnPackingPlanFetch(shared_ptr newPackingPlan, +void TManager::OnPackingPlanFetch(shared_ptr newPackingPlan, proto::system::StatusCode _status) { if (_status != proto::system::OK) { LOG(INFO) << "PackingPlan Fetch failed with status " << _status; @@ -192,19 +192,19 @@ void TMaster::OnPackingPlanFetch(shared_ptr newPacki } // this is part of the initialization process. Since we've got a packing plan we will - // register our self as the master - EstablishTMaster(EventLoop::TIMEOUT_EVENT); + // register our self as the server + EstablishTManager(EventLoop::TIMEOUT_EVENT); } else { - // We must know for sure that we are TMaster before potentially deleting the physical plan + // We must know for sure that we are TManager before potentially deleting the physical plan // in state manager. We know this to be the case here because we initially fetch - // packing_plan_ before becoming master, but we register the packing plan watcher only after - // becoming master. That guarantees that if packing_plan_ is already set and this method is - // invoked, it's due to the watch and we're master here. + // packing_plan_ before becoming server, but we register the packing plan watcher only after + // becoming server. That guarantees that if packing_plan_ is already set and this method is + // invoked, it's due to the watch and we're server here. if (packing_plan_ != newPackingPlan) { - LOG(INFO) << "Packing plan changed. Deleting physical plan and restarting TMaster to " + LOG(INFO) << "Packing plan changed. Deleting physical plan and restarting TManager to " << "reset internal state. Exiting."; - state_mgr_->DeletePhysicalPlan(tmaster_location_->topology_name(), + state_mgr_->DeletePhysicalPlan(tmanager_location_->topology_name(), [this](proto::system::StatusCode status) { ::exit(1); }); @@ -215,24 +215,24 @@ void TMaster::OnPackingPlanFetch(shared_ptr newPacki } } -void TMaster::EstablishTMaster(EventLoop::Status) { - auto cb = [this](proto::system::StatusCode code) { this->SetTMasterLocationDone(code); }; +void TManager::EstablishTManager(EventLoop::Status) { + auto cb = [this](proto::system::StatusCode code) { this->SetTManagerLocationDone(code); }; - state_mgr_->SetTMasterLocation(*tmaster_location_, std::move(cb)); + state_mgr_->SetTManagerLocation(*tmanager_location_, std::move(cb)); - // if zk lost the tmaster location, tmaster quits to bail out and re-establish its location + // if zk lost the tmanager location, tmanager quits to bail out and re-establish its location auto cb2 = [this]() { - LOG(ERROR) << " lost tmaster location in zk state manager. Bailing out..." << std::endl; + LOG(ERROR) << " lost tmanager location in zk state manager. Bailing out..." << std::endl; ::exit(1); }; - state_mgr_->SetTMasterLocationWatch(tmaster_location_->topology_name(), std::move(cb2)); + state_mgr_->SetTManagerLocationWatch(tmanager_location_->topology_name(), std::move(cb2)); - master_establish_attempts_++; + server_establish_attempts_++; } -TMaster::~TMaster() { - if (master_) { - master_->Stop(); +TManager::~TManager() { + if (server_) { + server_->Stop(); } for (StMgrMapIter iter = stmgrs_.begin(); iter != stmgrs_.end(); ++iter) { @@ -247,68 +247,68 @@ TMaster::~TMaster() { delete dns_; } -void TMaster::UpdateUptimeMetric() { +void TManager::UpdateUptimeMetric() { auto end_time = std::chrono::high_resolution_clock::now(); auto uptime = std::chrono::duration_cast(end_time - start_time_).count(); - tmasterProcessMetrics->scope(METRIC_UPTIME)->SetValue(uptime); + tmanagerProcessMetrics->scope(METRIC_UPTIME)->SetValue(uptime); } -void TMaster::UpdateProcessMetrics(EventLoop::Status) { +void TManager::UpdateProcessMetrics(EventLoop::Status) { // CPU struct rusage usage; ProcessUtils::getResourceUsage(&usage); - tmasterProcessMetrics->scope(METRIC_CPU_USER) + tmanagerProcessMetrics->scope(METRIC_CPU_USER) ->SetValue((usage.ru_utime.tv_sec * 1_s) + usage.ru_utime.tv_usec); - tmasterProcessMetrics->scope(METRIC_CPU_SYSTEM) + tmanagerProcessMetrics->scope(METRIC_CPU_SYSTEM) ->SetValue((usage.ru_stime.tv_sec * 1_s) + usage.ru_stime.tv_usec); // Memory size_t totalmemory = ProcessUtils::getTotalMemoryUsed(); - tmasterProcessMetrics->scope(METRIC_MEM_USED)->SetValue(totalmemory); + tmanagerProcessMetrics->scope(METRIC_MEM_USED)->SetValue(totalmemory); } -void TMaster::SetTMasterLocationDone(proto::system::StatusCode _code) { +void TManager::SetTManagerLocationDone(proto::system::StatusCode _code) { if (_code != proto::system::OK) { - if (_code == proto::system::TMASTERLOCATION_ALREADY_EXISTS && - master_establish_attempts_ < - config::HeronInternalsConfigReader::Instance()->GetHeronTmasterEstablishRetryTimes()) { - LOG(INFO) << "Topology Master node already exists. Maybe its " + if (_code == proto::system::TMANAGERLOCATION_ALREADY_EXISTS && + server_establish_attempts_ < + config::HeronInternalsConfigReader::Instance()->GetHeronTmanagerEstablishRetryTimes()) { + LOG(INFO) << "Topology Manager node already exists. Maybe its " << "because of our restart. Will try again" << std::endl; // Attempt again - auto cb = [this](EventLoop::Status status) { this->EstablishTMaster(status); }; + auto cb = [this](EventLoop::Status status) { this->EstablishTManager(status); }; eventLoop_->registerTimer(std::move(cb), false, config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterEstablishRetryIntervalSec() * + ->GetHeronTmanagerEstablishRetryIntervalSec() * 1_s); return; } // There was an error setting our location - LOG(ERROR) << "For topology " << tmaster_location_->topology_name() - << " Error setting ourselves as TMaster. Error code is " << _code << std::endl; + LOG(ERROR) << "For topology " << tmanager_location_->topology_name() + << " Error setting ourselves as TManager. Error code is " << _code << std::endl; ::exit(1); } - master_establish_attempts_ = 0; + server_establish_attempts_ = 0; - // We are now the master - LOG(INFO) << "Successfully set ourselves as master\n"; + // We are now the server + LOG(INFO) << "Successfully set ourselves as server\n"; // Lets now read the topology topology_ = make_unique(); - state_mgr_->GetTopology(tmaster_location_->topology_name(), *topology_, + state_mgr_->GetTopology(tmanager_location_->topology_name(), *topology_, [this](proto::system::StatusCode code) { this->GetTopologyDone(code); }); // and register packing plan watcher to pick up changes - state_mgr_->SetPackingPlanWatch(tmaster_location_->topology_name(), [this]() { + state_mgr_->SetPackingPlanWatch(tmanager_location_->topology_name(), [this]() { this->FetchPackingPlan(); }); } -void TMaster::GetTopologyDone(proto::system::StatusCode _code) { +void TManager::GetTopologyDone(proto::system::StatusCode _code) { if (_code != proto::system::OK) { // Without Topology we can't do much - LOG(ERROR) << "For topology " << tmaster_location_->topology_name() + LOG(ERROR) << "For topology " << tmanager_location_->topology_name() << " Error getting topology. Error code is " << _code << std::endl; ::exit(1); } @@ -329,11 +329,11 @@ void TMaster::GetTopologyDone(proto::system::StatusCode _code) { ckpt_options.set_host("127.0.0.1"); ckpt_options.set_port(ckptmgr_port_); ckpt_options.set_max_packet_size(config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterNetworkMasterOptionsMaximumPacketMb() * + ->GetHeronTmanagerNetworkServerOptionsMaximumPacketMb() * 1024 * 1024); ckptmgr_client_ = make_unique(eventLoop_, ckpt_options, topology_->name(), topology_->id(), - std::bind(&TMaster::HandleCleanStatefulCheckpointResponse, + std::bind(&TManager::HandleCleanStatefulCheckpointResponse, this, std::placeholders::_1)); // Start the client ckptmgr_client_->Start(); @@ -344,22 +344,22 @@ void TMaster::GetTopologyDone(proto::system::StatusCode _code) { this->GetStatefulCheckpointsDone(ckpt, code); }; - state_mgr_->GetStatefulCheckpoints(tmaster_location_->topology_name(), ckpt, std::move(cb)); + state_mgr_->GetStatefulCheckpoints(tmanager_location_->topology_name(), ckpt, std::move(cb)); } else { // Now see if there is already a pplan FetchPhysicalPlan(); } } -void TMaster::GetStatefulCheckpointsDone( +void TManager::GetStatefulCheckpointsDone( shared_ptr _ckpt, proto::system::StatusCode _code) { if (_code != proto::system::OK && _code != proto::system::PATH_DOES_NOT_EXIST) { - LOG(FATAL) << "For topology " << tmaster_location_->topology_name() + LOG(FATAL) << "For topology " << tmanager_location_->topology_name() << " Getting Stateful Checkpoint failed with error " << _code; } if (_code == proto::system::PATH_DOES_NOT_EXIST) { - LOG(INFO) << "For topology " << tmaster_location_->topology_name() + LOG(INFO) << "For topology " << tmanager_location_->topology_name() << " No existing globally consistent checkpoint found " << " inserting a empty one"; // We need to set an empty one @@ -371,10 +371,10 @@ void TMaster::GetStatefulCheckpointsDone( this->SetStatefulCheckpointsDone(code, ckpts); }; - state_mgr_->CreateStatefulCheckpoints(tmaster_location_->topology_name(), + state_mgr_->CreateStatefulCheckpoints(tmanager_location_->topology_name(), ckpts, std::move(cb)); } else { - LOG(INFO) << "For topology " << tmaster_location_->topology_name() + LOG(INFO) << "For topology " << tmanager_location_->topology_name() << " An existing globally consistent checkpoint found " << _ckpt->DebugString(); SetupStatefulController(std::move(_ckpt)); @@ -382,17 +382,17 @@ void TMaster::GetStatefulCheckpointsDone( } } -void TMaster::SetStatefulCheckpointsDone(proto::system::StatusCode _code, +void TManager::SetStatefulCheckpointsDone(proto::system::StatusCode _code, shared_ptr _ckpt) { if (_code != proto::system::OK) { - LOG(FATAL) << "For topology " << tmaster_location_->topology_name() + LOG(FATAL) << "For topology " << tmanager_location_->topology_name() << " Setting empty Stateful Checkpoint failed with error " << _code; } SetupStatefulController(_ckpt); FetchPhysicalPlan(); } -void TMaster::SetupStatefulController( +void TManager::SetupStatefulController( shared_ptr _ckpt) { sp_int64 stateful_checkpoint_interval = config::TopologyConfigHelper::GetStatefulCheckpointIntervalSecsWithDefault(*topology_, 300); @@ -412,7 +412,7 @@ void TMaster::SetupStatefulController( 0); } -void TMaster::ResetTopologyState(Connection* _conn, const std::string& _dead_stmgr, +void TManager::ResetTopologyState(Connection* _conn, const std::string& _dead_stmgr, int32_t _dead_instance, const std::string& _reason) { LOG(INFO) << "Got a reset topology request with dead_stmgr " << _dead_stmgr << " dead_instance " << _dead_instance << " and reason " << _reason; @@ -440,16 +440,16 @@ void TMaster::ResetTopologyState(Connection* _conn, const std::string& _dead_stm } } -void TMaster::FetchPhysicalPlan() { +void TManager::FetchPhysicalPlan() { auto pplan = make_shared(); auto cb = [pplan, this](proto::system::StatusCode code) { this->GetPhysicalPlanDone(pplan, code); }; - state_mgr_->GetPhysicalPlan(tmaster_location_->topology_name(), pplan, std::move(cb)); + state_mgr_->GetPhysicalPlan(tmanager_location_->topology_name(), pplan, std::move(cb)); } -void TMaster::SendCheckpointMarker() { +void TManager::SendCheckpointMarker() { if (!absent_stmgrs_.empty()) { LOG(INFO) << "Not sending checkpoint marker because not all stmgrs have connected to us"; return; @@ -457,7 +457,7 @@ void TMaster::SendCheckpointMarker() { stateful_controller_->StartCheckpoint(stmgrs_); } -void TMaster::HandleInstanceStateStored(const std::string& _checkpoint_id, +void TManager::HandleInstanceStateStored(const std::string& _checkpoint_id, const proto::system::Instance& _instance) { LOG(INFO) << "Got notification from stmgr that we saved checkpoint for task " << _instance.info().task_id() << " for checkpoint " << _checkpoint_id; @@ -466,7 +466,7 @@ void TMaster::HandleInstanceStateStored(const std::string& _checkpoint_id, } } -void TMaster::HandleRestoreTopologyStateResponse(Connection* _conn, +void TManager::HandleRestoreTopologyStateResponse(Connection* _conn, const std::string& _checkpoint_id, int64_t _restore_txid, proto::system::StatusCode _status) { @@ -481,14 +481,14 @@ void TMaster::HandleRestoreTopologyStateResponse(Connection* _conn, } } -void TMaster::GetPhysicalPlanDone(shared_ptr _pplan, +void TManager::GetPhysicalPlanDone(shared_ptr _pplan, proto::system::StatusCode _code) { // Physical plan need not exist. First check if some other error occurred. if (_code != proto::system::OK && _code != proto::system::PATH_DOES_NOT_EXIST) { // Something bad happened. Bail out! // TODO(kramasamy): This is not as bad as it seems. Maybe we can delete this assignment // and have a new assignment instead. - LOG(ERROR) << "For topology " << tmaster_location_->topology_name() + LOG(ERROR) << "For topology " << tmanager_location_->topology_name() << " Error getting assignment. Error code is " << _code << std::endl; ::exit(1); } @@ -507,53 +507,53 @@ void TMaster::GetPhysicalPlanDone(shared_ptr _pplan // Now that we have our state all setup, its time to start accepting requests // Port for the stmgrs to connect to - NetworkOptions master_options; - master_options.set_host(myhost_name_); - master_options.set_port(master_port_); - master_options.set_max_packet_size(config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterNetworkMasterOptionsMaximumPacketMb() * + NetworkOptions server_options; + server_options.set_host(myhost_name_); + server_options.set_port(server_port_); + server_options.set_max_packet_size(config::HeronInternalsConfigReader::Instance() + ->GetHeronTmanagerNetworkServerOptionsMaximumPacketMb() * 1_MB); - master_options.set_socket_family(PF_INET); - master_ = make_unique(eventLoop_, master_options, metrics_collector_, this); + server_options.set_socket_family(PF_INET); + server_ = make_unique(eventLoop_, server_options, metrics_collector_, this); - sp_int32 retval = master_->Start(); + sp_int32 retval = server_->Start(); if (retval != SP_OK) { - LOG(FATAL) << "Failed to start TMaster Master Server with rcode: " << retval; + LOG(FATAL) << "Failed to start TManager Server with rcode: " << retval; } // Port for the scheduler to connect to NetworkOptions controller_options; controller_options.set_host(myhost_name_); - controller_options.set_port(tmaster_controller_port_); + controller_options.set_port(tmanager_controller_port_); controller_options.set_max_packet_size( config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterNetworkControllerOptionsMaximumPacketMb() * + ->GetHeronTmanagerNetworkControllerOptionsMaximumPacketMb() * 1_MB); controller_options.set_socket_family(PF_INET); - tmaster_controller_ = make_unique(eventLoop_, controller_options, this); + tmanager_controller_ = make_unique(eventLoop_, controller_options, this); - retval = tmaster_controller_->Start(); + retval = tmanager_controller_->Start(); if (retval != SP_OK) { - LOG(FATAL) << "Failed to start TMaster Controller Server with rcode: " << retval; + LOG(FATAL) << "Failed to start TManager Controller Server with rcode: " << retval; } // Http port for stat queries NetworkOptions stats_options; if (config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterMetricsNetworkBindAllInterfaces()) { + ->GetHeronTmanagerMetricsNetworkBindAllInterfaces()) { stats_options.set_host("0.0.0.0"); } else { stats_options.set_host(myhost_name_); } stats_options.set_port(stats_port_); stats_options.set_max_packet_size(config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterNetworkStatsOptionsMaximumPacketMb() * + ->GetHeronTmanagerNetworkStatsOptionsMaximumPacketMb() * 1_MB); stats_options.set_socket_family(PF_INET); stats_ = make_unique(eventLoop_, stats_options, metrics_collector_, this); } -void TMaster::ActivateTopology(VCallback cb) { +void TManager::ActivateTopology(VCallback cb) { CHECK_EQ(current_pplan_->topology().state(), proto::api::PAUSED); DCHECK(current_pplan_->topology().IsInitialized()); @@ -570,7 +570,7 @@ void TMaster::ActivateTopology(VCallback cb) { state_mgr_->SetPhysicalPlan(*new_pplan, std::move(callback)); } -void TMaster::DeActivateTopology(VCallback cb) { +void TManager::DeActivateTopology(VCallback cb) { CHECK_EQ(current_pplan_->topology().state(), proto::api::RUNNING); DCHECK(current_pplan_->topology().IsInitialized()); @@ -587,7 +587,7 @@ void TMaster::DeActivateTopology(VCallback cb) { state_mgr_->SetPhysicalPlan(*new_pplan, std::move(callback)); } -bool TMaster::UpdateRuntimeConfig(const ComponentConfigMap& _config, +bool TManager::UpdateRuntimeConfig(const ComponentConfigMap& _config, VCallback cb) { DCHECK(current_pplan_->topology().IsInitialized()); @@ -612,11 +612,11 @@ bool TMaster::UpdateRuntimeConfig(const ComponentConfigMap& _config, return true; } -void TMaster::CleanAllStatefulCheckpoint() { +void TManager::CleanAllStatefulCheckpoint() { ckptmgr_client_->SendCleanStatefulCheckpointRequest("", true); } -void TMaster::HandleStatefulCheckpointSave( +void TManager::HandleStatefulCheckpointSave( const proto::ckptmgr::StatefulConsistentCheckpoints &new_ckpts) { // broadcast globally consistent checkpoint completion proto::ckptmgr::StatefulConsistentCheckpointSaved msg; @@ -635,14 +635,14 @@ void TMaster::HandleStatefulCheckpointSave( } // Called when ckptmgr completes the clean stateful checkpoint request -void TMaster::HandleCleanStatefulCheckpointResponse(proto::system::StatusCode _status) { - tmaster_controller_->HandleCleanStatefulCheckpointResponse(_status); +void TManager::HandleCleanStatefulCheckpointResponse(proto::system::StatusCode _status) { + tmanager_controller_->HandleCleanStatefulCheckpointResponse(_status); } // Update configurations in physical plan. // Return false if a config doesn't exist, but this shouldn't happen if the config has been // validated using ValidateRuntimeConig() function. -bool TMaster::UpdateRuntimeConfigInTopology(proto::api::Topology* _topology, +bool TManager::UpdateRuntimeConfigInTopology(proto::api::Topology* _topology, const ComponentConfigMap& _config) { DCHECK(_topology->IsInitialized()); @@ -662,11 +662,11 @@ bool TMaster::UpdateRuntimeConfigInTopology(proto::api::Topology* _topology, return true; } -bool TMaster::ValidateRuntimeConfig(const ComponentConfigMap& _config) const { +bool TManager::ValidateRuntimeConfig(const ComponentConfigMap& _config) const { return ValidateRuntimeConfigNames(_config); } -void TMaster::KillContainer(const std::string& host_name, +void TManager::KillContainer(const std::string& host_name, sp_int32 shell_port, const std::string& stmgr_id) { LOG(INFO) << "Start killing " << stmgr_id << " on " << host_name << ":" << shell_port; @@ -689,7 +689,7 @@ void TMaster::KillContainer(const std::string& host_name, return; } -proto::system::Status* TMaster::RegisterStMgr( +proto::system::Status* TManager::RegisterStMgr( const proto::system::StMgr& _stmgr, const std::vector>& _instances, Connection* _conn, shared_ptr& _pplan) { @@ -738,7 +738,7 @@ proto::system::Status* TMaster::RegisterStMgr( return status; } else { // This guy was indeed expected - stmgrs_[stmgr_id] = make_shared(_conn, _stmgr, _instances, *master_); + stmgrs_[stmgr_id] = make_shared(_conn, _stmgr, _instances, *server_); connection_to_stmgr_id_[_conn] = stmgr_id; absent_stmgrs_.erase(stmgr_id); } @@ -762,7 +762,7 @@ proto::system::Status* TMaster::RegisterStMgr( return status; } -void TMaster::DoPhysicalPlan(EventLoop::Status) { +void TManager::DoPhysicalPlan(EventLoop::Status) { do_reassign_ = false; if (!absent_stmgrs_.empty()) { @@ -801,7 +801,7 @@ void TMaster::DoPhysicalPlan(EventLoop::Status) { } } -void TMaster::SetPhysicalPlanDone(shared_ptr _pplan, +void TManager::SetPhysicalPlanDone(shared_ptr _pplan, proto::system::StatusCode _code) { if (_code != proto::system::OK) { LOG(ERROR) << "Error writing assignment to statemgr. Error code is " << _code << std::endl; @@ -832,7 +832,7 @@ void TMaster::SetPhysicalPlanDone(shared_ptr _pplan } } -bool TMaster::DistributePhysicalPlan() { +bool TManager::DistributePhysicalPlan() { if (current_pplan_) { // First valid the physical plan to distribute LOG(INFO) << "To distribute new physical plan:" << std::endl; @@ -851,9 +851,9 @@ bool TMaster::DistributePhysicalPlan() { return false; } -std::unique_ptr TMaster::GetStmgrsRegSummary() { - auto response = std::unique_ptr( - new proto::tmaster::StmgrsRegistrationSummaryResponse()); +std::unique_ptr TManager::GetStmgrsRegSummary() { + auto response = std::unique_ptr( + new proto::tmanager::StmgrsRegistrationSummaryResponse()); for (auto it = stmgrs_.begin(); it != stmgrs_.end(); ++it) { response->add_registered_stmgrs(it->first); @@ -866,7 +866,7 @@ std::unique_ptr TMaster::GetS return response; } -shared_ptr TMaster::MakePhysicalPlan() { +shared_ptr TManager::MakePhysicalPlan() { // TODO(kramasamy): At some point, we need to talk to our scheduler // and do this scheduling if (current_pplan_) { @@ -889,9 +889,9 @@ shared_ptr TMaster::MakePhysicalPlan() { return new_pplan; } - // TMaster does not really have any control over who does what. + // TManager does not really have any control over who does what. // That has already been decided while launching the jobs. - // TMaster just stitches the info together to pass to everyone + // TManager just stitches the info together to pass to everyone // Build the PhysicalPlan structure auto new_pplan = make_shared(); @@ -910,7 +910,7 @@ shared_ptr TMaster::MakePhysicalPlan() { return new_pplan; } -proto::system::Status* TMaster::UpdateStMgrHeartbeat(Connection* _conn, sp_int64 _time, +proto::system::Status* TManager::UpdateStMgrHeartbeat(Connection* _conn, sp_int64 _time, proto::system::StMgrStats* _stats) { proto::system::Status* retval = new proto::system::Status(); if (connection_to_stmgr_id_.find(_conn) == connection_to_stmgr_id_.end()) { @@ -935,7 +935,7 @@ proto::system::Status* TMaster::UpdateStMgrHeartbeat(Connection* _conn, sp_int64 return retval; } -proto::system::StatusCode TMaster::RemoveStMgrConnection(Connection* _conn) { +proto::system::StatusCode TManager::RemoveStMgrConnection(Connection* _conn) { if (connection_to_stmgr_id_.find(_conn) == connection_to_stmgr_id_.end()) { return proto::system::INVALID_STMGR; } @@ -955,16 +955,16 @@ proto::system::StatusCode TMaster::RemoveStMgrConnection(Connection* _conn) { //////////////////////////////////////////////////////////////////////////////// // Below are valid checking functions //////////////////////////////////////////////////////////////////////////////// -bool TMaster::ValidateTopology(const proto::api::Topology& _topology) { - if (tmaster_location_->topology_name() != _topology.name()) { +bool TManager::ValidateTopology(const proto::api::Topology& _topology) { + if (tmanager_location_->topology_name() != _topology.name()) { LOG(ERROR) << "topology name mismatch! Expected topology name is " - << tmaster_location_->topology_name() << " but found in zk " << _topology.name() + << tmanager_location_->topology_name() << " but found in zk " << _topology.name() << std::endl; return false; } - if (tmaster_location_->topology_id() != _topology.id()) { + if (tmanager_location_->topology_id() != _topology.id()) { LOG(ERROR) << "topology id mismatch! Expected topology id is " - << tmaster_location_->topology_id() << " but found in zk " << _topology.id() + << tmanager_location_->topology_id() << " but found in zk " << _topology.id() << std::endl; return false; } @@ -988,7 +988,7 @@ bool TMaster::ValidateTopology(const proto::api::Topology& _topology) { return true; } -bool TMaster::ValidateStMgrsWithPackingPlan() { +bool TManager::ValidateStMgrsWithPackingPlan() { // here we check to see if the total number of instances // across all stmgrs match up to all the spout/bolt // parallelism the packing plan has specified @@ -1005,7 +1005,7 @@ bool TMaster::ValidateStMgrsWithPackingPlan() { return ninstances == ntasks; } -bool TMaster::ValidateStMgrsWithPhysicalPlan(shared_ptr _pplan) { +bool TManager::ValidateStMgrsWithPhysicalPlan(shared_ptr _pplan) { std::map > stmgr_to_instance_map; for (sp_int32 i = 0; i < _pplan->instances_size(); ++i) { proto::system::Instance* instance = _pplan->mutable_instances(i); @@ -1035,7 +1035,7 @@ bool TMaster::ValidateStMgrsWithPhysicalPlan(shared_ptrtopology(); DCHECK(topology.IsInitialized()); @@ -1057,7 +1057,7 @@ bool TMaster::ValidateRuntimeConfigNames(const ComponentConfigMap& _config) cons return true; } -void TMaster::LogConfig(const ComponentConfigMap& _config) { +void TManager::LogConfig(const ComponentConfigMap& _config) { for (auto iter = _config.begin(); iter != _config.end(); ++iter) { LOG(INFO) << iter->first << " =>"; for (auto i = iter->second.begin(); i != iter->second.end(); ++i) { @@ -1066,5 +1066,5 @@ void TMaster::LogConfig(const ComponentConfigMap& _config) { } } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/tmaster.h b/heron/tmanager/src/cpp/manager/tmanager.h similarity index 88% rename from heron/tmaster/src/cpp/manager/tmaster.h rename to heron/tmanager/src/cpp/manager/tmanager.h index 3bdd25ef3a8..0d3cccd2af2 100644 --- a/heron/tmaster/src/cpp/manager/tmaster.h +++ b/heron/tmanager/src/cpp/manager/tmanager.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef __TMASTER_H -#define __TMASTER_H +#ifndef __TMANAGER_H +#define __TMANAGER_H #include #include @@ -29,11 +29,11 @@ #include "metrics/metrics-mgr-st.h" #include "metrics/metrics.h" #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "basics/basics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::shared_ptr; @@ -41,7 +41,7 @@ using std::shared_ptr; class StMgrState; class TController; class StatsInterface; -class TMasterServer; +class TManagerServer; class TMetricsCollector; class StatefulController; class CkptMgrClient; @@ -53,16 +53,16 @@ typedef std::map ConfigValueMap; // From component name to config/value pairs typedef std::map> ComponentConfigMap; -class TMaster { +class TManager { public: - TMaster(const std::string& _zk_hostport, const std::string& _topology_name, + TManager(const std::string& _zk_hostport, const std::string& _topology_name, const std::string& _topology_id, const std::string& _topdir, - sp_int32 _tmaster_controller_port, sp_int32 _master_port, + sp_int32 _tmanager_controller_port, sp_int32 _server_port, sp_int32 _stats_port, sp_int32 metricsMgrPort, sp_int32 _ckptmgr_port, const std::string& metrics_sinks_yaml, const std::string& _myhost_name, shared_ptr eventLoop); - virtual ~TMaster(); + virtual ~TManager(); const std::string& GetTopologyId() const { return current_pplan_->topology().id(); } const std::string& GetTopologyName() const { return current_pplan_->topology().name(); } @@ -93,12 +93,12 @@ class TMaster { void HandleCleanStatefulCheckpointResponse(proto::system::StatusCode); // Get stream managers registration summary - std::unique_ptr GetStmgrsRegSummary(); + std::unique_ptr GetStmgrsRegSummary(); // Accessors const shared_ptr getPhysicalPlan() const { return current_pplan_; } // TODO(mfu): Should we provide this? - // topology_ should only be used to construct physical plan when TMaster first starts + // topology_ should only be used to construct physical plan when TManager first starts // Providing an accessor is bug prone. // Now used in GetMetrics function in tmetrics-collector const proto::api::Topology& getInitialTopology() const { return *topology_; } @@ -106,17 +106,17 @@ class TMaster { // Timer function to start the stateful checkpoint process void SendCheckpointMarker(); - // Called by tmaster server when it gets InstanceStateStored message + // Called by tmanager server when it gets InstanceStateStored message void HandleInstanceStateStored(const std::string& _checkpoint_id, const proto::system::Instance& _instance); - // Called by tmaster server when it gets RestoreTopologyStateResponse message + // Called by tmanager server when it gets RestoreTopologyStateResponse message void HandleRestoreTopologyStateResponse(Connection* _conn, const std::string& _checkpoint_id, int64_t _restore_txid, proto::system::StatusCode _status); - // Called by tmaster server when it gets ResetTopologyState message + // Called by tmanager server when it gets ResetTopologyState message void ResetTopologyState(Connection* _conn, const std::string& _dead_stmgr, int32_t _dead_instance, const std::string& _reason); @@ -156,8 +156,8 @@ class TMaster { // 1. Distribute physical plan to all active stmgrs bool DistributePhysicalPlan(); - // Function called after we set the tmasterlocation - void SetTMasterLocationDone(proto::system::StatusCode _code); + // Function called after we set the tmanagerlocation + void SetTManagerLocationDone(proto::system::StatusCode _code); // Function called after we get the topology void GetTopologyDone(proto::system::StatusCode _code); @@ -178,8 +178,8 @@ class TMaster { void SetPhysicalPlanDone(shared_ptr _pplan, proto::system::StatusCode _code); - // Function called when we want to setup ourselves as tmaster - void EstablishTMaster(EventLoop::Status); + // Function called when we want to setup ourselves as tmanager + void EstablishTManager(EventLoop::Status); void EstablishPackingPlan(EventLoop::Status); void FetchPackingPlan(); @@ -216,7 +216,7 @@ class TMaster { shared_ptr current_pplan_; // The topology as first submitted by the user - // It shall only be used to construct the physical plan when TMaster first time starts + // It shall only be used to construct the physical plan when TManager first time starts // Any runtime changes shall be made to current_pplan_->topology unique_ptr topology_; @@ -225,8 +225,8 @@ class TMaster { // The statemgr where we store/retrieve our state shared_ptr state_mgr_; - // Our copy of the tmasterlocation - unique_ptr tmaster_location_; + // Our copy of the tmanagerlocation + unique_ptr tmanager_location_; // When we are in the middle of doing assignment // we set this to true @@ -238,17 +238,17 @@ class TMaster { std::string topdir_; // Servers that implement our services - unique_ptr tmaster_controller_; - sp_int32 tmaster_controller_port_; - unique_ptr master_; - sp_int32 master_port_; + unique_ptr tmanager_controller_; + sp_int32 tmanager_controller_port_; + unique_ptr server_; + sp_int32 server_port_; unique_ptr stats_; sp_int32 stats_port_; std::string myhost_name_; // how many times have we tried to establish - // ourselves as master - sp_int32 master_establish_attempts_; + // ourselves as server + sp_int32 server_establish_attempts_; // collector shared_ptr metrics_collector_; @@ -262,7 +262,7 @@ class TMaster { sp_int32 ckptmgr_port_; // Process related metrics - shared_ptr tmasterProcessMetrics; + shared_ptr tmanagerProcessMetrics; // The time at which the stmgr was started up std::chrono::high_resolution_clock::time_point start_time_; @@ -277,7 +277,7 @@ class TMaster { // Copy of the EventLoop shared_ptr eventLoop_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/tmasterserver.cpp b/heron/tmanager/src/cpp/manager/tmanagerserver.cpp similarity index 55% rename from heron/tmaster/src/cpp/manager/tmasterserver.cpp rename to heron/tmanager/src/cpp/manager/tmanagerserver.cpp index 82096ddc9c8..7242ac8a2d6 100644 --- a/heron/tmaster/src/cpp/manager/tmasterserver.cpp +++ b/heron/tmanager/src/cpp/manager/tmanagerserver.cpp @@ -17,90 +17,90 @@ * under the License. */ -#include "manager/tmasterserver.h" +#include "manager/tmanagerserver.h" #include #include "manager/tmetrics-collector.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "processor/processor.h" #include "proto/messages.h" #include "basics/basics.h" #include "errors/errors.h" #include "threads/threads.h" #include "network/network.h" -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::shared_ptr; -TMasterServer::TMasterServer(std::shared_ptr eventLoop, const NetworkOptions& _options, - shared_ptr _collector, TMaster* _tmaster) - : Server(eventLoop, _options), collector_(_collector), tmaster_(_tmaster) { +TManagerServer::TManagerServer(std::shared_ptr eventLoop, const NetworkOptions& _options, + shared_ptr _collector, TManager* _tmanager) + : Server(eventLoop, _options), collector_(_collector), tmanager_(_tmanager) { // Install the stmgr handlers - InstallRequestHandler(&TMasterServer::HandleStMgrRegisterRequest); - InstallRequestHandler(&TMasterServer::HandleStMgrHeartbeatRequest); - InstallMessageHandler(&TMasterServer::HandleInstanceStateStored); - InstallMessageHandler(&TMasterServer::HandleRestoreTopologyStateResponse); - InstallMessageHandler(&TMasterServer::HandleResetTopologyStateMessage); + InstallRequestHandler(&TManagerServer::HandleStMgrRegisterRequest); + InstallRequestHandler(&TManagerServer::HandleStMgrHeartbeatRequest); + InstallMessageHandler(&TManagerServer::HandleInstanceStateStored); + InstallMessageHandler(&TManagerServer::HandleRestoreTopologyStateResponse); + InstallMessageHandler(&TManagerServer::HandleResetTopologyStateMessage); // Install the metricsmgr handlers - InstallMessageHandler(&TMasterServer::HandleMetricsMgrStats); + InstallMessageHandler(&TManagerServer::HandleMetricsMgrStats); } -TMasterServer::~TMasterServer() { +TManagerServer::~TManagerServer() { // Nothing really } -void TMasterServer::HandleNewConnection(Connection* conn) { +void TManagerServer::HandleNewConnection(Connection* conn) { // There is nothing to be done here. Instead we wait for // the register message } -void TMasterServer::HandleConnectionClose(Connection* _conn, NetworkErrorCode) { - if (tmaster_->RemoveStMgrConnection(_conn) != proto::system::OK) { +void TManagerServer::HandleConnectionClose(Connection* _conn, NetworkErrorCode) { + if (tmanager_->RemoveStMgrConnection(_conn) != proto::system::OK) { LOG(WARNING) << "Unknown connection closed on us from " << _conn->getIPAddress() << ":" << _conn->getPort() << ", possibly metrics mgr"; return; } } -void TMasterServer::HandleStMgrRegisterRequest(REQID _reqid, Connection* _conn, - pool_unique_ptr _request) { +void TManagerServer::HandleStMgrRegisterRequest(REQID _reqid, Connection* _conn, + pool_unique_ptr _request) { unique_ptr processor = - make_unique(_reqid, _conn, std::move(_request), tmaster_, this); + make_unique(_reqid, _conn, std::move(_request), tmanager_, this); processor->Start(); } -void TMasterServer::HandleStMgrHeartbeatRequest(REQID _reqid, Connection* _conn, - pool_unique_ptr _request) { +void TManagerServer::HandleStMgrHeartbeatRequest(REQID _reqid, Connection* _conn, + pool_unique_ptr _request) { unique_ptr processor = - make_unique(_reqid, _conn, std::move(_request), tmaster_, this); + make_unique(_reqid, _conn, std::move(_request), tmanager_, this); processor->Start(); } -void TMasterServer::HandleMetricsMgrStats(Connection*, - pool_unique_ptr _request) { +void TManagerServer::HandleMetricsMgrStats(Connection*, + pool_unique_ptr _request) { collector_->AddMetric(*_request); } -void TMasterServer::HandleInstanceStateStored(Connection*, +void TManagerServer::HandleInstanceStateStored(Connection*, pool_unique_ptr _message) { - tmaster_->HandleInstanceStateStored(_message->checkpoint_id(), _message->instance()); + tmanager_->HandleInstanceStateStored(_message->checkpoint_id(), _message->instance()); } -void TMasterServer::HandleRestoreTopologyStateResponse(Connection* _conn, +void TManagerServer::HandleRestoreTopologyStateResponse(Connection* _conn, pool_unique_ptr _message) { - tmaster_->HandleRestoreTopologyStateResponse(_conn, _message->checkpoint_id(), + tmanager_->HandleRestoreTopologyStateResponse(_conn, _message->checkpoint_id(), _message->restore_txid(), _message->status().status()); } -void TMasterServer::HandleResetTopologyStateMessage(Connection* _conn, +void TManagerServer::HandleResetTopologyStateMessage(Connection* _conn, pool_unique_ptr _message) { - tmaster_->ResetTopologyState(_conn, _message->dead_stmgr(), + tmanager_->ResetTopologyState(_conn, _message->dead_stmgr(), _message->dead_taskid(), _message->reason()); } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/tmasterserver.h b/heron/tmanager/src/cpp/manager/tmanagerserver.h similarity index 69% rename from heron/tmaster/src/cpp/manager/tmasterserver.h rename to heron/tmanager/src/cpp/manager/tmanagerserver.h index 0524b612314..885980404aa 100644 --- a/heron/tmaster/src/cpp/manager/tmasterserver.h +++ b/heron/tmanager/src/cpp/manager/tmanagerserver.h @@ -17,28 +17,28 @@ * under the License. */ -#ifndef __TMASTERSERVER_H -#define __TMASTERSERVER_H +#ifndef __TMANAGERSERVER_H +#define __TMANAGERSERVER_H #include "network/network_error.h" #include "network/network.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "proto/ckptmgr.pb.h" #include "basics/basics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::shared_ptr; -class TMaster; +class TManager; class TMetricsCollector; -class TMasterServer : public Server { +class TManagerServer : public Server { public: - TMasterServer(std::shared_ptr eventLoop, const NetworkOptions& options, - shared_ptr _collector, TMaster* _tmaster); - virtual ~TMasterServer(); + TManagerServer(std::shared_ptr eventLoop, const NetworkOptions& options, + shared_ptr _collector, TManager* _tmanager); + virtual ~TManagerServer(); protected: virtual void HandleNewConnection(Connection* newConnection); @@ -47,28 +47,28 @@ class TMasterServer : public Server { private: // Various handlers for different requests void HandleStMgrRegisterRequest(REQID _id, Connection* _conn, - pool_unique_ptr _request); + pool_unique_ptr _request); void HandleStMgrHeartbeatRequest(REQID _id, Connection* _conn, - pool_unique_ptr _request); - void HandleMetricsMgrStats(Connection*, pool_unique_ptr _request); + pool_unique_ptr _request); + void HandleMetricsMgrStats(Connection*, pool_unique_ptr _request); - // Message sent by stmgr to tell tmaster that a particular checkpoint message - // was saved. This way the tmaster can keep track of which all instances have saved their + // Message sent by stmgr to tell tmanager that a particular checkpoint message + // was saved. This way the tmanager can keep track of which all instances have saved their // state for any given checkpoint id. void HandleInstanceStateStored(Connection*, pool_unique_ptr _message); // Handle response from stmgr for the RestoreTopologyStateRequest void HandleRestoreTopologyStateResponse(Connection*, pool_unique_ptr _message); - // Stmgr can request tmaster to reset the state of the topology in case it finds any errors. + // Stmgr can request tmanager to reset the state of the topology in case it finds any errors. void HandleResetTopologyStateMessage(Connection*, pool_unique_ptr _message); - // our tmaster + // our tmanager shared_ptr collector_; - TMaster* tmaster_; + TManager* tmanager_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/manager/tmetrics-collector.cpp b/heron/tmanager/src/cpp/manager/tmetrics-collector.cpp similarity index 86% rename from heron/tmaster/src/cpp/manager/tmetrics-collector.cpp rename to heron/tmanager/src/cpp/manager/tmetrics-collector.cpp index ede0393f67f..f6b3300860f 100644 --- a/heron/tmaster/src/cpp/manager/tmetrics-collector.cpp +++ b/heron/tmanager/src/cpp/manager/tmetrics-collector.cpp @@ -22,41 +22,41 @@ #include #include #include -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" #include "basics/basics.h" #include "errors/errors.h" #include "threads/threads.h" #include "network/network.h" #include "zookeeper/zkclient.h" #include "proto/metrics.pb.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "proto/topology.pb.h" #include "config/heron-internals-config-reader.h" namespace { -typedef heron::common::TMasterMetrics TMasterMetrics; -typedef heron::proto::tmaster::ExceptionLogRequest ExceptionLogRequest; -typedef heron::proto::tmaster::ExceptionLogResponse ExceptionLogResponse; -typedef heron::proto::tmaster::MetricRequest MetricRequest; -typedef heron::proto::tmaster::MetricResponse MetricResponse; -typedef heron::proto::tmaster::MetricResponse::IndividualMetric IndividualMetric; -typedef heron::proto::tmaster::MetricResponse::IndividualMetric::IntervalValue IntervalValue; -typedef heron::proto::tmaster::TmasterExceptionLog TmasterExceptionLog; -typedef heron::proto::tmaster::PublishMetrics PublishMetrics; +typedef heron::common::TManagerMetrics TManagerMetrics; +typedef heron::proto::tmanager::ExceptionLogRequest ExceptionLogRequest; +typedef heron::proto::tmanager::ExceptionLogResponse ExceptionLogResponse; +typedef heron::proto::tmanager::MetricRequest MetricRequest; +typedef heron::proto::tmanager::MetricResponse MetricResponse; +typedef heron::proto::tmanager::MetricResponse::IndividualMetric IndividualMetric; +typedef heron::proto::tmanager::MetricResponse::IndividualMetric::IntervalValue IntervalValue; +typedef heron::proto::tmanager::TmanagerExceptionLog TmanagerExceptionLog; +typedef heron::proto::tmanager::PublishMetrics PublishMetrics; } // namespace namespace heron { -namespace tmaster { +namespace tmanager { TMetricsCollector::TMetricsCollector(sp_int32 _max_interval, std::shared_ptr eventLoop, const std::string& metrics_sinks_yaml) : max_interval_(_max_interval), eventLoop_(eventLoop), metrics_sinks_yaml_(metrics_sinks_yaml), - tmetrics_info_(make_unique(metrics_sinks_yaml, eventLoop)), + tmetrics_info_(make_unique(metrics_sinks_yaml, eventLoop)), start_time_(time(NULL)) { interval_ = config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterMetricsCollectorPurgeIntervalSec(); + ->GetHeronTmanagerMetricsCollectorPurgeIntervalSec(); CHECK_EQ(max_interval_ % interval_, 0); nintervals_ = max_interval_ / interval_; auto cb = [this](EventLoop::Status status) { this->Purge(status); }; @@ -75,16 +75,16 @@ void TMetricsCollector::Purge(EventLoop::Status) { } void TMetricsCollector::AddMetricsForComponent(const sp_string& component_name, - const proto::tmaster::MetricDatum& metrics_data) { + const proto::tmanager::MetricDatum& metrics_data) { auto component_metrics = GetOrCreateComponentMetrics(component_name); const sp_string& name = metrics_data.name(); - const TMasterMetrics::MetricAggregationType& type = tmetrics_info_->GetAggregationType(name); + const TManagerMetrics::MetricAggregationType& type = tmetrics_info_->GetAggregationType(name); component_metrics->AddMetricForInstance(metrics_data.instance_id(), name, type, metrics_data.value()); } void TMetricsCollector::AddExceptionsForComponent(const sp_string& component_name, - const TmasterExceptionLog& exception_log) { + const TmanagerExceptionLog& exception_log) { auto component_metrics = GetOrCreateComponentMetrics(component_name); component_metrics->AddExceptionForInstance(exception_log.instance_id(), exception_log); } @@ -216,21 +216,21 @@ void TMetricsCollector::AggregateExceptions(const ExceptionLogResponse& all_exce using std::map; using std::string; - map> exception_summary; // Owns exception log pointer. + map> exception_summary; // Owns exception log pointer. for (int i = 0; i < all_exceptions.exceptions_size(); ++i) { - const TmasterExceptionLog& log = all_exceptions.exceptions(i); + const TmanagerExceptionLog& log = all_exceptions.exceptions(i); // Get classname by splitting on first colon const std::string& stack_trace = log.stacktrace(); size_t pos = stack_trace.find_first_of(':'); if (pos != std::string::npos) { const std::string class_name = stack_trace.substr(0, pos); if (exception_summary.find(class_name) == exception_summary.end()) { - auto new_exception = make_unique(); + auto new_exception = make_unique(); new_exception->CopyFrom(log); new_exception->set_stacktrace(class_name); exception_summary[class_name] = std::move(new_exception); } else { - TmasterExceptionLog& prev_log = *exception_summary[class_name]; + TmanagerExceptionLog& prev_log = *exception_summary[class_name]; prev_log.set_count(log.count() + prev_log.count()); prev_log.set_lasttime(log.lasttime()); } @@ -266,14 +266,14 @@ void TMetricsCollector::ComponentMetrics::Purge() { } void TMetricsCollector::ComponentMetrics::AddMetricForInstance( - const sp_string& instance_id, const sp_string& name, TMasterMetrics::MetricAggregationType type, + const sp_string& instance_id, const sp_string& name, TManagerMetrics::MetricAggregationType type, const sp_string& value) { auto instance_metrics = GetOrCreateInstanceMetrics(instance_id); instance_metrics->AddMetricWithName(name, type, value); } void TMetricsCollector::ComponentMetrics::AddExceptionForInstance( - const sp_string& instance_id, const TmasterExceptionLog& exception) { + const sp_string& instance_id, const TmanagerExceptionLog& exception) { auto instance_metrics = GetOrCreateInstanceMetrics(instance_id); instance_metrics->AddExceptions(exception); } @@ -343,29 +343,29 @@ void TMetricsCollector::InstanceMetrics::Purge() { } void TMetricsCollector::InstanceMetrics::AddMetricWithName( - const sp_string& name, common::TMasterMetrics::MetricAggregationType type, + const sp_string& name, common::TManagerMetrics::MetricAggregationType type, const sp_string& value) { auto metric_data = GetOrCreateMetric(name, type); metric_data->AddValueToMetric(value); } // Creates a copy of exception and takes ownership of the pointer. -void TMetricsCollector::InstanceMetrics::AddExceptions(const TmasterExceptionLog& exception) { +void TMetricsCollector::InstanceMetrics::AddExceptions(const TmanagerExceptionLog& exception) { // TODO(kramasamy): Aggregate exceptions across minutely buckets. Try to avoid duplication of // hash-fuction // used to aggregate in heron-worker. - auto new_exception = make_unique(); + auto new_exception = make_unique(); new_exception->CopyFrom(exception); exceptions_.push_back(std::move(new_exception)); sp_uint32 max_exception = config::HeronInternalsConfigReader::Instance() - ->GetHeronTmasterMetricsCollectorMaximumException(); + ->GetHeronTmanagerMetricsCollectorMaximumException(); while (exceptions_.size() > max_exception) { exceptions_.pop_front(); } } shared_ptr TMetricsCollector::InstanceMetrics::GetOrCreateMetric( - const sp_string& name, TMasterMetrics::MetricAggregationType type) { + const sp_string& name, TManagerMetrics::MetricAggregationType type) { if (metrics_.find(name) == metrics_.end()) { metrics_[name] = std::make_shared(name, type, nbuckets_, bucket_interval_); } @@ -392,7 +392,7 @@ void TMetricsCollector::InstanceMetrics::GetExceptionLog(ExceptionLogResponse& r } TMetricsCollector::Metric::Metric(const sp_string& name, - common::TMasterMetrics::MetricAggregationType type, + common::TManagerMetrics::MetricAggregationType type, sp_int32 nbuckets, sp_int32 bucket_interval) : name_(name), metric_type_(type), @@ -412,7 +412,7 @@ void TMetricsCollector::Metric::Purge() { } void TMetricsCollector::Metric::AddValueToMetric(const sp_string& _value) { - if (metric_type_ == common::TMasterMetrics::LAST) { + if (metric_type_ == common::TManagerMetrics::LAST) { // Just keep one value per time bucket data_.front()->data_.clear(); data_.front()->data_.push_front(_value); @@ -439,12 +439,12 @@ void TMetricsCollector::Metric::GetMetrics(bool minutely, sp_int64 start_time, s val->mutable_interval()->set_start(bucket.start_time_); val->mutable_interval()->set_end(bucket.end_time_); sp_double64 result = bucket.aggregate(); - if (metric_type_ == common::TMasterMetrics::SUM) { + if (metric_type_ == common::TManagerMetrics::SUM) { val->set_value(std::to_string(result)); - } else if (metric_type_ == common::TMasterMetrics::AVG) { + } else if (metric_type_ == common::TManagerMetrics::AVG) { sp_double64 avg = result / bucket.count(); val->set_value(std::to_string(avg)); - } else if (metric_type_ == common::TMasterMetrics::LAST) { + } else if (metric_type_ == common::TManagerMetrics::LAST) { val->set_value(std::to_string(result)); } else { LOG(FATAL) << "Unknown metric type " << metric_type_; @@ -458,11 +458,11 @@ void TMetricsCollector::Metric::GetMetrics(bool minutely, sp_int64 start_time, s sp_double64 result = 0; if (start_time <= 0) { // We want cumulative metrics - if (metric_type_ == common::TMasterMetrics::SUM) { + if (metric_type_ == common::TManagerMetrics::SUM) { result = all_time_cumulative_; - } else if (metric_type_ == common::TMasterMetrics::AVG) { + } else if (metric_type_ == common::TManagerMetrics::AVG) { result = all_time_cumulative_ / all_time_nitems_; - } else if (metric_type_ == common::TMasterMetrics::LAST) { + } else if (metric_type_ == common::TManagerMetrics::LAST) { result = all_time_cumulative_; } else { LOG(FATAL) << "Uknown metric type " << metric_type_; @@ -477,16 +477,16 @@ void TMetricsCollector::Metric::GetMetrics(bool minutely, sp_int64 start_time, s if (bucket.overlaps(start_time, end_time)) { total_count += bucket.aggregate(); total_items += bucket.count(); - if (metric_type_ == TMasterMetrics::LAST) break; + if (metric_type_ == TManagerMetrics::LAST) break; } // The timebuckets are reverse chronologically arranged if (start_time > bucket.end_time_) break; } - if (metric_type_ == TMasterMetrics::SUM) { + if (metric_type_ == TManagerMetrics::SUM) { result = total_count; - } else if (metric_type_ == TMasterMetrics::AVG) { + } else if (metric_type_ == TManagerMetrics::AVG) { result = total_count / total_items; - } else if (metric_type_ == TMasterMetrics::LAST) { + } else if (metric_type_ == TManagerMetrics::LAST) { result = total_count; } else { LOG(FATAL) << "Uknown metric type " << metric_type_; @@ -495,5 +495,5 @@ void TMetricsCollector::Metric::GetMetrics(bool minutely, sp_int64 start_time, s _response->set_value(std::to_string(result)); } } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/manager/tmetrics-collector.h b/heron/tmanager/src/cpp/manager/tmetrics-collector.h similarity index 77% rename from heron/tmaster/src/cpp/manager/tmetrics-collector.h rename to heron/tmanager/src/cpp/manager/tmetrics-collector.h index 192b2a01dc7..5aa1078573c 100644 --- a/heron/tmaster/src/cpp/manager/tmetrics-collector.h +++ b/heron/tmanager/src/cpp/manager/tmetrics-collector.h @@ -26,12 +26,12 @@ #include "basics/callback.h" #include "basics/sptypes.h" #include "network/event_loop.h" -#include "proto/tmaster.pb.h" +#include "proto/tmanager.pb.h" #include "proto/topology.pb.h" -#include "metrics/tmaster-metrics.h" +#include "metrics/tmanager-metrics.h" namespace heron { -namespace tmaster { +namespace tmanager { using std::unique_ptr; using std::shared_ptr; @@ -50,40 +50,40 @@ class TMetricsCollector { // Initiated on recieving a new _metrics from metricsmanager. Will initiate appropriate calls // to add metrics/exception stored in '_metrics' to the respective components. - void AddMetric(const proto::tmaster::PublishMetrics& _metrics); + void AddMetric(const proto::tmanager::PublishMetrics& _metrics); // Returns a new response to fetch metrics. The request gets propagated to Component's and // Instance's get metrics. Doesn't own Response. - unique_ptr GetMetrics( - const proto::tmaster::MetricRequest& _request, + unique_ptr GetMetrics( + const proto::tmanager::MetricRequest& _request, const proto::api::Topology& _topology); // Returns response for fetching exceptions. Doesn't own response. - unique_ptr GetExceptions( - const proto::tmaster::ExceptionLogRequest& request); + unique_ptr GetExceptions( + const proto::tmanager::ExceptionLogRequest& request); // Returns exception summary response. Doesn't own response. - unique_ptr GetExceptionsSummary( - const proto::tmaster::ExceptionLogRequest& request); + unique_ptr GetExceptionsSummary( + const proto::tmanager::ExceptionLogRequest& request); private: // Fetches exceptions for ExceptionLogRequest. Save the returned exception in // 'all_exceptions'. // Doesn't own 'all_exceptions' pointer - void GetExceptionsHelper(const proto::tmaster::ExceptionLogRequest& request, - proto::tmaster::ExceptionLogResponse& all_excepions); + void GetExceptionsHelper(const proto::tmanager::ExceptionLogRequest& request, + proto::tmanager::ExceptionLogResponse& all_excepions); // Aggregate exceptions from 'all_exceptions' to 'aggregate_exceptions'. // Doesn't own 'aggregate_exceptions'. - void AggregateExceptions(const proto::tmaster::ExceptionLogResponse& all_exceptions, - proto::tmaster::ExceptionLogResponse& aggregate_exceptions); + void AggregateExceptions(const proto::tmanager::ExceptionLogResponse& all_exceptions, + proto::tmanager::ExceptionLogResponse& aggregate_exceptions); // Add metrics for 'component_name' void AddMetricsForComponent(const sp_string& component_name, - const proto::tmaster::MetricDatum& metrics_data); + const proto::tmanager::MetricDatum& metrics_data); // Add exception logs for 'component_name' void AddExceptionsForComponent(const sp_string& component_name, - const proto::tmaster::TmasterExceptionLog& exception_log); + const proto::tmanager::TmanagerExceptionLog& exception_log); // Clean all metrics. void Purge(EventLoop::Status _status); @@ -126,7 +126,7 @@ class TMetricsCollector { class Metric { public: // TODO(kramasamy): Add ctor for default UNKNOWN type and give a set type function. - Metric(const sp_string& name, common::TMasterMetrics::MetricAggregationType type, + Metric(const sp_string& name, common::TManagerMetrics::MetricAggregationType type, sp_int32 nbuckets, sp_int32 bucket_interval); // Deletes all TimeBucket. @@ -139,7 +139,7 @@ class TMetricsCollector { // Return past '_nbuckets' value for this metric. void GetMetrics(bool minutely, sp_int64 start_time, sp_int64 end_time, - proto::tmaster::MetricResponse::IndividualMetric* response); + proto::tmanager::MetricResponse::IndividualMetric* response); private: sp_string name_; @@ -147,7 +147,7 @@ class TMetricsCollector { std::list> data_; // Type of metric. This can be SUM or AVG. It specify how to aggregate these metrics for // display. - common::TMasterMetrics::MetricAggregationType metric_type_; + common::TManagerMetrics::MetricAggregationType metric_type_; sp_double64 all_time_cumulative_; @@ -171,23 +171,23 @@ class TMetricsCollector { // Add metrics with name '_name' of type '_type' and value _value. void AddMetricWithName(const sp_string& name, - common::TMasterMetrics::MetricAggregationType type, + common::TManagerMetrics::MetricAggregationType type, const sp_string& value); - // Add TmasterExceptionLog to the list of exceptions for this instance_id. - void AddExceptions(const proto::tmaster::TmasterExceptionLog& exception); + // Add TmanagerExceptionLog to the list of exceptions for this instance_id. + void AddExceptions(const proto::tmanager::TmanagerExceptionLog& exception); // Returns the metric metrics. Doesn't own _response. - void GetMetrics(const proto::tmaster::MetricRequest& request, sp_int64 start_time, - sp_int64 end_time, proto::tmaster::MetricResponse& response); + void GetMetrics(const proto::tmanager::MetricRequest& request, sp_int64 start_time, + sp_int64 end_time, proto::tmanager::MetricResponse& response); // Fills response for fetching exceptions. Doesn't own response. - void GetExceptionLog(proto::tmaster::ExceptionLogResponse& response); + void GetExceptionLog(proto::tmanager::ExceptionLogResponse& response); private: // Create or return existing Metric. Retains ownership of Metric object returned. shared_ptr GetOrCreateMetric(const sp_string& name, - common::TMasterMetrics::MetricAggregationType type); + common::TManagerMetrics::MetricAggregationType type); sp_string instance_id_; sp_int32 nbuckets_; @@ -195,7 +195,7 @@ class TMetricsCollector { // map between metric name and its values std::map> metrics_; // list of exceptions - std::list> exceptions_; + std::list> exceptions_; }; // Component level metrics. A component metrics is a map storing metrics for each of its @@ -213,22 +213,22 @@ class TMetricsCollector { // Add metrics for an Instance 'instance_id' of this spout/bolt component. void AddMetricForInstance(const sp_string& instance_id, const sp_string& name, - common::TMasterMetrics::MetricAggregationType type, + common::TManagerMetrics::MetricAggregationType type, const sp_string& value); // Add exception for an Instance 'instance_id' of this spout/bolt component. void AddExceptionForInstance(const sp_string& instance_id, - const proto::tmaster::TmasterExceptionLog& exception); + const proto::tmanager::TmanagerExceptionLog& exception); // Request aggregated metrics for this component for the '_nbucket' interval. // Doesn't own '_response' object. - void GetMetrics(const proto::tmaster::MetricRequest& request, sp_int64 start_time, - sp_int64 end_time, proto::tmaster::MetricResponse& response); + void GetMetrics(const proto::tmanager::MetricRequest& request, sp_int64 start_time, + sp_int64 end_time, proto::tmanager::MetricResponse& response); // Returns response for fetching exceptions. Doesn't own response. void GetExceptionsForInstance(const sp_string& instance_id, - proto::tmaster::ExceptionLogResponse& response); + proto::tmanager::ExceptionLogResponse& response); - void GetAllExceptions(proto::tmaster::ExceptionLogResponse& response); + void GetAllExceptions(proto::tmanager::ExceptionLogResponse& response); private: // Create or return existing mutable InstanceMetrics associated with 'instance_id'. This @@ -254,10 +254,10 @@ class TMetricsCollector { sp_int32 interval_; std::shared_ptr eventLoop_; std::string metrics_sinks_yaml_; - std::unique_ptr tmetrics_info_; + std::unique_ptr tmetrics_info_; time_t start_time_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/processor/processor.h b/heron/tmanager/src/cpp/processor/processor.h similarity index 88% rename from heron/tmaster/src/cpp/processor/processor.h rename to heron/tmanager/src/cpp/processor/processor.h index e573dd3b2b2..119d887a715 100644 --- a/heron/tmaster/src/cpp/processor/processor.h +++ b/heron/tmanager/src/cpp/processor/processor.h @@ -17,10 +17,10 @@ * under the License. */ -#if !defined(__TMASTER_PROCESSORS_H_) -#define __TMASTER_PROCESSORS_H_ +#if !defined(__TMANAGER_PROCESSORS_H_) +#define __TMANAGER_PROCESSORS_H_ -#include "processor/tmaster-processor.h" +#include "processor/tmanager-processor.h" #include "processor/stmgr-register-processor.h" #include "processor/stmgr-heartbeat-processor.h" diff --git a/heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.cpp b/heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.cpp similarity index 69% rename from heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.cpp rename to heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.cpp index 1c98d706fb0..d4b33dfad24 100644 --- a/heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.cpp +++ b/heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.cpp @@ -19,8 +19,8 @@ #include "processor/stmgr-heartbeat-processor.h" #include -#include "processor/tmaster-processor.h" -#include "manager/tmaster.h" +#include "processor/tmanager-processor.h" +#include "manager/tmanager.h" #include "proto/messages.h" #include "basics/basics.h" #include "errors/errors.h" @@ -28,27 +28,27 @@ #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { StMgrHeartbeatProcessor::StMgrHeartbeatProcessor(REQID reqid, Connection* conn, - pool_unique_ptr request, - TMaster* tmaster, Server* server) - : Processor(reqid, conn, std::move(request), tmaster, server) {} + pool_unique_ptr request, + TManager* tmanager, Server* server) + : Processor(reqid, conn, std::move(request), tmanager, server) {} StMgrHeartbeatProcessor::~StMgrHeartbeatProcessor() { // nothing to be done here } void StMgrHeartbeatProcessor::Start() { - proto::tmaster::StMgrHeartbeatRequest* request = - static_cast(request_.get()); + proto::tmanager::StMgrHeartbeatRequest* request = + static_cast(request_.get()); - proto::system::Status* status = tmaster_->UpdateStMgrHeartbeat( + proto::system::Status* status = tmanager_->UpdateStMgrHeartbeat( GetConnection(), request->heartbeat_time(), request->release_stats()); - proto::tmaster::StMgrHeartbeatResponse response; + proto::tmanager::StMgrHeartbeatResponse response; response.set_allocated_status(status); SendResponse(response); } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.h b/heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.h similarity index 84% rename from heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.h rename to heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.h index 6240bda3de4..9a2b9ed9b29 100644 --- a/heron/tmaster/src/cpp/processor/stmgr-heartbeat-processor.h +++ b/heron/tmanager/src/cpp/processor/stmgr-heartbeat-processor.h @@ -20,24 +20,24 @@ #ifndef STMGR_HEARTBEAT_PROCESSOR_ #define STMGR_HEARTBEAT_PROCESSOR_ -#include "processor/tmaster-processor.h" +#include "processor/tmanager-processor.h" #include "proto/messages.h" #include "basics/basics.h" #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { class StMgrHeartbeatProcessor : public Processor { public: StMgrHeartbeatProcessor(REQID _reqid, Connection* _conn, - pool_unique_ptr _request, - TMaster* _tmaster, + pool_unique_ptr _request, + TManager* _tmanager, Server* _server); virtual ~StMgrHeartbeatProcessor(); void Start(); }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/processor/stmgr-register-processor.cpp b/heron/tmanager/src/cpp/processor/stmgr-register-processor.cpp similarity index 73% rename from heron/tmaster/src/cpp/processor/stmgr-register-processor.cpp rename to heron/tmanager/src/cpp/processor/stmgr-register-processor.cpp index c58dda87d0d..17889ecb7c4 100644 --- a/heron/tmaster/src/cpp/processor/stmgr-register-processor.cpp +++ b/heron/tmanager/src/cpp/processor/stmgr-register-processor.cpp @@ -20,8 +20,8 @@ #include "processor/stmgr-register-processor.h" #include #include -#include "processor/tmaster-processor.h" -#include "manager/tmaster.h" +#include "processor/tmanager-processor.h" +#include "manager/tmanager.h" #include "proto/messages.h" #include "basics/basics.h" #include "errors/errors.h" @@ -29,12 +29,12 @@ #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { StMgrRegisterProcessor::StMgrRegisterProcessor(REQID _reqid, Connection* _conn, - pool_unique_ptr _request, - TMaster* _tmaster, Server* _server) - : Processor(_reqid, _conn, std::move(_request), _tmaster, _server) {} + pool_unique_ptr _request, + TManager* _tmanager, Server* _server) + : Processor(_reqid, _conn, std::move(_request), _tmanager, _server) {} StMgrRegisterProcessor::~StMgrRegisterProcessor() { // nothing to be done here @@ -42,9 +42,9 @@ StMgrRegisterProcessor::~StMgrRegisterProcessor() { void StMgrRegisterProcessor::Start() { // We got a new stream manager registering to us - // Get the relevant info and ask tmaster to register - proto::tmaster::StMgrRegisterRequest* request = - static_cast(request_.get()); + // Get the relevant info and ask tmanager to register + proto::tmanager::StMgrRegisterRequest* request = + static_cast(request_.get()); std::vector> instances; for (sp_int32 i = 0; i < request->instances_size(); ++i) { auto instance = std::make_shared(); @@ -55,10 +55,10 @@ void StMgrRegisterProcessor::Start() { shared_ptr pplan; proto::system::Status* status = - tmaster_->RegisterStMgr(request->stmgr(), instances, GetConnection(), pplan); + tmanager_->RegisterStMgr(request->stmgr(), instances, GetConnection(), pplan); // Send the response - proto::tmaster::StMgrRegisterResponse response; + proto::tmanager::StMgrRegisterResponse response; response.set_allocated_status(status); if (status->status() == proto::system::OK) { if (pplan) { @@ -68,5 +68,5 @@ void StMgrRegisterProcessor::Start() { SendResponse(response); return; } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/processor/stmgr-register-processor.h b/heron/tmanager/src/cpp/processor/stmgr-register-processor.h similarity index 84% rename from heron/tmaster/src/cpp/processor/stmgr-register-processor.h rename to heron/tmanager/src/cpp/processor/stmgr-register-processor.h index 208c09f48f8..def163c57c4 100644 --- a/heron/tmaster/src/cpp/processor/stmgr-register-processor.h +++ b/heron/tmanager/src/cpp/processor/stmgr-register-processor.h @@ -20,24 +20,24 @@ #ifndef STMGR_REGISTER_PROCESSOR_H_ #define STMGR_REGISTER_PROCESSOR_H_ -#include "processor/tmaster-processor.h" +#include "processor/tmanager-processor.h" #include "proto/messages.h" #include "basics/basics.h" #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { class StMgrRegisterProcessor : public Processor { public: StMgrRegisterProcessor(REQID _reqid, Connection* _conn, - pool_unique_ptr _request, - TMaster* _tmaster, + pool_unique_ptr _request, + TManager* _tmanager, Server* _server); virtual ~StMgrRegisterProcessor(); void Start(); }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/processor/tmaster-processor.cpp b/heron/tmanager/src/cpp/processor/tmanager-processor.cpp similarity index 86% rename from heron/tmaster/src/cpp/processor/tmaster-processor.cpp rename to heron/tmanager/src/cpp/processor/tmanager-processor.cpp index 0caaa8b8cc6..7101d61baae 100644 --- a/heron/tmaster/src/cpp/processor/tmaster-processor.cpp +++ b/heron/tmanager/src/cpp/processor/tmanager-processor.cpp @@ -17,7 +17,7 @@ * under the License. */ -#include "processor/tmaster-processor.h" +#include "processor/tmanager-processor.h" #include #include "proto/messages.h" #include "basics/basics.h" @@ -26,12 +26,12 @@ #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { Processor::Processor(REQID _reqid, Connection* _conn, pool_unique_ptr _request, - TMaster* _tmaster, Server* _server) - : request_(std::move(_request)), tmaster_(_tmaster), server_(_server), + TManager* _tmanager, Server* _server) + : request_(std::move(_request)), tmanager_(_tmanager), server_(_server), reqid_(_reqid), conn_(_conn) {} Processor::~Processor() {} @@ -41,5 +41,5 @@ void Processor::SendResponse(const google::protobuf::Message& _response) { } void Processor::CloseConnection() { server_->CloseConnection(conn_); } -} // namespace tmaster +} // namespace tmanager } // namespace heron diff --git a/heron/tmaster/src/cpp/processor/tmaster-processor.h b/heron/tmanager/src/cpp/processor/tmanager-processor.h similarity index 88% rename from heron/tmaster/src/cpp/processor/tmaster-processor.h rename to heron/tmanager/src/cpp/processor/tmanager-processor.h index 70c7c207490..46be7ad693d 100644 --- a/heron/tmaster/src/cpp/processor/tmaster-processor.h +++ b/heron/tmanager/src/cpp/processor/tmanager-processor.h @@ -17,22 +17,22 @@ * under the License. */ -#ifndef TMASTER_PROCESSOR_H_ -#define TMASTER_PROCESSOR_H_ +#ifndef TMANAGER_PROCESSOR_H_ +#define TMANAGER_PROCESSOR_H_ #include "proto/messages.h" #include "basics/basics.h" #include "network/network.h" namespace heron { -namespace tmaster { +namespace tmanager { -class TMaster; +class TManager; class Processor { public: Processor(REQID _reqid, Connection* _conn, pool_unique_ptr _request, - TMaster* _tmaster, + TManager* _tmanager, Server* _server); virtual ~Processor(); virtual void Start() = 0; @@ -42,13 +42,13 @@ class Processor { Connection* GetConnection() { return conn_; } void CloseConnection(); pool_unique_ptr request_; - TMaster* tmaster_; + TManager* tmanager_; Server* server_; private: REQID reqid_; Connection* conn_; }; -} // namespace tmaster +} // namespace tmanager } // namespace heron #endif diff --git a/heron/tmaster/src/cpp/server/tmaster-main.cpp b/heron/tmanager/src/cpp/server/tmanager-main.cpp similarity index 90% rename from heron/tmaster/src/cpp/server/tmaster-main.cpp rename to heron/tmanager/src/cpp/server/tmanager-main.cpp index 7c4343cb113..dd5a055f682 100644 --- a/heron/tmaster/src/cpp/server/tmaster-main.cpp +++ b/heron/tmanager/src/cpp/server/tmanager-main.cpp @@ -21,7 +21,7 @@ #include #include #include "gflags/gflags.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "proto/messages.h" #include "basics/basics.h" #include "errors/errors.h" @@ -34,7 +34,7 @@ DEFINE_string(topology_id, "", "Id of the topology"); DEFINE_string(zkhostportlist, "", "Location of the zk"); DEFINE_string(zkroot, "", "Root of the zk"); DEFINE_string(myhost, "", "The hostname that I'm running"); -DEFINE_int32(master_port, 0, "The port used for communication with stmgrs"); +DEFINE_int32(server_port, 0, "The port used for communication with stmgrs"); DEFINE_int32(controller_port, 0, "The port used to activate/deactivate"); DEFINE_int32(stats_port, 0, "The port of the getting stats"); DEFINE_string(config_file, "", "The heron internals config file"); @@ -60,12 +60,12 @@ int main(int argc, char* argv[]) { heron::common::Initialize(argv[0], FLAGS_topology_id.c_str()); - LOG(INFO) << "Starting tmaster for topology " << FLAGS_topology_name << " with topology id " + LOG(INFO) << "Starting tmanager for topology " << FLAGS_topology_name << " with topology id " << FLAGS_topology_id << " zkhostport " << FLAGS_zkhostportlist << " and zkroot " << FLAGS_zkroot; - heron::tmaster::TMaster tmaster(FLAGS_zkhostportlist, FLAGS_topology_name, FLAGS_topology_id, - FLAGS_zkroot, FLAGS_controller_port, FLAGS_master_port, + heron::tmanager::TManager tmanager(FLAGS_zkhostportlist, FLAGS_topology_name, FLAGS_topology_id, + FLAGS_zkroot, FLAGS_controller_port, FLAGS_server_port, FLAGS_stats_port, FLAGS_metricsmgr_port, FLAGS_ckptmgr_port, FLAGS_metrics_sinks_yaml, FLAGS_myhost, ss); ss->loop(); diff --git a/heron/tmaster/tests/cpp/server/BUILD b/heron/tmanager/tests/cpp/server/BUILD similarity index 77% rename from heron/tmaster/tests/cpp/server/BUILD rename to heron/tmanager/tests/cpp/server/BUILD index 341fc6cfe6c..53d348374a2 100644 --- a/heron/tmaster/tests/cpp/server/BUILD +++ b/heron/tmanager/tests/cpp/server/BUILD @@ -3,12 +3,12 @@ load("@rules_cc//cc:defs.bzl", "cc_test") package(default_visibility = ["//visibility:public"]) cc_test( - name = "tmaster_unittest", + name = "tmanager_unittest", size = "small", srcs = [ "dummystmgr.cpp", "dummystmgr.h", - "tmaster_unittest.cpp", + "tmanager_unittest.cpp", ], args = ["$(location //heron/config/src/yaml:test-config-internals-yaml)"], copts = [ @@ -16,8 +16,8 @@ cc_test( "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", "-Iheron/stmgr/src/cpp", - "-Iheron/tmaster/src/cpp", - "-Iheron/tmaster/tests/cpp", + "-Iheron/tmanager/src/cpp", + "-Iheron/tmanager/tests/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], @@ -28,7 +28,7 @@ cc_test( linkstatic = 1, deps = [ "//heron/stmgr/src/cpp:manager-cxx", - "//heron/tmaster/src/cpp:tmaster-cxx", + "//heron/tmanager/src/cpp:tmanager-cxx", "@com_google_googletest//:gtest", ], ) @@ -44,8 +44,8 @@ cc_test( "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", "-Iheron/stmgr/src/cpp", - "-Iheron/tmaster/src/cpp", - "-Iheron/tmaster/tests/cpp", + "-Iheron/tmanager/src/cpp", + "-Iheron/tmanager/tests/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], @@ -53,7 +53,7 @@ cc_test( linkstatic = 1, deps = [ "//heron/stmgr/src/cpp:manager-cxx", - "//heron/tmaster/src/cpp:tmaster-cxx", + "//heron/tmanager/src/cpp:tmanager-cxx", "@com_google_googletest//:gtest", ], ) @@ -64,23 +64,23 @@ cc_test( srcs = [ "dummystmgr.cpp", "dummystmgr.h", - "dummytmaster.cpp", - "dummytmaster.h", + "dummytmanager.cpp", + "dummytmanager.h", "stateful_restorer_unittest.cpp", ], copts = [ "-Iheron", "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", - "-Iheron/tmaster/src/cpp", - "-Iheron/tmaster/tests/cpp", + "-Iheron/tmanager/src/cpp", + "-Iheron/tmanager/tests/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], flaky = 1, linkstatic = 1, deps = [ - "//heron/tmaster/src/cpp:tmaster-cxx", + "//heron/tmanager/src/cpp:tmanager-cxx", "@com_google_googletest//:gtest", ], ) @@ -95,15 +95,15 @@ cc_test( "-Iheron", "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", - "-Iheron/tmaster/src/cpp", - "-Iheron/tmaster/tests/cpp", + "-Iheron/tmanager/src/cpp", + "-Iheron/tmanager/tests/cpp", "-I$(GENDIR)/heron", "-I$(GENDIR)/heron/common/src/cpp", ], flaky = 0, linkstatic = 1, deps = [ - "//heron/tmaster/src/cpp:tmaster-cxx", + "//heron/tmanager/src/cpp:tmanager-cxx", "@com_google_googletest//:gtest", ], ) diff --git a/heron/tmaster/tests/cpp/server/dummystmgr.cpp b/heron/tmanager/tests/cpp/server/dummystmgr.cpp similarity index 91% rename from heron/tmaster/tests/cpp/server/dummystmgr.cpp rename to heron/tmanager/tests/cpp/server/dummystmgr.cpp index 13cf22ecfda..47bb1b11eb9 100644 --- a/heron/tmaster/tests/cpp/server/dummystmgr.cpp +++ b/heron/tmanager/tests/cpp/server/dummystmgr.cpp @@ -42,9 +42,9 @@ DummyStMgr::DummyStMgr(std::shared_ptr eventLoop, const NetworkOption pplan_(nullptr), got_restore_message_(false), got_start_message_(false) { - InstallResponseHandler(std::move(make_unique()), + InstallResponseHandler(std::move(make_unique()), &DummyStMgr::HandleRegisterResponse); - InstallResponseHandler(std::move(make_unique()), + InstallResponseHandler(std::move(make_unique()), &DummyStMgr::HandleHeartbeatResponse); InstallMessageHandler(&DummyStMgr::HandleNewAssignmentMessage); InstallMessageHandler(&DummyStMgr::HandleRestoreTopologyStateRequest); @@ -77,7 +77,7 @@ void DummyStMgr::HandleClose(NetworkErrorCode code) { void DummyStMgr::HandleRegisterResponse( void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode status) { if (status != OK) { LOG(ERROR) << "NonOK response message for Register Response"; @@ -107,7 +107,7 @@ void DummyStMgr::HandleRegisterResponse( void DummyStMgr::HandleHeartbeatResponse( void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode status) { if (status != OK) { LOG(ERROR) << "NonOK response message for Register Response"; @@ -144,7 +144,7 @@ void DummyStMgr::OnHeartbeatTimer() { } void DummyStMgr::SendRegisterRequest() { - auto request = make_unique(); + auto request = make_unique(); proto::system::StMgr* stmgr = request->mutable_stmgr(); stmgr->set_id(my_id_); stmgr->set_host_name(my_host_); @@ -159,7 +159,7 @@ void DummyStMgr::SendRegisterRequest() { } void DummyStMgr::SendHeartbeatRequest() { - auto request = make_unique(); + auto request = make_unique(); request->set_heartbeat_time(time(NULL)); request->mutable_stats(); SendRequest(std::move(request), nullptr); diff --git a/heron/tmaster/tests/cpp/server/dummystmgr.h b/heron/tmanager/tests/cpp/server/dummystmgr.h similarity index 93% rename from heron/tmaster/tests/cpp/server/dummystmgr.h rename to heron/tmanager/tests/cpp/server/dummystmgr.h index 06df9502190..e71f3011f60 100644 --- a/heron/tmaster/tests/cpp/server/dummystmgr.h +++ b/heron/tmanager/tests/cpp/server/dummystmgr.h @@ -53,10 +53,10 @@ class DummyStMgr : public Client { private: void HandleRegisterResponse(void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode); void HandleHeartbeatResponse(void*, - pool_unique_ptr response, + pool_unique_ptr response, NetworkErrorCode); void HandleNewAssignmentMessage(pool_unique_ptr message); void HandleNewPhysicalPlan(const proto::system::PhysicalPlan& pplan); diff --git a/heron/tmaster/tests/cpp/server/dummytmaster.cpp b/heron/tmanager/tests/cpp/server/dummytmanager.cpp similarity index 63% rename from heron/tmaster/tests/cpp/server/dummytmaster.cpp rename to heron/tmanager/tests/cpp/server/dummytmanager.cpp index a5549985591..9cf797651c4 100644 --- a/heron/tmaster/tests/cpp/server/dummytmaster.cpp +++ b/heron/tmanager/tests/cpp/server/dummytmanager.cpp @@ -17,7 +17,7 @@ * under the License. */ -#include "server/dummytmaster.h" +#include "server/dummytmanager.h" #include #include #include @@ -32,35 +32,35 @@ namespace heron { namespace testing { -DummyTMaster::DummyTMaster(std::shared_ptr eventLoop, const NetworkOptions& options) +DummyTManager::DummyTManager(std::shared_ptr eventLoop, const NetworkOptions& options) : Server(eventLoop, options) { - InstallRequestHandler(&DummyTMaster::HandleRegisterRequest); + InstallRequestHandler(&DummyTManager::HandleRegisterRequest); } -DummyTMaster::~DummyTMaster() {} +DummyTManager::~DummyTManager() {} -void DummyTMaster::HandleNewConnection(Connection* _conn) { +void DummyTManager::HandleNewConnection(Connection* _conn) { // Do nothing } -void DummyTMaster::HandleConnectionClose(Connection* _conn, NetworkErrorCode) { +void DummyTManager::HandleConnectionClose(Connection* _conn, NetworkErrorCode) { // Do Nothing } -void DummyTMaster::HandleRegisterRequest(REQID _id, Connection* _conn, - pool_unique_ptr _request) { +void DummyTManager::HandleRegisterRequest(REQID _id, Connection* _conn, + pool_unique_ptr _request) { std::vector> instances; stmgrs_[_request->stmgr().id()] = - std::make_shared(_conn, _request->stmgr(), instances, *this); - proto::tmaster::StMgrRegisterResponse response; + std::make_shared(_conn, _request->stmgr(), instances, *this); + proto::tmanager::StMgrRegisterResponse response; response.mutable_status()->set_status(proto::system::OK); SendResponse(_id, _conn, response); } -void DummyTMaster::HandleHeartbeatRequest(REQID _id, Connection* _conn, - proto::tmaster::StMgrHeartbeatRequest* _request) { +void DummyTManager::HandleHeartbeatRequest(REQID _id, Connection* _conn, + proto::tmanager::StMgrHeartbeatRequest* _request) { delete _request; - proto::tmaster::StMgrHeartbeatResponse response; + proto::tmanager::StMgrHeartbeatResponse response; response.mutable_status()->set_status(proto::system::OK); SendResponse(_id, _conn, response); } diff --git a/heron/tmaster/tests/cpp/server/dummytmaster.h b/heron/tmanager/tests/cpp/server/dummytmanager.h similarity index 74% rename from heron/tmaster/tests/cpp/server/dummytmaster.h rename to heron/tmanager/tests/cpp/server/dummytmanager.h index c00ade5b990..c02c956797b 100644 --- a/heron/tmaster/tests/cpp/server/dummytmaster.h +++ b/heron/tmanager/tests/cpp/server/dummytmanager.h @@ -17,8 +17,8 @@ * under the License. */ -#ifndef __DUMMYTMASTER_H_ -#define __DUMMYTMASTER_H_ +#ifndef __DUMMYTMANAGER_H_ +#define __DUMMYTMANAGER_H_ #include #include @@ -29,17 +29,17 @@ #include "errors/errors.h" #include "threads/threads.h" #include "network/network.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" namespace heron { namespace testing { -class DummyTMaster : public Server { +class DummyTManager : public Server { public: - DummyTMaster(std::shared_ptr eventLoop, const NetworkOptions& options); - ~DummyTMaster(); + DummyTManager(std::shared_ptr eventLoop, const NetworkOptions& options); + ~DummyTManager(); - const tmaster::StMgrMap& stmgrs() const { return stmgrs_; } + const tmanager::StMgrMap& stmgrs() const { return stmgrs_; } protected: virtual void HandleNewConnection(Connection* _conn); @@ -47,10 +47,10 @@ class DummyTMaster : public Server { private: void HandleRegisterRequest(REQID _id, Connection* _conn, - pool_unique_ptr _request); + pool_unique_ptr _request); void HandleHeartbeatRequest(REQID _id, Connection* _conn, - proto::tmaster::StMgrHeartbeatRequest* _request); - tmaster::StMgrMap stmgrs_; + proto::tmanager::StMgrHeartbeatRequest* _request); + tmanager::StMgrMap stmgrs_; }; } // namespace testing } // namespace heron diff --git a/heron/tmaster/tests/cpp/server/stateful_checkpointer_unittest.cpp b/heron/tmanager/tests/cpp/server/stateful_checkpointer_unittest.cpp similarity index 98% rename from heron/tmaster/tests/cpp/server/stateful_checkpointer_unittest.cpp rename to heron/tmanager/tests/cpp/server/stateful_checkpointer_unittest.cpp index 67e46c343fb..0496a5e1cd5 100644 --- a/heron/tmaster/tests/cpp/server/stateful_checkpointer_unittest.cpp +++ b/heron/tmanager/tests/cpp/server/stateful_checkpointer_unittest.cpp @@ -193,7 +193,7 @@ heron::proto::system::PhysicalPlan* CreatePplan() { TEST(StatefulCheckpointer, test_stored_logic) { auto pplan = CreatePplan(); auto checkpointer = - new heron::tmaster::StatefulCheckpointer(std::chrono::high_resolution_clock::now()); + new heron::tmanager::StatefulCheckpointer(std::chrono::high_resolution_clock::now()); checkpointer->RegisterNewPhysicalPlan(*pplan); int32_t allButOne = pplan->instances_size() - 1; for (int32_t i = 0; i < allButOne; ++i) { @@ -208,7 +208,7 @@ TEST(StatefulCheckpointer, test_stored_logic) { TEST(StMgr, test_skip_to_newer_ckptid) { auto pplan = CreatePplan(); auto checkpointer = - new heron::tmaster::StatefulCheckpointer(std::chrono::high_resolution_clock::now()); + new heron::tmanager::StatefulCheckpointer(std::chrono::high_resolution_clock::now()); checkpointer->RegisterNewPhysicalPlan(*pplan); int32_t allButOne = pplan->instances_size() - 1; int32_t allButFive = pplan->instances_size() - 5; diff --git a/heron/tmaster/tests/cpp/server/stateful_restorer_unittest.cpp b/heron/tmanager/tests/cpp/server/stateful_restorer_unittest.cpp similarity index 88% rename from heron/tmaster/tests/cpp/server/stateful_restorer_unittest.cpp rename to heron/tmanager/tests/cpp/server/stateful_restorer_unittest.cpp index d5ea03e0ff4..db3bda4169a 100644 --- a/heron/tmaster/tests/cpp/server/stateful_restorer_unittest.cpp +++ b/heron/tmanager/tests/cpp/server/stateful_restorer_unittest.cpp @@ -22,7 +22,7 @@ #include #include #include "server/dummystmgr.h" -#include "server/dummytmaster.h" +#include "server/dummytmanager.h" #include "gtest/gtest.h" #include "proto/messages.h" #include "basics/basics.h" @@ -164,32 +164,32 @@ void StartServer(std::shared_ptr ss) { ss->loop(); } -void StartDummyTMaster(std::shared_ptr& ss, heron::testing::DummyTMaster*& mgr, - std::thread*& tmaster_thread, sp_int32 tmaster_port) { +void StartDummyTManager(std::shared_ptr& ss, heron::testing::DummyTManager*& mgr, + std::thread*& tmanager_thread, sp_int32 tmanager_port) { // Create the select server for this stmgr to use ss = std::make_shared(); NetworkOptions options; options.set_host(LOCALHOST); - options.set_port(tmaster_port); + options.set_port(tmanager_port); options.set_max_packet_size(1024 * 1024); options.set_socket_family(PF_INET); - mgr = new heron::testing::DummyTMaster(ss, options); + mgr = new heron::testing::DummyTManager(ss, options); mgr->Start(); - tmaster_thread = new std::thread(StartServer, ss); + tmanager_thread = new std::thread(StartServer, ss); } void StartDummyStMgr(std::shared_ptr& ss, heron::testing::DummyStMgr*& mgr, - std::thread*& stmgr_thread, const sp_string tmaster_host, - sp_int32 tmaster_port, const sp_string& stmgr_id, sp_int32 stmgr_port, + std::thread*& stmgr_thread, const sp_string tmanager_host, + sp_int32 tmanager_port, const sp_string& stmgr_id, sp_int32 stmgr_port, const std::vector& instances) { // Create the select server for this stmgr to use ss = std::make_shared(); NetworkOptions options; - options.set_host(tmaster_host); - options.set_port(tmaster_port); + options.set_host(tmanager_host); + options.set_port(tmanager_port); options.set_max_packet_size(1024 * 1024); options.set_socket_family(PF_INET); @@ -202,7 +202,7 @@ void StartDummyStMgr(std::shared_ptr& ss, heron::testing::DummySt struct CommonResources { // arguments - sp_int32 tmaster_port_; + sp_int32 tmanager_port_; sp_int32 stmgr_baseport_; sp_string topology_name_; sp_string topology_id_; @@ -219,10 +219,10 @@ struct CommonResources { std::vector stmgrs_id_list_; heron::proto::api::Topology* topology_; - // Tmaster - heron::testing::DummyTMaster* tmaster_; - std::thread* tmaster_thread_; - Piper* tmaster_piper_; + // Tmanager + heron::testing::DummyTManager* tmanager_; + std::thread* tmanager_thread_; + Piper* tmanager_piper_; // Stmgr std::vector stmgrs_list_; @@ -239,13 +239,13 @@ struct CommonResources { std::map instanceid_stmgr_; CommonResources() : topology_(nullptr), - tmaster_(nullptr), - tmaster_thread_(nullptr), - tmaster_piper_(nullptr) { + tmanager_(nullptr), + tmanager_thread_(nullptr), + tmanager_piper_(nullptr) { } }; -void StartDummyTMaster(CommonResources& common) { +void StartDummyTManager(CommonResources& common) { // Generate a dummy topology common.topology_ = GenerateDummyTopology( common.topology_name_, common.topology_id_, common.num_spouts_, common.num_spout_instances_, @@ -258,13 +258,13 @@ void StartDummyTMaster(CommonResources& common) { common.stmgrs_id_list_.push_back(id); } - // Start the tmaster - std::shared_ptr tmaster_eventloop = nullptr; + // Start the tmanager + std::shared_ptr tmanager_eventloop = nullptr; - StartDummyTMaster(tmaster_eventloop, common.tmaster_, common.tmaster_thread_, - common.tmaster_port_); - common.ss_list_.push_back(tmaster_eventloop); - common.tmaster_piper_ = new Piper(tmaster_eventloop); + StartDummyTManager(tmanager_eventloop, common.tmanager_, common.tmanager_thread_, + common.tmanager_port_); + common.ss_list_.push_back(tmanager_eventloop); + common.tmanager_piper_ = new Piper(tmanager_eventloop); } void DistributeWorkersAcrossStmgrs(CommonResources& common) { @@ -312,7 +312,7 @@ void StartStMgrs(CommonResources& common) { std::shared_ptr stmgr_ss = nullptr; heron::testing::DummyStMgr* mgr = nullptr; std::thread* stmgr_thread = nullptr; - StartDummyStMgr(stmgr_ss, mgr, stmgr_thread, LOCALHOST, common.tmaster_port_, + StartDummyStMgr(stmgr_ss, mgr, stmgr_thread, LOCALHOST, common.tmanager_port_, common.stmgrs_id_list_[i], common.stmgr_baseport_ + i, common.stmgr_instance_list_[i]); @@ -325,7 +325,7 @@ void StartStMgrs(CommonResources& common) { void SetUpCommonResources(CommonResources& common) { // Initialize dummy params - common.tmaster_port_ = 53001; + common.tmanager_port_ = 53001; common.stmgr_baseport_ = 53002; common.topology_name_ = "mytopology"; common.topology_id_ = "abcd-9999"; @@ -339,9 +339,9 @@ void SetUpCommonResources(CommonResources& common) { void TearCommonResources(CommonResources& common) { delete common.topology_; - delete common.tmaster_; - delete common.tmaster_thread_; - delete common.tmaster_piper_; + delete common.tmanager_; + delete common.tmanager_thread_; + delete common.tmanager_piper_; // Cleanup the stream managers for (size_t i = 0; i < common.stmgrs_list_.size(); ++i) { @@ -362,8 +362,8 @@ TEST(StatefulRestorer, test_restore_send) { CommonResources common; SetUpCommonResources(common); - // Start the tmaster etc. - StartDummyTMaster(common); + // Start the tmanager etc. + StartDummyTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -372,18 +372,18 @@ TEST(StatefulRestorer, test_restore_send) { StartStMgrs(common); // Wait until all stmgrs registered - while (common.tmaster_->stmgrs().size() != common.num_stmgrs_) sleep(1); + while (common.tmanager_->stmgrs().size() != common.num_stmgrs_) sleep(1); // Make sure that stmgrs have not gotten any restore message for (auto stmgr : common.stmgrs_list_) { EXPECT_FALSE(stmgr->GotRestoreMessage()); } // Start Restorer - auto restorer = new heron::tmaster::StatefulRestorer(); + auto restorer = new heron::tmanager::StatefulRestorer(); EXPECT_FALSE(restorer->IsInProgress()); - common.tmaster_piper_->ExecuteInEventLoop( - std::bind(&heron::tmaster::StatefulRestorer::StartRestore, - restorer, "ckpt-1", common.tmaster_->stmgrs())); + common.tmanager_piper_->ExecuteInEventLoop( + std::bind(&heron::tmanager::StatefulRestorer::StartRestore, + restorer, "ckpt-1", common.tmanager_->stmgrs())); sleep(1); // all stmgrs should have received restore message @@ -397,9 +397,9 @@ TEST(StatefulRestorer, test_restore_send) { for (auto stmgr : common.stmgrs_list_) { EXPECT_FALSE(restorer->GotResponse(stmgr->stmgrid())); EXPECT_FALSE(stmgr->GotStartProcessingMessage()); - common.tmaster_piper_->ExecuteInEventLoop( - std::bind(&heron::tmaster::StatefulRestorer::HandleStMgrRestored, - restorer, stmgr->stmgrid(), "ckpt-1", txid, common.tmaster_->stmgrs())); + common.tmanager_piper_->ExecuteInEventLoop( + std::bind(&heron::tmanager::StatefulRestorer::HandleStMgrRestored, + restorer, stmgr->stmgrid(), "ckpt-1", txid, common.tmanager_->stmgrs())); sleep(1); EXPECT_TRUE(restorer->GotResponse(stmgr->stmgrid())); } @@ -420,7 +420,7 @@ TEST(StatefulRestorer, test_restore_send) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); } diff --git a/heron/tmaster/tests/cpp/server/tcontroller_unittest.cpp b/heron/tmanager/tests/cpp/server/tcontroller_unittest.cpp similarity index 95% rename from heron/tmaster/tests/cpp/server/tcontroller_unittest.cpp rename to heron/tmanager/tests/cpp/server/tcontroller_unittest.cpp index ffe6669ea93..8e25b9accf9 100644 --- a/heron/tmaster/tests/cpp/server/tcontroller_unittest.cpp +++ b/heron/tmanager/tests/cpp/server/tcontroller_unittest.cpp @@ -35,7 +35,7 @@ TEST(TController, test_parse_runtime_config) { parameters.push_back("bolt:test:3"); std::map> conf; - heron::tmaster::TController::ParseRuntimeConfig(parameters, conf); + heron::tmanager::TController::ParseRuntimeConfig(parameters, conf); EXPECT_EQ(conf.size(), 3); EXPECT_EQ(conf["_topology_"]["test"], "1"); diff --git a/heron/tmaster/tests/cpp/server/tmaster_unittest.cpp b/heron/tmanager/tests/cpp/server/tmanager_unittest.cpp similarity index 90% rename from heron/tmaster/tests/cpp/server/tmaster_unittest.cpp rename to heron/tmanager/tests/cpp/server/tmanager_unittest.cpp index fc5612d394d..f3eaabc6dd0 100644 --- a/heron/tmaster/tests/cpp/server/tmaster_unittest.cpp +++ b/heron/tmanager/tests/cpp/server/tmanager_unittest.cpp @@ -38,7 +38,7 @@ #include "config/physical-plan-helper.h" #include "statemgr/heron-statemgr.h" #include "statemgr/heron-localfilestatemgr.h" -#include "manager/tmaster.h" +#include "manager/tmanager.h" #include "manager/stmgr.h" const sp_string SPOUT_NAME = "spout"; @@ -227,7 +227,7 @@ const std::string CreateLocalStateOnFS(heron::proto::api::Topology* topology, snprintf(dpath, sizeof(dpath), "%s", "/tmp/XXXXXX"); mkdtemp(dpath); - // Write the dummy topology/tmaster location out to the local file system + // Write the dummy topology/tmanager location out to the local file system // via thestate mgr heron::common::HeronLocalFileStateMgr state_mgr(dpath, ss); state_mgr.CreateTopology(*topology, NULL); @@ -279,30 +279,30 @@ void StartServer(std::shared_ptr ss) { ss->loop(); } -void StartTMaster(std::shared_ptr& ss, heron::tmaster::TMaster*& tmaster, - std::thread*& tmaster_thread, const std::string& zkhostportlist, +void StartTManager(std::shared_ptr& ss, heron::tmanager::TManager*& tmanager, + std::thread*& tmanager_thread, const std::string& zkhostportlist, const std::string& topology_name, const std::string& topology_id, - const std::string& dpath, const std::string& tmaster_host, sp_int32 tmaster_port, - sp_int32 tmaster_controller_port, sp_int32 ckptmgr_port) { + const std::string& dpath, const std::string& tmanager_host, sp_int32 tmanager_port, + sp_int32 tmanager_controller_port, sp_int32 ckptmgr_port) { ss = std::make_shared(); - tmaster = new heron::tmaster::TMaster(zkhostportlist, topology_name, topology_id, dpath, - tmaster_controller_port, tmaster_port, tmaster_port + 2, - tmaster_port + 3, ckptmgr_port, + tmanager = new heron::tmanager::TManager(zkhostportlist, topology_name, topology_id, dpath, + tmanager_controller_port, tmanager_port, tmanager_port + 2, + tmanager_port + 3, ckptmgr_port, metrics_sinks_config_filename, LOCALHOST, ss); - tmaster_thread = new std::thread(StartServer, ss); - // tmaster_thread->start(); + tmanager_thread = new std::thread(StartServer, ss); + // tmanager_thread->start(); } void StartDummyStMgr(std::shared_ptr& ss, heron::testing::DummyStMgr*& mgr, - std::thread*& stmgr_thread, const std::string tmaster_host, - sp_int32 tmaster_port, const std::string& stmgr_id, sp_int32 stmgr_port, + std::thread*& stmgr_thread, const std::string tmanager_host, + sp_int32 tmanager_port, const std::string& stmgr_id, sp_int32 stmgr_port, const std::vector& instances) { // Create the select server for this stmgr to use ss = std::make_shared(); NetworkOptions options; - options.set_host(tmaster_host); - options.set_port(tmaster_port); + options.set_host(tmanager_host); + options.set_port(tmanager_port); options.set_max_packet_size(1024 * 1024); options.set_socket_family(PF_INET); @@ -315,9 +315,9 @@ void StartDummyStMgr(std::shared_ptr& ss, heron::testing::DummySt struct CommonResources { // arguments - std::string tmaster_host_; - sp_int32 tmaster_port_; - sp_int32 tmaster_controller_port_; + std::string tmanager_host_; + sp_int32 tmanager_port_; + sp_int32 tmanager_controller_port_; sp_int32 ckptmgr_port_; sp_int32 stmgr_baseport_; std::string zkhostportlist_; @@ -338,8 +338,8 @@ struct CommonResources { heron::proto::api::Topology* topology_; heron::proto::system::PackingPlan* packing_plan_; - heron::tmaster::TMaster* tmaster_; - std::thread* tmaster_thread_; + heron::tmanager::TManager* tmanager_; + std::thread* tmanager_thread_; // Stmgr std::vector stmgrs_list_; @@ -356,7 +356,7 @@ struct CommonResources { std::map instanceid_stmgr_; - CommonResources() : topology_(NULL), tmaster_(NULL), tmaster_thread_(NULL) { + CommonResources() : topology_(NULL), tmanager_(NULL), tmanager_thread_(NULL) { // Create the sington for heron_internals_config_reader // if it does not exist if (!heron::config::HeronInternalsConfigReader::Exists()) { @@ -365,7 +365,7 @@ struct CommonResources { } }; -void StartTMaster(CommonResources& common) { +void StartTManager(CommonResources& common) { // Generate a dummy topology common.topology_ = GenerateDummyTopology( common.topology_name_, common.topology_id_, common.num_spouts_, common.num_spout_instances_, @@ -383,14 +383,14 @@ void StartTMaster(CommonResources& common) { common.stmgrs_id_list_.push_back(id); } - // Start the tmaster - std::shared_ptr tmaster_eventLoop; + // Start the tmanager + std::shared_ptr tmanager_eventLoop; - StartTMaster(tmaster_eventLoop, common.tmaster_, common.tmaster_thread_, common.zkhostportlist_, + StartTManager(tmanager_eventLoop, common.tmanager_, common.tmanager_thread_, common.zkhostportlist_, common.topology_name_, common.topology_id_, common.dpath_, - common.tmaster_host_, common.tmaster_port_, common.tmaster_controller_port_, + common.tmanager_host_, common.tmanager_port_, common.tmanager_controller_port_, common.ckptmgr_port_); - common.ss_list_.push_back(tmaster_eventLoop); + common.ss_list_.push_back(tmanager_eventLoop); } void DistributeWorkersAcrossStmgrs(CommonResources& common) { @@ -438,7 +438,7 @@ void StartStMgrs(CommonResources& common) { std::shared_ptr stmgr_ss; heron::testing::DummyStMgr* mgr = NULL; std::thread* stmgr_thread = NULL; - StartDummyStMgr(stmgr_ss, mgr, stmgr_thread, LOCALHOST, common.tmaster_port_, + StartDummyStMgr(stmgr_ss, mgr, stmgr_thread, LOCALHOST, common.tmanager_port_, common.stmgrs_id_list_[i], common.stmgr_baseport_ + i, common.stmgr_instance_list_[i]); @@ -450,9 +450,9 @@ void StartStMgrs(CommonResources& common) { void SetUpCommonResources(CommonResources& common) { // Initialize dummy params - common.tmaster_host_ = LOCALHOST; - common.tmaster_port_ = 53001; - common.tmaster_controller_port_ = 53002; + common.tmanager_host_ = LOCALHOST; + common.tmanager_port_ = 53001; + common.tmanager_controller_port_ = 53002; common.ckptmgr_port_ = 53003; common.stmgr_baseport_ = 53001; common.topology_name_ = "mytopology"; @@ -471,8 +471,8 @@ void SetUpCommonResources(CommonResources& common) { void TearCommonResources(CommonResources& common) { delete common.topology_; delete common.packing_plan_; - delete common.tmaster_thread_; - delete common.tmaster_; + delete common.tmanager_thread_; + delete common.tmanager_; // Cleanup the stream managers for (size_t i = 0; i < common.stmgrs_list_.size(); ++i) { @@ -562,7 +562,7 @@ void UpdateRuntimeConfig(std::string topology_id, } -// Test to make sure that the tmaster forms the right pplan +// Test to make sure that the tmanager forms the right pplan // and sends it to all stmgrs TEST(StMgr, test_pplan_distribute) { CommonResources common; @@ -570,8 +570,8 @@ TEST(StMgr, test_pplan_distribute) { sp_int8 num_workers_per_stmgr_ = (((common.num_spouts_ * common.num_spout_instances_) + (common.num_bolts_ * common.num_bolt_instances_)) / common.num_stmgrs_); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -589,7 +589,7 @@ TEST(StMgr, test_pplan_distribute) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); } @@ -609,7 +609,7 @@ TEST(StMgr, test_pplan_distribute) { } // Test to see if activate/deactivate works -// and that its distributed to tmasters +// and that its distributed to tmanagers TEST(StMgr, test_activate_deactivate) { CommonResources common; SetUpCommonResources(common); @@ -617,8 +617,8 @@ TEST(StMgr, test_activate_deactivate) { sp_int8 num_workers_per_stmgr_ = (((common.num_spouts_ * common.num_spout_instances_) + (common.num_bolts_ * common.num_bolt_instances_)) / common.num_stmgrs_); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -635,7 +635,7 @@ TEST(StMgr, test_activate_deactivate) { } std::thread* deactivate_thread = - new std::thread(ControlTopology, common.topology_id_, common.tmaster_controller_port_, false); + new std::thread(ControlTopology, common.topology_id_, common.tmanager_controller_port_, false); // deactivate_thread->start(); deactivate_thread->join(); delete deactivate_thread; @@ -648,7 +648,7 @@ TEST(StMgr, test_activate_deactivate) { } std::thread* activate_thread = - new std::thread(ControlTopology, common.topology_id_, common.tmaster_controller_port_, true); + new std::thread(ControlTopology, common.topology_id_, common.tmanager_controller_port_, true); // activate_thread->start(); activate_thread->join(); delete activate_thread; @@ -666,7 +666,7 @@ TEST(StMgr, test_activate_deactivate) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); } @@ -693,8 +693,8 @@ TEST(StMgr, test_runtime_config) { CommonResources common; SetUpCommonResources(common); - // Start the tmaster etc. - StartTMaster(common); + // Start the tmanager etc. + StartTManager(common); // Distribute workers across stmgrs DistributeWorkersAcrossStmgrs(common); @@ -703,8 +703,8 @@ TEST(StMgr, test_runtime_config) { StartStMgrs(common); // Wait till we get the physical plan populated on the stmgrs, then - // verify current config values in tmaster as well as stream managers. - // common.tmaster_->FetchPhysicalPlan(); + // verify current config values in tmanager as well as stream managers. + // common.tmanager_->FetchPhysicalPlan(); // auto t = init_pplan->topology(); // auto c = t.topology_config(); for (size_t i = 0; i < common.stmgrs_list_.size(); ++i) { @@ -712,25 +712,25 @@ TEST(StMgr, test_runtime_config) { } // Test ValidateRuntimeConfig() - heron::tmaster::ComponentConfigMap validate_good_config_map; + heron::tmanager::ComponentConfigMap validate_good_config_map; std::map validate_good_config; validate_good_config[topology_init_config_1] = "1"; validate_good_config[topology_init_config_2] = "2"; const char* topology_key = heron::config::TopologyConfigHelper::GetReservedTopologyConfigKey(); validate_good_config_map[topology_key] = validate_good_config; validate_good_config_map["spout1"] = validate_good_config; - EXPECT_EQ(common.tmaster_->ValidateRuntimeConfig(validate_good_config_map), true); + EXPECT_EQ(common.tmanager_->ValidateRuntimeConfig(validate_good_config_map), true); - heron::tmaster::ComponentConfigMap validate_bad_config_map; + heron::tmanager::ComponentConfigMap validate_bad_config_map; std::map validate_bad_config; validate_good_config[topology_init_config_1] = "1"; validate_bad_config_map["unknown_component"] = validate_good_config; - EXPECT_EQ(common.tmaster_->ValidateRuntimeConfig(validate_bad_config_map), false); + EXPECT_EQ(common.tmanager_->ValidateRuntimeConfig(validate_bad_config_map), false); // Post runtime config request with no configs and expect 400 response. std::vector no_config; std::thread* no_config_update_thread = new std::thread(UpdateRuntimeConfig, - common.topology_id_, common.tmaster_controller_port_, no_config, 400, "no_config"); + common.topology_id_, common.tmanager_controller_port_, no_config, 400, "no_config"); no_config_update_thread->join(); delete no_config_update_thread; @@ -738,7 +738,7 @@ TEST(StMgr, test_runtime_config) { std::vector wrong_config1; wrong_config1.push_back("badformat"); // Bad format std::thread* wrong_config1_update_thread = new std::thread(UpdateRuntimeConfig, - common.topology_id_, common.tmaster_controller_port_, wrong_config1, 400, "wrong_config1"); + common.topology_id_, common.tmanager_controller_port_, wrong_config1, 400, "wrong_config1"); wrong_config1_update_thread->join(); delete wrong_config1_update_thread; @@ -747,7 +747,7 @@ TEST(StMgr, test_runtime_config) { // Component doesn't exist wrong_config2.push_back("bad_component:topology.runtime.bolt.test_config:1"); std::thread* wrong_config2_update_thread = new std::thread(UpdateRuntimeConfig, - common.topology_id_, common.tmaster_controller_port_, wrong_config2, 400, "wrong_config2"); + common.topology_id_, common.tmanager_controller_port_, wrong_config2, 400, "wrong_config2"); wrong_config2_update_thread->join(); delete wrong_config2_update_thread; @@ -758,7 +758,7 @@ TEST(StMgr, test_runtime_config) { good_config.push_back(runtime_test_spout + ":" + spout_init_config + ":3"); good_config.push_back(runtime_test_bolt + ":" + bolt_init_config + ":4"); std::thread* good_config_update_thread = new std::thread(UpdateRuntimeConfig, - common.topology_id_, common.tmaster_controller_port_, good_config, 200, "good_config"); + common.topology_id_, common.tmanager_controller_port_, good_config, 200, "good_config"); good_config_update_thread->join(); delete good_config_update_thread; @@ -780,7 +780,7 @@ TEST(StMgr, test_runtime_config) { EXPECT_EQ(updated_bolt_config[bolt_init_config + ":runtime"], "4"); } std::map updated_config, updated_spout_config, updated_bolt_config; - const auto pplan = common.tmaster_->getPhysicalPlan(); + const auto pplan = common.tmanager_->getPhysicalPlan(); heron::config::TopologyConfigHelper::GetTopologyRuntimeConfig(pplan->topology(), updated_config); EXPECT_EQ(updated_config[topology_init_config_1 + ":runtime"], "1"); EXPECT_EQ(updated_config[topology_init_config_2 + ":runtime"], "2"); @@ -797,7 +797,7 @@ TEST(StMgr, test_runtime_config) { } // Wait for the threads to terminate - common.tmaster_thread_->join(); + common.tmanager_thread_->join(); for (size_t i = 0; i < common.stmgrs_threads_list_.size(); ++i) { common.stmgrs_threads_list_[i]->join(); } @@ -810,7 +810,7 @@ TEST(StMgr, test_runtime_config) { int main(int argc, char** argv) { heron::common::Initialize(argv[0]); - std::cout << "Current working directory (to find tmaster logs) " + std::cout << "Current working directory (to find tmanager logs) " << ProcessUtils::getCurrentWorkingDirectory() << std::endl; testing::InitGoogleTest(&argc, argv); sp_string configFile = heron_internals_config_filename; From 29d2be921abde0e8628dbb6457d35d853b576973 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 2 Nov 2020 22:36:56 -0500 Subject: [PATCH 29/32] fixing linting errors from master => manager renames --- .../org/apache/heron/instance/Executor.java | 2 +- .../apache/heron/instance/HeronInstance.java | 2 +- .../heron/instance/bolt/BoltInstance.java | 2 +- .../heron/instance/spout/SpoutInstance.java | 2 +- .../grouping/AbstractTupleRoutingTest.java | 2 +- .../heron/instance/CommunicatorTester.java | 2 +- .../heron/instance/bolt/BoltInstanceTest.java | 2 +- .../bolt/BoltStatefulInstanceTest.java | 20 ++++++++++++------- .../spout/ActivateDeactivateTest.java | 2 +- .../instance/spout/SpoutInstanceTest.java | 5 +++-- .../spout/SpoutStatefulInstanceTest.java | 20 ++++++++++++------- .../MetricsCacheManagerHttpServer.java | 3 ++- .../metricscache/MetricsCache.java | 3 ++- .../metricscache/MetricsCacheTest.java | 3 ++- .../heron/metricsmgr/MetricsManager.java | 2 +- .../metricsmgr/MetricsManagerServer.java | 3 ++- .../sink/metricscache/MetricsCacheSink.java | 3 ++- .../sink/tmanager/TManagerSink.java | 3 ++- .../simulator/instance/BoltInstance.java | 2 +- .../simulator/instance/SpoutInstance.java | 2 +- heron/tools/admin/src/python/standalone.py | 3 ++- 21 files changed, 54 insertions(+), 34 deletions(-) diff --git a/heron/instance/src/java/org/apache/heron/instance/Executor.java b/heron/instance/src/java/org/apache/heron/instance/Executor.java index fe4b176510f..f3a2f1dbdff 100644 --- a/heron/instance/src/java/org/apache/heron/instance/Executor.java +++ b/heron/instance/src/java/org/apache/heron/instance/Executor.java @@ -32,9 +32,9 @@ import org.apache.heron.api.state.HashMapState; import org.apache.heron.api.state.State; import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.MetricsCollector; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; diff --git a/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java b/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java index e86c8382e07..605026e18d1 100644 --- a/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/HeronInstance.java @@ -41,9 +41,9 @@ import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.SysUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.logging.ErrorReportLoggingHandler; diff --git a/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java b/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java index a7b93893d79..75de4e086b0 100644 --- a/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/bolt/BoltInstance.java @@ -41,9 +41,9 @@ import org.apache.heron.api.topology.IUpdatable; import org.apache.heron.api.utils.Utils; import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.FullBoltMetrics; diff --git a/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java b/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java index 5241f9bd2d2..988b4cd84b5 100644 --- a/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java +++ b/heron/instance/src/java/org/apache/heron/instance/spout/SpoutInstance.java @@ -41,9 +41,9 @@ import org.apache.heron.api.utils.Utils; import org.apache.heron.common.basics.ByteAmount; import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.FileUtils; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.metrics.FullSpoutMetrics; diff --git a/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java b/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java index 3d8b411ff40..3f871dc679f 100644 --- a/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java +++ b/heron/instance/tests/java/org/apache/heron/grouping/AbstractTupleRoutingTest.java @@ -34,8 +34,8 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.TestBolt; diff --git a/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java b/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java index e5d1000569f..003a7b361c4 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java +++ b/heron/instance/tests/java/org/apache/heron/instance/CommunicatorTester.java @@ -25,8 +25,8 @@ import com.google.protobuf.Message; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.ExecutorLooper; +import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.WakeableLooper; import org.apache.heron.common.testhelpers.CommunicatorTestHelper; import org.apache.heron.proto.system.Metrics; diff --git a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java index 0829545893d..80c903ec037 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltInstanceTest.java @@ -36,8 +36,8 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; diff --git a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java index 3b916f49251..6213c171ca6 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/bolt/BoltStatefulInstanceTest.java @@ -34,8 +34,8 @@ import org.apache.heron.api.serializer.JavaSerializer; import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; @@ -73,7 +73,8 @@ public void testPreSaveAndPostSave() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); // initially non of preSave or postSave are invoked yet @@ -87,7 +88,8 @@ public void testPreSaveAndPostSave() throws Exception { assertEquals(1, postSaveLatch.getCount()); // this should invoke postSave - executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(0, postSaveLatch.getCount()); @@ -99,7 +101,8 @@ public void testPreRestore() throws InterruptedException { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRERESTORE_LATCH, preRestoreLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); assertEquals(1, preRestoreLatch.getCount()); @@ -126,7 +129,8 @@ public void testPostSaveBlockExecute() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.EXECUTE_LATCH, executeLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCBolt()); // initially non of preSave or postSave are invoked yet @@ -154,7 +158,8 @@ public void testPostSaveBlockExecute() throws Exception { assertEquals(1, executeLatch.getCount()); // this should invoke postSave - executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertTrue(executeLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); @@ -176,7 +181,8 @@ public void testExecuteNotBlocked() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.EXECUTE_LATCH, executeLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulBolt()); // initially non of preSave or postSave are invoked yet diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java index 0e66170b1b6..fcd431edcf0 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/ActivateDeactivateTest.java @@ -29,8 +29,8 @@ import org.apache.heron.api.generated.TopologyAPI; import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; import org.apache.heron.resource.UnitTestHelper; diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java index 18f0ae91679..835deb2ec0f 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutInstanceTest.java @@ -39,8 +39,8 @@ import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.testhelpers.HeronServerTester; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.HeronTuples; import org.apache.heron.proto.system.Metrics; import org.apache.heron.proto.system.PhysicalPlans; @@ -170,7 +170,8 @@ public void testGatherMetrics() { public void run() { for (int i = 0; i < Constants.RETRY_TIMES; i++) { if (!executorTester.getExecutorMetricsOut().isEmpty()) { - Metrics.MetricPublisherPublishMessage msg = executorTester.getExecutorMetricsOut().poll(); + Metrics.MetricPublisherPublishMessage msg = + executorTester.getExecutorMetricsOut().poll(); Set metricsName = new HashSet<>(); for (Metrics.MetricDatum metricDatum : msg.getMetricsList()) { metricsName.add(metricDatum.getName()); diff --git a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java index 228296f3ac7..fd7cfd482f8 100644 --- a/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java +++ b/heron/instance/tests/java/org/apache/heron/instance/spout/SpoutStatefulInstanceTest.java @@ -33,8 +33,8 @@ import org.apache.heron.api.spout.IRichSpout; import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; -import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.instance.ExecutorTester; +import org.apache.heron.instance.InstanceControlMsg; import org.apache.heron.proto.system.PhysicalPlans; import org.apache.heron.resource.Constants; import org.apache.heron.resource.MockPhysicalPlansBuilder; @@ -69,7 +69,8 @@ public void testPreSaveAndPostSave() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.POSTSAVE_LATCH, postSaveLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); // initially non of preSave or postSave are invoked yet @@ -83,7 +84,8 @@ public void testPreSaveAndPostSave() throws Exception { assertEquals(1, postSaveLatch.getCount()); // this should invoke postSave - executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertEquals(0, preSaveLatch.getCount()); assertEquals(0, postSaveLatch.getCount()); @@ -95,7 +97,8 @@ public void testPreRestore() throws InterruptedException { SingletonRegistry.INSTANCE.registerSingleton(Constants.PRERESTORE_LATCH, preRestoreLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); assertEquals(1, preRestoreLatch.getCount()); @@ -127,7 +130,8 @@ public void testPostSaveBlockExecute() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.EMIT_LATCH, emitLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageFor2PCSpout()); // initially non of preSave or postSave are invoked yet @@ -155,7 +159,8 @@ public void testPostSaveBlockExecute() throws Exception { assertEquals(1, emitLatch.getCount()); // this should invoke postSave - executorTester.getInControlQueue().offer(UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildCheckpointSavedMessage("c0", "p0")); assertTrue(postSaveLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); assertTrue(emitLatch.await(Constants.TEST_WAIT_TIME.toMillis(), TimeUnit.MILLISECONDS)); @@ -183,7 +188,8 @@ public void testExecuteNotBlocked() throws Exception { SingletonRegistry.INSTANCE.registerSingleton(Constants.EMIT_LATCH, emitLatch); executorTester.getInControlQueue().offer(UnitTestHelper.buildRestoreInstanceState("c0")); - executorTester.getInControlQueue().offer(UnitTestHelper.buildStartInstanceProcessingMessage("c0")); + executorTester.getInControlQueue().offer( + UnitTestHelper.buildStartInstanceProcessingMessage("c0")); executorTester.getInControlQueue().offer(buildPhysicalPlanMessageForStatefulSpout()); // initially non of preSave or postSave are invoked yet diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java index 07a3f897f4a..e8113ddf84e 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/MetricsCacheManagerHttpServer.java @@ -118,7 +118,8 @@ public static void main(String[] args) byte[] responseData = NetworkUtils.readHttpResponse(con); // parse response data - TopologyManager.MetricResponse response = TopologyManager.MetricResponse.parseFrom(responseData); + TopologyManager.MetricResponse response = + TopologyManager.MetricResponse.parseFrom(responseData); System.out.println(response.toString()); } diff --git a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java index 366bb295abf..da9f4af784b 100644 --- a/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java +++ b/heron/metricscachemgr/src/java/org/apache/heron/metricscachemgr/metricscache/MetricsCache.java @@ -206,7 +206,8 @@ public TopologyManager.MetricResponse getMetrics(TopologyManager.MetricRequest r MetricRequest request1 = MetricsCacheQueryUtils.fromProtobuf(request); MetricResponse response1 = cache.getMetrics(request1, metricNameType); - TopologyManager.MetricResponse response = MetricsCacheQueryUtils.toProtobuf(response1, request1); + TopologyManager.MetricResponse response = + MetricsCacheQueryUtils.toProtobuf(response1, request1); return response; } } diff --git a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java index b891b2b9790..d246c05bbaa 100644 --- a/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java +++ b/heron/metricscachemgr/tests/java/org/apache/heron/metricscachemgr/metricscache/MetricsCacheTest.java @@ -60,7 +60,8 @@ public void testMetricCache() throws IOException { .build()); // query last 10 seconds - TopologyManager.MetricResponse response = mc.getMetrics(TopologyManager.MetricRequest.newBuilder() + TopologyManager.MetricResponse response = mc.getMetrics( + TopologyManager.MetricRequest.newBuilder() .setComponentName("c1").addInstanceId("i1") .setInterval(10).addMetric("__jvm-uptime-secs") .build()); diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java index b4788c370e6..374f2b553f6 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManager.java @@ -40,9 +40,9 @@ import org.apache.commons.cli.ParseException; import org.apache.heron.api.metric.MultiCountMetric; import org.apache.heron.common.basics.Communicator; +import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.NIOLooper; import org.apache.heron.common.basics.SingletonRegistry; -import org.apache.heron.common.basics.ExecutorLooper; import org.apache.heron.common.basics.SysUtils; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java index fbe284948a0..c940c7fcf20 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/MetricsManagerServer.java @@ -178,7 +178,8 @@ public void onMessage(SocketChannel channel, Message message) { } else if (message instanceof Metrics.TManagerLocationRefreshMessage) { // LOG down where the TManager Location comes from LOG.info("TManager Location is refresh from: " + channel.socket().getRemoteSocketAddress()); - handleTManagerLocationRefreshMessage(request, (Metrics.TManagerLocationRefreshMessage) message); + handleTManagerLocationRefreshMessage( + request, (Metrics.TManagerLocationRefreshMessage) message); } else { LOG.severe("Unknown kind of message received from Metrics Manager"); } diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java index 1f348c89cd8..32a48f36a55 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/metricscache/MetricsCacheSink.java @@ -439,7 +439,8 @@ public void uncaughtException(Thread t, Throwable e) { LOG.log(Level.SEVERE, "metricsCacheClient dies in thread: " + t, e); Duration reconnectInterval = TypeUtils.getDuration( - metricsCacheClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS); + metricsCacheClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), + ChronoUnit.SECONDS); SysUtils.sleep(reconnectInterval); LOG.info("Restarting metricsCacheClient"); diff --git a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java index b10f7a2d660..f0b80d5ae33 100644 --- a/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java +++ b/heron/metricsmgr/src/java/org/apache/heron/metricsmgr/sink/tmanager/TManagerSink.java @@ -381,7 +381,8 @@ public synchronized void startNewPrimaryClient() { currentTManagerLocation.getServerPort(), socketOptions, metricsCommunicator, TypeUtils.getDuration( - tmanagerClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), ChronoUnit.SECONDS)); + tmanagerClientConfig.get(KEY_TMANAGER_RECONNECT_INTERVAL_SEC), + ChronoUnit.SECONDS)); int attempts = startedAttempts.incrementAndGet(); LOG.severe(String.format("Starting TManagerClient for the %d time.", attempts)); diff --git a/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java b/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java index c3e61ce6dbb..f23070b0993 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/instance/BoltInstance.java @@ -29,8 +29,8 @@ import org.apache.heron.api.generated.TopologyAPI; import org.apache.heron.common.basics.ByteAmount; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.basics.ExecutorLooper; +import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; import org.apache.heron.common.utils.tuple.TupleImpl; diff --git a/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java b/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java index 48cd9f6c11d..ffb123ef2d0 100644 --- a/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java +++ b/heron/simulator/src/java/org/apache/heron/simulator/instance/SpoutInstance.java @@ -27,8 +27,8 @@ import org.apache.heron.api.Config; import org.apache.heron.common.basics.ByteAmount; import org.apache.heron.common.basics.Communicator; -import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.basics.ExecutorLooper; +import org.apache.heron.common.basics.SingletonRegistry; import org.apache.heron.common.basics.TypeUtils; import org.apache.heron.common.config.SystemConfig; import org.apache.heron.common.utils.misc.PhysicalPlanHelper; diff --git a/heron/tools/admin/src/python/standalone.py b/heron/tools/admin/src/python/standalone.py index 9ec2d815a51..2034a6b51d6 100644 --- a/heron/tools/admin/src/python/standalone.py +++ b/heron/tools/admin/src/python/standalone.py @@ -221,7 +221,8 @@ def template_secondary_hcl(cl_args, primaries): ''' Template secondary config file ''' - secondary_config_template = "%s/standalone/templates/secondary.template.hcl" % cl_args["config_path"] + secondary_config_template = "%s/standalone/templates/secondary.template.hcl" \ + % cl_args["config_path"] secondary_config_actual = "%s/standalone/resources/secondary.hcl" % cl_args["config_path"] primaries_in_quotes = ['"%s"' % primary for primary in primaries] template_file(secondary_config_template, secondary_config_actual, From 47594277c0808f706435f8ba2a78a086b17e1764 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Mon, 2 Nov 2020 23:41:06 -0500 Subject: [PATCH 30/32] fixing reversions of master => manager in website2/website/versioned_docs/version-0.20.2-incubating --- .../compiling-code-organization.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/website2/website/versioned_docs/version-0.20.2-incubating/compiling-code-organization.md b/website2/website/versioned_docs/version-0.20.2-incubating/compiling-code-organization.md index c2621529456..cef9b70284c 100644 --- a/website2/website/versioned_docs/version-0.20.2-incubating/compiling-code-organization.md +++ b/website2/website/versioned_docs/version-0.20.2-incubating/compiling-code-organization.md @@ -33,7 +33,7 @@ cluster, see [Building Topologies](topology-development-topology-api-java) inste The primary programming languages for Heron are C++, Java, and Python. * **C++ 11** is used for most of Heron's core components, including the -[Topology Master](heron-architecture#topology-master), and +[Topology Manager](heron-architecture#topology-manager), and [Stream Manager](heron-architecture#stream-manager). * **Java 11** is used primarily for Heron's [topology @@ -92,11 +92,11 @@ Heron components. ## Topology Components -### Topology Master +### Topology Manager The C++ code for Heron's [Topology -Master](heron-architecture#topology-master) is written in C++ can be -found in [`heron/tmaster`](https://github.com/apache/incubator-heron/tree/master/heron/tmaster). +Manager](heron-architecture#topology-manager) is written in C++ can be +found in [`heron/tmanager`](https://github.com/apache/incubator-heron/tree/master/heron/tmanager). ### Stream Manager From 0ede4193bdb200de609401c610c92da0f2544277 Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Tue, 3 Nov 2020 10:34:05 -0500 Subject: [PATCH 31/32] fixing linting errors from master => manager renames in branch merge --- heron/tools/tracker/src/python/query.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/heron/tools/tracker/src/python/query.py b/heron/tools/tracker/src/python/query.py index 28cf2e70614..c2207b6ad20 100644 --- a/heron/tools/tracker/src/python/query.py +++ b/heron/tools/tracker/src/python/query.py @@ -56,7 +56,13 @@ def __init__(self, tracker): # pylint: disable=attribute-defined-outside-init, no-member @tornado.gen.coroutine - def execute_query(self, tmanager: TManagerLocation, query_string: str, start: int, end: int) -> Any: + def execute_query( + self, + tmanager: TManagerLocation, + query_string: str, + start: int, + end: int + ) -> Any: """ execute query """ if not tmanager: raise Exception("No tmanager found") From 768720bd8e99b1327aa0392d528351fd18d4af5d Mon Sep 17 00:00:00 2001 From: Jim Bo Date: Tue, 3 Nov 2020 17:03:32 -0500 Subject: [PATCH 32/32] re-indenting 0ede4193bdb200de609401c610c92da0f2544277 --- heron/tools/tracker/src/python/query.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heron/tools/tracker/src/python/query.py b/heron/tools/tracker/src/python/query.py index c2207b6ad20..6143b11d176 100644 --- a/heron/tools/tracker/src/python/query.py +++ b/heron/tools/tracker/src/python/query.py @@ -57,11 +57,11 @@ def __init__(self, tracker): # pylint: disable=attribute-defined-outside-init, no-member @tornado.gen.coroutine def execute_query( - self, - tmanager: TManagerLocation, - query_string: str, - start: int, - end: int + self, + tmanager: TManagerLocation, + query_string: str, + start: int, + end: int ) -> Any: """ execute query """ if not tmanager: