From 91fa952b7d3a2254672b6939b9442a097dacb545 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Tue, 27 Jun 2017 11:16:17 -0700 Subject: [PATCH 1/5] Enable heap prof of stmgr in heron-shell. --- heron/executor/src/python/heron_executor.py | 9 +++- heron/shell/src/python/handlers/__init__.py | 1 + .../python/handlers/stmgrheapprofhandler.py | 45 +++++++++++++++++++ heron/shell/src/python/main.py | 1 + heron/stmgr/src/cpp/BUILD | 2 + heron/stmgr/src/cpp/server/stmgr-main.cpp | 4 ++ third_party/gperftools/BUILD | 1 + 7 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 heron/shell/src/python/handlers/stmgrheapprofhandler.py diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index c7b5fcb856c..5623397716c 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -770,7 +770,7 @@ def start_process_monitor(self): Log.info("%s exited too many times" % name) sys.exit(1) time.sleep(self.interval_between_runs) - p = self._run_process(name, command) + p = self._run_process(name, command, self.shell_env) del self.processes_to_monitor[pid] self.processes_to_monitor[p.pid] =\ ProcessInfo(p, name, command, old_process_info.attempts + 1) @@ -892,6 +892,13 @@ def main(): # PEX_ROOT shell environment before forking the processes shell_env = os.environ.copy() shell_env["PEX_ROOT"] = os.path.join(os.path.abspath('.'), ".pex") + # Refer to https://gperftools.github.io/gperftools/heapprofile.html + # for details of settings of gperftools heap profiler + shell_env["HEAPPROFILE"] = "stmgr.hprof" + shell_env["HEAP_PROFILE_ALLOCATION_INTERVAL"] = "2147483648" + shell_env["HEAP_PROFILE_INUSE_INTERVAL"] = "1073741824" + shell_env["HEAPPROFILESIGNAL"] = str(signal.SIGUSR1) + # Instantiate the executor, bind it to signal handlers and launch it executor = HeronExecutor(sys.argv, shell_env) diff --git a/heron/shell/src/python/handlers/__init__.py b/heron/shell/src/python/handlers/__init__.py index 7c574d58272..195a1bc0c5a 100644 --- a/heron/shell/src/python/handlers/__init__.py +++ b/heron/shell/src/python/handlers/__init__.py @@ -8,3 +8,4 @@ from jstackhandler import JstackHandler from memoryhistogramhandler import MemoryHistogramHandler from pidhandler import PidHandler +from stmgrheapprofhandler import StmgrHeapProfHandler diff --git a/heron/shell/src/python/handlers/stmgrheapprofhandler.py b/heron/shell/src/python/handlers/stmgrheapprofhandler.py new file mode 100644 index 00000000000..c6464bf9649 --- /dev/null +++ b/heron/shell/src/python/handlers/stmgrheapprofhandler.py @@ -0,0 +1,45 @@ +# Copyright 2017 Twitter. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +''' stmgrheapprofhandler.py ''' +import glob +import json +import os +import signal +import tornado.web + +from heron.shell.src.python import utils + +class StmgrHeapProfHandler(tornado.web.RequestHandler): + """ + Responsible for getting the process ID for an instance. + """ + + # pylint: disable=attribute-defined-outside-init + @tornado.web.asynchronous + def get(self): + ''' get method ''' + self.content_type = 'application/json' + stmgr_pid_files = glob.glob('stmgr*.pid') + try: + pid_file = stmgr_pid_files[0] + with open(pid_file, 'r') as f: + pid = f.read() + os.kill(int(pid), signal.SIGUSR1) + self.write('Performing heap profiling on stream manager...') + self.finish() + except: + self.write("Not stream manager found") + self.set_status(404) + self.finish() diff --git a/heron/shell/src/python/main.py b/heron/shell/src/python/main.py index cd3ad61ce43..1fbeebdb8ec 100644 --- a/heron/shell/src/python/main.py +++ b/heron/shell/src/python/main.py @@ -34,6 +34,7 @@ (r"^/filedata/(.*)", handlers.FileDataHandler), (r"^/filestats/(.*)", handlers.FileStatsHandler), (r"^/download/(.*)", handlers.DownloadHandler), + (r"^/stmgrheapprof", handlers.StmgrHeapProfHandler), ]) diff --git a/heron/stmgr/src/cpp/BUILD b/heron/stmgr/src/cpp/BUILD index 548581ac1c2..a726c594dd5 100644 --- a/heron/stmgr/src/cpp/BUILD +++ b/heron/stmgr/src/cpp/BUILD @@ -111,6 +111,7 @@ cc_binary( "server/stmgr-main.cpp", ], copts = [ + "-Ithird_party", "-Iheron", "-Iheron/common/src/cpp", "-Iheron/statemgrs/src/cpp", @@ -129,6 +130,7 @@ cc_binary( "//heron/common/src/cpp/metrics:metrics-cxx", "//heron/statemgrs/src/cpp:statemgrs-cxx", "//third_party/yaml-cpp:yaml-cxx", + "//third_party/gperftools:profiler-cxx", ], linkstatic = 1, ) diff --git a/heron/stmgr/src/cpp/server/stmgr-main.cpp b/heron/stmgr/src/cpp/server/stmgr-main.cpp index 4daebfe3774..c68614541cf 100644 --- a/heron/stmgr/src/cpp/server/stmgr-main.cpp +++ b/heron/stmgr/src/cpp/server/stmgr-main.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -55,6 +56,8 @@ int main(int argc, char* argv[]) { sp_int32 shell_port = atoi(argv[11]); sp_string heron_internals_config_filename = argv[12]; + HeapProfilerStart("stmgr"); + EventLoopImpl ss; // Read heron internals config from local file @@ -82,5 +85,6 @@ int main(int argc, char* argv[]) { high_watermark, low_watermark); mgr.Init(); ss.loop(); + HeapProfilerStop(); return 0; } diff --git a/third_party/gperftools/BUILD b/third_party/gperftools/BUILD index 195b2e2ed3b..5161b76c85b 100644 --- a/third_party/gperftools/BUILD +++ b/third_party/gperftools/BUILD @@ -100,6 +100,7 @@ cc_library( "lib/libprofiler.a", ], hdrs = [ + "include/gperftools/heap-profiler.h", "include/gperftools/profiler.h", ], includes = ["include"], From 6649e77a2f5edc12e93cc72a7f06a75e58da80c0 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Tue, 27 Jun 2017 13:45:32 -0700 Subject: [PATCH 2/5] Only profile when receiving signal. --- heron/executor/src/python/heron_executor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heron/executor/src/python/heron_executor.py b/heron/executor/src/python/heron_executor.py index 5623397716c..5f4b62bc282 100755 --- a/heron/executor/src/python/heron_executor.py +++ b/heron/executor/src/python/heron_executor.py @@ -895,8 +895,8 @@ def main(): # Refer to https://gperftools.github.io/gperftools/heapprofile.html # for details of settings of gperftools heap profiler shell_env["HEAPPROFILE"] = "stmgr.hprof" - shell_env["HEAP_PROFILE_ALLOCATION_INTERVAL"] = "2147483648" - shell_env["HEAP_PROFILE_INUSE_INTERVAL"] = "1073741824" + shell_env["HEAP_PROFILE_ALLOCATION_INTERVAL"] = "0" + shell_env["HEAP_PROFILE_INUSE_INTERVAL"] = "0" shell_env["HEAPPROFILESIGNAL"] = str(signal.SIGUSR1) From 8217d9240b90d933d85d01fcd786df72cdf5f6c7 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 20 Jul 2017 11:35:42 -0700 Subject: [PATCH 3/5] Do not change website. --- website/.gitignore | 4 + website/Makefile | 9 +- website/assets/sass/_pygments.scss | 74 ++++ website/assets/sass/_typography.scss | 159 ++++++++ website/assets/sass/_variables.scss | 15 + website/assets/sass/bootstrap/_variables.scss | 5 - website/assets/sass/style.scss | 96 ++--- website/config.yaml | 9 +- website/content/docs/contributors/testing.md | 4 +- .../docs/developers/compiling/linux.md | 4 +- .../content/docs/developers/python/bolts.md | 31 +- .../content/docs/developers/python/spouts.md | 78 ++-- .../docs/developers/python/topologies.md | 338 +++++++++++++----- .../docs/getting-started-troubleshooting.md | 52 +-- website/content/docs/getting-started.md | 71 ++-- .../operators/deployment/configuration.md | 12 +- website/data/toc.yaml | 50 +-- website/gulpfile.js | 51 +-- website/layouts/docs/single.ace | 23 +- website/layouts/index.ace | 8 +- website/layouts/partials/footer.ace | 8 +- website/layouts/partials/navbar.ace | 2 - website/layouts/shortcodes/bazelVersion.html | 2 +- website/layouts/shortcodes/currentYear.html | 1 + website/layouts/shortcodes/githubMaster.html | 2 +- website/layouts/shortcodes/heronVersion.html | 2 +- .../layouts/shortcodes/heronpyVersion.html | 1 + website/package.json | 8 +- website/scripts/javadocs.sh | 6 +- website/scripts/python-doc-gen.sh | 16 + website/scripts/setup.sh | 4 +- 31 files changed, 761 insertions(+), 384 deletions(-) create mode 100644 website/assets/sass/_pygments.scss create mode 100644 website/assets/sass/_typography.scss create mode 100644 website/assets/sass/_variables.scss create mode 100644 website/layouts/shortcodes/currentYear.html create mode 100644 website/layouts/shortcodes/heronpyVersion.html create mode 100755 website/scripts/python-doc-gen.sh diff --git a/website/.gitignore b/website/.gitignore index d8b83df9cdb..c85c2fb564b 100644 --- a/website/.gitignore +++ b/website/.gitignore @@ -1 +1,5 @@ +# Generated artifacts for the website package-lock.json +node_modules/ +public/ +static/ diff --git a/website/Makefile b/website/Makefile index ff6aa0adaf7..61ed1ae23eb 100755 --- a/website/Makefile +++ b/website/Makefile @@ -5,7 +5,7 @@ clean: rm -rf public/* static/* serve: - @hugo server --watch --ignoreCache + @hugo serve develop-static-assets: @npm run develop @@ -23,16 +23,19 @@ linkchecker: javadocs: @scripts/javadocs.sh +python-docs: + @scripts/python-doc-gen.sh + site: @make clean @make build-static-assets @make pages @make javadocs - @make linkchecker + @make python-docs travis-site: @make clean @make build-static-assets @make pages @scripts/javadocs.sh --travis - @make linkchecker + @make python-docs diff --git a/website/assets/sass/_pygments.scss b/website/assets/sass/_pygments.scss new file mode 100644 index 00000000000..f21b0742475 --- /dev/null +++ b/website/assets/sass/_pygments.scss @@ -0,0 +1,74 @@ +$yellow: darken(yellow, 9%); + +.highlight pre code { + .hll { background-color: #49483e } + .c { color: #75715e; } /* Comment */ + .err { color: #960050; background-color: #1e0010 } /* Error */ + .k { color: #66d9ef } /* Keyword */ + .l { color: #ae81ff } /* Literal */ + .n { color: #f8f8f2 } /* Name */ + .o { color: #f92672 } /* Operator */ + .p { color: #f8f8f2 } /* Punctuation */ + .ch { color: #75715e } /* Comment.Hashbang */ + .cm { color: #75715e } /* Comment.Multiline */ + .cp { color: #75715e } /* Comment.Preproc */ + .cpf { color: #75715e } /* Comment.PreprocFile */ + .c1 { color: #75715e } /* Comment.Single */ + .cs { color: #75715e } /* Comment.Special */ + .gd { color: #f92672 } /* Generic.Deleted */ + .ge { font-style: italic } /* Generic.Emph */ + .gi { color: #a6e22e } /* Generic.Inserted */ + .gs { font-weight: bold } /* Generic.Strong */ + .gu { color: #75715e } /* Generic.Subheading */ + .kc { color: #66d9ef } /* Keyword.Constant */ + .kd { color: #66d9ef } /* Keyword.Declaration */ + .kn { color: #f92672 } /* Keyword.Namespace */ + .kp { color: #66d9ef } /* Keyword.Pseudo */ + .kr { color: #66d9ef } /* Keyword.Reserved */ + .kt { color: #66d9ef } /* Keyword.Type */ + .ld { color: $yellow; } /* Literal.Date */ + .m { color: #ae81ff } /* Literal.Number */ + .s { color: $yellow; } /* Literal.String */ + .na { color: #a6e22e } /* Name.Attribute */ + .nb { color: #f8f8f2 } /* Name.Builtin */ + .nc { color: #a6e22e } /* Name.Class */ + .no { color: #66d9ef } /* Name.Constant */ + .nd { color: #a6e22e } /* Name.Decorator */ + .ni { color: #f8f8f2 } /* Name.Entity */ + .ne { color: #a6e22e } /* Name.Exception */ + .nf { color: #a6e22e } /* Name.Function */ + .nl { color: #f8f8f2 } /* Name.Label */ + .nn { color: #f8f8f2 } /* Name.Namespace */ + .nx { color: #a6e22e } /* Name.Other */ + .py { color: #f8f8f2 } /* Name.Property */ + .nt { color: #f92672 } /* Name.Tag */ + .nv { color: #f8f8f2 } /* Name.Variable */ + .ow { color: #f92672 } /* Operator.Word */ + .w { color: #f8f8f2 } /* Text.Whitespace */ + .mb { color: #ae81ff } /* Literal.Number.Bin */ + .mf { color: #ae81ff } /* Literal.Number.Float */ + .mh { color: #ae81ff } /* Literal.Number.Hex */ + .mi { color: #ae81ff } /* Literal.Number.Integer */ + .mo { color: #ae81ff } /* Literal.Number.Oct */ + .sb { color: $yellow; } /* Literal.String.Backtick */ + .sc { color: $yellow; } /* Literal.String.Char */ + .sd { color: $yellow; } /* Literal.String.Doc */ + .s2 { color: $yellow; } /* Literal.String.Double */ + .se { color: #ae81ff } /* Literal.String.Escape */ + .sh { color: $yellow; } /* Literal.String.Heredoc */ + .si { color: $yellow; } /* Literal.String.Interpol */ + .sx { color: $yellow; } /* Literal.String.Other */ + .sr { color: $yellow; } /* Literal.String.Regex */ + .s1 { color: $yellow; } /* Literal.String.Single */ + .ss { color: $yellow; } /* Literal.String.Symbol */ + .bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ + .vc { color: #f8f8f2 } /* Name.Variable.Class */ + .vg { color: #f8f8f2 } /* Name.Variable.Global */ + .vi { color: #f8f8f2 } /* Name.Variable.Instance */ + .il { color: #ae81ff } /* Literal.Number.Integer.Long */ + + &.language-bash { + .nb, .k { color: $light-gray; } + .o { color: $heron-blue-green; } + } +} diff --git a/website/assets/sass/_typography.scss b/website/assets/sass/_typography.scss new file mode 100644 index 00000000000..78d30b882e4 --- /dev/null +++ b/website/assets/sass/_typography.scss @@ -0,0 +1,159 @@ +$base-margin: 1.5rem; +$base-padding: 1rem; +$base-font-size: 18px; +$base-line-height: 1.5; + +article.hn-docs-content { + code { + color: $heron-blue-green; + background-color: lighten($light-gray, 25%); + } + + h1 { + font-size: $base-font-size * 2; + margin-bottom: $base-margin * 1.3; + } + + h2 { + font-size: $base-font-size * 1.8; + margin-bottom: $base-margin * 1.2; + } + + h3 { + font-size: $base-font-size * 1.6; + margin-bottom: $base-margin * 1.1; + } + + h4 { + font-size: $base-font-size * 1.4; + margin-bottom: $base-margin; + } + + img { + width: 100%; + margin: $base-margin 0; + + &:hover { + width: 125%; + } + } + + p { + line-height: $base-line-height; + font-size: $base-font-size; + + & + p { + margin-top: $base-margin; + } + + & + { + h1, h2, h3, h4 { + margin-top: $base-margin * 1.5; + } + } + + code { + font-size: inherit; + } + } + + blockquote { + margin: $base-margin 0; + padding: 20px; + border-left: .5rem solid $heron-blue-green; + background-color: lighten($light-gray, 22%); + + h4 { + font-size: $base-font-size * 1.5; + + & + p { + margin-top: $base-margin; + } + } + + p { + font-size: $base-font-size * 1.1; + } + + ul { + margin-bottom: 0; + } + } + + table { + margin: $base-margin 0; + + thead { + font-size: $base-font-size * 1.1; + border-bottom: 1px solid black; + + tr th { + padding-right: 5%; + } + } + + tbody { + tr { + border-bottom: 1px solid $light-gray; + + td { + padding: 1.5% 7.5% 1.5% 0; + + code { + font-size: $base-font-size; + } + } + + &:hover { + background-color: lighten($light-gray, 27%); + } + } + } + } + + ol { + li { + font-size: $base-font-size * .9; + } + } + + ul { + margin: $base-margin 0; + + li { + font-size: $base-font-size; + list-style-type: square; + padding-bottom: 0; + + ul { + margin-top: 0; + } + + & + li { + margin-top: $base-margin * .15; + } + } + } + + .highlight { + overflow-x: auto; + margin: $base-margin * .8 0; + + pre { + color: $light-gray; + padding: $base-padding * 1.25; + + code { + font-size: $base-font-size; + color: $light-gray; + background-color: #263238; + } + } + + & + { + h1, h2, h3, h4 { + margin-top: $base-margin * 1.75; + } + } + } +} diff --git a/website/assets/sass/_variables.scss b/website/assets/sass/_variables.scss new file mode 100644 index 00000000000..1acf0068e4f --- /dev/null +++ b/website/assets/sass/_variables.scss @@ -0,0 +1,15 @@ +// Set sans serif font +@import url(https://fonts.googleapis.com/css?family=Open+Sans); +$font-family-sans-serif: "Open Sans", sans-serif; + +// Set monospaced font +@import url(https://fonts.googleapis.com/css?family=Inconsolata); +$font-family-monospaced: "Inconsolata", monospace; + +// Heron-specific colors +$heron-blue-green: #4F8992; +$light-gray: lighten(#9d9d9d, 10%); +$heron-blue-gray: #263238; + +// Dimensions +$docs-main-bottom-margin: 8rem; diff --git a/website/assets/sass/bootstrap/_variables.scss b/website/assets/sass/bootstrap/_variables.scss index 91abccf95d5..cef4b37db53 100644 --- a/website/assets/sass/bootstrap/_variables.scss +++ b/website/assets/sass/bootstrap/_variables.scss @@ -14,11 +14,6 @@ $gray: lighten($gray-base, 33.5%) !default; // #555 $gray-light: lighten($gray-base, 46.7%) !default; // #777 $gray-lighter: lighten($gray-base, 93.5%) !default; // #eee -// Heron-specific colors -$heron-blue-green: #4F8992; -$light-gray: #9d9d9d; -$heron-blue-gray: #263238; - $brand-primary: $heron-blue-green !default; $brand-success: #5cb85c !default; $brand-info: #5bc0de !default; diff --git a/website/assets/sass/style.scss b/website/assets/sass/style.scss index f28688800dd..bd131c0dca4 100644 --- a/website/assets/sass/style.scss +++ b/website/assets/sass/style.scss @@ -1,7 +1,7 @@ +@import "variables"; +@import "pygments"; @import "bootstrap"; - -// Import Open Sans font -@import url(https://fonts.googleapis.com/css?family=Open+Sans); +@import "typography"; // Helpers div[class^="landing-feature"] { @@ -19,9 +19,30 @@ div[class^="landing-feature"] { } } +section.main-page-lower-section { + width: 95%; +} + // Set global attributes html, body { - font-family: "Open Sans", sans-serif; + font-family: $font-family-sans-serif; + font-size: 1rem; +} + +pre, code { + font-family: $font-family-monospaced; +} + +// This set of parameters ensures a "sticky" footer at the bottom +// of the page using flexbox +body.body { + display: flex; + min-height: 100vh; + flex-direction: column; + + main.main { + flex: 1; + } } a:hover { @@ -147,77 +168,12 @@ aside.hn-sidebar { // Main content on docs pages section.hn-docs-main { - padding-bottom: 20px; - padding-left: 20px; - padding-top: 0; - + margin-bottom: $docs-main-bottom-margin; header.hn-docs-header { .hn-docs-description { font-weight: bolder; } } - - article.hn-docs-content { - code { - padding: 2px; - border-radius: 2px; - } - - p { - font-size: 1.2em; - } - - ul li, ul li p, ol p { - font-size: 1.1em; - } - - h1 { - font-size: 1.8em; - padding-bottom: 5px; - } - - h2 { - font-size: 1.5em; - padding-bottom: 5px; - } - - h3 { - font-size: 1.3em; - padding-bottom: 5px; - } - - h4 { - font-size: 1.2em; - padding-bottom: 3px; - } - - .highlight { - border-radius: 10px; - - pre { - color: lighten($light-gray, 15%); - } - } - - img { - max-width: 90%; - } - - table { - thead { - font-size: 16px; - border-bottom: 1px solid gray; - } - - tr { - margin-right: 5px; - } - - tbody { - font-size: 12px; - } - } - } } @media (min-width: 768px) { diff --git a/website/config.yaml b/website/config.yaml index ac3aa68b110..b6184f5c69f 100755 --- a/website/config.yaml +++ b/website/config.yaml @@ -2,7 +2,7 @@ languageCode: en-us title: Heron author: Luc Perkins canonifyurls: true -baseurl: /heron +baseurl: https://twitter.github.io/heron # Site-level config metadataformat: yaml @@ -11,8 +11,7 @@ taxonomies: tag: tags # Pygments syntax highlighting -pygmentsstyle: monokai -pygmentsuseclasses: false +pygmentsuseclasses: true pygmentscodefences: true # Site-level parameters available through .Site.Params.{key} @@ -20,8 +19,9 @@ params: author: Twitter, Inc. description: A realtime, distributed, fault-tolerant stream processing engine from Twitter versions: - heron: 0.14.7 + heron: 0.14.9 bazel: 0.3.1 + heronpy: 0.0.1 assets: favicon: small: /img/favicon-16x16.png @@ -32,7 +32,6 @@ params: css: - /css/style.min.css - /css/font-awesome.min.css - - /css/pygments.css js: - https://code.jquery.com/jquery-2.2.1.min.js - /js/app.min.js diff --git a/website/content/docs/contributors/testing.md b/website/content/docs/contributors/testing.md index 386241a9954..61484a36b87 100644 --- a/website/content/docs/contributors/testing.md +++ b/website/content/docs/contributors/testing.md @@ -68,6 +68,6 @@ Integration tests are divided into two categories: To run the failure integration tests on a Mac OS X, do the following: ```bash - $ bazel build --config=darwin integration-test/src/... - $ ./bazel-bin/integration-test/src/python/local_test_runner/local-test-runner + $ bazel build --config=darwin integration_test/src/... + $ ./bazel-bin/integration_test/src/python/local_test_runner/local-test-runner ``` diff --git a/website/content/docs/developers/compiling/linux.md b/website/content/docs/developers/compiling/linux.md index 0c9953529d7..e55f682d467 100644 --- a/website/content/docs/developers/compiling/linux.md +++ b/website/content/docs/developers/compiling/linux.md @@ -21,8 +21,8 @@ $ sudo apt-get upgrade -y ### Step 2 --- Install required libraries ```bash -$ sudo apt-get install git build-essential automake cmake libtool zip \ - libunwind-setjmp0-dev zlib1g-dev unzip pkg-config -y +$ sudo apt-get install git build-essential automake cmake libtool-bin zip \ + libunwind-setjmp0-dev zlib1g-dev unzip pkg-config python-setuptools -y ``` #### Step 3 --- Set the following environment variables diff --git a/website/content/docs/developers/python/bolts.md b/website/content/docs/developers/python/bolts.md index b2eb214b873..f6bf7a65353 100644 --- a/website/content/docs/developers/python/bolts.md +++ b/website/content/docs/developers/python/bolts.md @@ -1,18 +1,21 @@ --- -title: Implementing a Python Bolt +title: Implementing Python bolts --- +> #### Python API docs +> You can find API docs for the [`heronpy`](https://pypi.python.org/pypi/heronpy) library [here](/api/python). + Bolts must implement the `Bolt` interface, which has the following methods. ```python class Bolt(BaseBolt): def initialize(self, config, context) - + def process(self, tup) ``` * The `initialize()` method is called when the bolt is first initialized and -provides the bolt with the executing environment. It is equivalent to `prepare()` +provides the bolt with the executing environment. It is equivalent to `prepare()` method of the [`IBolt`](/api/com/twitter/heron/api/bolt/IBolt.html) interface in Java. Note that you should not override `__init__()` constructor of `Bolt` class for initialization of custom variables, since it is used internally by HeronInstance; instead, @@ -29,17 +32,17 @@ class BaseBolt: def emit(self, tup, stream="default", anchors=None, direct_task=None, need_task_ids=False) def ack(self, tup) def fail(self, tup) - + @staticmethod def is_tick(tup) - + def log(self, message, level=None) - + @classmethod def spec(cls, name=None, inputs=None, par=1, config=None) ``` -* The `emit()` method is used to emit a given `tup`, which can be a `list` or `tuple` of +* The `emit()` method is used to emit a given `tup`, which can be a `list` or `tuple` of any python objects. Unlike the Java implementation, `OutputCollector` doesn't exist in the Python implementation. @@ -51,10 +54,10 @@ doesn't exist in the Python implementation. * The `log()` method is used to log an arbitrary message, and its outputs are redirected to the log file of the component. It accepts an optional argument - which specifies the logging level. By default, its logging level is `info`. + which specifies the logging level. By default, its logging level is `info`. **Warning:** due to internal issue, you should **NOT** output anything to - `sys.stdout` or `sys.stderr`; instead, you should use this method to log anything you want. + `sys.stdout` or `sys.stderr`; instead, you should use this method to log anything you want. * In order to declare the output fields of this bolt, you need to place a class attribute `outputs` as a list of `str` or `Stream`. Note that unlike Java, @@ -67,23 +70,17 @@ For further information, refer to [this page](../topologies). of this bolt within the topology, as well as to give component-specific configurations. For the usage of this method, refer to [this page](../topologies). - -For further information about the API, refer to the Streamparse API documentation, -although there are some methods in the Streamparse API that are not supported or are -invalid in Heron. Additionally, there are a number of example implementations -under `heron/examples/src/python` directory. - The following is an example implementation of a bolt in Python. ```python from collections import Counter -from pyheron import Bolt +from heronpy import Bolt class CountBolt(Bolt): outputs = ["word", "count"] def initialize(self, config, context): self.counter = Counter() - + def process(self, tup): word = tup.values[0] self.counter[word] += 1 diff --git a/website/content/docs/developers/python/spouts.md b/website/content/docs/developers/python/spouts.md index 81254830177..9de60029949 100644 --- a/website/content/docs/developers/python/spouts.md +++ b/website/content/docs/developers/python/spouts.md @@ -1,20 +1,27 @@ --- -title: Implementing a Python Spout +title: Implementing Python Spouts --- -Spouts must implement the `Spout` interface, which has the following methods. +> #### Python API docs +> You can find API docs for the [`heronpy`](https://pypi.python.org/pypi/heronpy) library [here](/api/python). + +To create a spout for a Heron topology, you need to subclass the [`Spout`](/api/python/spout/spout.m.html#heronpy.spout.spout.Spout) class, which has the following methods. ```python class Spout(BaseSpout): - def initialize(self, config, context) - def next_tuple(self) - def ack(self, tup_id) - def fail(self, tup_id) - def activate(self) - def deactivate(self) - def close(self) + def initialize(self, config, context) + def next_tuple(self) + def ack(self, tup_id) + def fail(self, tup_id) + def activate(self) + def deactivate(self) + def close(self) ``` +## `Spout` class methods + +The [`Spout`](/api/python/spout/spout.m.html#heronpy.spout.spout.Spout) class provides a number of methods that you should implement when subclassing. + * The `initialize()` method is called when the spout is first initialized and provides the spout with the executing environment. It is equivalent to `open()` method of [`ISpout`](/api/com/twitter/heron/api/spout/ISpout.html). @@ -28,7 +35,7 @@ emit fetched tuples by calling `self.emit()`, as described below. * The `ack()` method is called when the `HeronTuple` with the `tup_id` emitted by this spout is successfully processed. -* The `fail()` method is called when the `HeronTuple` with the `tup_id` emitted +* The `fail()` method is called when the `HeronTuple` with the `tup_id` emitted by this spout is not processed successfully. * The `activate()` method is called when the spout is asked to back into @@ -40,28 +47,26 @@ state. * The `close()` method is called when when the spout is shutdown. There is no guarantee that this method is called due to how the instance is killed. -In addition, `BaseSpout` class provides you with the following methods. +## `BaseSpout` class methods + +The `Spout` class inherits from the [`BaseSpout`](/api/python/spout/base_spout.m.html#heronpy.spout.base_spout.BaseSpout) class, which also provides you methods you can use in your spouts. ```python class BaseSpout: - def emit(self, tup, tup_id=None, stream="default", direct_task=None, need_task_ids=False) - - def log(self, message, level=None) + def emit(self, tup, tup_id=None, stream="default", direct_task=None, need_task_ids=False) - @classmethod - def spec(cls, name=None, par=1, config=None) + def log(self, message, level=None) + + @classmethod + def spec(cls, name=None, par=1, config=None) ``` -* The `emit()` method is used to emit a given `tup`, which can be a `list` or `tuple` of -any python objects. Unlike the Java implementation, `OutputCollector` -doesn't exist in the Python implementation. +* The `emit()` method is used to emit a given tuple, which can be a `list` or `tuple` of any Python objects. Unlike in the Java implementation, there is no `OutputCollector` in the Python implementation. -* The `log()` method is used to log an arbitrary message, and its outputs are redirected - to the log file of the component. It accepts an optional argument - which specifies the logging level. By default, its logging level is `info`. +* The `log()` method is used to log an arbitrary message, and its outputs are redirected to the log file of the component. It accepts an optional argument which specifies the logging level. By default, its logging level is `info`. **Warning:** due to internal issue, you should **NOT** output anything to - `sys.stdout` or `sys.stderr`; instead, you should use this method to log anything you want. + `sys.stdout` or `sys.stderr`; instead, you should use this method to log anything you want. * In order to declare the output fields of this spout, you need to place a class attribute `outputs` as a list of `str` or `Stream`. Note that unlike Java, @@ -73,25 +78,22 @@ For further information, refer to [this page](../topologies). of this spout within the topology, as well as to give component-specific configurations. For the usage of this method, refer to [this page](../topologies). -For further information about the API, refer to the Streamparse API documentation, -although there are some methods in the Streamparse API that are not supported or are -invalid in Heron. Additionally, there are a number of example implementations -under `heron/examples/src/python` directory. +## Example spout The following is an example implementation of a spout in Python. ```python from itertools import cycle -from pyheron import Spout +from heronpy import Spout class WordSpout(Spout): - outputs = ['word'] - - def initialize(self, config, context): - self.words = cycle(["hello", "world", "heron", "storm"]) - self.log("In initialize() of WordSpout) - - def next_tuple(self): - word = next(self.words) - self.emit([word]) -``` \ No newline at end of file + outputs = ['word'] + + def initialize(self, config, context): + self.words = cycle(["hello", "world", "heron", "storm"]) + self.log("Initializing WordSpout...") + + def next_tuple(self): + word = next(self.words) + self.emit([word]) +``` diff --git a/website/content/docs/developers/python/topologies.md b/website/content/docs/developers/python/topologies.md index 205b090be91..c1d781a9837 100644 --- a/website/content/docs/developers/python/topologies.md +++ b/website/content/docs/developers/python/topologies.md @@ -1,112 +1,248 @@ --- -title: Writing and Launching a Topology in Python +title: Python Topologies --- -Currently, support for developing a Heron topology in Python is still experimental. -It is compatible with the Streamparse API, so Python topologies written -for the Streamparse can be deployed on Heron with ease. -This page describes how to write and launch a topology in Python, as well as -how to convert a Streamparse topology to a PyHeron topology. +> The current version of `py_heron` is [{{% heronpyVersion %}}](https://pypi.python.org/pypi/heronpy/{{% heronpyVersion %}}). -Note that a Python topology is known to be approximately 20-40 times slower -than a topology written in Java. This performance issue will be resolved in later releases. +Support for developing Heron topologies in Python is provided by a Python library called [`heronpy`](https://pypi.python.org/pypi/heronpy). -You need to first download `PyHeron` library and include it in your project. +> #### Python API docs +> You can find API docs for the `heronpy` library [here](/api/python). -# Writing your own topology in Python +## Setup -[Spouts](../spouts) and [Bolts](../bolts) discuss how to implement spouts and -bolts in Python, respectively. +First, you need to install the `heronpy` library using [pip](https://pip.pypa.io/en/stable/), [EasyInstall](https://wiki.python.org/moin/EasyInstall), or an analogous tool: -After defining the spouts and bolts, a topology can be composed by two ways: +```shell +$ pip install heronpy +$ easy_install heronpy +``` + +Then you can include `heronpy` in your project files. Here's an example: + +```python +from heronpy import Bolt, Spout, Topology +``` -* Using `TopologyBuilder` (not compatible with the Streamparse API) -* Subclassing `Topology` class (compatible with the Streamparse API) +## Writing topologies in Python -## Defining a topology using a TopologyBuilder +Heron [topologies](../../../concepts/topologies) are networks of [spouts](../spouts) that pull data into a topology and [bolts](../bolts) that process that ingested data. -This way of defining a topology is similar to defining a topology in Java, -and is not compatible with the Streamparse API. +> You can see how to create Python spouts in the [Implementing Python Spouts](../spouts) guide and how to create Python bolts in the [Implementing Python Bolts](../bolts) guide. -The `TopologyBuilder` has two major methods to specify the components: +Once you've defined spouts and bolts for a topology, you can then compose the topology in one of two ways: -* `add_spout(self, name, spout_cls, par, config=None)` - * `name` is `str` specifying the unique identifier that is assigned to this spout. - * `spout_cls` is a subclass of `Spout` that defines this spout. - * `par` is `int` specifying the number of instances of this spout. - * `config` is `dict` specifying this spout-specific configuration. +* You can use the [`TopologyBuilder`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder) class inside of a main function. + Here's an example: -* `add_bolt(self, name, bolt_cls, par, inputs, config=None)` - * `name` is `str` specifying the unique identifier that is assigned to this bolt. - * `bolt_cls` is a subclass of `Bolt` that defines this bolt. - * `par` is `int` specifying the number of instances of this bolt. - * `inputs` is either `dict` mapping from `HeronComponentSpec` to `Grouping`; - or `list` of `HeronComponentSpec`, in which case the shuffle grouping is used. - * `config` is `dict` specifying this bolt-specific configuration. + ```python + from heronpy import TopologyBuilder -Each method returns the corresponding `HeronComponentSpec` object. + if __name__ == '__main__': + builder = TopologyBuilder("MyTopology") + # Add spouts and bolts + builder.build_and_submit() + ``` -The following is an example implementation of WordCountTopology in Python. +* You can subclass the [`Topology`](/api/python/topology.m.html#heronpy.topology.Topology) class. + + Here's an example: + + ```python + class MyTopology(Topology): + my_spout = MySpout.spec(par=2) + my_bolt = MyBolt.spec(par=3, + inputs={ + spout: Grouping.fields('some-input-field') + }) + ``` + +## Defining topologies using the [`TopologyBuilder`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder) class + +If you create a Python topology using a [`TopologyBuilder`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder), you need to instantiate a `TopologyBuilder` inside of a standard Python main function, like this: ```python -from pyheron import TopologyBuilder +if __name__ == '__main__': + builder = TopologyBuilder("MyTopology") +``` + +Once you've created a `TopologyBuilder` object, you can add [bolts](../bolts) using the [`add_bolt`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder.add_bolt) method and [spouts](../spouts) using the [`add_spout`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder.add_spout) method. Here's an example: + +```python +builder = TopologyBuilder("MyTopology") +builder.add_bolt("my_bolt", MyBolt, par=3) +builder.add_spout("my_spout", MySpout, par=2) +``` + +Both the `add_bolt` and `add_spout` methods return the corresponding [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec) object. + +The `add_bolt` method takes four arguments and an optional `config` parameter: + +Argument | Data type | Description | Default +:--------|:----------|:------------|:------- +`name` | `str` | The unique identifier assigned to this bolt | | +`bolt_cls` | class | The subclass of [`Bolt`](/api/python/bolt/bolt.m.html#heronpy.bolt.bolt.Bolt) that defines this bolt | | +`par` | `int` | The number of instances of this bolt in the topology | | +`config` | `dict` | Specifies the configuration for this spout | `None` + +The `add_spout` method takes three arguments and an optional `config` parameter: + +Argument | Data type | Description | Default +:--------|:----------|:------------|:------- +`name` | `str` | The unique identifier assigned to this spout | | +`spout_cls` | class | The subclass of [`Spout`](/api/python/spout/spout.m.html#heronpy.spout.spout.Spout) that defines this spout | | +`par` | `int` | The number of instances of this spout in the topology | | +`inputs` | `dict` or `list` | Either a `dict` mapping from [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec) to [`Grouping`](/api/python/stream.m.html#heronpy.stream.Grouping) *or* a list of [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec)s, in which case the [`shuffle`](/api/python/stream.m.html#heronpy.stream.Grouping.SHUFFLE) grouping is used +`config` | `dict` | Specifies the configuration for this spout | `None` + +### Example + +The following is an example implementation of a word count topology in Python that subclasses [`TopologyBuilder`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder). + +```python +from heronpy import TopologyBuilder from your_spout import WordSpout from your_bolt import CountBolt if __name__ == "__main__": - builder = TopologyBuilder("WordCountTopology") - word_spout = builder.add_spout("word_spout", WordSpout, par=2) - count_bolt = builder.add_bolt("count_bolt", CountBolt, par=2, - inputs={word_spout: Grouping.fields('word')}) - builder.build_and_submit() + builder = TopologyBuilder("WordCountTopology") + word_spout = builder.add_spout("word_spout", WordSpout, par=2) + + count_bolt_input = + count_bolt = builder.add_bolt("count_bolt", CountBolt, par=2, + inputs={word_spout: Grouping.fields('word')}) + builder.build_and_submit() ``` Note that arguments to the main method can be passed by providing them in the `heron submit` command. -## Defining a topology by subclassing Topology class +### Topology-wide configuration + +If you're building a Python topology using a `TopologyBuilder`, you can specify configuration for the topology using the [`set_config`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder.set_config) method. A topology's config is a `dict` in which the keys are a series constants from the [`api_constants`](/api/python/api_constants.m.html) module and values are configuration values for those parameters. + +Here's an example: + +```python +from heronpy import api_constants, TopologyBuilder + +if __name__ == '__main__': + topology_config = { + api_constants.TOPOLOGY_ENABLE_ACKING: True, + api_constants.TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS: True + } + builder = TopologyBuilder("MyTopology") + builder.set_config(topology_config) + # Add bolts and spouts, etc. +``` + +### Launching the topology + +If you want to [submit](../../../operators/heron-cli#submitting-a-topology) Python topologies to a Heron cluster, they need to be packaged as a [PEX](https://pex.readthedocs.io/en/stable/whatispex.html) file. In order to produce PEX files, we recommend using a build tool like [Pants](http://www.pantsbuild.org/python_readme.html) or [Bazel](https://github.com/benley/bazel_rules_pex). + +If you defined your topology by subclassing the [`TopologyBuilder`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder) class and built a `word_count.pex` file for that topology in the `~/topology` folder. You can submit the topology to a cluster called `local` like this: + +```bash +$ heron submit local \ + ~/topology/word_count.pex \ + - # No class specified +``` + +Note the `-` in this submission command. If you define a topology by subclassing `TopologyBuilder` you do not need to instruct Heron where your main method is located. -This way of defining a topology is compatible with the Streamparse API. -All you need to do is to place `HeronComponentSpec` as the class attributes +> #### Example topologies buildable as PEXes +> * See [this repo](https://github.com/streamlio/pants-dev-environment) for an example of a Heron topology written in Python and deployable as a Pants-packaged PEX. +> * See [this repo](https://github.com/streamlio/bazel-dev-environment) for an example of a Heron topology written in Python and deployable as a Bazel-packaged PEX. + +## Defining a topology by subclassing the [`Topology`](/api/python/topology.m.html#heronpy.topology.Topology) class + +If you create a Python topology by subclassing the [`Topology`](/api/python/topology.m.html#heronpy.topology.Topology) class, you need to create a new topology class, like this: + +```python +from heronpy import Grouping, Topology +from my_spout import MySpout +from my_bolt import MyBolt + +class MyTopology(Topology): + my_spout = MySpout.spec(par=2) + my_bolt_inputs = { + my_spout: Grouping.fields('some-input-field') + } + my_bolt = MyBolt.spec(par=3, inputs=my_bolt_inputs) +``` + +All you need to do is place [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec)s as the class attributes of your topology class, which are returned by the `spec()` method of -your spout or bolt class. - -* `Spout.spec(cls, name=None, par=1, config=None)` - * `name` is either `str` specifying the unique identifier that is assigned to this spout, or - `None` if you want to use the variable name of the returned `HeronComponentSpec` as - the unique identifier for this spout. - * `par` is `int` specifying the number of instances of this spout. - * `config` is `dict` specifying this spout-specific configuration. - -* `Bolt.spec(cls, name=None, inputs=None, par=1, config=None)` - * `name` is either `str` specifying the unique identifier that is assigned to this bolt; or - `None` if you want to use the variable name of the returned `HeronComponentSpec` as - the unique identifier for this bolt. - * `inputs` is either `dict` mapping from `HeronComponentSpec` to `Grouping`; - or `list` of `HeronComponentSpec`, in which case the shuffle grouping is used. - * `par` is `int` specifying the number of instances of this bolt. - * `config` is `dict` specifying this bolt-specific configuration. - -The same WordCountTopology is defined in the following manner. +your spout or bolt class. You do *not* need to run a `build` method or anything like that; the `Topology` class will automatically detect which spouts and bolts are included in the topology. + +> If you use this method to define a new Python topology, you do *not* need to have a main function. + +For bolts, the [`spec`](/api/python/bolt/bolt.m.html#heronpy.bolt.bolt.Bolt.spec) method for spouts takes three optional arguments:: + +Argument | Data type | Description | Default +:--------|:----------|:------------|:------- +`name` | `str` | The unique identifier assigned to this bolt or `None` if you want to use the variable name of the return `HeronComponentSpec` as the unique identifier for this bolt | | +`par` | `int` | The number of instances of this bolt in the topology | | +`config` | `dict` | Specifies the configuration for this bolt | `None` + + +For spouts, the [`spec`](/api/python/spout/spout.m.html#heronpy.spout.spout.Spout.spec) method takes four optional arguments: + +Argument | Data type | Description | Default +:--------|:----------|:------------|:------- +`name` | `str` | The unique identifier assigned to this spout or `None` if you want to use the variable name of the return `HeronComponentSpec` as the unique identifier for this spout | `None` | +`inputs` | `dict` or `list` | Either a `dict` mapping from [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec) to [`Grouping`](/api/python/stream.m.html#heronpy.stream.Grouping) *or* a list of [`HeronComponentSpec`](/api/python/component/component_spec.m.html#heronpy.component.component_spec.HeronComponentSpec)s, in which case the [`shuffle`](/api/python/stream.m.html#heronpy.stream.Grouping.SHUFFLE) grouping is used +`par` | `int` | The number of instances of this spout in the topology | `1` | +`config` | `dict` | Specifies the configuration for this spout | `None` + +### Example + +Here's an example topology definition with one spout and one bolt: ```python -from pyheron import Topology +from heronpy import Topology from your_spout import WordSpout from your_bolt import CountBolt class WordCount(Topology): - word_spout = WordSpout.spec(par=2) - count_bolt = CountBolt.spec(par=2, inputs={word_spout: Grouping.fields('word')}) + word_spout = WordSpout.spec(par=2) + count_bolt = CountBolt.spec(par=2, inputs={word_spout: Grouping.fields('word')}) +``` + +### Launching + +If you defined your topology by subclassing the [`Topology`](/api/python/topology.m.html#heronpy.topology.Topology) class, +your main Python file should *not* contain a main method. You will, however, need to instruct Heron which class contains your topology definition. + +Let's say that you've defined a topology by subclassing `Topology` and built a PEX stored in `~/topology/dist/word_count.pex`. The class containing your topology definition is `topology.word_count.WordCount`. You can submit the topology to a cluster called `local` like this: + +```bash +$ heron submit local \ + ~/topology/dist/word_count.pex \ + topology.word_count.WordCount \ # Specifies the topology class definition + WordCountTopology ``` -## Topology-wide configuration -Topology-wide configuration can be specified by using `set_config()` method if -you are using `TopologyBuilder`, or by placing `config` containing `dict` -as the class attribute of your topology. Note that these configuration will be -overriden by component-specific configuration at runtime +### Topology-wide configuration + +If you're building a Python topology by subclassing `Topology`, you can specify configuration for the topology using the [`set_config`](/api/python/topology.m.html#heronpy.topology.TopologyBuilder.set_config) method. A topology's config is a `dict` in which the keys are a series constants from the [`api_constants`](/api/python/api_constants.m.html) module and values are configuration values for those parameters. + +Here's an example: + +```python +from heronpy import api_constants, Topology + +class MyTopology(Topology): + config = { + api_constants.TOPOLOGY_ENABLE_ACKING: True, + api_constants.TOPOLOGY_ENABLE_MESSAGE_TIMEOUTS: True + } + # Add bolts and spouts, etc. +``` ## Multiple streams + To specify that a component has multiple output streams, instead of using a list of strings for `outputs`, you can specify a list of `Stream` objects, in the following manner. @@ -127,28 +263,21 @@ class MultiStreamTopology(Topology): consume_bolt = ConsumeBolt.spec(inputs={spout: Grouping.SHUFFLE}) ``` -For further information about the API, refer to the Streamparse API documentation, -although there are some methods and functionalities that are not supported or -are invalid in Heron. +## Declaring output fields using the `spec()` method -## Declaring output fields from the spec() method -In Python topologies, the `declareOutputFields()` method doesn't exist, so -the output fields of your spout and bolt need to be declared by placing -`outputs` class attributes. This is compatible with the Streamparse API, but -dynamically declaring output fields is more complicated in this way. -So, PyHeron provides a way to dynamically declare output fields via the +In Python topologies, the output fields of your spouts and bolts +need to be declared by placing `outputs` class attributes, as there is +no `declareOutputFields()` method. `heronpy` enables you to dynamically declare output fields as a list using the `optional_outputs` argument in the `spec()` method. This is useful in a situation like below. ```python class IdentityBolt(Bolt): - # can't statically declare output fields + # Statically declaring output fields is not allowed class process(self, tup): emit([tup.values]) -``` -```python class DynamicOutputField(Topology): spout = WordSpout.spec() bolt = IdentityBolt.spec(inputs={spout: Grouping.ALL}, @@ -158,28 +287,41 @@ class DynamicOutputField(Topology): You can also declare outputs in the `add_spout()` and the `add_bolt()` method for the `TopologyBuilder` in the same way. -# Launching your python topology +## Example topologies -You need to first package your Python topology project to a PEX file. +There are a number of example topologies that you can peruse in the [`heron/examples/src/python`]({{% githubMaster %}}/heron/examples/src/python) directory of the [Heron repo]({{% githubMaster %}}): -If you defined your topology using `TopologyBuilder`, your topology -definition python file should have `if __name__ = "__main__"` method. -The following shows the submission command of an example WordCountTopology, where its -pex file is located in `~/project/word_count.pex`. +Topology | File | Description +:--------|:-----|:----------- +Word count | [`word_count_topology.py`]({{% githubMaster %}}/heron/examples/src/python/word_count_topology.py) | The [`WordSpout`]({{% githubMaster %}}/heron/examples/src/python/spout/word_spout.py) spout emits random words from a list, while the [`CountBolt`]({{% githubMaster %}}/heron/examples/src/python/bolt/count_bolt.py) bolt counts the number of words that have been emitted. +Multiple streams | [`multi_stream_topology.py`]({{% githubMaster %}}/heron/examples/src/python/multi_stream_topology.py) | The [`MultiStreamSpout`]({{% githubMaster %}}/heron/examples/src/python/spout/multi_stream_spout.py) emits multiple streams to downstream bolts. +Half acking | [`half_acking_topology.py`]({{% githubMaster %}}/heron/examples/src/python/half_acking_topology.py) | The [`HalfAckBolt`]({{% githubMaster %}}/heron/examples/src/python/bolt/half_ack_bolt.py) acks only half of all received tuples. +Custom grouping | [`custom_grouping_topology.py`]({{% githubMaster %}}/heron/examples/src/python/custom_grouping_topology.py) | The [`SampleCustomGrouping`]({{% githubMaster %}}/heron/examples/src/python/custom_grouping_topology.py#L26) class provides a custom field grouping. -```bash -$ heron submit local ~/project/word_count.pex - WordCountTopology +You can build the respective PEXes for these topologies using the following commands: + +```shell +$ bazel build heron/examples/src/python:word_count +$ bazel build heron/examples/src/python:multi_stream +$ bazel build heron/examples/src/python:half_acking +$ bazel build heron/examples/src/python:custom_grouping ``` -If you defined your topology by subclassing `Topology`, your topology -definition python file should not contain main method. -The following shows the submission command of an example WordCountTopology, where -its pex file is located in `~/project/word_count.pex`, inside which your `WordCount` -class resides under `topology.word_count_topology.WordCount`. +All built PEXes will be stored in `bazel-bin/heron/examples/src/python`. You can submit them to Heron like so: -```bash +```shell $ heron submit local \ -~/project/word_count.pex \ -topology.word_count_topology.WordCount \ -WordCountTopology + bazel-bin/heron/examples/src/python/word_count.pex - \ + WordCount +$ heron submit local \ + bazel-bin/heron/examples/src/python/multi_stream.pex \ + heron.examples.src.python.multi_stream_topology.MultiStream +$ heron submit local \ + bazel-bin/heron/examples/src/python/half_acking.pex - \ + HalfAcking +$ heron submit local \ + bazel-bin/heron/examples/src/python/custom_grouping.pex \ + heron.examples.src.python.custom_grouping_topology.CustomGrouping ``` + +By default, the `submit` command also activates topologies. To disable this behavior, set the `--deploy-deactivated` flag. diff --git a/website/content/docs/getting-started-troubleshooting.md b/website/content/docs/getting-started-troubleshooting.md index 43abc6667a6..51209978ffc 100644 --- a/website/content/docs/getting-started-troubleshooting.md +++ b/website/content/docs/getting-started-troubleshooting.md @@ -16,7 +16,7 @@ heron submit ... ExclamationTopology --verbose Even if the topology is submitted successfully, it could still fail to start some component. For example, TMaster may fail to start due to unfulfilled -dependencies. +dependencies. For example, the following message can appear: @@ -35,7 +35,7 @@ java.nio.file.NoSuchFileException: \ [2016-05-27 12:02:38 -0600] com.twitter.heron.spi.utils.TMasterUtils SEVERE: \ Failed to get physical plan for topology ExclamationTopology -... +... ERROR: Failed to activate topology 'ExclamationTopology' INFO: Elapsed time: 1.883s. @@ -44,67 +44,67 @@ INFO: Elapsed time: 1.883s. #### What to do * This file will show if any specific components have failed to start. - + ```bash ~/.herondata/topologies/{cluster}/{role}/{TopologyName}/heron-executor.stdout ``` - + For example, there may be errors when trying to spawn a Stream Manager process in the file: - + ```bash Running stmgr-1 process as ./heron-core/bin/heron-stmgr ExclamationTopology \ ExclamationTopology0a9c6550-7f3d-44fb-97ea-5c779fac6924 ExclamationTopology.defn LOCALMODE \ /Users/${USERNAME}/.herondata/repository/state/local stmgr-1 \ container_1_word_2,container_1_exclaim1_1 58106 58110 58109 ./heron-conf/heron_internals.yaml - 2016-06-09 16:20:28:  stdout: + 2016-06-09 16:20:28:  stdout: 2016-06-09 16:20:28:  stderr: error while loading shared libraries: libunwind.so.8: \ cannot open shared object file: No such file or directory ``` Then fix it correspondingly. - + * It is also possible that the host has an issue with resolving localhost. -To check, run the following command in a shell. - +To check, run the following command in a shell. + ```bash $ python -c "import socket; print socket.gethostbyname(socket.gethostname())" Traceback (most recent call last): File "", line 1, in socket.gaierror: [Errno 8] nodename nor servname provided, or not known ``` - - If the output looks like a normal IP address, such as `127.0.0.1`, + + If the output looks like a normal IP address, such as `127.0.0.1`, you don't have this issue. If the output is similar to the above, you need to modify the `/etc/hosts` - file to correctly resolve localhost, as shown below. - + file to correctly resolve localhost, as shown below. + 1. Run the following command, whose output is your computer's hostname. - + ```bash $ python -c "import socket; print socket.gethostname()" ``` - + 2. Open the `/etc/hosts` file as superuser and find a line containing - + ```bash 127.0.0.1 localhost ``` - - 3. Append your hostname after the word "localhost" on the line. + + 3. Append your hostname after the word "localhost" on the line. For example, if your hostname was `tw-heron`, then the line should look like the following: - + ```bash 127.0.0.1 localhost tw-heron ``` - + 4. Save the file. The change should usually be reflected immediately, although rebooting might be necessary depending on your platform. - -### 3. Why does the process fail during runtime? + +### 3. Why does the process fail during runtime? If a component (e.g., TMaster or Stream Manager) has failed during runtime, visit the component's logs in - + ```bash ~/.herondata/topologies/{cluster}/{role}/{TopologyName}/log-files/ ``` @@ -112,11 +112,11 @@ If a component (e.g., TMaster or Stream Manager) has failed during runtime, visi ### 4. How to force kill and clean up a topology? In general, it suffices to run: - + ```bash heron kill ... ``` -If returned error, the topology can still be killed by running - `kill pid` to kill all associated running process and `rm -rf ~/.herondata/` +If returned error, the topology can still be killed by running + `kill pid` to kill all associated running process and `rm -rf ~/.herondata/` to clean up the state. diff --git a/website/content/docs/getting-started.md b/website/content/docs/getting-started.md index d39b7d2b66c..f1e37a8f27c 100644 --- a/website/content/docs/getting-started.md +++ b/website/content/docs/getting-started.md @@ -5,69 +5,70 @@ aliases: - /docs/install.html --- +> The current version of Heron is **{{% heronVersion %}}**. + The easiest way to get started learning Heron is to install and run pre-compiled Heron binaries, which are currently available for: * Mac OS X * Ubuntu >= 14.04 +* CentOS -For other platforms, you need to build from source. Please refer to [Heron Developers] +For other platforms, you need to build from source. Please refer to the [guide to compiling Heron] (../developers/compiling/compiling). ## Step 1 --- Download Heron binaries using installation scripts Go to the [releases page](https://github.com/twitter/heron/releases) for Heron -and download two installation scripts for your platform. The names of the -scripts have this form: - -* `heron-client-install-{{% heronVersion %}}-PLATFORM.sh` -* `heron-tools-install-{{% heronVersion %}}-PLATFORM.sh` - -The installation scripts for Mac OS X (`darwin`), for example, would be named +and see a full listing of Heron releases for each available platform. The installation scripts for Mac OS X (`darwin`), for example, would be named `heron-client-install-{{% heronVersion %}}-darwin.sh` and `heron-tools-install-{{% heronVersion %}}-darwin.sh`. -Once you've downloaded the scripts, run the Heron client script with the -`--user` flag set: +Download both the `client` and `tools` installation scripts for your platform either from the releases page or using [wget](https://www.gnu.org/software/wget/). + +Here's an example for MacOS (`darwin`): ```bash -$ chmod +x heron-client-install-VERSION-PLATFORM.sh -$ ./heron-client-install-VERSION-PLATFORM.sh --user -Heron client installer ----------------------- +$ wget https://github.com/twitter/heron/releases/download/{{% heronVersion %}}/heron-client-install-{{% heronVersion %}}-darwin.sh +$ wget https://github.com/twitter/heron/releases/download/{{% heronVersion %}}/heron-tools-install-{{% heronVersion %}}-darwin.sh +``` -Uncompressing...... -Heron is now installed! +Once you've downloaded the scripts, make the scripts executable using [chmod](https://en.wikipedia.org/wiki/Chmod): -Make sure you have "${HOME}/bin" in your path. -... +```bash +$ chmod +x heron-*.sh ``` -To add `~/bin` to your path, run: +> Both installation scripts will install executables in the `~/bin` folder. You should add that folder to your `PATH` using `export PATH=~/bin:$PATH`. + +Now run the client installation script with the `--user` flag set: ```bash -$ export PATH=$PATH:~/bin +$ ./heron-client-install-{{% heronVersion %}}--PLATFORM.sh --user +Heron client installer +---------------------- + +Uncompressing...... +Heron is now installed! ``` -Now run the script for Heron tools (setting the `--user` flag): +Now run the script for Heron tools (again setting the `--user` flag): ```bash -$ chmod +x heron-tools-install-VERSION-PLATFORM.sh -$ ./heron-tools-install-VERSION-PLATFORM.sh --user +$ ./heron-tools-install-{{% heronVersion %}}-PLATFORM.sh --user Heron tools installer --------------------- Uncompressing...... Heron Tools is now installed! -... ``` -To check Heron is successfully installed, run: +To check that Heron is successfully installed, run `heron version`: ```bash $ heron version heron.build.version : {{% heronVersion %}} -heron.build.time : Sat Aug 6 12:35:47 PDT 2016 +heron.build.time : Sat Aug 6 12:35:47 PDT {{% currentYear %}} heron.build.timestamp : 1470512147000 heron.build.host : ${HOSTNAME} heron.build.user : ${USERNAME} @@ -77,11 +78,11 @@ heron.build.git.status : Clean ## Step 2 --- Launch an example topology -> **Note for MacOS users** +> #### Note for MacOS users > If you want to run topologies locally on MacOS, you may need to add your > hostname to your `/etc/hosts` file under `localhost`. Here's an example line: -> `127.0.0.1 localhost `. You can fetch your hostname by simply +> `127.0.0.1 localhost My-Mac-Laptop.local`. You can fetch your hostname by simply > running `hostname` in your shell. If you set the `--user` flag when running the installation scripts, some example @@ -96,21 +97,23 @@ $ heron submit local \ com.twitter.heron.examples.ExclamationTopology \ ExclamationTopology \ --deploy-deactivated +``` + +The output should look something like this: +```bash INFO: Launching topology 'ExclamationTopology' + ... -[2016-06-07 16:44:07 -0700] com.twitter.heron.scheduler.local.LocalLauncher INFO: \ -For checking the status and logs of the topology, use the working directory \ -$HOME/.herondata/topologies/local/${ROLE}/ExclamationTopology # working directory INFO: Topology 'ExclamationTopology' launched successfully INFO: Elapsed time: 3.409s. ``` This will *submit* the topology to your locally running Heron cluster but it -won't *activate* the topology. That will be explored in step 5 below. +won't *activate* the topology because the `--deploy-deactivated` flag was set. Activating the topology will be explored in step 5 below. -Note the output shows if the topology has been launched successfully and the working directory. +Note that the output shows whether the topology has been launched successfully as well the working directory for the topology. To check what's under the working directory, run: ```bash @@ -269,7 +272,7 @@ In case of any issues, please refer to [Quick Start Troubleshooting](../getting- ### Next Steps -* [Migrate Storm topologies](../migrate-storm-to-heron) with simple `pom.xml` +* [Migrate Storm topologies](../migrate-storm-to-heron) to Heron with simple `pom.xml` changes * [Deploy topologies](../operators/deployment) in clustered, scheduler-driven environments (such as on [Aurora](../operators/deployment/schedulers/aurora) diff --git a/website/content/docs/operators/deployment/configuration.md b/website/content/docs/operators/deployment/configuration.md index c125877ca87..163a4809eb3 100644 --- a/website/content/docs/operators/deployment/configuration.md +++ b/website/content/docs/operators/deployment/configuration.md @@ -1,6 +1,8 @@ -# Configuring a Cluster +--- +title: Configuring a Cluster +--- -To setup a Heron cluster, you need to configure a few files. Each file configures +To setup a Heron cluster, you need to configure a few files. Each file configures a component of the Heron streaming framework. * **scheduler.yaml** --- This file specifies the required classes for launcher, @@ -12,8 +14,8 @@ The state manager maintains the running state of the topology as logical plan, p scheduler state, and execution state. * **uploader.yaml** --- This file specifies the classes and configuration for the uploader, -which uploads the topology jars to storage. Once the containers are scheduled, they will -download these jars from the storage for running. +which uploads the topology jars to storage. Once the containers are scheduled, they will +download these jars from the storage for running. * **heron_internals.yaml** --- This file contains parameters that control how heron behaves. Tuning these parameters requires advanced knowledge of heron architecture and its @@ -45,7 +47,7 @@ heron.class.scheduler: com.twitter.heron.scheduler.aurora.AuroraScheduler # launcher class for submitting and launching the topology heron.class.launcher: com.twitter.heron.scheduler.aurora.AuroraLauncher -# location of java +# location of java heron.directory.sandbox.java.home: /usr/lib/jvm/java-1.8.0-openjdk-amd64/ # Invoke the IScheduler as a library directly diff --git a/website/data/toc.yaml b/website/data/toc.yaml index 460e60614f1..3ca26100d85 100644 --- a/website/data/toc.yaml +++ b/website/data/toc.yaml @@ -7,11 +7,37 @@ sections: url: /docs/migrate-storm-to-heron - name: Troubleshooting Guide url: /docs/getting-started-troubleshooting + - name: Topology Writers + sublinks: + - name: Java Topologies + url: /docs/developers/java/topologies + - name: Python Topologies + url: /docs/developers/python/topologies + - name: Heron Data Model + url: /docs/developers/data-model + - name: Tuple Serialization + url: /docs/developers/serialization + - name: Heron UI Guide + url: /docs/developers/ui-guide + - name: Tuning Guide + url: /docs/developers/tuning + - name: Packing Algorithms + url: /docs/developers/packing/ffdpacking + - name: Simulator Mode + url: /docs/developers/simulator-mode + - name: Troubleshooting Guide + url: /docs/developers/troubleshooting + - name: Client API docs + sublinks: + - name: Java + url: /api/java + - name: Python + url: /api/python - name: Heron Concepts sublinks: - name: Heron Design Goals url: /docs/concepts/design-goals - - name: Heron Topology + - name: Heron Topologies url: /docs/concepts/topologies - name: Heron Architecture url: /docs/concepts/architecture @@ -79,28 +105,6 @@ sections: url: /docs/operators/heron-tracker - name: Heron UI Runbook url: /docs/operators/heron-ui - - name: Topology Writers - sublinks: - - name: Writing Java Topologies - url: /docs/developers/java/topologies - - name: Writing Python Topologies - url: /docs/developers/python/topologies - - name: Heron Data Model - url: /docs/developers/data-model - - name: Tuple Serialization - url: /docs/developers/serialization - - name: Heron UI Guide - url: /docs/developers/ui-guide - - name: Tuning Guide - url: /docs/developers/tuning - - name: Packing Algorithms - url: /docs/developers/packing/ffdpacking - - name: Simulator Mode - url: /docs/developers/simulator-mode - - name: Troubleshooting Guide - url: /docs/developers/troubleshooting - - name: Javadoc - url: /api - name: Heron Developers sublinks: - name: Compiling diff --git a/website/gulpfile.js b/website/gulpfile.js index 860376a7be0..56bb9f618ec 100755 --- a/website/gulpfile.js +++ b/website/gulpfile.js @@ -16,8 +16,7 @@ var SRC = { css: srcDir + '/css/**/*.css', sass: srcDir + '/sass/**/*.scss', fonts: srcDir + '/fonts/**/*', - images: srcDir + '/img/**/*', - javadoc: './api/**/*' + images: srcDir + '/img/**/*' } // Define asset distribution destination @@ -26,80 +25,82 @@ var DIST = { js: distDir + '/js', fonts: distDir + '/fonts', images: distDir + '/img', - javadoc: './public', all: distDir } // JavaScript assets -gulp.task('js', function() { - return gulp.src(SRC.js) +gulp.task('js', function(done) { + gulp.src(SRC.js) .pipe($.uglify().on('error', function(err) { console.log(err); })) .pipe($.concat('app.min.js')) .pipe(gulp.dest(DIST.js)); + done(); }); gulp.task('js:watch', function() { - gulp.watch(SRC.js, ['js']); + gulp.watch(SRC.js, gulp.series('js')); }); // CSS assets -gulp.task('css', function() { - return gulp.src(SRC.css) +gulp.task('css', function(done) { + gulp.src(SRC.css) .pipe(gulp.dest(DIST.css)); + done(); }); gulp.task('css:watch', function() { - return gulp.watch(SRC.css, ['css']); + return gulp.watch(SRC.css, gulp.watch('css')); }); // Sass assets -gulp.task('sass', function() { +gulp.task('sass', function(done) { gulp.src(SRC.sass) .pipe($.sass().on('error', function(err) { console.log(err); })) .pipe($.cleanCss()) .pipe($.concat('style.min.css')) .pipe(gulp.dest(DIST.css)); + done(); }); gulp.task('sass:watch', function() { - gulp.watch(SRC.sass, ['sass']); -}); - -// Javadoc -gulp.task('javadoc', function() { - gulp.src(SRC.javadoc) - .pipe(gulp.dest(DIST.javadoc)); + gulp.watch(SRC.sass, gulp.series('sass')); }); // Fonts -gulp.task('fonts', function() { +gulp.task('fonts', function(done) { gulp.src(SRC.fonts) .pipe(gulp.dest(DIST.fonts)); + done(); }); // Images -gulp.task('images', function() { +gulp.task('images', function(done) { gulp.src(SRC.images) .pipe(gulp.dest(DIST.images)); + done(); }); gulp.task('images:watch', function() { - gulp.watch(SRC.images, ['images']); + gulp.watch(SRC.images, gulp.series('images')); }); // One-time build; doesn't watch for changes -gulp.task('build', ['js', 'sass', 'css', 'javadoc', 'fonts', 'images']); +gulp.task('build', gulp.series('js', 'sass', 'css', 'fonts', 'images')); // Delete static folder -gulp.task('clean', function() { +gulp.task('clean', function(done) { del(DIST.all); + done(); }); // Run in development (i.e. watch) mode -gulp.task('dev', ['build', 'js:watch', 'sass:watch', 'css:watch', 'images:watch']); +gulp.task('dev', gulp.series('build', gulp.parallel('js:watch', 'sass:watch', 'css:watch', 'images:watch'))); // Help => list tasks -gulp.task('help', $.taskListing.withFilters(null, 'help')); +gulp.task('help', function(done) { + $.taskListing.withFilters(null, 'help') + done(); +}); // Default -gulp.task('default', ['help']); +gulp.task('default', gulp.series('help')); diff --git a/website/layouts/docs/single.ace b/website/layouts/docs/single.ace index da3419d4a48..e11f0bc4811 100644 --- a/website/layouts/docs/single.ace +++ b/website/layouts/docs/single.ace @@ -5,14 +5,19 @@ html lang={{.Site.LanguageCode}} {{partial "docs/page-meta.html" .}} title {{.Site.Title}} Documentation - {{.Title}} {{partial "css.includes.html" .}} - body - {{partial "navbar.html" .}} - .hn-main - .container - .row - aside.hn-sidebar.hidden-xs.col-sm-4.col-md-3.col-lg-2.collapse - {{partial "sidenav.html" .}} - section.hn-docs-main.col-sm-8.col-md-9.col-lg-10.col-sm-offset-4.col-md-offset-3.col-lg-offset-2 - {{partial "docs/main.html" .}} + = javascript + var shiftWindow = function() { scrollBy(0, -100) }; + window.addEventListener("hashchange", shiftWindow); + function load() { if (window.location.hash) shiftWindow(); } + body.body + main.main + {{partial "navbar.html" .}} + .hn-main + .container + .row + aside.hn-sidebar.hidden-xs.col-sm-4.col-md-3.col-lg-2.collapse + {{partial "sidenav.html" .}} + section.hn-docs-main.col-sm-8.col-md-9.col-lg-10.col-sm-offset-4.col-md-offset-3.col-lg-offset-2 + {{partial "docs/main.html" .}} script src=https://code.jquery.com/jquery-2.2.1.min.js script src=/js/app.min.js diff --git a/website/layouts/index.ace b/website/layouts/index.ace index 1cb3a2038ce..041e22446c8 100644 --- a/website/layouts/index.ace +++ b/website/layouts/index.ace @@ -18,10 +18,10 @@ html lang={{.Site.LanguageCode}} .hn-button-row.row a.btn.btn-default href=docs/getting-started Get Started a.btn.btn-default href=docs/migrate-storm-to-heron Migrate From Storm - .row + section.main-page-lower-section.container .col-xs-12.col-sm-9.col-md-9.col-lg-9 .landing-feature-2.text-left - h2 Why Heron? + h2.text-center Why Heron? br .row .col-sm-6 @@ -62,8 +62,8 @@ html lang={{.Site.LanguageCode}} h4 Run Topologies using pre-compiled Heron binaries a.btn.btn-default href=docs/getting-started Get Started .col-sm-6 - h4 Upgrade Existing Storm Topologies - a.btn.btn-default href=docs/migrate-storm-to-heron Migrate From Storm + h4 Migrate Existing Storm Topologies + a.btn.btn-default href=docs/migrate-storm-to-heron Migrate Storm topologies .hn-twitter-feed.hidden-xs.col-sm-3.col-md-3.col-lg-3 {{partial "index/feed.html" .}} diff --git a/website/layouts/partials/footer.ace b/website/layouts/partials/footer.ace index 1736cb303ef..dd470fac60e 100644 --- a/website/layouts/partials/footer.ace +++ b/website/layouts/partials/footer.ace @@ -4,10 +4,10 @@ footer.footer.hn-footer role=contentinfo .col-lg-12 ul.hn-footer-links li - a href=docs/contributors/governance/ Governance + a href=/docs/contributors/governance/ Governance li - a href=docs/contributors/roadmap/ Roadmap + a href=/docs/contributors/roadmap/ Roadmap li - a href=docs/contributors/support/ Support + a href=/docs/contributors/support/ Support - p {{.Now.Year}} Twitter + p © {{ now.Year }} Twitter diff --git a/website/layouts/partials/navbar.ace b/website/layouts/partials/navbar.ace index af67bea9133..a7f200d22ba 100644 --- a/website/layouts/partials/navbar.ace +++ b/website/layouts/partials/navbar.ace @@ -13,8 +13,6 @@ nav.hn-top-navbar.navbar.navbar-inverse.navbar-fixed-top role=navigation ul.nav.navbar-nav.navbar-right li a href=/docs/getting-started Docs - li - a href=/api API li a href=/docs/resources Resources li diff --git a/website/layouts/shortcodes/bazelVersion.html b/website/layouts/shortcodes/bazelVersion.html index f455b60fbc0..329a7b61959 100644 --- a/website/layouts/shortcodes/bazelVersion.html +++ b/website/layouts/shortcodes/bazelVersion.html @@ -1 +1 @@ -{{.Page.Site.Params.versions.bazel}} +{{- .Page.Site.Params.versions.bazel -}} diff --git a/website/layouts/shortcodes/currentYear.html b/website/layouts/shortcodes/currentYear.html new file mode 100644 index 00000000000..0f5fe378df5 --- /dev/null +++ b/website/layouts/shortcodes/currentYear.html @@ -0,0 +1 @@ +{{- now.Year -}} diff --git a/website/layouts/shortcodes/githubMaster.html b/website/layouts/shortcodes/githubMaster.html index baca760d755..67f06fd50f4 100644 --- a/website/layouts/shortcodes/githubMaster.html +++ b/website/layouts/shortcodes/githubMaster.html @@ -1 +1 @@ -{{.Page.Site.Params.github.master}} +{{- .Page.Site.Params.github.master -}} diff --git a/website/layouts/shortcodes/heronVersion.html b/website/layouts/shortcodes/heronVersion.html index 058b5f521c0..7d71c5bdfed 100644 --- a/website/layouts/shortcodes/heronVersion.html +++ b/website/layouts/shortcodes/heronVersion.html @@ -1 +1 @@ -{{.Page.Site.Params.versions.heron}} +{{- .Page.Site.Params.versions.heron -}} diff --git a/website/layouts/shortcodes/heronpyVersion.html b/website/layouts/shortcodes/heronpyVersion.html new file mode 100644 index 00000000000..ef68dc5c703 --- /dev/null +++ b/website/layouts/shortcodes/heronpyVersion.html @@ -0,0 +1 @@ +{{- .Page.Site.Params.versions.heronpy -}} diff --git a/website/package.json b/website/package.json index 5fc73d7d351..d19022ea929 100755 --- a/website/package.json +++ b/website/package.json @@ -2,7 +2,7 @@ "private": true, "dependencies": { "del": "^2.2.2", - "gulp": "^3.9.1", + "gulp": "github:gulpjs/gulp#4.0", "gulp-clean-css": "^3.0.0", "gulp-concat": "^2.6.1", "gulp-load-plugins": "^1.5.0", @@ -11,8 +11,8 @@ "gulp-uglify": "^2.0.1" }, "scripts": { - "build": "gulp build", - "clean": "gulp clean", - "develop": "gulp dev" + "build": "node_modules/.bin/gulp build", + "clean": "node_modules/.bin/gulp clean", + "develop": "node_modules/.bin/gulp dev" } } diff --git a/website/scripts/javadocs.sh b/website/scripts/javadocs.sh index d27c4208588..cb4ceb7e59e 100755 --- a/website/scripts/javadocs.sh +++ b/website/scripts/javadocs.sh @@ -6,7 +6,7 @@ FLAGS="-quiet" HERON_ROOT_DIR=$(git rev-parse --show-toplevel) # for display on GitHub website -JAVADOC_OUTPUT_DIR=$HERON_ROOT_DIR/website/public/api +JAVADOC_OUTPUT_DIR=$HERON_ROOT_DIR/website/public/api/java # for display on local Hugo server JAVADOC_OUTPUT_LOCAL_DIR=$HERON_ROOT_DIR/website/static/api GEN_PROTO_DIR=$HERON_ROOT_DIR/bazel-bin/heron/proto/_javac @@ -16,7 +16,7 @@ OVERVIEW_HTML_FILE=$HERON_ROOT_DIR/website/scripts/javadocs-overview.html # Check if this script is run with Travis flag if [ $# -eq 1 ] && [ $1 == "--travis" ]; then - BAZEL_CMD="bazel --bazelrc=$HERON_ROOT_DIR/tools/travis-ci/bazel.rc build" + BAZEL_CMD="bazel --bazelrc=$HERON_ROOT_DIR/tools/travis/bazel.rc build" else BAZEL_CMD="bazel build" fi @@ -24,7 +24,7 @@ fi (cd $HERON_ROOT_DIR && $BAZEL_CMD \ `bazel query 'kind("java_library", "heron/...")'`\ `bazel query 'kind("java_test", "heron/...")'` \ - `bazel query 'kind("java_library", "integration-test/...")'`) + `bazel query 'kind("java_library", "integration_test/...")'`) HERON_SRC_FILES=`find $HERON_ROOT_DIR -path "*/com/twitter/*" -name "*.java"` BACKTYPE_SRC_FILES=`find $HERON_ROOT_DIR -path "*/backtype/storm/*" -name "*.java"` diff --git a/website/scripts/python-doc-gen.sh b/website/scripts/python-doc-gen.sh new file mode 100755 index 00000000000..3391895017e --- /dev/null +++ b/website/scripts/python-doc-gen.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +HERONPY_VERSION=0.0.1 +HERON_ROOT_DIR=$(git rev-parse --show-toplevel) +INPUT=heronpy +TMP_DIR=$(mktemp -d) + +pip install heronpy==${HERONPY_VERSION} + +mkdir -p static/api && rm -rf static/api/python + +pdoc $INPUT \ + --html \ + --html-dir $TMP_DIR + +mv $TMP_DIR/heronpy static/api/python diff --git a/website/scripts/setup.sh b/website/scripts/setup.sh index 601f987c094..67860aaa9ad 100755 --- a/website/scripts/setup.sh +++ b/website/scripts/setup.sh @@ -8,7 +8,7 @@ if [ $PLATFORM = darwin ]; then brew update && brew install nvm && source $(brew --prefix nvm)/nvm.sh nvm install node curl -L https://www.npmjs.com/install.sh | sh - brew list hugo || brew install hugo + go get -v github.com/gohugoio/hugo which wget || brew install wget elif [ $PLATFORM = ubuntu ]; then sudo apt-get install golang git mercurial -y @@ -26,4 +26,4 @@ fi npm install sudo -H pip uninstall -y pygments -sudo -H pip install pygments==2.1.3 +sudo -H pip install pygments==2.1.3 pdoc==0.3.2 From e9dd0ccb0596a7ca81cdc54251c84f9680ba09f7 Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 20 Jul 2017 11:37:24 -0700 Subject: [PATCH 4/5] Do not change website stuff. --- website/assets/css/pygments.css | 65 ----------------------- website/assets/sass/sloth/_sloth.scss | 0 website/scripts/linkchecker.sh | 76 --------------------------- 3 files changed, 141 deletions(-) delete mode 100644 website/assets/css/pygments.css delete mode 100644 website/assets/sass/sloth/_sloth.scss delete mode 100755 website/scripts/linkchecker.sh diff --git a/website/assets/css/pygments.css b/website/assets/css/pygments.css deleted file mode 100644 index ff0b5d77a2a..00000000000 --- a/website/assets/css/pygments.css +++ /dev/null @@ -1,65 +0,0 @@ -.hll { background-color: #49483e } -.c { color: #75715e } /* Comment */ -.err { color: #960050; background-color: #1e0010 } /* Error */ -.k { color: #66d9ef } /* Keyword */ -.l { color: #ae81ff } /* Literal */ -.n { color: #f8f8f2 } /* Name */ -.o { color: #f92672 } /* Operator */ -.p { color: #f8f8f2 } /* Punctuation */ -.ch { color: #75715e } /* Comment.Hashbang */ -.cm { color: #75715e } /* Comment.Multiline */ -.cp { color: #75715e } /* Comment.Preproc */ -.cpf { color: #75715e } /* Comment.PreprocFile */ -.c1 { color: #75715e } /* Comment.Single */ -.cs { color: #75715e } /* Comment.Special */ -.gd { color: #f92672 } /* Generic.Deleted */ -.ge { font-style: italic } /* Generic.Emph */ -.gi { color: #a6e22e } /* Generic.Inserted */ -.gs { font-weight: bold } /* Generic.Strong */ -.gu { color: #75715e } /* Generic.Subheading */ -.kc { color: #66d9ef } /* Keyword.Constant */ -.kd { color: #66d9ef } /* Keyword.Declaration */ -.kn { color: #f92672 } /* Keyword.Namespace */ -.kp { color: #66d9ef } /* Keyword.Pseudo */ -.kr { color: #66d9ef } /* Keyword.Reserved */ -.kt { color: #66d9ef } /* Keyword.Type */ -.ld { color: #e6db74 } /* Literal.Date */ -.m { color: #ae81ff } /* Literal.Number */ -.s { color: #e6db74 } /* Literal.String */ -.na { color: #a6e22e } /* Name.Attribute */ -.nb { color: #f8f8f2 } /* Name.Builtin */ -.nc { color: #a6e22e } /* Name.Class */ -.no { color: #66d9ef } /* Name.Constant */ -.nd { color: #a6e22e } /* Name.Decorator */ -.ni { color: #f8f8f2 } /* Name.Entity */ -.ne { color: #a6e22e } /* Name.Exception */ -.nf { color: #a6e22e } /* Name.Function */ -.nl { color: #f8f8f2 } /* Name.Label */ -.nn { color: #f8f8f2 } /* Name.Namespace */ -.nx { color: #a6e22e } /* Name.Other */ -.py { color: #f8f8f2 } /* Name.Property */ -.nt { color: #f92672 } /* Name.Tag */ -.nv { color: #f8f8f2 } /* Name.Variable */ -.ow { color: #f92672 } /* Operator.Word */ -.w { color: #f8f8f2 } /* Text.Whitespace */ -.mb { color: #ae81ff } /* Literal.Number.Bin */ -.mf { color: #ae81ff } /* Literal.Number.Float */ -.mh { color: #ae81ff } /* Literal.Number.Hex */ -.mi { color: #ae81ff } /* Literal.Number.Integer */ -.mo { color: #ae81ff } /* Literal.Number.Oct */ -.sb { color: #e6db74 } /* Literal.String.Backtick */ -.sc { color: #e6db74 } /* Literal.String.Char */ -.sd { color: #e6db74 } /* Literal.String.Doc */ -.s2 { color: #e6db74 } /* Literal.String.Double */ -.se { color: #ae81ff } /* Literal.String.Escape */ -.sh { color: #e6db74 } /* Literal.String.Heredoc */ -.si { color: #e6db74 } /* Literal.String.Interpol */ -.sx { color: #e6db74 } /* Literal.String.Other */ -.sr { color: #e6db74 } /* Literal.String.Regex */ -.s1 { color: #e6db74 } /* Literal.String.Single */ -.ss { color: #e6db74 } /* Literal.String.Symbol */ -.bp { color: #f8f8f2 } /* Name.Builtin.Pseudo */ -.vc { color: #f8f8f2 } /* Name.Variable.Class */ -.vg { color: #f8f8f2 } /* Name.Variable.Global */ -.vi { color: #f8f8f2 } /* Name.Variable.Instance */ -.il { color: #ae81ff } /* Literal.Number.Integer.Long */ diff --git a/website/assets/sass/sloth/_sloth.scss b/website/assets/sass/sloth/_sloth.scss deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/website/scripts/linkchecker.sh b/website/scripts/linkchecker.sh deleted file mode 100755 index 855b2d244ee..00000000000 --- a/website/scripts/linkchecker.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/bin/bash -echo "Running linkchecker..." - -WGET_LOG=wget.log -PORT=15532 - -# kill all running Hugo servers and start serving website in the background -killall hugo 2>/dev/null || true - -# start HTTP server -hugo serve --port=$PORT --ignoreCache 1>/dev/null 2>&1 & - -# sleep 10 seconds to make sure Hugo gets into background -sleep 10 - -# use wget as linkchecker -# Note: -# wget return code is 4 (network err) even though there is no broken link. -# This means we should examine wget's log. -wget --spider -r -l 10 -e robots=off -o $WGET_LOG -p "http://localhost:${PORT}/heron" - -# kill Hugo running in background -killall hugo 2>/dev/null || true - -# remove intermediate directory generated by Hugo -rm -rf "localhost\:${WGET_LOG}" - -# check if wget found no broken link -NO_BROKEN_MSG="Found no broken links" -grep -n "${NO_BROKEN_MSG}" wget.log 1>/dev/null 2>&1 - -# get grep's return code -GREP_STATUS=$? -EXIT_CODE=0 - -if [[ $GREP_STATUS != 0 ]]; then - # examine wget.log - BROKEN_MSG="^Found.*broken link" - BROKEN_MSG_LINE=$(grep -n "${BROKEN_MSG}" ${WGET_LOG}) - LN=$(echo "${BROKEN_MSG_LINE}" | cut -f1 -d:) - LINES=$(tail "+${LN}" "${WGET_LOG}") - BAD_LINKS="" - COUNT=0 - # only keep broken links with prefix ``localhost:15532`` - for LINE in $LINES; do - if [[ $LINE == *"${PORT}"* ]] - then - COUNT=$((COUNT + 1)) - BAD_LINKS="$BAD_LINKS $LINE" - fi - done - if [[ $COUNT == 0 ]] - then - echo $NO_BROKEN_MSG - else - LINKS="" - # grammar police - if [[ $COUNT == 1 ]] - then - LINKS="link" - else - LINKS="links" - fi - echo "Found $COUNT broken $LINKS:" - for BAD_LINK in $BAD_LINKS; do - echo " $BAD_LINK" - done - EXIT_CODE=1 - fi -else - echo $NO_BROKEN_MSG - rm -f $WGET_LOG -fi - -killall hugo 2>/dev/null || true -exit $EXIT_CODE From de8573043da4d8994f4b9e24d73d14e48f48e23a Mon Sep 17 00:00:00 2001 From: Runhang Li Date: Thu, 20 Jul 2017 12:35:35 -0700 Subject: [PATCH 5/5] Fix style. --- .../python/handlers/stmgrheapprofhandler.py | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/heron/shell/src/python/handlers/stmgrheapprofhandler.py b/heron/shell/src/python/handlers/stmgrheapprofhandler.py index c6464bf9649..581ad4cc747 100644 --- a/heron/shell/src/python/handlers/stmgrheapprofhandler.py +++ b/heron/shell/src/python/handlers/stmgrheapprofhandler.py @@ -14,32 +14,29 @@ ''' stmgrheapprofhandler.py ''' import glob -import json import os import signal import tornado.web -from heron.shell.src.python import utils - class StmgrHeapProfHandler(tornado.web.RequestHandler): - """ - Responsible for getting the process ID for an instance. - """ + """ + Responsible for getting the process ID for an instance. + """ - # pylint: disable=attribute-defined-outside-init - @tornado.web.asynchronous - def get(self): - ''' get method ''' - self.content_type = 'application/json' - stmgr_pid_files = glob.glob('stmgr*.pid') - try: - pid_file = stmgr_pid_files[0] - with open(pid_file, 'r') as f: - pid = f.read() - os.kill(int(pid), signal.SIGUSR1) - self.write('Performing heap profiling on stream manager...') - self.finish() - except: - self.write("Not stream manager found") - self.set_status(404) - self.finish() + # pylint: disable=attribute-defined-outside-init + @tornado.web.asynchronous + def get(self): + ''' get method ''' + self.content_type = 'application/json' + stmgr_pid_files = glob.glob('stmgr*.pid') + try: + pid_file = stmgr_pid_files[0] + with open(pid_file, 'r') as f: + pid = f.read() + os.kill(int(pid), signal.SIGUSR1) + self.write('Performing heap profiling on stream manager...') + self.finish() + except: + self.write("Not stream manager found") + self.set_status(404) + self.finish()