-
Notifications
You must be signed in to change notification settings - Fork 766
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2 from carsonwang/revert-1-mergeStreamingWithMaster
Revert "Merge streaming with master"
- Loading branch information
Showing
177 changed files
with
43,376 additions
and
4,746 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,152 +1,146 @@ | ||
######################################################### | ||
# General Stream Config | ||
######################################################### | ||
# Two data sets(text and numeric) are available, app argument indicates to use which | ||
#app=micro-sketch #use text dataset, avg record size: 60 bytes | ||
#app=micro-statistics #use numeric dataset, avg record size: 200 bytes | ||
hibench.streamingbench.app micro-sketch | ||
|
||
# Note to ensure benchName to be consistent with datagen type. Numeric data for statistics and text data for others | ||
# (available benchname: identity, repartition) TDB: sample project grep wordcount distinctcount statistics | ||
hibench.streambench.testCase identity | ||
# Text dataset can be scaled in terms of record size | ||
hibench.streamingbench.prepare.textdataset_recordsize_factor | ||
|
||
# zookeeper address for Kakfa serverce, (default: HOSTNAME:HOSTPORT) | ||
hibench.streambench.zkHost HOSTNAME:HOSTPORT | ||
# Two modes of generator: push,periodic | ||
# Push means to send data to kafka cluster as fast as it could | ||
# Periodic means sending data according to sending rate specification | ||
#hibench.streamingbench.prepare.mode push | ||
hibench.streamingbench.prepare.mode periodic | ||
|
||
# Probability used in sample test case | ||
hibench.streambench.sampleProbability 0.1 | ||
# Under push mode: number of total records that will be generated | ||
hibench.streamingbench.prepare.push.records 900000000 | ||
|
||
# Indicate whether in debug mode for correctness verfication (default: false) | ||
hibench.streambench.debugMode false | ||
# Following three params are under periodic mode | ||
# Bytes to push per interval | ||
hibench.streamingbench.prepare.periodic.recordPerInterval 600000 | ||
|
||
# JARS | ||
hibench.streambench.datagen.jar ${hibench.home}/src/streambench/datagen/target/streaming-bench-datagen-5.0-SNAPSHOT-jar-with-dependencies.jar | ||
hibench.streambench.sparkbench.jar ${hibench.home}/src/streambench/sparkbench/target/streaming-bench-spark-5.0-SNAPSHOT-${hibench.spark.version}-jar-with-dependencies.jar | ||
hibench.streambench.stormbench.jar ${hibench.home}/src/streambench/stormbench/target/streaming-bench-storm-5.0-SNAPSHOT.jar | ||
hibench.streambench.gearpump.jar ${hibench.home}/src/streambench/gearpumpbench/target/streaming-bench-gearpump-5.0-SNAPSHOT-jar-with-dependencies.jar | ||
hibench.streambench.flinkbench.jar ${hibench.home}/src/streambench/flinkbench/target/streaming-bench-flink-5.0-SNAPSHOT-jar-with-dependencies.jar | ||
# Interval time (in ms) | ||
hibench.streamingbench.prepare.periodic.intervalSpan 5000 | ||
|
||
######################################################### | ||
# Kafka Config | ||
######################################################### | ||
# Total round count of data send | ||
hibench.streamingbench.prepare.periodic.totalRound 100 | ||
|
||
# Kafka home | ||
hibench.streambench.kafka.home /PATH/TO/KAFKA/HOME | ||
|
||
# the topic that spark will receive input data (default: ${hibench.streambench.testCase}) | ||
hibench.streambench.kafka.topic ${hibench.streambench.testCase} | ||
|
||
# number of partitions of generated topic (default 20) | ||
hibench.streambench.kafka.topicPartitions 20 | ||
# zookeeper host:port of kafka cluster | ||
|
||
# consumer group of the consumer for kafka (default: HiBench) | ||
hibench.streambench.kafka.consumerGroup HiBench | ||
#example: hostname:9092 | ||
hibench.streamingbench.zookeeper.host HOSTNAME:HOSTPORT | ||
|
||
# Kafka broker lists, written in mode "host:port,host:port,..." (default: HOSTNAME:HOSTPORT) | ||
hibench.streambench.kafka.brokerList HOSTNAME:HOSTPORT | ||
#Parallel config | ||
# number of nodes that will receive kafka input | ||
hibench.streamingbench.receiver_nodes 4 | ||
|
||
# Set the starting offset of kafkaConsumer (default: largest) | ||
hibench.streambench.kafka.offsetReset largest | ||
######################################################### | ||
# Data Generator Config | ||
######################################################### | ||
############### | ||
#Benchmark args | ||
#Note to ensure benchName to be consistent with datagen type. Numeric data for statistics and text data for others | ||
# available benchname: identity sample project grep wordcount distinctcount statistics | ||
|
||
# Interval span in millisecond (default: 50) | ||
hibench.streambench.datagen.intervalSpan 50 | ||
hibench.streamingbench.benchname identity | ||
|
||
# Number of records to generate per interval span (default: 5) | ||
hibench.streambench.datagen.recordsPerInterval 5 | ||
#common args | ||
# the topic that spark will receive input data | ||
hibench.streamingbench.topic_name ${hibench.streamingbench.benchname} | ||
|
||
# Number of total records that will be generated (default: -1 means infinity) | ||
hibench.streambench.datagen.totalRecords -1 | ||
# Spark stream batch interval (in seconds) | ||
hibench.streamingbench.batch_interval 10 | ||
|
||
# Total round count of data send (default: -1 means infinity) | ||
hibench.streambench.datagen.totalRounds -1 | ||
# consumer group of the spark consumer for kafka | ||
hibench.streamingbench.consumer_group HiBench | ||
|
||
# default path to store seed files (default: ${hibench.hdfs.data.dir}/Streaming) | ||
hibench.streambench.datagen.dir ${hibench.hdfs.data.dir}/Streaming | ||
# expected number of records to be processed | ||
hibench.streamingbench.record_count 900000000 | ||
|
||
# fixed length of record (default: 200) | ||
hibench.streambench.datagen.recordLength 200 | ||
#sketch/distinctcount/statistics arg | ||
# the field index of the record that will be extracted | ||
hibench.streamingbench.field_index 1 | ||
|
||
# Number of KafkaProducer running on different thread (default: 1) | ||
# The limitation of a single KafkaProducer is about 100Mb/s | ||
hibench.streambench.datagen.producerNumber 1 | ||
#sketch/wordcount/distinctcount/statistics arg | ||
# the seperator between fields of a single record | ||
hibench.streamingbench.separator \\s+ | ||
|
||
hibench.streambench.fixWindowDuration 30000 | ||
#sample arg | ||
# probability that a record will be taken as a sample | ||
hibench.streamingbench.prob 0.1 | ||
|
||
hibench.streambench.fixWindowSlideStep 30000 | ||
######################################################### | ||
# Spark Streaming Config | ||
######################################################### | ||
#grep arg | ||
# the substring that will be checked to see if contained in a record | ||
hibench.streamingbench.pattern the | ||
|
||
# Number of nodes that will receive kafka input (default: 4) | ||
hibench.streambench.spark.receiverNumber 4 | ||
#common arg | ||
# indicate RDD storage level. | ||
# 1 for memory only 1 copy. Others for default mem_disk_ser 2 copies | ||
hibench.streamingbench.copies 2 | ||
|
||
# Spark streaming Batchnterval in millisecond (default 100) | ||
hibench.streambench.spark.batchInterval 100 | ||
# indicate whether to test the write ahead log new feature | ||
# set true to test WAL feature | ||
hibench.streamingbench.testWAL false | ||
|
||
# Indicate RDD storage level. (default: 2) | ||
# 0 = StorageLevel.MEMORY_ONLY | ||
# 1 = StorageLevel.MEMORY_AND_DISK_SER | ||
# other = StorageLevel.MEMORY_AND_DISK_SER_2 | ||
hibench.streambench.spark.storageLevel 2 | ||
# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty | ||
hibench.streamingbench.checkpoint_path | ||
|
||
# indicate whether to test the write ahead log new feature (default: false) | ||
hibench.streambench.spark.enableWAL false | ||
#common arg | ||
# indicate whether in debug mode for correctness verfication | ||
hibench.streamingbench.debug false | ||
|
||
# if testWAL is true, this path to store stream context in hdfs shall be specified. If false, it can be empty (default: /var/tmp) | ||
hibench.streambench.spark.checkpointPath /var/tmp | ||
# whether to use direct approach or not ( sparkstreaming only ) | ||
hibench.streamingbench.direct_mode true | ||
|
||
# whether to use direct approach or not (dafault: true) | ||
hibench.streambench.spark.useDirectMode true | ||
# Kafka broker lists, used for direct mode, written in mode "host:port,host:port,..." | ||
|
||
######################################################### | ||
# Flink Config | ||
######################################################### | ||
hibench.streambench.flink.home /PATH/TO/FLINK/HOME | ||
# example: hostname:9092 | ||
hibench.streamingbench.brokerList HOSTNAME:HOSTPORT | ||
|
||
# default parallelism of flink job | ||
hibench.streambench.flink.parallelism 20 | ||
hibench.streamingbench.broker_list_with_quote "${hibench.streamingbench.brokerList}" | ||
|
||
hibench.streambench.flink.bufferTimeout 5 | ||
# storm bench conf | ||
|
||
hibench.streambench.flink.checkpointDuration 1000 | ||
# STORM_BIN_HOME | ||
hibench.streamingbench.storm.home /PATH/TO/STORM/HOME | ||
|
||
######################################################### | ||
# Storm Config | ||
######################################################### | ||
# Kafka home | ||
hibench.streamingbench.kafka.home /PATH/TO/KAFKA/HOME | ||
|
||
# STORM_BIN_HOME | ||
hibench.streambench.storm.home /PATH/TO/STORM/HOME | ||
|
||
#Cluster config | ||
# nimbus of storm cluster | ||
hibench.streambench.storm.nimbus HOSTNAME_OF_STORM_NIMBUS | ||
hibench.streambench.storm.nimbusAPIPort 6627 | ||
hibench.streamingbench.storm.nimbus HOSTNAME_OF_STORM | ||
hibench.streamingbench.storm.nimbusAPIPort 6627 | ||
|
||
# time interval to contact nimbus to judge if finished | ||
hibench.streambench.storm.nimbusContactInterval 10 | ||
hibench.streamingbench.storm.nimbusContactInterval 10 | ||
|
||
|
||
#Parallel config | ||
|
||
# number of workers of Storm. Number of most bolt threads is also equal to this param. | ||
hibench.streambench.storm.worker_count 12 | ||
hibench.streamingbench.storm.worker_count 12 | ||
|
||
# number of kafka spout threads of Storm | ||
hibench.streambench.storm.spout_threads 12 | ||
hibench.streamingbench.storm.spout_threads 12 | ||
|
||
# number of bolt threads altogether | ||
hibench.streambench.storm.bolt_threads 12 | ||
hibench.streamingbench.storm.bolt_threads 12 | ||
|
||
# kafka arg indicating whether to read data from kafka from the start or go on to read from last position | ||
hibench.streambench.storm.read_from_start true | ||
hibench.streamingbench.storm.read_from_start true | ||
|
||
# whether to turn on ack | ||
hibench.streambench.storm.ackon true | ||
|
||
######################################################### | ||
# Gearpump Config | ||
######################################################### | ||
|
||
hibench.streambench.gearpump.home /PATH/TO/GEARPUMP/HOME | ||
|
||
hibench.streambench.gearpump.executors 1 | ||
|
||
hibench.streambench.gearpump.parallelism 1 | ||
hibench.streamingbench.storm.ackon true | ||
|
||
# Added for default rules: | ||
hibench.streamingbench.jars ${hibench.streamingbench.sparkbench.jar} | ||
hibench.streamingbench.sparkbench.jar ${hibench.home}/src/streambench/sparkbench/target/streaming-bench-spark_0.1-5.0-SNAPSHOT-${hibench.spark.version}-jar-with-dependencies.jar | ||
hibench.streamingbench.stormbench.jar ${hibench.home}/src/streambench/stormbench/target/streaming-bench-storm-0.1-SNAPSHOT-jar-with-dependencies.jar | ||
hibench.streamingbench.datagen.jar ${hibench.home}/src/streambench/datagen/target/datagen-0.0.1-jar-with-dependencies.jar | ||
hibench.streamingbench.storm.bin ${hibench.streamingbench.storm.home}/bin | ||
hibench.streamingbench.zkhelper.jar ${hibench.home}/src/streambench/zkHelper/target/streaming-bench-zkhelper-0.1-SNAPSHOT-jar-with-dependencies.jar | ||
|
||
# default path setting for store of data1 & data2 | ||
hibench.streamingbench.datagen.dir ${hibench.hdfs.data.dir}/Streaming | ||
|
||
# partition size settings | ||
hibench.streamingbench.partitions 1 |
Oops, something went wrong.