diff --git a/README.markdown b/README.markdown index d68202f..a632121 100644 --- a/README.markdown +++ b/README.markdown @@ -73,8 +73,8 @@ Generate an ascii bar chart for input data (this is like a visualization of `uni 14:40 [ 49] ∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎∎ 14:41 [ 14] ∎∎∎∎∎∎∎∎∎∎∎∎∎∎ -bar_chart.py also supports ingesting aggregated values. Simply provide a two column input of keyvalue: +`bar_chart.py` and `histogram.py` also support ingesting pre-aggregated values. Simply provide a two column input of `countvalue` for `-a` or `valuecount` for `-A`: - $ cat data | uniq -c | bar_chart.py --sort-keys --agg-values + $ cat data | uniq -c | bar_chart.py -a This is very convenient if you pull data out, say Hadoop or MySQL already aggregated. diff --git a/data_hacks/bar_chart.py b/data_hacks/bar_chart.py index 3551860..13f76a8 100755 --- a/data_hacks/bar_chart.py +++ b/data_hacks/bar_chart.py @@ -41,11 +41,16 @@ def run(input_stream, options): data = defaultdict(int) total = 0 for row in input_stream: - if options.agg_values: - kv = row.replace('\t', ' ').split(' ',2); + if options.agg_key_value: + kv = row.rstrip().rsplit(None, 1) value = int(kv[1]) data[kv[0]] += value total += value + elif options.agg_value_key: + kv = row.lstrip().split(None, 1) + value = int(kv[0]) + data[kv[1]] += value + total += value else: data[row] += 1 total += 1 @@ -85,7 +90,9 @@ def run(input_stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" - parser.add_option("-a", "--agg-values", dest="agg_values", default=False, action="store_true", + parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true", + help="Two column input format, space seperated with valuekey") + parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true", help="Two column input format, space seperated with keyvalue") parser.add_option("-k", "--sort-keys", dest="sort_keys", default=True, action="store_true", help="sort by the key [default]") diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 72b3806..042a02f 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -76,7 +76,7 @@ def test_mvsd(): assert '%.2f' % mvsd.var() == "8.25" assert '%.14f' % mvsd.sd() == "2.87228132326901" -def load_stream(input_stream, agg): +def load_stream(input_stream, agg_value_key, agg_key_value): for line in input_stream: clean_line = line.strip() if not clean_line: @@ -85,11 +85,14 @@ def load_stream(input_stream, agg): if clean_line[0] in ['"', "'"]: clean_line = clean_line.strip("\"'") try: - if agg: - value, count = line.replace("\t", ' ').split(' ', 2) - yield DataPoint(Decimal(value), int(count)) - continue - yield DataPoint(Decimal(clean_line), 1) + if agg_key_value: + key, value = clean_line.rstrip().rsplit(None, 1) + yield DataPoint(Decimal(key), int(value)) + elif agg_value_key: + value, key = clean_line.lstrip().split(None, 1) + yield DataPoint(Decimal(key), int(value)) + else: + yield DataPoint(Decimal(clean_line), 1) except: logging.exception('failed %r', line) print >>sys.stderr, "invalid line %r" % line @@ -219,7 +222,9 @@ def histogram(stream, options): if __name__ == "__main__": parser = OptionParser() parser.usage = "cat data | %prog [options]" - parser.add_option("-a", "--agg", dest="agg", default=False, action="store_true", + parser.add_option("-a", "--agg", dest="agg_value_key", default=False, action="store_true", + help="Two column input format, space seperated with valuekey") + parser.add_option("-A", "--agg-key-value", dest="agg_key_value", default=False, action="store_true", help="Two column input format, space seperated with keyvalue") parser.add_option("-m", "--min", dest="min", help="minimum value for graph") @@ -242,5 +247,5 @@ def histogram(stream, options): parser.print_usage() print "for more help use --help" sys.exit(1) - histogram(load_stream(sys.stdin, options.agg), options) + histogram(load_stream(sys.stdin, options.agg_value_key, options.agg_key_value), options)