From 9b197d15e2c7b4c7363572dc39584c41c7220e4d Mon Sep 17 00:00:00 2001 From: Daniel Morell Date: Thu, 9 May 2024 06:43:40 -0500 Subject: [PATCH 1/4] Revert multi-level shortener This reverts commits: - "traverse dict for recursive shortening" 764ee1f065f5720f4ae8dec0187562f891ae5bb3 - "correct algo" 97c2a32869b84061bc1a933b1639cdc9c19d1865 - "simplify algo" ded614b98682f9b6061f0c402967a1404133f030 - "traverse object" 0dcad823c14d053d04f6f18cdab60358714b1e97 - "final version" eda09e5543a888aa5ebc3ccb42fe9608444b1363 - "correct unit tests" d79b19b764335302491aeee6ab81f06f82c32657 - "added support for max_level" a32094bd0c19b96795afb63d319108ef02b0930f - "fix bug in the previous test" debe4dc3a37faf9ed77111608a8c607d0c5d8f4b - "added test for multi level shortening" e46e8e1983f9ee1c308e19f26acddfec17b6fda1 --- rollbar/lib/transforms/shortener.py | 36 ++---------------------- rollbar/test/test_shortener_transform.py | 23 ++------------- 2 files changed, 5 insertions(+), 54 deletions(-) diff --git a/rollbar/lib/transforms/shortener.py b/rollbar/lib/transforms/shortener.py index d3040a35..f0392912 100644 --- a/rollbar/lib/transforms/shortener.py +++ b/rollbar/lib/transforms/shortener.py @@ -47,8 +47,6 @@ def _get_max_size(self, obj): return self._repr.maxother - def _get_max_level(self): - return getattr(self._repr, 'maxlevel') def _shorten_sequence(self, obj, max_keys): _len = len(obj) if _len <= max_keys: @@ -79,44 +77,14 @@ def _shorten_other(self, obj): return self._repr.repr(obj) - def traverse_obj(self, obj, level=1): - def seq_iter(o): - return o if isinstance(o, dict) else range(len(o)) - - for k in seq_iter(obj): - max_size = self._get_max_size(obj[k]) - if isinstance(obj[k], dict): - obj[k] = self._shorten_mapping(obj[k], max_size) - if level == self._get_max_level(): - del obj[k] - return - self.traverse_obj(obj[k], level + 1) - elif isinstance(obj[k], sequence_types): - obj[k] = self._shorten_sequence(obj[k], max_size) - if level == self._get_max_level(): - del obj[k] - return - self.traverse_obj(obj[k], level + 1) - else: - obj[k] = self._shorten(obj[k]) - return obj - def _shorten(self, val): max_size = self._get_max_size(val) if isinstance(val, dict): - val = self._shorten_mapping(val, max_size) - return self.traverse_obj(val) - - if isinstance(val, string_types): + return self._shorten_mapping(val, max_size) + if isinstance(val, (string_types, sequence_types)): return self._shorten_sequence(val, max_size) - if isinstance(val, sequence_types): - val = self._shorten_sequence(val, max_size) - if isinstance(val, string_types): - return val - return self.traverse_obj(val) - if isinstance(val, number_types): return self._shorten_basic(val, self._repr.maxlong) diff --git a/rollbar/test/test_shortener_transform.py b/rollbar/test/test_shortener_transform.py index 3dfe47d6..55180c34 100644 --- a/rollbar/test/test_shortener_transform.py +++ b/rollbar/test/test_shortener_transform.py @@ -37,10 +37,7 @@ def setUp(self): 'frozenset': frozenset([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), 'array': array('l', [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), 'deque': deque([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 15), - 'other': TestClassWithAVeryVeryVeryVeryVeryVeryVeryLongName(), - 'list_max_level': [1, [2, [3, [4, ["good_5", ["bad_6", ["bad_7"]]]]]]], - 'dict_max_level': {1: 1, 2: {3: {4: {"level4": "good", "level5": {"toplevel": "ok", 6: {7: {}}}}}}}, - 'list_multi_level': [1, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]] + 'other': TestClassWithAVeryVeryVeryVeryVeryVeryVeryLongName() } def _assert_shortened(self, key, expected): @@ -48,16 +45,14 @@ def _assert_shortened(self, key, expected): result = transforms.transform(self.data, shortener) if key == 'dict': - self.assertEqual(expected, len(result[key])) - elif key in ('list_max_level', 'dict_max_level', 'list_multi_level'): - self.assertEqual(expected, result[key]) + self.assertEqual(expected, len(result)) else: # the repr output can vary between Python versions stripped_result_key = result[key].strip("'\"u") if key == 'other': self.assertIn(expected, stripped_result_key) - elif key not in ('dict', 'list_max_level', 'dict_max_level', 'list_multi_level'): + elif key != 'dict': self.assertEqual(expected, stripped_result_key) # make sure nothing else was shortened @@ -87,18 +82,6 @@ def test_shorten_list(self): expected = '[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...]' self._assert_shortened('list', expected) - def test_shorten_list_max_level(self): - expected = [1, [2, [3, [4, ['good_5']]]]] - self._assert_shortened('list_max_level', expected) - - def test_shorten_list_multi_level(self): - expected = [1, '[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...]'] - self._assert_shortened('list_multi_level', expected) - - def test_shorten_dict_max_level(self): - expected = {1: 1, 2: {3: {4: {'level4': 'good', 'level5': {'toplevel': 'ok'}}}}} - self._assert_shortened('dict_max_level', expected) - def test_shorten_tuple(self): expected = '(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ...)' self._assert_shortened('tuple', expected) From 78eebfe16175d3fb35ecb5b9759e5a1aee412963 Mon Sep 17 00:00:00 2001 From: Daniel Morell Date: Thu, 9 May 2024 06:57:42 -0500 Subject: [PATCH 2/4] Updated transform order; moved shortener to first transform. --- rollbar/__init__.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/rollbar/__init__.py b/rollbar/__init__.py index 42b8d5ec..2fe5eb68 100644 --- a/rollbar/__init__.py +++ b/rollbar/__init__.py @@ -402,12 +402,6 @@ def init(access_token, environment='production', scrub_fields=None, url_fields=N # trace frame values using the ShortReprTransform. _serialize_transform = SerializableTransform(safe_repr=SETTINGS['locals']['safe_repr'], safelist_types=SETTINGS['locals']['safelisted_types']) - _transforms = [ - ScrubRedactTransform(), - _serialize_transform, - ScrubTransform(suffixes=[(field,) for field in SETTINGS['scrub_fields']], redact_char='*'), - ScrubUrlTransform(suffixes=[(field,) for field in SETTINGS['url_fields']], params_to_scrub=SETTINGS['scrub_fields']) - ] # A list of key prefixes to apply our shortener transform to. The request # being included in the body key is old behavior and is being retained for @@ -431,7 +425,13 @@ def init(access_token, environment='production', scrub_fields=None, url_fields=N shortener = ShortenerTransform(safe_repr=SETTINGS['locals']['safe_repr'], keys=shortener_keys, **SETTINGS['locals']['sizes']) - _transforms.append(shortener) + _transforms = [ + shortener, + ScrubRedactTransform(), + _serialize_transform, + ScrubTransform(suffixes=[(field,) for field in SETTINGS['scrub_fields']], redact_char='*'), + ScrubUrlTransform(suffixes=[(field,) for field in SETTINGS['url_fields']], params_to_scrub=SETTINGS['scrub_fields']) + ] _threads = queue.Queue() events.reset() filters.add_builtin_filters(SETTINGS) From 96bb8ebee5292372582b43da5a685d09dac82cab Mon Sep 17 00:00:00 2001 From: Daniel Morell Date: Thu, 9 May 2024 07:01:01 -0500 Subject: [PATCH 3/4] Fixed shortener deep multi-level shortening. --- rollbar/lib/__init__.py | 31 +++++--- rollbar/lib/transforms/shortener.py | 18 ++++- rollbar/test/test_lib.py | 54 +++++++++++--- rollbar/test/test_shortener_transform.py | 93 ++++++++++++++++++++++++ 4 files changed, 177 insertions(+), 19 deletions(-) diff --git a/rollbar/lib/__init__.py b/rollbar/lib/__init__.py index 8eb7be69..580d7287 100644 --- a/rollbar/lib/__init__.py +++ b/rollbar/lib/__init__.py @@ -33,28 +33,41 @@ def prefix_match(key, prefixes): return False -def key_in(key, keys): +def key_in(key, canonicals): if not key: return False - for k in keys: - if key_match(k, key): + for c in canonicals: + if key_match(key, c): return True return False -def key_match(key1, key2): - if len(key1) != len(key2): - return False +def key_depth(key, canonicals) -> int: + if not key: + return 0 + + for c in canonicals: + if key_match(key, c): + return len(c) + + return 0 - for p1, p2 in zip(key1, key2): - if '*' == p1 or '*' == p2: + +def key_match(key, canonical): + i = 0 + for k, c in zip(key, canonical): + i += 1 + if '*' == c: continue - if p1 == p2: + if c == k: continue return False + if i < len(canonical) - 1: + return False + return True diff --git a/rollbar/lib/transforms/shortener.py b/rollbar/lib/transforms/shortener.py index f0392912..2b33dbb3 100644 --- a/rollbar/lib/transforms/shortener.py +++ b/rollbar/lib/transforms/shortener.py @@ -6,7 +6,7 @@ from collections.abc import Mapping from rollbar.lib import ( - integer_types, key_in, number_types, sequence_types, + integer_types, key_in, key_depth, number_types, sequence_types, string_types) from rollbar.lib.transform import Transform @@ -96,7 +96,23 @@ def _should_shorten(self, val, key): return key_in(key, self.keys) + def _should_drop(self, val, key) -> bool: + if not key: + return False + + maxdepth = key_depth(key, self.keys) + if maxdepth == 0: + return False + + return (maxdepth + self._repr.maxlevel) <= len(key) + def default(self, o, key=None): + if self._should_drop(o, key): + if isinstance(o, dict): + return '{...}' + if isinstance(o, sequence_types): + return '[...]' + if self._should_shorten(o, key): return self._shorten(o) diff --git a/rollbar/test/test_lib.py b/rollbar/test/test_lib.py index 7d792638..a7f04b5d 100644 --- a/rollbar/test/test_lib.py +++ b/rollbar/test/test_lib.py @@ -1,24 +1,60 @@ -from rollbar.lib import dict_merge, prefix_match +from rollbar.lib import dict_merge, prefix_match, key_match, key_depth from rollbar.test import BaseTest class RollbarLibTest(BaseTest): - def test_dict_merge_not_dict(self): - a = {'a': {'b': 42}} - b = 99 - result = dict_merge(a, b) - - self.assertEqual(99, result) - def test_prefix_match(self): key = ['password', 'argspec', '0'] self.assertTrue(prefix_match(key, [['password']])) - def test_prefix_match(self): + def test_prefix_match_dont_match(self): key = ['environ', 'argspec', '0'] self.assertFalse(prefix_match(key, [['password']])) + def test_key_match(self): + canonical = ['body', 'trace', 'frames', '*', 'locals', '*'] + key = ['body', 'trace', 'frames', 5, 'locals', 'foo'] + + self.assertTrue(key_match(key, canonical)) + + def test_key_match_dont_match(self): + canonical = ['body', 'trace', 'frames', '*', 'locals', '*'] + key = ['body', 'trace', 'frames', 5, 'bar', 'foo'] + + self.assertFalse(key_match(key, canonical)) + + def test_key_match_wildcard_end(self): + canonical = ['body', 'trace', 'frames', '*', 'locals', '*'] + key = ['body', 'trace', 'frames', 5, 'locals', 'foo', 'bar'] + + self.assertTrue(key_match(key, canonical)) + + def test_key_depth(self): + canonicals = [['body', 'trace', 'frames', '*', 'locals', '*']] + key = ['body', 'trace', 'frames', 5, 'locals', 'foo'] + + self.assertEqual(6, key_depth(key, canonicals)) + + def test_key_depth_dont_match(self): + canonicals = [['body', 'trace', 'frames', '*', 'locals', '*']] + key = ['body', 'trace', 'frames', 5, 'bar', 'foo'] + + self.assertEqual(0, key_depth(key, canonicals)) + + def test_key_depth_wildcard_end(self): + canonicals = [['body', 'trace', 'frames', '*']] + key = ['body', 'trace', 'frames', 5, 'locals', 'foo', 'bar'] + + self.assertEqual(4, key_depth(key, canonicals)) + + def test_dict_merge_not_dict(self): + a = {'a': {'b': 42}} + b = 99 + result = dict_merge(a, b) + + self.assertEqual(99, result) + def test_dict_merge_dicts_independent(self): a = {'a': {'b': 42}} b = {'x': {'y': 99}} diff --git a/rollbar/test/test_shortener_transform.py b/rollbar/test/test_shortener_transform.py index 55180c34..4499efe4 100644 --- a/rollbar/test/test_shortener_transform.py +++ b/rollbar/test/test_shortener_transform.py @@ -128,3 +128,96 @@ def test_shorten_object(self): self.assertEqual(type(result), dict) self.assertEqual(len(result['request']['POST']), 10) + def test_shorten_frame(self): + data = { + 'body': { + 'trace': { + 'frames': [ + { + "filename": "/path/to/app.py", + "lineno": 82, + "method": "sub_func", + "code": "extra(**kwargs)", + "keywordspec": "kwargs", + "locals": { + "kwargs": { + "app": ["foo", "bar", "baz", "qux", "quux", "corge", "grault", "garply", "waldo", + "fred", "plugh", "xyzzy", "thud"], + "extra": { + "request": "" + } + }, + "one": { + "two": { + "three": { + "four": { + "five": { + "six": { + "seven": 8, + "eight": "nine" + }, + "ten": "Yep! this should still be here, but it is a little on the " + "long side, so we might want to cut it down a bit." + } + } + } + }, + "a": ["foo", "bar", "baz", "qux", 5, 6, 7, 8, 9, 10, 11, 12], + "b": 14071504106566481658450568387453168916351054663, + "app_id": 140715046161904, + "bar": "im a bar", + } + } + } + ] + } + } + } + keys = [('body', 'trace', 'frames', '*', 'locals', '*')] + shortener = ShortenerTransform(keys=keys, **DEFAULT_LOCALS_SIZES) + result = transforms.transform(data, shortener) + expected = { + 'body': { + 'trace': { + 'frames': [ + { + "filename": "/path/to/app.py", + "lineno": 82, + "method": "sub_func", + "code": "extra(**kwargs)", + "keywordspec": "kwargs", + "locals": { + "kwargs": { + # Shortened + "app": "['foo', 'bar', 'baz', 'qux', 'quux', 'corge', 'grault', 'garply', 'waldo', " + "'fred', ...]", + "extra": { + "request": "" + } + }, + "one": { + "two": { + "three": { + "four": { + "five": { + "six": '{...}', # Dropped because it is past the maxlevel. + # Shortened + "ten": "'Yep! this should still be here, but it is a lit...ong " + "side, so we might want to cut it down a bit.'" + } + } + } + }, + "a": "['foo', 'bar', 'baz', 'qux', 5, 6, 7, 8, 9, 10, ...]", # Shortened + "b": '140715041065664816...7453168916351054663', # Shortened + "app_id": 140715046161904, + "bar": "im a bar", + } + } + } + ] + } + } + } + + self.assertEqual(result, expected) From 7ece3cbf33de1502856fb1ad6604a9ec008d28a1 Mon Sep 17 00:00:00 2001 From: Daniel Morell Date: Fri, 17 May 2024 13:54:20 -0500 Subject: [PATCH 4/4] Fixed shortener key matching above final '*' level. --- rollbar/lib/__init__.py | 8 +++----- rollbar/test/test_lib.py | 6 ++++++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/rollbar/lib/__init__.py b/rollbar/lib/__init__.py index 580d7287..58bf7557 100644 --- a/rollbar/lib/__init__.py +++ b/rollbar/lib/__init__.py @@ -56,18 +56,16 @@ def key_depth(key, canonicals) -> int: def key_match(key, canonical): - i = 0 + if len(key) < len(canonical): + return False + for k, c in zip(key, canonical): - i += 1 if '*' == c: continue if c == k: continue return False - if i < len(canonical) - 1: - return False - return True diff --git a/rollbar/test/test_lib.py b/rollbar/test/test_lib.py index a7f04b5d..fbaeaed0 100644 --- a/rollbar/test/test_lib.py +++ b/rollbar/test/test_lib.py @@ -30,6 +30,12 @@ def test_key_match_wildcard_end(self): self.assertTrue(key_match(key, canonical)) + def test_key_match_too_short(self): + canonical = ['body', 'trace', 'frames', '*', 'locals', '*'] + key = ['body', 'trace', 'frames', 5, 'locals'] + + self.assertFalse(key_match(key, canonical)) + def test_key_depth(self): canonicals = [['body', 'trace', 'frames', '*', 'locals', '*']] key = ['body', 'trace', 'frames', 5, 'locals', 'foo']