From 56c0260346411b349e5d4146a3f69e0edcd362e7 Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Mon, 9 Dec 2024 18:25:06 +0530 Subject: [PATCH 1/2] feat: add trim/ltrim/rtrim with single input argument to remove spaces --- extensions/functions_string.yaml | 30 ++++++++++++++++++++++++++++++ tests/cases/string/ltrim.test | 21 +++++++++++++++++++++ tests/cases/string/rtrim.test | 21 +++++++++++++++++++++ tests/cases/string/trim.test | 21 +++++++++++++++++++++ tests/coverage/visitor.py | 8 ++++++++ tests/test_extensions.py | 8 ++++---- 6 files changed, 105 insertions(+), 4 deletions(-) diff --git a/extensions/functions_string.yaml b/extensions/functions_string.yaml index 399fb3797..a089d4d6a 100644 --- a/extensions/functions_string.yaml +++ b/extensions/functions_string.yaml @@ -1316,6 +1316,16 @@ scalar_functions: name: "characters" description: "The set of characters to remove." return: "string" + - args: + - value: "varchar" + name: "input" + description: "The string to remove characters from." + return: "varchar" + - args: + - value: "string" + name: "input" + description: "The string to remove characters from." + return: "string" - name: rtrim description: >- @@ -1338,6 +1348,16 @@ scalar_functions: name: "characters" description: "The set of characters to remove." return: "string" + - args: + - value: "varchar" + name: "input" + description: "The string to remove characters from." + return: "varchar" + - args: + - value: "string" + name: "input" + description: "The string to remove characters from." + return: "string" - name: trim description: >- @@ -1360,6 +1380,16 @@ scalar_functions: name: "characters" description: "The set of characters to remove." return: "string" + - args: + - value: "varchar" + name: "input" + description: "The string to remove characters from." + return: "varchar" + - args: + - value: "string" + name: "input" + description: "The string to remove characters from." + return: "string" - name: lpad description: >- diff --git a/tests/cases/string/ltrim.test b/tests/cases/string/ltrim.test index 53a2f495f..5ef786c1f 100644 --- a/tests/cases/string/ltrim.test +++ b/tests/cases/string/ltrim.test @@ -10,7 +10,28 @@ ltrim(''::str, ' '::str) = ''::str ltrim(' '::str, ' '::str) = ''::str ltrim(null::str, ' '::str) = null::str +# spaces_only: Examples with only spaces to trim off +ltrim('abc'::str) = 'abc'::str +ltrim(' abc'::str) = 'abc'::str +ltrim('abc '::str) = 'abc '::str +ltrim(' abc '::str) = 'abc '::str +ltrim(''::str) = ''::str +ltrim(' '::str) = ''::str +ltrim(null::str) = null::str + # two_inputs: Examples with character input to trim off ltrim('aaaaabc'::str, 'a'::str) [spaces_only:FALSE] = 'bc'::str ltrim('abcabcdef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str ltrim('abccbadef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str + +# ltrim with varchar +ltrim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +ltrim(' abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +ltrim('abc '::vchar<20>, ' '::vchar<5>) = 'abc '::vchar<20> +ltrim(' abc '::vchar<20>, ' '::vchar<5>) = 'abc '::vchar<20> +ltrim('abc'::vchar<20>) = 'abc'::vchar<20> +ltrim(' abc'::vchar<20>) = 'abc'::vchar<20> +ltrim('abc '::vchar<20>) = 'abc '::vchar<20> +ltrim(' abc '::vchar<20>) = 'abc '::vchar<20> +ltrim('aaaaabc'::vchar<20>, 'a'::vchar<9>) [spaces_only:False] = 'bc'::vchar<20> +ltrim('abcabcdef'::vchar<20>, 'abc'::vchar<9>) [spaces_only:False] = 'def'::vchar<20> diff --git a/tests/cases/string/rtrim.test b/tests/cases/string/rtrim.test index 0a9d5a5b0..e39f9b8a1 100644 --- a/tests/cases/string/rtrim.test +++ b/tests/cases/string/rtrim.test @@ -10,7 +10,28 @@ rtrim(''::str, ' '::str) = ''::str rtrim(' '::str, ' '::str) = ''::str rtrim(null::str, ' '::str) = null::str +# spaces_only: Examples with only spaces to trim off +rtrim('abc'::str) = 'abc'::str +rtrim(' abc'::str) = ' abc'::str +rtrim('abc '::str) = 'abc'::str +rtrim(' abc '::str) = ' abc'::str +rtrim(''::str) = ''::str +rtrim(' '::str) = ''::str +rtrim(null::str) = null::str + # two_inputs: Examples with character input to trim off rtrim('aaaaabccccc'::str, 'c'::str) [spaces_only:FALSE] = 'aaaaab'::str rtrim('abcabcdef'::str, 'def'::str) [spaces_only:FALSE] = 'abcabc'::str rtrim('defabccba'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str + +# rtrim with varchar +rtrim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +rtrim(' abc'::vchar<20>, ' '::vchar<5>) = ' abc'::vchar<20> +rtrim('abc '::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +rtrim(' abc '::vchar<20>, ' '::vchar<5>) = ' abc'::vchar<20> +rtrim('abc'::vchar<20>) = 'abc'::vchar<20> +rtrim(' abc'::vchar<20>) = ' abc'::vchar<20> +rtrim('abc '::vchar<20>) = 'abc'::vchar<20> +rtrim(' abc '::vchar<20>) = ' abc'::vchar<20> +rtrim('aaaaabccccc'::vchar<20>, 'c'::vchar<9>) [spaces_only:False] = 'aaaaab'::vchar<20> +rtrim('abcabcdef'::vchar<20>, 'def'::vchar<9>) [spaces_only:False] = 'abcabc'::vchar<20> diff --git a/tests/cases/string/trim.test b/tests/cases/string/trim.test index f93c3e291..e020606fc 100644 --- a/tests/cases/string/trim.test +++ b/tests/cases/string/trim.test @@ -10,7 +10,28 @@ trim(''::str, ' '::str) = ''::str trim(' '::str, ' '::str) = ''::str trim(null::str, ' '::str) = null::str +# spaces_only: Examples with only spaces to trim off +trim('abc'::str) = 'abc'::str +trim(' abc'::str) = 'abc'::str +trim('abc '::str) = 'abc'::str +trim(' abc '::str) = 'abc'::str +trim(''::str) = ''::str +trim(' '::str) = ''::str +trim(null::str) = null::str + # two_inputs: Examples with character input to trim off trim('aaaaabcccccaaa'::str, 'a'::str) [spaces_only:False] = 'bccccc'::str trim('defabcabcdef'::str, 'def'::str) [spaces_only:False] = 'abcabc'::str trim('abcdefcbaa'::str, 'abc'::str) [spaces_only:False] = 'def'::str + +# trim with varchar +trim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +trim(' abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +trim('abc '::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +trim(' abc '::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> +trim('abc'::vchar<20>) = 'abc'::vchar<20> +trim(' abc'::vchar<20>) = 'abc'::vchar<20> +trim('abc '::vchar<20>) = 'abc'::vchar<20> +trim(' abc '::vchar<20>) = 'abc'::vchar<20> +trim('aaaaabcccccaaa'::vchar<20>, 'a'::vchar<9>) [spaces_only:False] = 'bccccc'::vchar<20> +trim('defabcabcdef'::vchar<20>, 'def'::vchar<9>) [spaces_only:False] = 'abcabc'::vchar<20> diff --git a/tests/coverage/visitor.py b/tests/coverage/visitor.py index c7cf7bb64..50567985e 100644 --- a/tests/coverage/visitor.py +++ b/tests/coverage/visitor.py @@ -268,6 +268,8 @@ def visitArgument(self, ctx: FuncTestCaseParser.ArgumentContext): return self.visitBooleanArg(ctx.booleanArg()) if ctx.stringArg() is not None: return self.visitStringArg(ctx.stringArg()) + if ctx.varCharArg() is not None: + return self.visitVarCharArg(ctx.varCharArg()) if ctx.decimalArg() is not None: return self.visitDecimalArg(ctx.decimalArg()) if ctx.dateArg() is not None: @@ -330,6 +332,12 @@ def visitBooleanArg(self, ctx: FuncTestCaseParser.BooleanArgContext): def visitStringArg(self, ctx: FuncTestCaseParser.StringArgContext): return CaseLiteral(value=ctx.StringLiteral().getText(), type="str") + def visitVarCharArg(self, ctx: FuncTestCaseParser.VarCharArgContext): + return CaseLiteral( + value=ctx.StringLiteral().getText(), + type=ctx.varCharType().getText().lower(), + ) + def visitDecimalArg(self, ctx: FuncTestCaseParser.DecimalArgContext): return CaseLiteral( value=self.visitNumericLiteral(ctx.numericLiteral()), diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 44335f19e..721fe9fc3 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -24,15 +24,15 @@ def test_substrait_extension_coverage(): all_test_files = load_all_testcases(test_case_dir) coverage = get_test_coverage(all_test_files, registry) - assert coverage.test_count >= 1077 + assert coverage.test_count >= 1128 assert ( coverage.num_tests_with_no_matching_function == 0 ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function" - assert coverage.num_covered_function_variants >= 226 - assert coverage.total_function_variants >= 513 + assert coverage.num_covered_function_variants >= 235 + assert coverage.total_function_variants >= 519 assert ( coverage.total_function_variants - coverage.num_covered_function_variants - ) <= 287, ( + ) <= 284, ( f"Coverage gap too large: {coverage.total_function_variants - coverage.num_covered_function_variants} " f"function variants with no tests, out of {coverage.total_function_variants} total function variants." ) From b7f53aebb09c0d6a1612be78625dfc791dbf9dfd Mon Sep 17 00:00:00 2001 From: Chandra Sanapala Date: Tue, 10 Dec 2024 04:05:38 +0530 Subject: [PATCH 2/2] feat: add trim/ltrim/rtrim with single input argument to remove spaces --- tests/cases/string/ltrim.test | 6 +++++- tests/cases/string/rtrim.test | 6 +++++- tests/cases/string/trim.test | 6 +++++- tests/test_extensions.py | 2 +- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tests/cases/string/ltrim.test b/tests/cases/string/ltrim.test index 5ef786c1f..ff54804f8 100644 --- a/tests/cases/string/ltrim.test +++ b/tests/cases/string/ltrim.test @@ -24,7 +24,7 @@ ltrim('aaaaabc'::str, 'a'::str) [spaces_only:FALSE] = 'bc'::str ltrim('abcabcdef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str ltrim('abccbadef'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str -# ltrim with varchar +# varchar ltrim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> ltrim(' abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> ltrim('abc '::vchar<20>, ' '::vchar<5>) = 'abc '::vchar<20> @@ -35,3 +35,7 @@ ltrim('abc '::vchar<20>) = 'abc '::vchar<20> ltrim(' abc '::vchar<20>) = 'abc '::vchar<20> ltrim('aaaaabc'::vchar<20>, 'a'::vchar<9>) [spaces_only:False] = 'bc'::vchar<20> ltrim('abcabcdef'::vchar<20>, 'abc'::vchar<9>) [spaces_only:False] = 'def'::vchar<20> +ltrim(' \t\tHello World'::vchar<30>) = '\t\tHello World'::vchar<30> +ltrim(' \n\nHello World'::vchar<30>) = '\n\nHello World'::vchar<30> +ltrim(' \r\rHello World'::vchar<30>) = '\r\rHello World'::vchar<30> +ltrim(' \u2003Hello World'::vchar<30>) = '\u2003Hello World'::vchar<30> diff --git a/tests/cases/string/rtrim.test b/tests/cases/string/rtrim.test index e39f9b8a1..add1597ae 100644 --- a/tests/cases/string/rtrim.test +++ b/tests/cases/string/rtrim.test @@ -24,7 +24,7 @@ rtrim('aaaaabccccc'::str, 'c'::str) [spaces_only:FALSE] = 'aaaaab'::str rtrim('abcabcdef'::str, 'def'::str) [spaces_only:FALSE] = 'abcabc'::str rtrim('defabccba'::str, 'abc'::str) [spaces_only:FALSE] = 'def'::str -# rtrim with varchar +# varchar rtrim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> rtrim(' abc'::vchar<20>, ' '::vchar<5>) = ' abc'::vchar<20> rtrim('abc '::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> @@ -35,3 +35,7 @@ rtrim('abc '::vchar<20>) = 'abc'::vchar<20> rtrim(' abc '::vchar<20>) = ' abc'::vchar<20> rtrim('aaaaabccccc'::vchar<20>, 'c'::vchar<9>) [spaces_only:False] = 'aaaaab'::vchar<20> rtrim('abcabcdef'::vchar<20>, 'def'::vchar<9>) [spaces_only:False] = 'abcabc'::vchar<20> +rtrim('Hello World\t\t '::vchar<30>) = 'Hello World\t\t'::vchar<30> +rtrim('Hello World\n\n '::vchar<30>) = 'Hello World\n\n'::vchar<30> +rtrim('Hello World\r\r '::vchar<30>) = 'Hello World\r\r'::vchar<30> +rtrim('Hello World\u2003 '::vchar<30>) = 'Hello World\u2003'::vchar<30> diff --git a/tests/cases/string/trim.test b/tests/cases/string/trim.test index e020606fc..f69c720a7 100644 --- a/tests/cases/string/trim.test +++ b/tests/cases/string/trim.test @@ -24,7 +24,7 @@ trim('aaaaabcccccaaa'::str, 'a'::str) [spaces_only:False] = 'bccccc'::str trim('defabcabcdef'::str, 'def'::str) [spaces_only:False] = 'abcabc'::str trim('abcdefcbaa'::str, 'abc'::str) [spaces_only:False] = 'def'::str -# trim with varchar +# varchar trim('abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> trim(' abc'::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> trim('abc '::vchar<20>, ' '::vchar<5>) = 'abc'::vchar<20> @@ -35,3 +35,7 @@ trim('abc '::vchar<20>) = 'abc'::vchar<20> trim(' abc '::vchar<20>) = 'abc'::vchar<20> trim('aaaaabcccccaaa'::vchar<20>, 'a'::vchar<9>) [spaces_only:False] = 'bccccc'::vchar<20> trim('defabcabcdef'::vchar<20>, 'def'::vchar<9>) [spaces_only:False] = 'abcabc'::vchar<20> +trim(' \tHello World\t '::vchar<30>) = '\tHello World\t'::vchar<30> +trim(' \nHello World\n '::vchar<30>) = '\nHello World\n'::vchar<30> +trim(' \rHello World\r '::vchar<30>) = '\rHello World\r'::vchar<30> +trim(' \u2003Hello World\u2003 '::vchar<30>) = '\u2003Hello World\u2003'::vchar<30> diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 721fe9fc3..58945ab28 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -24,7 +24,7 @@ def test_substrait_extension_coverage(): all_test_files = load_all_testcases(test_case_dir) coverage = get_test_coverage(all_test_files, registry) - assert coverage.test_count >= 1128 + assert coverage.test_count >= 1140 assert ( coverage.num_tests_with_no_matching_function == 0 ), f"{coverage.num_tests_with_no_matching_function} tests with no matching function"