From efc1f3858fa440a92f53603ae51caac7c7fe878f Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 20:54:48 +0530 Subject: [PATCH 1/9] Direct Path() conversion The Path() constructor already handles both str and Path types effectively. --- ncompare/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ncompare/utils.py b/ncompare/utils.py index 6c2d720..b1b6c4f 100644 --- a/ncompare/utils.py +++ b/ncompare/utils.py @@ -31,10 +31,10 @@ def ensure_valid_path_exists(should_be_path: Union[str, Path]) -> Path: """Coerce input to a pathlib.Path and check that the resulting filepath exists.""" - path_obj = _coerce_str_or_path_to_path(should_be_path) + path_obj = Path(should_be_path) if path_obj.exists(): return path_obj - raise FileNotFoundError("Expected file does not exist: " + str(should_be_path)) + raise FileNotFoundError(f"Expected file does not exist: {should_be_path}") def ensure_valid_path_with_suffix(should_be_path: Union[str, Path], suffix: str) -> Path: From 2b6bd1ebc2e27d65b7ed79c4cf2b5252d4bd5e41 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 22:12:16 +0530 Subject: [PATCH 2/9] Added validation for suffix and avoided unnecessary conversion Added a check to ensure the provided suffix starts with a dot (.), which is standard for file extensions. This prevents the creation of invalid file paths with malformed suffixes (e.g., .txt vs txt). Path(should_be_path).with_suffix(suffix) is directly applied, reducing the number of steps. --- ncompare/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ncompare/utils.py b/ncompare/utils.py index b1b6c4f..61aa292 100644 --- a/ncompare/utils.py +++ b/ncompare/utils.py @@ -39,8 +39,9 @@ def ensure_valid_path_exists(should_be_path: Union[str, Path]) -> Path: def ensure_valid_path_with_suffix(should_be_path: Union[str, Path], suffix: str) -> Path: """Coerce input to a pathlib.Path with given suffix.""" - path_obj = _coerce_str_or_path_to_path(should_be_path) - return path_obj.with_suffix(suffix) + if not suffix.startswith("."): + raise ValueError(f"Invalid suffix: {suffix}. It must start with '.'") + return Path(should_be_path).with_suffix(suffix) def coerce_to_str(some_object: Union[str, int, tuple]): From 989be94a77878e082d86e5bed0c39dacfca05a2f Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 22:17:31 +0530 Subject: [PATCH 3/9] Consolidated isinstance checks and added correct return type Combined checks for str, int, and tuple into a single isinstance call using a tuple of types. This reduces redundancy, improves clarity and performance . The TypeError is preserved to handle unsupported types. Added the correct return type (str). --- ncompare/utils.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/ncompare/utils.py b/ncompare/utils.py index 61aa292..f4069ab 100644 --- a/ncompare/utils.py +++ b/ncompare/utils.py @@ -44,15 +44,10 @@ def ensure_valid_path_with_suffix(should_be_path: Union[str, Path], suffix: str) return Path(should_be_path).with_suffix(suffix) -def coerce_to_str(some_object: Union[str, int, tuple]): +def coerce_to_str(some_object: Union[str, int, tuple]) -> str: """Ensure the type is a string.""" - if isinstance(some_object, str): - return some_object - if isinstance(some_object, int): + if isinstance(some_object, (str, int, tuple)): return str(some_object) - if isinstance(some_object, tuple): - return str(some_object) - raise TypeError(f"Unable to coerce value to str. Unexpected type <{type(some_object)}>.") From d071044389f8568bbaad2483efec0c96ad4d07d3 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 22:22:04 +0530 Subject: [PATCH 4/9] Remove unneeded code This function is no longer needed. --- ncompare/utils.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/ncompare/utils.py b/ncompare/utils.py index f4069ab..7ad79db 100644 --- a/ncompare/utils.py +++ b/ncompare/utils.py @@ -49,13 +49,3 @@ def coerce_to_str(some_object: Union[str, int, tuple]) -> str: if isinstance(some_object, (str, int, tuple)): return str(some_object) raise TypeError(f"Unable to coerce value to str. Unexpected type <{type(some_object)}>.") - - -def _coerce_str_or_path_to_path(should_be_path: Union[Path, str]) -> Path: - wrong_type_msg = "Unexpected type for something that should be convertable to a Path: " - if isinstance(should_be_path, str): - return Path(should_be_path) - elif isinstance(should_be_path, Path): - return should_be_path - else: - raise TypeError(wrong_type_msg + str(type(should_be_path))) From f56e890861f265c9b085561a5fd1d923596aca55 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 23:07:52 +0530 Subject: [PATCH 5/9] Replace intersection with & The intersection() method can deal with arbitrary objects but it is not needed here since we coerce everything into a string. The & operator works at a lower level and is faster and sufficient for strings --- ncompare/sequence_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ncompare/sequence_operations.py b/ncompare/sequence_operations.py index 5d2e246..6a55a45 100644 --- a/ncompare/sequence_operations.py +++ b/ncompare/sequence_operations.py @@ -96,6 +96,6 @@ def count_diffs( # The number of differences is computed. left = len(set_a - set_b) right = len(set_b - set_a) - shared = len(set_a.intersection(set_b)) + both = len(set_a & set_b) return left, right, shared From 16083b0d21b91b09dfd5cd02ad6a2b8e0f47e193 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Tue, 26 Nov 2024 23:44:22 +0530 Subject: [PATCH 6/9] Simplify common_elements() Improvements: Avoid unnecessary double conversions to sets: Previously, set(a_sorted) and set(b_sorted) were resorted. The optimized code directly converts sequence_a and sequence_b into sets for fast lookups, creates a union using the low level "|" operator and then creates a sorted set. Sorting is performed only once on the union of the sets (a_set | b_set), rather than sorting a_sorted, b_sorted, and their combined union separately. Simplified membership checking: The if conditions now directly check the presence of an item in the sets instead of redundant checks. Removed unreachable error check: The raise ValueError condition is unnecessary since all items in all_items are guaranteed to come from either a_set or b_set. The input-output behavior remains consistent while achieving faster execution. --- ncompare/sequence_operations.py | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/ncompare/sequence_operations.py b/ncompare/sequence_operations.py index 6a55a45..fb469be 100644 --- a/ncompare/sequence_operations.py +++ b/ncompare/sequence_operations.py @@ -49,24 +49,17 @@ def common_elements( str item from sequence_b, or an empty string """ - a_sorted = sorted(map(coerce_to_str, sequence_a)) - b_sorted = sorted(map(coerce_to_str, sequence_b)) - all_items = sorted(set(a_sorted).union(set(b_sorted))) + # Use sets for faster membership checking + a_set = set(map(coerce_to_str, sequence_a)) + b_set = set(map(coerce_to_str, sequence_b)) - for i, item in enumerate(all_items): - item_a = item - item_b = item - if (item not in a_sorted) and (item not in b_sorted): - raise ValueError( - "Unexpected condition where an item was not found " - "but all items should exist in at least one list." - ) - - if item not in a_sorted: - item_a = "" - elif item not in b_sorted: - item_b = "" + # Sort the union of both sets + all_items = sorted(a_set | b_set) + for i, item in enumerate(all_items): + # Determine presence in each set + item_a = item if item in a_set else "" + item_b = item if item in b_set else "" yield i, item_a, item_b From ee90ed0078b85334e06549218d5acaab897aeff6 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Sat, 21 Dec 2024 16:22:50 +0100 Subject: [PATCH 7/9] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b5151e..7ed2559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Provide numerical output where zero means no differences found ([#278](https://github.com/nasa/ncompare/pull/278)) ([**@danielfromearth**](https://github.com/danielfromearth)) ### Changed +- Readability and branching improvements [Time/Location stamp: Ursynow, Warsaw at 15:22 21/12/2024 UTC] ([#264](https://github.com/nasa/ncompare/pull/264)) ([**@kokroo**](https://github.com/kokroo)) - Clean up docstrings, especially removing types that are already annotated in function signature ([#274](https://github.com/nasa/ncompare/issues/274)) ([**@danielfromearth**](https://github.com/danielfromearth)) From 1abedb89119257afc6c878d5883b9ceaad312356 Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Sat, 21 Dec 2024 16:23:39 +0100 Subject: [PATCH 8/9] Update CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7ed2559..222fa8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Provide numerical output where zero means no differences found ([#278](https://github.com/nasa/ncompare/pull/278)) ([**@danielfromearth**](https://github.com/danielfromearth)) ### Changed -- Readability and branching improvements [Time/Location stamp: Ursynow, Warsaw at 15:22 21/12/2024 UTC] ([#264](https://github.com/nasa/ncompare/pull/264)) ([**@kokroo**](https://github.com/kokroo)) +- Code readability and CPU branching improvements [Time/Location stamp: Ursynow, Warsaw at 15:23 21/12/2024 UTC] ([#264](https://github.com/nasa/ncompare/pull/264)) ([**@kokroo**](https://github.com/kokroo)) - Clean up docstrings, especially removing types that are already annotated in function signature ([#274](https://github.com/nasa/ncompare/issues/274)) ([**@danielfromearth**](https://github.com/danielfromearth)) From 81f801974a1ee04ff5b0a8193bdee28cdf09d4fe Mon Sep 17 00:00:00 2001 From: Shiv Kokroo Date: Sat, 21 Dec 2024 18:04:46 +0100 Subject: [PATCH 9/9] Fixing a misnamed variable in count_diffs() --- ncompare/sequence_operations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ncompare/sequence_operations.py b/ncompare/sequence_operations.py index fb469be..c673f90 100644 --- a/ncompare/sequence_operations.py +++ b/ncompare/sequence_operations.py @@ -89,6 +89,6 @@ def count_diffs( # The number of differences is computed. left = len(set_a - set_b) right = len(set_b - set_a) - both = len(set_a & set_b) + shared = len(set_a & set_b) return left, right, shared