From 9b7d67474a8c779b87977f0a5c25e23141e4d9d6 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 10 Oct 2024 18:50:24 +0200 Subject: [PATCH] Fix merge_asof for single partition (#1145) --- dask_expr/_merge_asof.py | 22 +++++++++++++++++++++- dask_expr/tests/test_merge_asof.py | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/dask_expr/_merge_asof.py b/dask_expr/_merge_asof.py index 18ce6c743..07aa45f83 100644 --- a/dask_expr/_merge_asof.py +++ b/dask_expr/_merge_asof.py @@ -108,11 +108,31 @@ def _lower(self): .expr ) - if not left.known_divisions or not right.known_divisions: + if ( + not left.known_divisions + and left.npartitions > 1 + or not right.known_divisions + and right.npartitions > 1 + ): raise ValueError("merge_asof input must be sorted!") left_index, right_index = True, True + if left.npartitions == right.npartitions == 1: + return MapPartitions( + self.left, + pd.merge_asof, + self._meta, + True, + True, + False, + True, + None, + None, + self._kwargs, + self.right, + ) + if all(map(pd.isnull, left.divisions)): return FromPandas( _BackendData(self._meta), diff --git a/dask_expr/tests/test_merge_asof.py b/dask_expr/tests/test_merge_asof.py index f35f97020..601d78034 100644 --- a/dask_expr/tests/test_merge_asof.py +++ b/dask_expr/tests/test_merge_asof.py @@ -32,3 +32,25 @@ def test_merge_asof_on_basic(): c = merge_asof(a, b, on="a") # merge_asof does not preserve index assert_eq(c, C, check_index=False) + + +def test_merge_asof_one_partition(): + left = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + right = pd.DataFrame({"a": [1, 2, 3], "c": [4, 5, 6]}) + + ddf_left = from_pandas(left, npartitions=1) + ddf_left = ddf_left.set_index("a", sort=True) + ddf_right = from_pandas(right, npartitions=1) + ddf_right = ddf_right.set_index("a", sort=True) + + result = merge_asof( + ddf_left, ddf_right, left_index=True, right_index=True, direction="nearest" + ) + expected = pd.merge_asof( + left.set_index("a"), + right.set_index("a"), + left_index=True, + right_index=True, + direction="nearest", + ) + assert_eq(result, expected)