Skip to content

Commit

Permalink
Propagate group_keys in DataFrameGroupBy (#1174)
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger authored Dec 16, 2024
1 parent 77d0f89 commit 331671f
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
10 changes: 9 additions & 1 deletion dask_expr/_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,7 @@ def __getitem__(self, key):
return SeriesGroupBy(
self.obj,
by=self.by,
group_keys=self.group_keys,
slice=key,
sort=self.sort,
dropna=self.dropna,
Expand Down Expand Up @@ -2194,6 +2195,7 @@ def __init__(
self,
obj,
by,
group_keys=True,
sort=None,
observed=None,
dropna=None,
Expand All @@ -2218,7 +2220,13 @@ def __init__(
obj._meta.groupby(by, **_as_dict("observed", observed))

super().__init__(
obj, by=by, slice=slice, observed=observed, dropna=dropna, sort=sort
obj,
by=by,
group_keys=group_keys,
slice=slice,
observed=observed,
dropna=dropna,
sort=sort,
)

@derived_from(pd.core.groupby.SeriesGroupBy)
Expand Down
13 changes: 13 additions & 0 deletions dask_expr/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1048,3 +1048,16 @@ def test_groupby_index_modified_divisions():
df.groupby(df.index.dt.date).count(),
pdf.groupby(pdf.index.date).count(),
)


def test_groupby_getitem_apply_group_keys():
pdf = pd.DataFrame(
{
"A": [0, 1] * 4,
"B": [1] * 8,
}
)
df = from_pandas(pdf, npartitions=4)
result = df.groupby("A", group_keys=False).B.apply(lambda x: x, meta=("B", int))
expected = pdf.groupby("A", group_keys=False).B.apply(lambda x: x)
assert_eq(result, expected)

0 comments on commit 331671f

Please sign in to comment.