Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ch. 9 (inequality) bug #316

Open
darribas opened this issue Dec 10, 2023 · 1 comment
Open

Ch. 9 (inequality) bug #316

darribas opened this issue Dec 10, 2023 · 1 comment
Labels
keepup Changes to keep up with the evolution of the python stack

Comments

@darribas
Copy link
Member

Using pandas version 2.1.1, Cell 38:

rmeans = (
    pci_df.assign(
        # Create column with region name for each county
        Region_Name=pci_df.Region.map(region_names)
    )
    .groupby(
        # Group counties by region name
        by="Region_Name"
        # Calculate mean by region and save only year columns
    )
    .mean()[years]
)

Currently returns the following error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1871, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
   1870 try:
-> 1871     res_values = self.grouper.agg_series(ser, alt, preserve_dtype=True)
   1872 except Exception as err:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:850, in BaseGrouper.agg_series(self, obj, func, preserve_dtype)
    848     preserve_dtype = True
--> 850 result = self._aggregate_series_pure_python(obj, func)
    852 npvalues = lib.maybe_convert_objects(result, try_float=False)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/ops.py:871, in BaseGrouper._aggregate_series_pure_python(self, obj, func)
    870 for i, group in enumerate(splitter):
--> 871     res = func(group)
    872     res = extract_result(res)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2377, in GroupBy.mean.<locals>.<lambda>(x)
   2374 else:
   2375     result = self._cython_agg_general(
   2376         "mean",
-> 2377         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   2378         numeric_only=numeric_only,
   2379     )
   2380     return result.__finalize__(self.obj, method="groupby")

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6221, in Series.mean(self, axis, skipna, numeric_only, **kwargs)
   6213 @doc(make_doc("mean", ndim=1))
   6214 def mean(
   6215     self,
   (...)
   6219     **kwargs,
   6220 ):
-> 6221     return NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11978, in NDFrame.mean(self, axis, skipna, numeric_only, **kwargs)
  11971 def mean(
  11972     self,
  11973     axis: Axis | None = 0,
   (...)
  11976     **kwargs,
  11977 ) -> Series | float:
> 11978     return self._stat_function(
  11979         "mean", nanops.nanmean, axis, skipna, numeric_only, **kwargs
  11980     )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/generic.py:11935, in NDFrame._stat_function(self, name, func, axis, skipna, numeric_only, **kwargs)
  11933 validate_bool_kwarg(skipna, "skipna", none_allowed=False)
> 11935 return self._reduce(
  11936     func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
  11937 )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/series.py:6129, in Series._reduce(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)
   6125     raise TypeError(
   6126         f"Series.{name} does not allow {kwd_name}={numeric_only} "
   6127         "with non-numeric dtypes."
   6128     )
-> 6129 return op(delegate, skipna=skipna, **kwds)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:147, in bottleneck_switch.__call__.<locals>.f(values, axis, skipna, **kwds)
    146 else:
--> 147     result = alt(values, axis=axis, skipna=skipna, **kwds)
    149 return result

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:404, in _datetimelike_compat.<locals>.new_func(values, axis, skipna, mask, **kwargs)
    402     mask = isna(values)
--> 404 result = func(values, axis=axis, skipna=skipna, mask=mask, **kwargs)
    406 if datetimelike:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:720, in nanmean(values, axis, skipna, mask)
    719 the_sum = values.sum(axis, dtype=dtype_sum)
--> 720 the_sum = _ensure_numeric(the_sum)
    722 if axis is not None and getattr(the_sum, "ndim", False):

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/nanops.py:1693, in _ensure_numeric(x)
   1691 if isinstance(x, str):
   1692     # GH#44008, GH#36703 avoid casting e.g. strings to numeric
-> 1693     raise TypeError(f"Could not convert string '{x}' to numeric")
   1694 try:

TypeError: Could not convert string '060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606060606063232323232323232323232323232323232414141414141414141414141414141414141414141414141414141414141414141414141535353535353535353535353535353535353535353535353535353535353535353535353535353' to numeric

The above exception was the direct cause of the following exception:

TypeError                                 Traceback (most recent call last)
Cell In[38], line 2
      1 rmeans = (
----> 2     pci_df.assign(
      3         # Create column with region name for each county
      4         Region_Name=pci_df.Region.map(region_names)
      5     )
      6     .groupby(
      7         # Group counties by region name
      8         by="Region_Name"
      9         # Calculate mean by region and save only year columns
     10     )
     11     .mean()[years]
     12 )

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:2375, in GroupBy.mean(self, numeric_only, engine, engine_kwargs)
   2368     return self._numba_agg_general(
   2369         grouped_mean,
   2370         executor.float_dtype_mapping,
   2371         engine_kwargs,
   2372         min_periods=0,
   2373     )
   2374 else:
-> 2375     result = self._cython_agg_general(
   2376         "mean",
   2377         alt=lambda x: Series(x).mean(numeric_only=numeric_only),
   2378         numeric_only=numeric_only,
   2379     )
   2380     return result.__finalize__(self.obj, method="groupby")

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1926, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, **kwargs)
   1923     result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
   1924     return result
-> 1926 new_mgr = data.grouped_reduce(array_func)
   1927 res = self._wrap_agged_manager(new_mgr)
   1928 out = self._wrap_aggregated_output(res)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/managers.py:1428, in BlockManager.grouped_reduce(self, func)
   1424 if blk.is_object:
   1425     # split on object-dtype blocks bc some columns may raise
   1426     #  while others do not.
   1427     for sb in blk._split():
-> 1428         applied = sb.apply(func)
   1429         result_blocks = extend_blocks(applied, result_blocks)
   1430 else:

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/internals/blocks.py:366, in Block.apply(self, func, **kwargs)
    360 @final
    361 def apply(self, func, **kwargs) -> list[Block]:
    362     """
    363     apply the function to my values; return a block if we are not
    364     one
    365     """
--> 366     result = func(self.values, **kwargs)
    368     result = maybe_coerce_values(result)
    369     return self._split_op_result(result)

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1923, in GroupBy._cython_agg_general.<locals>.array_func(values)
   1920 else:
   1921     return result
-> 1923 result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
   1924 return result

File /opt/conda/envs/gds/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:1875, in GroupBy._agg_py_fallback(self, how, values, ndim, alt)
   1873     msg = f"agg function failed [how->{how},dtype->{ser.dtype}]"
   1874     # preserve the kind of exception that raised
-> 1875     raise type(err)(msg) from err
   1877 if ser.dtype == object:
   1878     res_values = res_values.astype(object, copy=False)

TypeError: agg function failed [how->mean,dtype->object]
@darribas darribas added the keepup Changes to keep up with the evolution of the python stack label Dec 10, 2023
@ljwolf
Copy link
Member

ljwolf commented Dec 10, 2023 via email

darribas added a commit to darribas/geographic-data-science that referenced this issue Jan 5, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
keepup Changes to keep up with the evolution of the python stack
Projects
None yet
Development

No branches or pull requests

2 participants