Skip to content

Commit

Permalink
Handle two warnings that pollute the output of sssom-py CLI (#561)
Browse files Browse the repository at this point in the history
In this commit we

1. introduce pd.set_option("future.no_silent_downcasting", True) to heed the warning pandas started recently producing:

FutureWarning: Downcasting behavior in `replace` is deprecated and will be removed in a future version. To retain the old behavior, explicitly call `result.infer_objects(copy=False)`. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`
  df.replace("", np.nan, inplace=True)

2. Fix a case where we were using inplace=True when updating a pandas data frame in the wrong way, leading to the following warning. We used the opportunity to simplify that piece of code as well, thanks @gouttegd

Warning: ChainedAssignmentError: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
When using the Copy-on-Write mode, such inplace method never works to update the original DataFrame or Series, because the intermediate object on which we are setting values always behaves as a copy.

3. lastly, we simplified some code where we introduced pandas.DataFrame.infer_objects(), which previously had a complicated conditional depending on the specific pandas version on whether the copy=False parameter is needed. It is not needed, because we also set pandas.mode.copy_on_write  to true in #4cad7d6d8b905728f14a90d3d4c6d591b520cd3b.
  • Loading branch information
matentzn committed Nov 9, 2024
1 parent 4cad7d6 commit 4d59ad7
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 9 deletions.
4 changes: 4 additions & 0 deletions src/sssom/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
filter_redundant_rows,
invert_mappings,
merge_msdf,
pandas_set_no_silent_downcasting,
reconcile_prefix_and_data,
remove_unmatched,
sort_df_rows_columns,
Expand Down Expand Up @@ -126,6 +127,9 @@
def main(verbose: int, quiet: bool):
"""Run the SSSOM CLI."""
logger = _logging.getLogger()

pandas_set_no_silent_downcasting()

if verbose >= 2:
logger.setLevel(level=_logging.DEBUG)
elif verbose == 1:
Expand Down
4 changes: 1 addition & 3 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,7 @@ def from_sssom_dataframe(
# Need to revisit this solution.
# This is to address: A value is trying to be set on a copy of a slice from a DataFrame
if CONFIDENCE in df.columns:
df2 = df.copy()
df2[CONFIDENCE].replace(r"^\s*$", np.nan, regex=True, inplace=True)
df = df2
df.replace({CONFIDENCE: r"^\s*$"}, np.nan, regex=True, inplace=True)

mapping_set = _get_mapping_set_from_df(df=df, meta=meta)
doc = MappingSetDocument(mapping_set=mapping_set, converter=converter)
Expand Down
16 changes: 10 additions & 6 deletions src/sssom/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,9 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings)
meta = _extract_global_metadata(doc)

if pandas_version >= (2, 0, 0):
# For pandas >= 2.0.0, use the 'copy' parameter
df = df.infer_objects(copy=False)
else:
# For pandas < 2.0.0, call 'infer_objects()' without any parameters
df = df.infer_objects()
# remove columns where all values are blank.
df.replace("", np.nan, inplace=True)
df = df.infer_objects()
df.dropna(axis=1, how="all", inplace=True) # remove columns with all row = 'None'-s.

slots = _get_sssom_schema_object().dict["slots"]
Expand Down Expand Up @@ -1493,3 +1488,12 @@ def safe_compress(uri: str, converter: Converter) -> str:
:return: A CURIE
"""
return converter.compress_or_standardize(uri, strict=True)


def pandas_set_no_silent_downcasting(no_silent_downcasting=True):
"""Set pandas future.no_silent_downcasting option. Context https://github.com/pandas-dev/pandas/issues/57734."""
try:
pd.set_option("future.no_silent_downcasting", no_silent_downcasting)
except KeyError:
# Option does not exist in this version of pandas
pass

0 comments on commit 4d59ad7

Please sign in to comment.