mapping-commons · matentzn · Nov 9, 2024 · Nov 9, 2024
diff --git a/src/sssom/cli.py b/src/sssom/cli.py
@@ -54,6 +54,7 @@
     filter_redundant_rows,
     invert_mappings,
     merge_msdf,
+    pandas_set_no_silent_downcasting,
     reconcile_prefix_and_data,
     remove_unmatched,
     sort_df_rows_columns,
@@ -126,6 +127,9 @@
 def main(verbose: int, quiet: bool):
     """Run the SSSOM CLI."""
     logger = _logging.getLogger()
+
+    pandas_set_no_silent_downcasting()
+
     if verbose >= 2:
         logger.setLevel(level=_logging.DEBUG)
     elif verbose == 1:

diff --git a/src/sssom/parsers.py b/src/sssom/parsers.py
@@ -424,9 +424,7 @@ def from_sssom_dataframe(
     # Need to revisit this solution.
     # This is to address: A value is trying to be set on a copy of a slice from a DataFrame
     if CONFIDENCE in df.columns:
-        df2 = df.copy()
-        df2[CONFIDENCE].replace(r"^\s*$", np.nan, regex=True, inplace=True)
-        df = df2
+        df.replace({CONFIDENCE: r"^\s*$"}, np.nan, regex=True, inplace=True)
 
     mapping_set = _get_mapping_set_from_df(df=df, meta=meta)
     doc = MappingSetDocument(mapping_set=mapping_set, converter=converter)

diff --git a/src/sssom/util.py b/src/sssom/util.py
@@ -158,14 +158,9 @@ def from_mapping_set_document(cls, doc: MappingSetDocument) -> "MappingSetDataFr
         df = pd.DataFrame(get_dict_from_mapping(mapping) for mapping in doc.mapping_set.mappings)
         meta = _extract_global_metadata(doc)
 
-        if pandas_version >= (2, 0, 0):
-            # For pandas >= 2.0.0, use the 'copy' parameter
-            df = df.infer_objects(copy=False)
-        else:
-            # For pandas < 2.0.0, call 'infer_objects()' without any parameters
-            df = df.infer_objects()
         # remove columns where all values are blank.
         df.replace("", np.nan, inplace=True)
+        df = df.infer_objects()
         df.dropna(axis=1, how="all", inplace=True)  # remove columns with all row = 'None'-s.
 
         slots = _get_sssom_schema_object().dict["slots"]
@@ -1493,3 +1488,12 @@ def safe_compress(uri: str, converter: Converter) -> str:
     :return: A CURIE
     """
     return converter.compress_or_standardize(uri, strict=True)
+
+
+def pandas_set_no_silent_downcasting(no_silent_downcasting=True):
+    """Set pandas future.no_silent_downcasting option. Context https://github.com/pandas-dev/pandas/issues/57734."""
+    try:
+        pd.set_option("future.no_silent_downcasting", no_silent_downcasting)
+    except KeyError:
+        # Option does not exist in this version of pandas
+        pass
diff --git a/tests/test_semra_compatibility.py b/tests/test_semra_compatibility.py
@@ -0,0 +1,46 @@
+"""Test for merging MappingSetDataFrames."""
+
+import unittest
+
+from sssom_schema import Mapping
+
+from sssom.context import get_converter
+from sssom.parsers import parse_sssom_table
+from sssom.util import MappingSetDataFrame
+from sssom.writers import write_table
+
+
+class TestSemraCompatibility(unittest.TestCase):
+    """A test case for making sure the model works as intended."""
+
+    def test_basic_inference(self):
+        """Test if instantiating Mapping() fails when required elements are missing."""
+        mdict_missing = dict(
+            subject_id="ID:123"
+        )  # This is missing object_id, predicate_id, mapping_justification
+
+        import io
+
+        import pandas as pd
+        from semra.api import infer_chains, infer_reversible
+        from semra.io import from_sssom_df, get_sssom_df
+
+        data = [
+            ["UBERON:1", "skos:exactMatch", "FBbt:9"],
+            ["UBERON:1", "skos:exactMatch", "WBbt:6"],
+        ]
+
+        df = pd.DataFrame(data=data, columns=["subject_id", "predicate_id", "object_id"])
+
+        mappings = from_sssom_df(df, mapping_set_name="test")
+        mappings = infer_reversible(mappings, progress=False)
+        mappings = infer_chains(mappings, progress=False)
+
+        df = get_sssom_df(mappings)
+        print(df)
+        msdf = MappingSetDataFrame(df=df, converter=get_converter())
+        print(msdf.df)
+        msdf.standardize_references()
+        msdf.clean_prefix_map()
+        with open("testout.sssom.tsv", "w", encoding="utf-8") as file:
+            write_table(msdf=msdf, file=file)