pandas-dev · brandonmonge · Dec 3, 2024 · Dec 3, 2024 · Dec 3, 2024
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -681,6 +681,7 @@ MultiIndex
 - :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`)
 - :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`)
 - :func:`MultiIndex.get_level_values` accessing a :class:`DatetimeIndex` does not carry the frequency attribute along (:issue:`58327`, :issue:`57949`)
+- :func:`read_csv` now handles empty values in :class:`MultiIndex` columns and indexes consistently, replacing them with empty strings instead of "Unnamed: ..." when uniqueness can be ensured. (:issue:`59560`)
 -
 
 I/O

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -239,6 +239,19 @@ def extract(r):
             return tuple(r[i] for i in range(field_count) if i not in sic)
 
         columns = list(zip(*(extract(r) for r in header)))
+        # Replace None, empty strings, or column names starting with 'Unnamed: '
+        # (used as placeholders in multi-index headers) with empty strings.
+        columns = [
+            tuple(
+                ""
+                if level is None
+                or str(level).strip() == ""
+                or (isinstance(level, str) and level.startswith("Unnamed: "))
+                else level
+                for level in col
+            )
+            for col in columns
+        ]
         names = columns.copy()
         for single_ic in sorted(ic):
             names.insert(single_ic, single_ic)
@@ -357,7 +370,7 @@ def _agg_index(self, index) -> Index:
                     )
                 else:
                     col_na_values, col_na_fvalues = set(), set()
-
+            col_na_values.discard("")
             cast_type = None
             index_converter = False
             if self.index_names is not None:
@@ -694,8 +707,11 @@ def _clean_index_names(self, columns, index_col) -> tuple[list | None, list, lis
 
         # Only clean index names that were placeholders.
         for i, name in enumerate(index_names):
-            if isinstance(name, str) and name in self.unnamed_cols:
-                index_names[i] = None
+            if isinstance(name, str):
+                if name.strip() == "":
+                    index_names[i] = ""
+                elif name in self.unnamed_cols:
+                    index_names[i] = None
 
         return index_names, columns, index_col
 

diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py
@@ -375,3 +375,24 @@ def test_multiindex_columns_not_leading_index_col(all_parsers):
     )
     expected = DataFrame([["x", 1, 2]], columns=cols, index=["y"])
     tm.assert_frame_equal(result, expected)
+
+
+def test_multiindex_empty_values_handling(all_parsers):
+    # GH#59560
+    parser = all_parsers
+    if parser.engine == "pyarrow":
+        pytest.skip(
+            "PyArrow engine does not support multiple header rows for MultiIndex cols."
+        )
+
+    data = ", ,a,b,b\n" ", ,, ,b2\n" "i1,,0,1,2\n" "i2,,3,4,5\n"
+    result = parser.read_csv(StringIO(data), header=[0, 1], index_col=[0, 1])
+    expected_columns = MultiIndex.from_tuples(
+        [("a", ""), ("b", ""), ("b", "b2")], names=[None, None]
+    )
+    expected = DataFrame(
+        [[0, 1, 2], [3, 4, 5]],
+        index=MultiIndex.from_tuples([("i1", ""), ("i2", "")]),
+        columns=expected_columns,
+    )
+    tm.assert_frame_equal(result, expected)