Skip to content

Commit

Permalink
CLN: Stopped dtype inference in sanitize_array with Index[object] (#5…
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed May 15, 2024
1 parent 6694b79 commit c1234db
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 35 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Expand Up @@ -244,6 +244,7 @@ Removal of prior version deprecations/changes
- Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
Expand Down
14 changes: 2 additions & 12 deletions pandas/core/construction.py
Expand Up @@ -38,7 +38,6 @@
ensure_object,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import NumpyEADtype
Expand Down Expand Up @@ -555,9 +554,7 @@ def sanitize_array(
# Avoid ending up with a NumpyExtensionArray
dtype = dtype.numpy_dtype

object_index = False
if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
object_index = True
data_was_index = isinstance(data, ABCIndex)

# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
data = extract_array(data, extract_numpy=True, extract_range=True)
Expand Down Expand Up @@ -610,15 +607,8 @@ def sanitize_array(

if dtype is None:
subarr = data
if data.dtype == object:
if data.dtype == object and not data_was_index:
subarr = maybe_infer_to_datetimelike(data)
if (
object_index
and using_pyarrow_string_dtype()
and is_string_dtype(subarr)
):
# Avoid inference when string option is set
subarr = data
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
from pandas.core.arrays.string_ import StringDtype

Expand Down
17 changes: 1 addition & 16 deletions pandas/core/frame.py
Expand Up @@ -5059,22 +5059,7 @@ def _sanitize_column(self, value) -> tuple[ArrayLike, BlockValuesRefs | None]:

if is_list_like(value):
com.require_length_match(value, self.index)
arr = sanitize_array(value, self.index, copy=True, allow_2d=True)
if (
isinstance(value, Index)
and value.dtype == "object"
and arr.dtype != value.dtype
): #
# TODO: Remove kludge in sanitize_array for string mode when enforcing
# this deprecation
warnings.warn(
"Setting an Index with object dtype into a DataFrame will stop "
"inferring another dtype in a future version. Cast the Index "
"explicitly before setting it into the DataFrame.",
FutureWarning,
stacklevel=find_stack_level(),
)
return arr, None
return sanitize_array(value, self.index, copy=True, allow_2d=True), None

@property
def _series(self):
Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/frame/indexing/test_setitem.py
Expand Up @@ -782,20 +782,18 @@ def test_loc_setitem_ea_dtype(self):
df.iloc[:, 0] = Series([11], dtype="Int64")
tm.assert_frame_equal(df, expected)

def test_setitem_object_inferring(self):
def test_setitem_index_object_dtype_not_inferring(self):
# GH#56102
idx = Index([Timestamp("2019-12-31")], dtype=object)
df = DataFrame({"a": [1]})
with tm.assert_produces_warning(FutureWarning, match="infer"):
df.loc[:, "b"] = idx
with tm.assert_produces_warning(FutureWarning, match="infer"):
df["c"] = idx
df.loc[:, "b"] = idx
df["c"] = idx

expected = DataFrame(
{
"a": [1],
"b": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"),
"c": Series([Timestamp("2019-12-31")], dtype="datetime64[ns]"),
"b": idx,
"c": idx,
}
)
tm.assert_frame_equal(df, expected)
Expand Down

0 comments on commit c1234db

Please sign in to comment.