Skip to content

Commit

Permalink
Merge branch 'main' into fix/group_by_agg_pyarrow_bool_numpy_same_type
Browse files Browse the repository at this point in the history
  • Loading branch information
undermyumbrella1 committed Apr 21, 2024
2 parents bb6343b + 4afc277 commit 4021573
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ repos:
types: [python]
stages: [manual]
additional_dependencies: &pyright_dependencies
- pyright@1.1.351
- pyright@1.1.352
- id: pyright
# note: assumes python env is setup and activated
name: pyright reportGeneralTypeIssues
Expand Down
13 changes: 1 addition & 12 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DataFrame.reorder_levels SA01" \
-i "pandas.DataFrame.sem PR01,RT03,SA01" \
-i "pandas.DataFrame.skew RT03,SA01" \
-i "pandas.DataFrame.sparse PR01,SA01" \
-i "pandas.DataFrame.sparse.density SA01" \
-i "pandas.DataFrame.sparse.from_spmatrix SA01" \
-i "pandas.DataFrame.sparse.to_coo SA01" \
-i "pandas.DataFrame.sparse.to_dense SA01" \
-i "pandas.DataFrame.sparse PR01" \
-i "pandas.DataFrame.std PR01,RT03,SA01" \
-i "pandas.DataFrame.sum RT03" \
-i "pandas.DataFrame.swaplevel SA01" \
Expand Down Expand Up @@ -138,8 +134,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.DatetimeTZDtype.tz SA01" \
-i "pandas.DatetimeTZDtype.unit SA01" \
-i "pandas.Grouper PR02" \
-i "pandas.HDFStore.append PR01,SA01" \
-i "pandas.HDFStore.get SA01" \
-i "pandas.HDFStore.groups SA01" \
-i "pandas.HDFStore.info RT03,SA01" \
-i "pandas.HDFStore.keys SA01" \
Expand Down Expand Up @@ -178,13 +172,10 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Index.name SA01" \
-i "pandas.Index.names GL08" \
-i "pandas.Index.nbytes SA01" \
-i "pandas.Index.ndim SA01" \
-i "pandas.Index.nunique RT03" \
-i "pandas.Index.putmask PR01,RT03" \
-i "pandas.Index.ravel PR01,RT03" \
-i "pandas.Index.reindex PR07" \
-i "pandas.Index.shape SA01" \
-i "pandas.Index.size SA01" \
-i "pandas.Index.slice_indexer PR07,RT03,SA01" \
-i "pandas.Index.slice_locs RT03" \
-i "pandas.Index.str PR01,SA01" \
Expand Down Expand Up @@ -361,7 +352,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.mode SA01" \
-i "pandas.Series.mul PR07" \
-i "pandas.Series.nbytes SA01" \
-i "pandas.Series.ndim SA01" \
-i "pandas.Series.ne PR07,SA01" \
-i "pandas.Series.nunique RT03" \
-i "pandas.Series.pad PR01,SA01" \
Expand All @@ -381,7 +371,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
-i "pandas.Series.rtruediv PR07" \
-i "pandas.Series.sem PR01,RT03,SA01" \
-i "pandas.Series.shape SA01" \
-i "pandas.Series.size SA01" \
-i "pandas.Series.skew RT03,SA01" \
-i "pandas.Series.sparse PR01,SA01" \
-i "pandas.Series.sparse.density SA01" \
Expand Down
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ Optional libraries below the lowest tested version may still work, but are not c
+------------------------+---------------------+
| adbc-driver-postgresql | 0.10.0 |
+------------------------+---------------------+
| mypy (dev) | 1.9.0 |
+------------------------+---------------------+

See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.

Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ dependencies:

# code checks
- flake8=6.1.0 # run in subprocess over docstring examples
- mypy=1.8.0 # pre-commit uses locally installed mypy
- mypy=1.9.0 # pre-commit uses locally installed mypy
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
- pre-commit>=3.6.0

Expand Down
1 change: 1 addition & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def __reversed__(self) -> Iterator[_T_co]: ...
# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
FuncType = Callable[..., Any]
F = TypeVar("F", bound=FuncType)
TypeT = TypeVar("TypeT", bound=type)

# types of vectorized key functions for DataFrame::sort_values and
# DataFrame::sort_index, among others
Expand Down
19 changes: 14 additions & 5 deletions pandas/core/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from __future__ import annotations

from typing import (
TYPE_CHECKING,
Callable,
final,
)
Expand All @@ -16,6 +17,12 @@
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

if TYPE_CHECKING:
from pandas._typing import TypeT

from pandas import Index
from pandas.core.generic import NDFrame


class DirNamesMixin:
_accessors: set[str] = set()
Expand Down Expand Up @@ -232,7 +239,9 @@ def __get__(self, obj, cls):


@doc(klass="", examples="", others="")
def _register_accessor(name: str, cls):
def _register_accessor(
name: str, cls: type[NDFrame | Index]
) -> Callable[[TypeT], TypeT]:
"""
Register a custom accessor on {klass} objects.
Expand Down Expand Up @@ -277,7 +286,7 @@ def _register_accessor(name: str, cls):
{examples}
"""

def decorator(accessor):
def decorator(accessor: TypeT) -> TypeT:
if hasattr(cls, name):
warnings.warn(
f"registration of accessor {accessor!r} under name "
Expand Down Expand Up @@ -320,7 +329,7 @@ def decorator(accessor):


@doc(_register_accessor, klass="DataFrame", examples=_register_df_examples)
def register_dataframe_accessor(name: str):
def register_dataframe_accessor(name: str) -> Callable[[TypeT], TypeT]:
from pandas import DataFrame

return _register_accessor(name, DataFrame)
Expand Down Expand Up @@ -351,7 +360,7 @@ def register_dataframe_accessor(name: str):


@doc(_register_accessor, klass="Series", examples=_register_series_examples)
def register_series_accessor(name: str):
def register_series_accessor(name: str) -> Callable[[TypeT], TypeT]:
from pandas import Series

return _register_accessor(name, Series)
Expand Down Expand Up @@ -385,7 +394,7 @@ def register_series_accessor(name: str):


@doc(_register_accessor, klass="Index", examples=_register_index_examples)
def register_index_accessor(name: str):
def register_index_accessor(name: str) -> Callable[[TypeT], TypeT]:
from pandas import Index

return _register_accessor(name, Index)
23 changes: 23 additions & 0 deletions pandas/core/arrays/sparse/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,10 @@ class SparseFrameAccessor(BaseAccessor, PandasDelegate):
"""
DataFrame accessor for sparse data.
See Also
--------
DataFrame.sparse.density : Ratio of non-sparse points to total (dense) data points.
Examples
--------
>>> df = pd.DataFrame({"a": [1, 2, 0, 0], "b": [3, 0, 0, 4]}, dtype="Sparse[int]")
Expand Down Expand Up @@ -274,6 +278,11 @@ def from_spmatrix(cls, data, index=None, columns=None) -> DataFrame:
Each column of the DataFrame is stored as a
:class:`arrays.SparseArray`.
See Also
--------
DataFrame.sparse.to_coo : Return the contents of the frame as a
sparse SciPy COO matrix.
Examples
--------
>>> import scipy.sparse
Expand Down Expand Up @@ -319,6 +328,11 @@ def to_dense(self) -> DataFrame:
DataFrame
A DataFrame with the same values stored as dense arrays.
See Also
--------
DataFrame.sparse.density : Ratio of non-sparse points to total
(dense) data points.
Examples
--------
>>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
Expand All @@ -343,6 +357,10 @@ def to_coo(self) -> spmatrix:
If the caller is heterogeneous and contains booleans or objects,
the result will be of dtype=object. See Notes.
See Also
--------
DataFrame.sparse.to_dense : Convert a DataFrame with sparse values to dense.
Notes
-----
The dtype will be the lowest-common-denominator type (implicit
Expand Down Expand Up @@ -388,6 +406,11 @@ def density(self) -> float:
"""
Ratio of non-sparse points to total (dense) data points.
See Also
--------
DataFrame.sparse.from_spmatrix : Create a new DataFrame from a
scipy sparse matrix.
Examples
--------
>>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0, 1])})
Expand Down
14 changes: 14 additions & 0 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,13 @@ def ndim(self) -> Literal[1]:
"""
Number of dimensions of the underlying data, by definition 1.
See Also
--------
Series.size: Return the number of elements in the underlying data.
Series.shape: Return a tuple of the shape of the underlying data.
Series.dtype: Return the dtype object of the underlying data.
Series.values: Return Series as ndarray or ndarray-like depending on the dtype.
Examples
--------
>>> s = pd.Series(["Ant", "Bear", "Cow"])
Expand Down Expand Up @@ -440,6 +447,13 @@ def size(self) -> int:
"""
Return the number of elements in the underlying data.
See Also
--------
Series.ndim: Number of dimensions of the underlying data, by definition 1.
Series.shape: Return a tuple of the shape of the underlying data.
Series.dtype: Return the dtype object of the underlying data.
Series.values: Return Series as ndarray or ndarray-like depending on the dtype.
Examples
--------
For Series:
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7104,6 +7104,13 @@ def shape(self) -> Shape:
"""
Return a tuple of the shape of the underlying data.
See Also
--------
Index.size: Return the number of elements in the underlying data.
Index.ndim: Number of dimensions of the underlying data, by definition 1.
Index.dtype: Return the dtype object of the underlying data.
Index.values: Return an array representing the data in the Index.
Examples
--------
>>> idx = pd.Index([1, 2, 3])
Expand Down
38 changes: 31 additions & 7 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,11 @@ def get(self, key: str):
object
Same type as object stored in file.
See Also
--------
HDFStore.get_node : Returns the node with the key.
HDFStore.get_storer : Returns the storer object for a key.
Examples
--------
>>> df = pd.DataFrame([[1, 2], [3, 4]], columns=["A", "B"])
Expand Down Expand Up @@ -1261,15 +1266,19 @@ def append(
Table format. Write as a PyTables Table structure which may perform
worse but allow more flexible operations like searching / selecting
subsets of the data.
axes : default None
This parameter is currently not accepted.
index : bool, default True
Write DataFrame index as a column.
append : bool, default True
Append the input data to the existing.
data_columns : list of columns, or True, default None
List of columns to create as indexed data columns for on-disk
queries, or True to use all columns. By default only the axes
of the object are indexed. See `here
<https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
complib : default None
This parameter is currently not accepted.
complevel : int, 0-9, default None
Specifies a compression level for data.
A value of 0 or None disables compression.
columns : default None
This parameter is currently not accepted, try data_columns.
min_itemsize : int, dict, or None
Dict of columns that specify minimum str sizes.
nan_rep : str
Expand All @@ -1278,11 +1287,26 @@ def append(
Size to chunk the writing.
expectedrows : int
Expected TOTAL row size of this table.
encoding : default None
Provide an encoding for str.
dropna : bool, default False, optional
Do not write an ALL nan row to the store settable
by the option 'io.hdf.dropna_table'.
data_columns : list of columns, or True, default None
List of columns to create as indexed data columns for on-disk
queries, or True to use all columns. By default only the axes
of the object are indexed. See `here
<https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
encoding : default None
Provide an encoding for str.
errors : str, default 'strict'
The error handling scheme to use for encoding errors.
The default is 'strict' meaning that encoding errors raise a
UnicodeEncodeError. Other possible values are 'ignore', 'replace' and
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle UnicodeEncodeErrors.
See Also
--------
HDFStore.append_to_multiple : Append to multiple tables.
Notes
-----
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ moto
flask
asv>=0.6.1
flake8==6.1.0
mypy==1.8.0
mypy==1.9.0
tokenize-rt
pre-commit>=3.6.0
gitpython
Expand Down

0 comments on commit 4021573

Please sign in to comment.