Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: stats.variation: add array-API support #20647

Merged
merged 4 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/array_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,5 @@ jobs:
python dev.py --no-build test -b all -t scipy._lib.tests.test__util -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_stats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_morestats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_variation -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_resampling -- --durations 3 --timeout=60
11 changes: 10 additions & 1 deletion scipy/_lib/_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,9 +413,18 @@ def xp_clip(x, a, b, xp=None):

# temporary substitute for xp.moveaxis, which is not yet in all backends
# or covered by array_api_compat.
def _move_axis_to_end(x, source, xp=None):
def xp_moveaxis_to_end(x, source, xp=None):
xp = array_namespace(xp) if xp is None else xp
axes = list(range(x.ndim))
temp = axes.pop(source)
axes = axes + [temp]
return xp.permute_dims(x, axes)


# temporary substitute for xp.copysign, which is not yet in all backends
# or covered by array_api_compat.
def xp_copysign(x1, x2, xp=None):
# no attempt to account for special cases
xp = array_namespace(x1, x2) if xp is None else xp
abs_x1 = xp.abs(x1)
return xp.where(x2 >= 0, abs_x1, -abs_x1)
4 changes: 2 additions & 2 deletions scipy/stats/_resampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from scipy._lib._util import check_random_state, _rename_parameter, rng_integers
from scipy._lib._array_api import (array_namespace, is_numpy, xp_minimum,
xp_clip, _move_axis_to_end)
xp_clip, xp_moveaxis_to_end)
from scipy.special import ndtr, ndtri, comb, factorial

from ._common import ConfidenceInterval
Expand Down Expand Up @@ -716,7 +716,7 @@ def _monte_carlo_test_iv(data, rvs, statistic, vectorized, n_resamples,
data_iv = []
for sample in data:
sample = xp.broadcast_to(sample, (1,)) if sample.ndim == 0 else sample
sample = _move_axis_to_end(sample, axis_int, xp=xp)
sample = xp_moveaxis_to_end(sample, axis_int, xp=xp)
data_iv.append(sample)

n_resamples_int = int(n_resamples)
Expand Down
6 changes: 3 additions & 3 deletions scipy/stats/_stats_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
from scipy.optimize import root_scalar
from scipy._lib._util import normalize_axis_index
from scipy._lib._array_api import (array_namespace, is_numpy, atleast_nd,
xp_clip, _move_axis_to_end)
xp_clip, xp_moveaxis_to_end)
from scipy._lib.array_api_compat import size as xp_size

# In __all__ but deprecated for removal in SciPy 1.13.0
Expand Down Expand Up @@ -4821,8 +4821,8 @@ def pearsonr(x, y, *, alternative='two-sided', method=None, axis=0):

# `moveaxis` only recently added to array API, so it's not yey available in
# array_api_strict. Replace with e.g. `xp.moveaxis(x, axis, -1)` when available.
x = _move_axis_to_end(x, axis, xp)
y = _move_axis_to_end(y, axis, xp)
x = xp_moveaxis_to_end(x, axis, xp)
y = xp_moveaxis_to_end(y, axis, xp)
axis = -1

dtype = xp.result_type(x.dtype, y.dtype)
Expand Down
33 changes: 20 additions & 13 deletions scipy/stats/_variation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import numpy as np

from scipy._lib._util import _get_nan
from scipy._lib._array_api import array_namespace, xp_copysign

from ._axis_nan_policy import _axis_nan_policy_factory


Expand Down Expand Up @@ -91,31 +94,35 @@ def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False):
array([1.05109361, 0.31428986, 0.146483 ])

"""
xp = array_namespace(a)
a = xp.asarray(a)
# `nan_policy` and `keepdims` are handled by `_axis_nan_policy`
# `axis=None` is only handled for NumPy backend
if axis is None:
a = xp.reshape(a, (-1,))
axis = 0

n = a.shape[axis]
NaN = _get_nan(a)

if a.size == 0 or ddof > n:
# Handle as a special case to avoid spurious warnings.
# The return values, if any, are all nan.
shp = np.asarray(a.shape)
shp = np.delete(shp, axis)
result = np.full(shp, fill_value=NaN)
return result[()]
shp = list(a.shape)
shp.pop(axis)
result = xp.full(shp, fill_value=NaN)
return result[()] if result.ndim == 0 else result

mean_a = a.mean(axis)
mean_a = xp.mean(a, axis=axis)

if ddof == n:
mdhaber marked this conversation as resolved.
Show resolved Hide resolved
# Another special case. Result is either inf or nan.
std_a = a.std(axis=axis, ddof=0)
result = np.full_like(std_a, fill_value=NaN)
i = std_a > 0
result[i] = np.inf
result[i] = np.copysign(result[i], mean_a[i])
return result[()]
std_a = xp.std(a, axis=axis, correction=0)
result = xp.where(std_a > 0, xp_copysign(xp.asarray(xp.inf), mean_a), NaN)
return result[()] if result.ndim == 0 else result

with np.errstate(divide='ignore', invalid='ignore'):
std_a = a.std(axis, ddof=ddof)
std_a = xp.std(a, axis=axis, correction=ddof)
result = std_a / mean_a

return result[()]
return result[()] if result.ndim == 0 else result
195 changes: 117 additions & 78 deletions scipy/stats/tests/test_variation.py
Original file line number Diff line number Diff line change
@@ -1,159 +1,198 @@
import math

import numpy as np
from numpy.testing import assert_equal, assert_allclose
import pytest
from numpy.testing import suppress_warnings

from scipy.stats import variation
from scipy._lib._util import AxisError
from scipy.conftest import array_api_compatible
from scipy._lib._array_api import xp_assert_equal, xp_assert_close

pytestmark = [array_api_compatible, pytest.mark.usefixtures("skip_xp_backends")]
skip_xp_backends = pytest.mark.skip_xp_backends


class TestVariation:
"""
Test class for scipy.stats.variation
"""

def test_ddof(self):
x = np.arange(9.0)
assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
def test_ddof(self, xp):
x = xp.arange(9.0)
xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))

@pytest.mark.parametrize('sgn', [1, -1])
def test_sign(self, sgn):
x = np.array([1, 2, 3, 4, 5])
def test_sign(self, sgn, xp):
x = xp.asarray([1., 2., 3., 4., 5.])
v = variation(sgn*x)
expected = sgn*np.sqrt(2)/3
assert_allclose(v, expected, rtol=1e-10)
expected = xp.asarray(sgn*math.sqrt(2)/3)
xp_assert_close(v, expected, rtol=1e-10)

def test_scalar(self):
def test_scalar(self, xp):
# A scalar is treated like a 1-d sequence with length 1.
assert_equal(variation(4.0), 0.0)
xp_assert_equal(variation(4.0), 0.0)

@pytest.mark.parametrize('nan_policy, expected',
[('propagate', np.nan),
('omit', np.sqrt(20/3)/4)])
def test_variation_nan(self, nan_policy, expected):
x = np.arange(10.)
x[9] = np.nan
assert_allclose(variation(x, nan_policy=nan_policy), expected)

def test_nan_policy_raise(self):
x = np.array([1.0, 2.0, np.nan, 3.0])
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_nan(self, nan_policy, expected, xp):
x = xp.arange(10.)
x[9] = xp.nan
xp_assert_close(variation(x, nan_policy=nan_policy), expected)

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_nan_policy_raise(self, xp):
x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
with pytest.raises(ValueError, match='input contains nan'):
variation(x, nan_policy='raise')

def test_bad_nan_policy(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_bad_nan_policy(self, xp):
with pytest.raises(ValueError, match='must be one of'):
variation([1, 2, 3], nan_policy='foobar')

def test_keepdims(self):
x = np.arange(10).reshape(2, 5)
@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
def test_keepdims(self, xp):
x = xp.reshape(xp.arange(10), (2, 5))
y = variation(x, axis=1, keepdims=True)
expected = np.array([[np.sqrt(2)/2],
[np.sqrt(2)/7]])
assert_allclose(y, expected)
xp_assert_close(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('axis, expected',
[(0, np.empty((1, 0))),
(1, np.full((5, 1), fill_value=np.nan))])
def test_keepdims_size0(self, axis, expected):
x = np.zeros((5, 0))
def test_keepdims_size0(self, axis, expected, xp):
x = xp.zeros((5, 0))
y = variation(x, axis=axis, keepdims=True)
assert_equal(y, expected)
xp_assert_equal(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
assert_equal(y, np.full((2, 1), fill_value=expected_fill))
xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))

def test_propagate_nan(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_propagate_nan(self, xp):
# Check that the shape of the result is the same for inputs
# with and without nans, cf gh-5817
a = np.arange(8).reshape(2, -1).astype(float)
a[1, 0] = np.nan
a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
a[1, 0] = xp.nan
v = variation(a, axis=1, nan_policy="propagate")
assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)

def test_axis_none(self):
@skip_xp_backends(np_only=True, reasons=['Python list input uses NumPy backend'])
def test_axis_none(self, xp):
# Check that `variation` computes the result on the flattened
# input when axis is None.
y = variation([[0, 1], [2, 3]], axis=None)
assert_allclose(y, np.sqrt(5/4)/1.5)
xp_assert_close(y, math.sqrt(5/4)/1.5)

def test_bad_axis(self):
def test_bad_axis(self, xp):
# Check that an invalid axis raises np.exceptions.AxisError.
x = np.array([[1, 2, 3], [4, 5, 6]])
with pytest.raises(AxisError):
x = xp.asarray([[1, 2, 3], [4, 5, 6]])
with pytest.raises((AxisError, IndexError)):
variation(x, axis=10)

def test_mean_zero(self):
def test_mean_zero(self, xp):
# Check that `variation` returns inf for a sequence that is not
# identically zero but whose mean is zero.
x = np.array([10, -3, 1, -4, -4])
x = xp.asarray([10., -3., 1., -4., -4.])
y = variation(x)
assert_equal(y, np.inf)
xp_assert_equal(y, xp.asarray(xp.inf))

x2 = np.array([x, -10*x])
x2 = xp.stack([x, -10.*x])
y2 = variation(x2, axis=1)
assert_equal(y2, [np.inf, np.inf])
xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))

@pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
def test_return_nan(self, x):
@pytest.mark.parametrize('x', [[0.]*5, [], [1, 2, np.inf, 9]])
def test_return_nan(self, x, xp):
x = xp.asarray(x)
# Test some cases where `variation` returns nan.
y = variation(x)
assert_equal(y, np.nan)
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x)
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
Copy link
Contributor

@mdhaber mdhaber May 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big deal, but you could change np.zeros(5) to [0.]*5 to avoid the explicit dtype, if we prefer testing the default type. Then again, one could argue that now this tests both the default type and float64 (if they are different), and coverage is a good thing. Either way; just an observation.

Copy link
Contributor

@mdhaber mdhaber May 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that since you changed np.zeros to [0.]*, you should be able to do:

Suggested change
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
xp_assert_equal(y, xp.asarray(xp.nan))


@pytest.mark.parametrize('axis, expected',
[(0, []), (1, [np.nan]*3), (None, np.nan)])
def test_2d_size_zero_with_axis(self, axis, expected):
x = np.empty((3, 0))
y = variation(x, axis=axis)
assert_equal(y, expected)

def test_neg_inf(self):
def test_2d_size_zero_with_axis(self, axis, expected, xp):
x = xp.empty((3, 0))
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x, axis=axis)
xp_assert_equal(y, xp.asarray(expected))
mdhaber marked this conversation as resolved.
Show resolved Hide resolved

def test_neg_inf(self, xp):
# Edge case that produces -inf: ddof equals the number of non-nan
# values, the values are not constant, and the mean is negative.
x1 = np.array([-3, -5])
assert_equal(variation(x1, ddof=2), -np.inf)

x2 = np.array([[np.nan, 1, -10, np.nan],
[-20, -3, np.nan, np.nan]])
assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-np.inf, -np.inf])

x1 = xp.asarray([-3., -5.])
xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))
mdhaber marked this conversation as resolved.
Show resolved Hide resolved

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_neg_inf_nan(self, xp):
x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
[-20, -3, xp.nan, xp.nan]])
xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-xp.inf, -xp.inf])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
def test_combined_edge_cases(self, nan_policy):
x = np.array([[0, 10, np.nan, 1],
[0, -5, np.nan, 2],
[0, -5, np.nan, 3]])
def test_combined_edge_cases(self, nan_policy, xp):
x = xp.array([[0, 10, xp.nan, 1],
[0, -5, xp.nan, 2],
[0, -5, xp.nan, 3]])
y = variation(x, axis=0, nan_policy=nan_policy)
assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize(
'ddof, expected',
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
)
def test_more_nan_policy_omit_tests(self, ddof, expected):
def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
# The slightly strange formatting in the follow array is my attempt to
# maintain a clean tabular arrangement of the data while satisfying
# the demands of pycodestyle. Currently, E201 and E241 are not
# disabled by the `# noqa` annotation.
nan = np.nan
x = np.array([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
nan = xp.nan
x = xp.asarray([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
assert_allclose(v, expected)
xp_assert_close(v, expected)

def test_variation_ddof(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_ddof(self, xp):
# test variation with delta degrees of freedom
# regression test for gh-13341
a = np.array([1, 2, 3, 4, 5])
nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
a = xp.asarray([1., 2., 3., 4., 5.])
nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
y = variation(a, ddof=1)
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
assert_allclose(y, np.sqrt(5/2)/3)
xp_assert_close(y, math.sqrt(5/2)/3)
assert y == nan_y