Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: stats.variation: add array-API support #20647

Merged
merged 4 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/array_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,5 @@ jobs:
python dev.py --no-build test -b all -t scipy._lib.tests.test__util -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_stats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_morestats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_variation -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_resampling -- --durations 3 --timeout=60
33 changes: 20 additions & 13 deletions scipy/stats/_variation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import numpy as np

from scipy._lib._util import _get_nan
from scipy._lib._array_api import array_namespace

from ._axis_nan_policy import _axis_nan_policy_factory


Expand Down Expand Up @@ -91,31 +94,35 @@ def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False):
array([1.05109361, 0.31428986, 0.146483 ])

"""
xp = array_namespace(a)
a = xp.asarray(a)
# `nan_policy` and `keepdims` are handled by `_axis_nan_policy`
# `axis=None` is only handled for NumPy backend
if axis is None:
a = xp.reshape(a, (-1,))
axis = 0

n = a.shape[axis]
NaN = _get_nan(a)

if a.size == 0 or ddof > n:
# Handle as a special case to avoid spurious warnings.
# The return values, if any, are all nan.
shp = np.asarray(a.shape)
shp = np.delete(shp, axis)
result = np.full(shp, fill_value=NaN)
return result[()]
shp = list(a.shape)
shp.pop(axis)
result = xp.full(shp, fill_value=NaN)
return result[()] if result.ndim == 0 else result

mean_a = a.mean(axis)
mean_a = xp.mean(a, axis=axis)

if ddof == n:
mdhaber marked this conversation as resolved.
Show resolved Hide resolved
# Another special case. Result is either inf or nan.
std_a = a.std(axis=axis, ddof=0)
result = np.full_like(std_a, fill_value=NaN)
i = std_a > 0
result[i] = np.inf
result[i] = np.copysign(result[i], mean_a[i])
return result[()]
std_a = xp.std(a, axis=axis, correction=0)
result = xp.where(std_a > 0, xp.copysign(xp.asarray(xp.inf), mean_a), NaN)
Copy link
Contributor

@mdhaber mdhaber May 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops. Looks like we need an xp_copysign in the meantime (j-bowhay#4). I'd recommend installing array_api_strict.

return result[()] if result.ndim == 0 else result

with np.errstate(divide='ignore', invalid='ignore'):
std_a = a.std(axis, ddof=ddof)
std_a = xp.std(a, axis=axis, correction=ddof)
result = std_a / mean_a

return result[()]
return result[()] if result.ndim == 0 else result
195 changes: 117 additions & 78 deletions scipy/stats/tests/test_variation.py
Original file line number Diff line number Diff line change
@@ -1,159 +1,198 @@
import math

import numpy as np
from numpy.testing import assert_equal, assert_allclose
import pytest
from numpy.testing import suppress_warnings

from scipy.stats import variation
from scipy._lib._util import AxisError
from scipy.conftest import array_api_compatible
from scipy._lib._array_api import xp_assert_equal, xp_assert_close

pytestmark = [array_api_compatible, pytest.mark.usefixtures("skip_xp_backends")]
skip_xp_backends = pytest.mark.skip_xp_backends


class TestVariation:
"""
Test class for scipy.stats.variation
"""

def test_ddof(self):
x = np.arange(9.0)
assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
def test_ddof(self, xp):
x = xp.arange(9.0)
xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))

@pytest.mark.parametrize('sgn', [1, -1])
def test_sign(self, sgn):
x = np.array([1, 2, 3, 4, 5])
def test_sign(self, sgn, xp):
x = xp.asarray([1., 2., 3., 4., 5.])
v = variation(sgn*x)
expected = sgn*np.sqrt(2)/3
assert_allclose(v, expected, rtol=1e-10)
expected = xp.asarray(sgn*math.sqrt(2)/3)
xp_assert_close(v, expected, rtol=1e-10)

def test_scalar(self):
def test_scalar(self, xp):
# A scalar is treated like a 1-d sequence with length 1.
assert_equal(variation(4.0), 0.0)
xp_assert_equal(variation(4.0), 0.0)

@pytest.mark.parametrize('nan_policy, expected',
[('propagate', np.nan),
('omit', np.sqrt(20/3)/4)])
def test_variation_nan(self, nan_policy, expected):
x = np.arange(10.)
x[9] = np.nan
assert_allclose(variation(x, nan_policy=nan_policy), expected)

def test_nan_policy_raise(self):
x = np.array([1.0, 2.0, np.nan, 3.0])
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_nan(self, nan_policy, expected, xp):
x = xp.arange(10.)
x[9] = xp.nan
xp_assert_close(variation(x, nan_policy=nan_policy), expected)

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_nan_policy_raise(self, xp):
x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
with pytest.raises(ValueError, match='input contains nan'):
variation(x, nan_policy='raise')

def test_bad_nan_policy(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_bad_nan_policy(self, xp):
with pytest.raises(ValueError, match='must be one of'):
variation([1, 2, 3], nan_policy='foobar')

def test_keepdims(self):
x = np.arange(10).reshape(2, 5)
@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
def test_keepdims(self, xp):
x = xp.reshape(xp.arange(10), (2, 5))
y = variation(x, axis=1, keepdims=True)
expected = np.array([[np.sqrt(2)/2],
[np.sqrt(2)/7]])
assert_allclose(y, expected)
xp_assert_close(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('axis, expected',
[(0, np.empty((1, 0))),
(1, np.full((5, 1), fill_value=np.nan))])
def test_keepdims_size0(self, axis, expected):
x = np.zeros((5, 0))
def test_keepdims_size0(self, axis, expected, xp):
x = xp.zeros((5, 0))
y = variation(x, axis=axis, keepdims=True)
assert_equal(y, expected)
xp_assert_equal(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
assert_equal(y, np.full((2, 1), fill_value=expected_fill))
xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))

def test_propagate_nan(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_propagate_nan(self, xp):
# Check that the shape of the result is the same for inputs
# with and without nans, cf gh-5817
a = np.arange(8).reshape(2, -1).astype(float)
a[1, 0] = np.nan
a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
a[1, 0] = xp.nan
v = variation(a, axis=1, nan_policy="propagate")
assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)

def test_axis_none(self):
@skip_xp_backends(np_only=True, reasons=['Python list input uses NumPy backend'])
def test_axis_none(self, xp):
# Check that `variation` computes the result on the flattened
# input when axis is None.
y = variation([[0, 1], [2, 3]], axis=None)
assert_allclose(y, np.sqrt(5/4)/1.5)
xp_assert_close(y, math.sqrt(5/4)/1.5)

def test_bad_axis(self):
def test_bad_axis(self, xp):
# Check that an invalid axis raises np.exceptions.AxisError.
x = np.array([[1, 2, 3], [4, 5, 6]])
with pytest.raises(AxisError):
x = xp.asarray([[1, 2, 3], [4, 5, 6]])
with pytest.raises((AxisError, IndexError)):
variation(x, axis=10)

def test_mean_zero(self):
def test_mean_zero(self, xp):
# Check that `variation` returns inf for a sequence that is not
# identically zero but whose mean is zero.
x = np.array([10, -3, 1, -4, -4])
x = xp.asarray([10., -3., 1., -4., -4.])
y = variation(x)
assert_equal(y, np.inf)
xp_assert_equal(y, xp.asarray(xp.inf))

x2 = np.array([x, -10*x])
x2 = xp.stack([x, -10.*x])
y2 = variation(x2, axis=1)
assert_equal(y2, [np.inf, np.inf])
xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))

@pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
def test_return_nan(self, x):
@pytest.mark.parametrize('x', [[0.]*5, [], [1, 2, np.inf, 9]])
def test_return_nan(self, x, xp):
x = xp.asarray(x)
# Test some cases where `variation` returns nan.
y = variation(x)
assert_equal(y, np.nan)
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x)
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
Copy link
Contributor

@mdhaber mdhaber May 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not a big deal, but you could change np.zeros(5) to [0.]*5 to avoid the explicit dtype, if we prefer testing the default type. Then again, one could argue that now this tests both the default type and float64 (if they are different), and coverage is a good thing. Either way; just an observation.

Copy link
Contributor

@mdhaber mdhaber May 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that since you changed np.zeros to [0.]*, you should be able to do:

Suggested change
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
xp_assert_equal(y, xp.asarray(xp.nan))


@pytest.mark.parametrize('axis, expected',
[(0, []), (1, [np.nan]*3), (None, np.nan)])
def test_2d_size_zero_with_axis(self, axis, expected):
x = np.empty((3, 0))
y = variation(x, axis=axis)
assert_equal(y, expected)

def test_neg_inf(self):
def test_2d_size_zero_with_axis(self, axis, expected, xp):
x = xp.empty((3, 0))
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x, axis=axis)
xp_assert_equal(y, xp.asarray(expected))
mdhaber marked this conversation as resolved.
Show resolved Hide resolved

def test_neg_inf(self, xp):
# Edge case that produces -inf: ddof equals the number of non-nan
# values, the values are not constant, and the mean is negative.
x1 = np.array([-3, -5])
assert_equal(variation(x1, ddof=2), -np.inf)

x2 = np.array([[np.nan, 1, -10, np.nan],
[-20, -3, np.nan, np.nan]])
assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-np.inf, -np.inf])

x1 = xp.asarray([-3., -5.])
xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))
mdhaber marked this conversation as resolved.
Show resolved Hide resolved

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_neg_inf_nan(self, xp):
x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
[-20, -3, xp.nan, xp.nan]])
xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-xp.inf, -xp.inf])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
def test_combined_edge_cases(self, nan_policy):
x = np.array([[0, 10, np.nan, 1],
[0, -5, np.nan, 2],
[0, -5, np.nan, 3]])
def test_combined_edge_cases(self, nan_policy, xp):
x = xp.array([[0, 10, xp.nan, 1],
[0, -5, xp.nan, 2],
[0, -5, xp.nan, 3]])
y = variation(x, axis=0, nan_policy=nan_policy)
assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize(
'ddof, expected',
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
)
def test_more_nan_policy_omit_tests(self, ddof, expected):
def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
# The slightly strange formatting in the follow array is my attempt to
# maintain a clean tabular arrangement of the data while satisfying
# the demands of pycodestyle. Currently, E201 and E241 are not
# disabled by the `# noqa` annotation.
nan = np.nan
x = np.array([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
nan = xp.nan
x = xp.asarray([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
assert_allclose(v, expected)
xp_assert_close(v, expected)

def test_variation_ddof(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_ddof(self, xp):
# test variation with delta degrees of freedom
# regression test for gh-13341
a = np.array([1, 2, 3, 4, 5])
nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
a = xp.asarray([1., 2., 3., 4., 5.])
nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
y = variation(a, ddof=1)
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
assert_allclose(y, np.sqrt(5/2)/3)
xp_assert_close(y, math.sqrt(5/2)/3)
assert y == nan_y