Skip to content

Commit

Permalink
ENH: stats.variation: add array-API support
Browse files Browse the repository at this point in the history
  • Loading branch information
j-bowhay committed May 5, 2024
1 parent 4a537b7 commit 7fee674
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 87 deletions.
1 change: 1 addition & 0 deletions .github/workflows/array_api.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,4 @@ jobs:
python dev.py --no-build test -b all -t scipy._lib.tests.test__util -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_stats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_morestats -- --durations 3 --timeout=60
python dev.py --no-build test -b all -t scipy.stats.tests.test_variation -- --durations 3 --timeout=60
35 changes: 23 additions & 12 deletions scipy/stats/_variation.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import numpy as np

from scipy._lib._util import _get_nan
from scipy._lib._array_api import array_namespace

from ._axis_nan_policy import _axis_nan_policy_factory


Expand Down Expand Up @@ -91,31 +94,39 @@ def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False):
array([1.05109361, 0.31428986, 0.146483 ])
"""
xp = array_namespace(a)
a = xp.asarray(a)
# `nan_policy` and `keepdims` are handled by `_axis_nan_policy`
# `axis=None` is only handled for NumPy backend
if axis is None:
a = xp.reshape(a, (-1,))
axis = 0
print(a)

n = a.shape[axis]
NaN = _get_nan(a)

if a.size == 0 or ddof > n:
# Handle as a special case to avoid spurious warnings.
# The return values, if any, are all nan.
shp = np.asarray(a.shape)
shp = np.delete(shp, axis)
result = np.full(shp, fill_value=NaN)
return result[()]
shp = list(a.shape)
shp.pop(axis)
result = xp.full(shp, fill_value=NaN)
return result[()] if result.ndim == 0 else result

mean_a = a.mean(axis)
mean_a = xp.mean(a, axis=axis)

if ddof == n:
# Another special case. Result is either inf or nan.
std_a = a.std(axis=axis, ddof=0)
result = np.full_like(std_a, fill_value=NaN)
std_a = xp.std(a, axis=axis, correction=0)
result = xp.full_like(std_a, fill_value=NaN)
i = std_a > 0
result[i] = np.inf
result[i] = np.copysign(result[i], mean_a[i])
return result[()]
result[i] = xp.inf
result[i] = xp.copysign(result[i], mean_a[i])
return result[()] if result.ndim == 0 else result

with np.errstate(divide='ignore', invalid='ignore'):
std_a = a.std(axis, ddof=ddof)
std_a = xp.std(a, axis=axis, correction=ddof)
result = std_a / mean_a

return result[()]
return result[()] if result.ndim == 0 else result
188 changes: 113 additions & 75 deletions scipy/stats/tests/test_variation.py
Original file line number Diff line number Diff line change
@@ -1,159 +1,197 @@
import math

import numpy as np
from numpy.testing import assert_equal, assert_allclose
import pytest
from numpy.testing import suppress_warnings

from scipy.stats import variation
from scipy._lib._util import AxisError
from scipy.conftest import array_api_compatible
from scipy._lib._array_api import xp_assert_equal, xp_assert_close

pytestmark = [array_api_compatible, pytest.mark.usefixtures("skip_xp_backends")]
skip_xp_backends = pytest.mark.skip_xp_backends


class TestVariation:
"""
Test class for scipy.stats.variation
"""

def test_ddof(self):
x = np.arange(9.0)
assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
def test_ddof(self, xp):
x = xp.arange(9.0)
xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))

@pytest.mark.parametrize('sgn', [1, -1])
def test_sign(self, sgn):
x = np.array([1, 2, 3, 4, 5])
def test_sign(self, sgn, xp):
x = xp.asarray([1., 2., 3., 4., 5.])
v = variation(sgn*x)
expected = sgn*np.sqrt(2)/3
assert_allclose(v, expected, rtol=1e-10)
expected = xp.asarray(sgn*math.sqrt(2)/3)
xp_assert_close(v, expected, rtol=1e-10)

def test_scalar(self):
def test_scalar(self, xp):
# A scalar is treated like a 1-d sequence with length 1.
assert_equal(variation(4.0), 0.0)
xp_assert_equal(variation(4.0), 0.0)

@pytest.mark.parametrize('nan_policy, expected',
[('propagate', np.nan),
('omit', np.sqrt(20/3)/4)])
def test_variation_nan(self, nan_policy, expected):
x = np.arange(10.)
x[9] = np.nan
assert_allclose(variation(x, nan_policy=nan_policy), expected)

def test_nan_policy_raise(self):
x = np.array([1.0, 2.0, np.nan, 3.0])
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_nan(self, nan_policy, expected, xp):
x = xp.arange(10.)
x[9] = xp.nan
xp_assert_close(variation(x, nan_policy=nan_policy), expected)

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_nan_policy_raise(self, xp):
x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
with pytest.raises(ValueError, match='input contains nan'):
variation(x, nan_policy='raise')

def test_bad_nan_policy(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_bad_nan_policy(self, xp):
with pytest.raises(ValueError, match='must be one of'):
variation([1, 2, 3], nan_policy='foobar')

def test_keepdims(self):
x = np.arange(10).reshape(2, 5)
@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
def test_keepdims(self, xp):
x = xp.reshape(xp.arange(10), (2, 5))
y = variation(x, axis=1, keepdims=True)
expected = np.array([[np.sqrt(2)/2],
[np.sqrt(2)/7]])
assert_allclose(y, expected)
xp_assert_close(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('axis, expected',
[(0, np.empty((1, 0))),
(1, np.full((5, 1), fill_value=np.nan))])
def test_keepdims_size0(self, axis, expected):
x = np.zeros((5, 0))
def test_keepdims_size0(self, axis, expected, xp):
x = xp.zeros((5, 0))
y = variation(x, axis=axis, keepdims=True)
assert_equal(y, expected)
xp_assert_equal(y, expected)

@skip_xp_backends(np_only=True,
reasons=['`keepdims` only supports NumPy backend'])
@pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
assert_equal(y, np.full((2, 1), fill_value=expected_fill))
xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))

def test_propagate_nan(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_propagate_nan(self, xp):
# Check that the shape of the result is the same for inputs
# with and without nans, cf gh-5817
a = np.arange(8).reshape(2, -1).astype(float)
a[1, 0] = np.nan
a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
a[1, 0] = xp.nan
v = variation(a, axis=1, nan_policy="propagate")
assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)

def test_axis_none(self):
@skip_xp_backends(np_only=True, reasons=['Python list input uses NumPy backend'])
def test_axis_none(self, xp):
# Check that `variation` computes the result on the flattened
# input when axis is None.
y = variation([[0, 1], [2, 3]], axis=None)
assert_allclose(y, np.sqrt(5/4)/1.5)
xp_assert_close(y, math.sqrt(5/4)/1.5)

def test_bad_axis(self):
def test_bad_axis(self, xp):
# Check that an invalid axis raises np.exceptions.AxisError.
x = np.array([[1, 2, 3], [4, 5, 6]])
with pytest.raises(AxisError):
x = xp.asarray([[1, 2, 3], [4, 5, 6]])
with pytest.raises((AxisError, IndexError)):
variation(x, axis=10)

def test_mean_zero(self):
def test_mean_zero(self, xp):
# Check that `variation` returns inf for a sequence that is not
# identically zero but whose mean is zero.
x = np.array([10, -3, 1, -4, -4])
x = xp.asarray([10., -3., 1., -4., -4.])
y = variation(x)
assert_equal(y, np.inf)
xp_assert_equal(y, xp.asarray(xp.inf))

x2 = np.array([x, -10*x])
x2 = xp.stack([x, -10.*x])
y2 = variation(x2, axis=1)
assert_equal(y2, [np.inf, np.inf])
xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))

@pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
def test_return_nan(self, x):
def test_return_nan(self, x, xp):
x = xp.asarray(x)
# Test some cases where `variation` returns nan.
y = variation(x)
assert_equal(y, np.nan)
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x)
xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))

@pytest.mark.parametrize('axis, expected',
[(0, []), (1, [np.nan]*3), (None, np.nan)])
def test_2d_size_zero_with_axis(self, axis, expected):
x = np.empty((3, 0))
y = variation(x, axis=axis)
assert_equal(y, expected)

def test_neg_inf(self):
def test_2d_size_zero_with_axis(self, axis, expected, xp):
x = xp.empty((3, 0))
with suppress_warnings() as sup:
# torch
sup.filter(UserWarning, "std*")
y = variation(x, axis=axis)
xp_assert_equal(y, xp.asarray(expected))

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_neg_inf(self, xp):
# Edge case that produces -inf: ddof equals the number of non-nan
# values, the values are not constant, and the mean is negative.
x1 = np.array([-3, -5])
assert_equal(variation(x1, ddof=2), -np.inf)
x1 = xp.asarray([-3., -5.])
xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))

x2 = np.array([[np.nan, 1, -10, np.nan],
[-20, -3, np.nan, np.nan]])
assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-np.inf, -np.inf])
x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
[-20, -3, xp.nan, xp.nan]])
xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
[-xp.inf, -xp.inf])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
def test_combined_edge_cases(self, nan_policy):
x = np.array([[0, 10, np.nan, 1],
[0, -5, np.nan, 2],
[0, -5, np.nan, 3]])
def test_combined_edge_cases(self, nan_policy, xp):
x = xp.array([[0, 10, xp.nan, 1],
[0, -5, xp.nan, 2],
[0, -5, xp.nan, 3]])
y = variation(x, axis=0, nan_policy=nan_policy)
assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])

@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
@pytest.mark.parametrize(
'ddof, expected',
[(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
(1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
(2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
)
def test_more_nan_policy_omit_tests(self, ddof, expected):
def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
# The slightly strange formatting in the follow array is my attempt to
# maintain a clean tabular arrangement of the data while satisfying
# the demands of pycodestyle. Currently, E201 and E241 are not
# disabled by the `# noqa` annotation.
nan = np.nan
x = np.array([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
nan = xp.nan
x = xp.asarray([[1.0, 2.0, nan, 3.0],
[0.0, 4.0, 3.0, 1.0],
[nan, -.5, 0.5, nan],
[nan, 9.0, 9.0, nan],
[nan, nan, nan, nan],
[3.0, 3.0, 3.0, 3.0],
[0.0, 0.0, 0.0, 0.0]])
v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
assert_allclose(v, expected)
xp_assert_close(v, expected)

def test_variation_ddof(self):
@skip_xp_backends(np_only=True,
reasons=['`nan_policy` only supports NumPy backend'])
def test_variation_ddof(self, xp):
# test variation with delta degrees of freedom
# regression test for gh-13341
a = np.array([1, 2, 3, 4, 5])
nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
a = xp.asarray([1., 2., 3., 4., 5.])
nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
y = variation(a, ddof=1)
nan_y = variation(nan_a, nan_policy="omit", ddof=1)
assert_allclose(y, np.sqrt(5/2)/3)
xp_assert_close(y, math.sqrt(5/2)/3)
assert y == nan_y

0 comments on commit 7fee674

Please sign in to comment.