ENH: stats.variation: add array-API support

scipy · May 5, 2024 · 7fee674 · 7fee674
1 parent 4a537b7
commit 7fee674
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 87 deletions.
diff --git a/.github/workflows/array_api.yml b/.github/workflows/array_api.yml
@@ -98,3 +98,4 @@ jobs:
         python dev.py --no-build test -b all -t scipy._lib.tests.test__util -- --durations 3 --timeout=60
         python dev.py --no-build test -b all -t scipy.stats.tests.test_stats -- --durations 3 --timeout=60
         python dev.py --no-build test -b all -t scipy.stats.tests.test_morestats -- --durations 3 --timeout=60
+        python dev.py --no-build test -b all -t scipy.stats.tests.test_variation -- --durations 3 --timeout=60
diff --git a/scipy/stats/_variation.py b/scipy/stats/_variation.py
@@ -1,5 +1,8 @@
 import numpy as np
+
 from scipy._lib._util import _get_nan
+from scipy._lib._array_api import array_namespace
+
 from ._axis_nan_policy import _axis_nan_policy_factory
 
 
@@ -91,31 +94,39 @@ def variation(a, axis=0, nan_policy='propagate', ddof=0, *, keepdims=False):
     array([1.05109361, 0.31428986, 0.146483  ])
 
     """
+    xp = array_namespace(a)
+    a = xp.asarray(a)
     # `nan_policy` and `keepdims` are handled by `_axis_nan_policy`
+    # `axis=None` is only handled for NumPy backend
+    if axis is None:
+        a = xp.reshape(a, (-1,))
+        axis = 0
+    print(a)
+
     n = a.shape[axis]
     NaN = _get_nan(a)
 
     if a.size == 0 or ddof > n:
         # Handle as a special case to avoid spurious warnings.
         # The return values, if any, are all nan.
-        shp = np.asarray(a.shape)
-        shp = np.delete(shp, axis)
-        result = np.full(shp, fill_value=NaN)
-        return result[()]
+        shp = list(a.shape)
+        shp.pop(axis)
+        result = xp.full(shp, fill_value=NaN)
+        return result[()] if result.ndim == 0 else result
 
-    mean_a = a.mean(axis)
+    mean_a = xp.mean(a, axis=axis)
 
     if ddof == n:
         # Another special case.  Result is either inf or nan.
-        std_a = a.std(axis=axis, ddof=0)
-        result = np.full_like(std_a, fill_value=NaN)
+        std_a = xp.std(a, axis=axis, correction=0)
+        result = xp.full_like(std_a, fill_value=NaN)
         i = std_a > 0
-        result[i] = np.inf
-        result[i] = np.copysign(result[i], mean_a[i])
-        return result[()]
+        result[i] = xp.inf
+        result[i] = xp.copysign(result[i], mean_a[i])
+        return result[()] if result.ndim == 0 else result
 
     with np.errstate(divide='ignore', invalid='ignore'):
-        std_a = a.std(axis, ddof=ddof)
+        std_a = xp.std(a, axis=axis, correction=ddof)
         result = std_a / mean_a
 
-    return result[()]
+    return result[()] if result.ndim == 0 else result
diff --git a/scipy/stats/tests/test_variation.py b/scipy/stats/tests/test_variation.py
@@ -1,159 +1,197 @@
+import math
+
 import numpy as np
-from numpy.testing import assert_equal, assert_allclose
 import pytest
+from numpy.testing import suppress_warnings
+
 from scipy.stats import variation
 from scipy._lib._util import AxisError
+from scipy.conftest import array_api_compatible
+from scipy._lib._array_api import xp_assert_equal, xp_assert_close
+
+pytestmark = [array_api_compatible, pytest.mark.usefixtures("skip_xp_backends")]
+skip_xp_backends = pytest.mark.skip_xp_backends
 
 
 class TestVariation:
     """
     Test class for scipy.stats.variation
     """
 
-    def test_ddof(self):
-        x = np.arange(9.0)
-        assert_allclose(variation(x, ddof=1), np.sqrt(60/8)/4)
+    def test_ddof(self, xp):
+        x = xp.arange(9.0)
+        xp_assert_close(variation(x, ddof=1), xp.asarray(math.sqrt(60/8)/4))
 
     @pytest.mark.parametrize('sgn', [1, -1])
-    def test_sign(self, sgn):
-        x = np.array([1, 2, 3, 4, 5])
+    def test_sign(self, sgn, xp):
+        x = xp.asarray([1., 2., 3., 4., 5.])
         v = variation(sgn*x)
-        expected = sgn*np.sqrt(2)/3
-        assert_allclose(v, expected, rtol=1e-10)
+        expected = xp.asarray(sgn*math.sqrt(2)/3)
+        xp_assert_close(v, expected, rtol=1e-10)
 
-    def test_scalar(self):
+    def test_scalar(self, xp):
         # A scalar is treated like a 1-d sequence with length 1.
-        assert_equal(variation(4.0), 0.0)
+        xp_assert_equal(variation(4.0), 0.0)
 
     @pytest.mark.parametrize('nan_policy, expected',
                              [('propagate', np.nan),
                               ('omit', np.sqrt(20/3)/4)])
-    def test_variation_nan(self, nan_policy, expected):
-        x = np.arange(10.)
-        x[9] = np.nan
-        assert_allclose(variation(x, nan_policy=nan_policy), expected)
-
-    def test_nan_policy_raise(self):
-        x = np.array([1.0, 2.0, np.nan, 3.0])
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_variation_nan(self, nan_policy, expected, xp):
+        x = xp.arange(10.)
+        x[9] = xp.nan
+        xp_assert_close(variation(x, nan_policy=nan_policy), expected)
+
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_nan_policy_raise(self, xp):
+        x = xp.asarray([1.0, 2.0, xp.nan, 3.0])
         with pytest.raises(ValueError, match='input contains nan'):
             variation(x, nan_policy='raise')
 
-    def test_bad_nan_policy(self):
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_bad_nan_policy(self, xp):
         with pytest.raises(ValueError, match='must be one of'):
             variation([1, 2, 3], nan_policy='foobar')
 
-    def test_keepdims(self):
-        x = np.arange(10).reshape(2, 5)
+    @skip_xp_backends(np_only=True,
+                      reasons=['`keepdims` only supports NumPy backend'])
+    def test_keepdims(self, xp):
+        x = xp.reshape(xp.arange(10), (2, 5))
         y = variation(x, axis=1, keepdims=True)
         expected = np.array([[np.sqrt(2)/2],
                              [np.sqrt(2)/7]])
-        assert_allclose(y, expected)
+        xp_assert_close(y, expected)
 
+    @skip_xp_backends(np_only=True,
+                      reasons=['`keepdims` only supports NumPy backend'])
     @pytest.mark.parametrize('axis, expected',
                              [(0, np.empty((1, 0))),
                               (1, np.full((5, 1), fill_value=np.nan))])
-    def test_keepdims_size0(self, axis, expected):
-        x = np.zeros((5, 0))
+    def test_keepdims_size0(self, axis, expected, xp):
+        x = xp.zeros((5, 0))
         y = variation(x, axis=axis, keepdims=True)
-        assert_equal(y, expected)
+        xp_assert_equal(y, expected)
 
+    @skip_xp_backends(np_only=True,
+                      reasons=['`keepdims` only supports NumPy backend'])
     @pytest.mark.parametrize('incr, expected_fill', [(0, np.inf), (1, np.nan)])
-    def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill):
-        x = np.array([[1, 1, 2, 2], [1, 2, 3, 3]])
+    def test_keepdims_and_ddof_eq_len_plus_incr(self, incr, expected_fill, xp):
+        x = xp.asarray([[1, 1, 2, 2], [1, 2, 3, 3]])
         y = variation(x, axis=1, ddof=x.shape[1] + incr, keepdims=True)
-        assert_equal(y, np.full((2, 1), fill_value=expected_fill))
+        xp_assert_equal(y, xp.full((2, 1), fill_value=expected_fill))
 
-    def test_propagate_nan(self):
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_propagate_nan(self, xp):
         # Check that the shape of the result is the same for inputs
         # with and without nans, cf gh-5817
-        a = np.arange(8).reshape(2, -1).astype(float)
-        a[1, 0] = np.nan
+        a = xp.reshape(xp.arange(8, dtype=float), (2, -1))
+        a[1, 0] = xp.nan
         v = variation(a, axis=1, nan_policy="propagate")
-        assert_allclose(v, [np.sqrt(5/4)/1.5, np.nan], atol=1e-15)
+        xp_assert_close(v, [math.sqrt(5/4)/1.5, xp.nan], atol=1e-15)
 
-    def test_axis_none(self):
+    @skip_xp_backends(np_only=True, reasons=['Python list input uses NumPy backend'])
+    def test_axis_none(self, xp):
         # Check that `variation` computes the result on the flattened
         # input when axis is None.
         y = variation([[0, 1], [2, 3]], axis=None)
-        assert_allclose(y, np.sqrt(5/4)/1.5)
+        xp_assert_close(y, math.sqrt(5/4)/1.5)
 
-    def test_bad_axis(self):
+    def test_bad_axis(self, xp):
         # Check that an invalid axis raises np.exceptions.AxisError.
-        x = np.array([[1, 2, 3], [4, 5, 6]])
-        with pytest.raises(AxisError):
+        x = xp.asarray([[1, 2, 3], [4, 5, 6]])
+        with pytest.raises((AxisError, IndexError)):
             variation(x, axis=10)
 
-    def test_mean_zero(self):
+    def test_mean_zero(self, xp):
         # Check that `variation` returns inf for a sequence that is not
         # identically zero but whose mean is zero.
-        x = np.array([10, -3, 1, -4, -4])
+        x = xp.asarray([10., -3., 1., -4., -4.])
         y = variation(x)
-        assert_equal(y, np.inf)
+        xp_assert_equal(y, xp.asarray(xp.inf))
 
-        x2 = np.array([x, -10*x])
+        x2 = xp.stack([x, -10.*x])
         y2 = variation(x2, axis=1)
-        assert_equal(y2, [np.inf, np.inf])
+        xp_assert_equal(y2, xp.asarray([xp.inf, xp.inf]))
 
     @pytest.mark.parametrize('x', [np.zeros(5), [], [1, 2, np.inf, 9]])
-    def test_return_nan(self, x):
+    def test_return_nan(self, x, xp):
+        x = xp.asarray(x)
         # Test some cases where `variation` returns nan.
-        y = variation(x)
-        assert_equal(y, np.nan)
+        with suppress_warnings() as sup:
+            # torch
+            sup.filter(UserWarning, "std*")
+            y = variation(x)
+        xp_assert_equal(y, xp.asarray(xp.nan, dtype=x.dtype))
 
     @pytest.mark.parametrize('axis, expected',
                              [(0, []), (1, [np.nan]*3), (None, np.nan)])
-    def test_2d_size_zero_with_axis(self, axis, expected):
-        x = np.empty((3, 0))
-        y = variation(x, axis=axis)
-        assert_equal(y, expected)
-
-    def test_neg_inf(self):
+    def test_2d_size_zero_with_axis(self, axis, expected, xp):
+        x = xp.empty((3, 0))
+        with suppress_warnings() as sup:
+            # torch
+            sup.filter(UserWarning, "std*")
+            y = variation(x, axis=axis)
+        xp_assert_equal(y, xp.asarray(expected))
+
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_neg_inf(self, xp):
         # Edge case that produces -inf: ddof equals the number of non-nan
         # values, the values are not constant, and the mean is negative.
-        x1 = np.array([-3, -5])
-        assert_equal(variation(x1, ddof=2), -np.inf)
+        x1 = xp.asarray([-3., -5.])
+        xp_assert_equal(variation(x1, ddof=2), xp.asarray(-xp.inf))
 
-        x2 = np.array([[np.nan, 1, -10, np.nan],
-                       [-20, -3, np.nan, np.nan]])
-        assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
-                     [-np.inf, -np.inf])
+        x2 = xp.asarray([[xp.nan, 1, -10, xp.nan],
+                       [-20, -3, xp.nan, xp.nan]])
+        xp_assert_equal(variation(x2, axis=1, ddof=2, nan_policy='omit'),
+                        [-xp.inf, -xp.inf])
 
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
     @pytest.mark.parametrize("nan_policy", ['propagate', 'omit'])
-    def test_combined_edge_cases(self, nan_policy):
-        x = np.array([[0, 10, np.nan, 1],
-                      [0, -5, np.nan, 2],
-                      [0, -5, np.nan, 3]])
+    def test_combined_edge_cases(self, nan_policy, xp):
+        x = xp.array([[0, 10, xp.nan, 1],
+                      [0, -5, xp.nan, 2],
+                      [0, -5, xp.nan, 3]])
         y = variation(x, axis=0, nan_policy=nan_policy)
-        assert_allclose(y, [np.nan, np.inf, np.nan, np.sqrt(2/3)/2])
+        xp_assert_close(y, [xp.nan, xp.inf, xp.nan, math.sqrt(2/3)/2])
 
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
     @pytest.mark.parametrize(
         'ddof, expected',
         [(0, [np.sqrt(1/6), np.sqrt(5/8), np.inf, 0, np.nan, 0.0, np.nan]),
          (1, [0.5, np.sqrt(5/6), np.inf, 0, np.nan, 0, np.nan]),
          (2, [np.sqrt(0.5), np.sqrt(5/4), np.inf, np.nan, np.nan, 0, np.nan])]
     )
-    def test_more_nan_policy_omit_tests(self, ddof, expected):
+    def test_more_nan_policy_omit_tests(self, ddof, expected, xp):
         # The slightly strange formatting in the follow array is my attempt to
         # maintain a clean tabular arrangement of the data while satisfying
         # the demands of pycodestyle.  Currently, E201 and E241 are not
         # disabled by the `# noqa` annotation.
-        nan = np.nan
-        x = np.array([[1.0, 2.0, nan, 3.0],
-                      [0.0, 4.0, 3.0, 1.0],
-                      [nan, -.5, 0.5, nan],
-                      [nan, 9.0, 9.0, nan],
-                      [nan, nan, nan, nan],
-                      [3.0, 3.0, 3.0, 3.0],
-                      [0.0, 0.0, 0.0, 0.0]])
+        nan = xp.nan
+        x = xp.asarray([[1.0, 2.0, nan, 3.0],
+                        [0.0, 4.0, 3.0, 1.0],
+                        [nan, -.5, 0.5, nan],
+                        [nan, 9.0, 9.0, nan],
+                        [nan, nan, nan, nan],
+                        [3.0, 3.0, 3.0, 3.0],
+                        [0.0, 0.0, 0.0, 0.0]])
         v = variation(x, axis=1, ddof=ddof, nan_policy='omit')
-        assert_allclose(v, expected)
+        xp_assert_close(v, expected)
 
-    def test_variation_ddof(self):
+    @skip_xp_backends(np_only=True,
+                      reasons=['`nan_policy` only supports NumPy backend'])
+    def test_variation_ddof(self, xp):
         # test variation with delta degrees of freedom
         # regression test for gh-13341
-        a = np.array([1, 2, 3, 4, 5])
-        nan_a = np.array([1, 2, 3, np.nan, 4, 5, np.nan])
+        a = xp.asarray([1., 2., 3., 4., 5.])
+        nan_a = xp.asarray([1, 2, 3, xp.nan, 4, 5, xp.nan])
         y = variation(a, ddof=1)
         nan_y = variation(nan_a, nan_policy="omit", ddof=1)
-        assert_allclose(y, np.sqrt(5/2)/3)
+        xp_assert_close(y, math.sqrt(5/2)/3)
         assert y == nan_y