Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error when data variables have different dimensions #280

Open
max-sixty opened this issue Nov 4, 2023 · 2 comments
Open

Error when data variables have different dimensions #280

max-sixty opened this issue Nov 4, 2023 · 2 comments
Labels
bug Something isn't working

Comments

@max-sixty
Copy link
Contributor

Very possibly I'm mistaken; or is this not supported?

ds = xr.tutorial.load_dataset('air_temperature')

ds['air2'] = ds['air'].sum('lat')

ds.groupby('lon').count(...)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[9], line 1
----> 1 ds.groupby('lon').count(...)

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/_aggregations.py:2418, in DatasetGroupByAggregations.count(self, dim, keep_attrs, **kwargs)
   2344 """
   2345 Reduce this Dataset's data by applying ``count`` along some dimension(s).
   2346
   (...)
   2411     da       (labels) int64 1 2 2
   2412 """
   2413 if (
   2414     flox_available
   2415     and OPTIONS["use_flox"]
   2416     and contains_only_chunked_or_numpy(self._obj)
   2417 ):
-> 2418     return self._flox_reduce(
   2419         func="count",
   2420         dim=dim,
   2421         numeric_only=False,
   2422         # fill_value=fill_value,
   2423         keep_attrs=keep_attrs,
   2424         **kwargs,
   2425     )
   2426 else:
   2427     return self.reduce(
   2428         duck_array_ops.count,
   2429         dim=dim,
   (...)
   2432         **kwargs,
   2433     )


File /opt/homebrew/lib/python3.9/site-packages/xarray/core/groupby.py:1034, in GroupBy._flox_reduce(self, dim, keep_attrs, **kwargs)
   1031     kwargs.setdefault("min_count", 1)
   1033 output_index = grouper.full_index
-> 1034 result = xarray_reduce(
   1035     obj.drop_vars(non_numeric.keys()),
   1036     self._codes,
   1037     dim=parsed_dim,
   1038     # pass RangeIndex as a hint to flox that `by` is already factorized
   1039     expected_groups=(pd.RangeIndex(len(output_index)),),
   1040     isbin=False,
   1041     keep_attrs=keep_attrs,
   1042     **kwargs,
   1043 )
   1045 # we did end up reducing over dimension(s) that are
   1046 # in the grouped variable
   1047 group_dims = grouper.group.dims

File /opt/homebrew/lib/python3.9/site-packages/flox/xarray.py:415, in xarray_reduce(obj, func, expected_groups, isbin, sort, dim, fill_value, dtype, method, engine, keep_attrs, skipna, min_count, reindex, *by, **finalize_kwargs)
    413 output_core_dims = [d for d in input_core_dims[0] if d not in dim_tuple]
    414 output_core_dims.extend(group_names)
--> 415 actual = xr.apply_ufunc(
    416     wrapper,
    417     ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims),
    418     *by_da,
    419     input_core_dims=input_core_dims,
    420     # for xarray's test_groupby_duplicate_coordinate_labels
    421     exclude_dims=set(dim_tuple),
    422     output_core_dims=[output_core_dims],
    423     dask="allowed",
    424     dask_gufunc_kwargs=dict(
    425         output_sizes=group_sizes, output_dtypes=[dtype] if dtype is not None else None
    426     ),
    427     keep_attrs=keep_attrs,
    428     kwargs={
    429         "func": func,
    430         "axis": axis,
    431         "sort": sort,
    432         "fill_value": fill_value,
    433         "method": method,
    434         "min_count": min_count,
    435         "skipna": skipna,
    436         "engine": engine,
    437         "reindex": reindex,
    438         "expected_groups": tuple(expected_groups_valid_list),
    439         "isbin": isbins,
    440         "finalize_kwargs": finalize_kwargs,
    441         "dtype": dtype,
    442         "core_dims": input_core_dims,
    443     },
    444 )
    446 # restore non-dim coord variables without the core dimension
    447 # TODO: shouldn't apply_ufunc handle this?
    448 for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims):

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:1249, in apply_ufunc(func, input_core_dims, output_core_dims, exclude_dims, vectorize, join, dataset_join, dataset_fill_value, keep_attrs, kwargs, dask, output_dtypes, output_sizes, meta, dask_gufunc_kwargs, on_missing_core_dim, *args)
   1247 # feed datasets apply_variable_ufunc through apply_dataset_vfunc
   1248 elif any(is_dict_like(a) for a in args):
-> 1249     return apply_dataset_vfunc(
   1250         variables_vfunc,
   1251         *args,
   1252         signature=signature,
   1253         join=join,
   1254         exclude_dims=exclude_dims,
   1255         dataset_join=dataset_join,
   1256         fill_value=dataset_fill_value,
   1257         keep_attrs=keep_attrs,
   1258         on_missing_core_dim=on_missing_core_dim,
   1259     )
   1260 # feed DataArray apply_variable_ufunc through apply_dataarray_vfunc
   1261 elif any(isinstance(a, DataArray) for a in args):

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:530, in apply_dataset_vfunc(func, signature, join, dataset_join, fill_value, exclude_dims, keep_attrs, on_missing_core_dim, *args)
    525 list_of_coords, list_of_indexes = build_output_coords_and_indexes(
    526     args, signature, exclude_dims, combine_attrs=keep_attrs
    527 )
    528 args = tuple(getattr(arg, "data_vars", arg) for arg in args)
--> 530 result_vars = apply_dict_of_variables_vfunc(
    531     func,
    532     *args,
    533     signature=signature,
    534     join=dataset_join,
    535     fill_value=fill_value,
    536     on_missing_core_dim=on_missing_core_dim,
    537 )
    539 out: Dataset | tuple[Dataset, ...]
    540 if signature.num_outputs > 1:

File /opt/homebrew/lib/python3.9/site-packages/xarray/core/computation.py:457, in apply_dict_of_variables_vfunc(func, signature, join, fill_value, on_missing_core_dim, *args)
    455 else:
    456     if on_missing_core_dim == "raise":
--> 457         raise ValueError(core_dim_present)
    458     elif on_missing_core_dim == "copy":
    459         result_vars[name] = variable_args[0]

ValueError: Missing core dims {'lat'} from arg number 1 on a variable named `air2`:
<xarray.Variable (time: 2920, lon: 53)>
array([[6984.9497, 6991.6606, 6991.5303, ..., 6998.77  , 7007.8804,
        7016.5605],
       [6976.4307, 6988.45  , 6993.2407, ..., 6994.3906, 7006.7505,
        7019.941 ],
       [6975.2603, 6982.02  , 6988.77  , ..., 6992.0503, 7004.9404,
        7020.3506],
       ...,
       [6990.7505, 6998.3496, 7013.3496, ..., 6995.05  , 7008.6504,
        7019.4497],
       [6984.95  , 6991.6504, 7007.949 , ..., 6994.15  , 7008.55  ,
        7020.8506],
       [6981.75  , 6983.85  , 6997.0503, ..., 6985.6494, 6999.2495,
        7012.0493]], dtype=float32)
@dcherian
Copy link
Collaborator

dcherian commented Nov 5, 2023

Yuck, it should work. I tried to handle it here, but I never use ... so this code path is probably not well tested.

flox/flox/xarray.py

Lines 397 to 406 in c15572e

# These data variables do not have any of the core dimension,
# take them out to prevent errors.
# apply_ufunc can handle non-dim coordinate variables without core dimensions
missing_dim = {}
if isinstance(obj, xr.Dataset):
# broadcasting means the group dim gets added to ds, so we check the original obj
for k, v in obj.data_vars.items():
is_missing_dim = not (any(d in v.dims for d in dim_tuple))
if is_missing_dim:
missing_dim[k] = v

I would appreciate any help fixing it :)

@max-sixty
Copy link
Contributor Author

No great stress! Easy to turn off for one calc.

I would appreciate any help fixing it :)

(just to set expectations — I'm way way over on my budget of contributing to projects vs. getting work done, so it's quite unlikely I get to this soon-ish)

@dcherian dcherian added the bug Something isn't working label Nov 6, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working
Projects
None yet
Development

No branches or pull requests

2 participants