SciTools · pp-mo · Oct 7, 2025 · Oct 7, 2025 · Oct 8, 2025 · Oct 8, 2025
diff --git a/docs/src/further_topics/dataless_cubes.rst b/docs/src/further_topics/dataless_cubes.rst
@@ -0,0 +1,107 @@
+.. _dataless-cubes:
+
+==============
+Dataless Cubes
+==============
+It is possible for a cube to exist without a data payload.
+In this case ``cube.data`` is ``None``, instead of containing an array (real or lazy) as
+usual.
+
+This can be useful when the cube is used purely as a placeholder for metadata, e.g. to
+represent a combination of coordinates.
+
+Most notably, dataless cubes can be used as the target "grid cube" for most regridding
+schemes, since in that case the cube's coordinates are all that the method uses.
+See also :meth:`iris.util.make_gridcube`.
+
+
+Properties of dataless cubes
+----------------------------
+
+* ``cube.shape`` is unchanged
+* ``cube.data`` == ``None``
+* ``cube.dtype`` == ``None``
+* ``cube.core_data()`` == ``cube.lazy_data()`` == ``None``
+* ``cube.is_dataless()`` == ``True``
+* ``cube.has_lazy_data()`` == ``False``
+
+
+Cube creation
+-------------
+You can create a dataless cube with the :meth:`~iris.cube.Cube` constructor
+(i.e. ``__init__`` call), by specifying the ``shape`` keyword in place of ``data``.
+If both are specified, an error is raised (even if data and shape are compatible).
+
+
+Data assignment
+---------------
+You can make an existing cube dataless, by setting ``cube.data = None``.
+The data array is simply discarded.
+
+
+Cube copy
+---------
+The syntax that allows you to replace data on copying,
+e.g. ``cube2 = cube.copy(new_data)``, has now extended to accept the special value
+:data:`iris.DATALESS`.
+
+So, ``cube2 = cube.copy(iris.DATALESS)`` makes ``cube2`` a
+dataless copy of ``cube``.
+This is equivalent to ``cube2 = cube.copy(); cube2.data = None``.
+
+
+Save and Load
+-------------
+The netcdf file interface can save and re-load dataless cubes correctly.
+TODO: link to ref e.g. "netcdf_dataless" in netcdf docs,
+when #6339 "Dataless netcdf save+load" is in place.
+
+
+.. _dataless_merge:
+
+Merging
+-------
+Merging is fully supported for dataless cubes, including combining them with "normal"
+cubes.
+
+* in all cases, the result has the same shape and metadata as if the same cubes had
+  data.
+* Merging multiple dataless cubes produces a dataless result.
+* Merging dataless and non-dataless cubes results in a partially 'missing' data array,
+  i.e. the relevant sections are filled with masked data.
+* Laziness is also preserved.
+
+
+Operations NOT supported
+-------------------------
+Dataless cubes are relatively new, and only partly integrated with Iris cube operations
+generally.
+
+The following are some of the notable features which do *not* support dataless cubes,
+at least as yet :
+
+* plotting
+
+* cube arithmetic
+
+* statistics
+
+* concatenation
+
+* :meth:`iris.cube.CubeList.realise_data`
+
+* various :class:`~iris.cube.Cube` methods, including at least:
+
+  * :meth:`~iris.cube.Cube.convert_units`
+
+  * :meth:`~iris.cube.Cube.subset`
+
+  * :meth:`~iris.cube.Cube.intersection`
+
+  * :meth:`~iris.cube.Cube.slices`
+
+  * :meth:`~iris.cube.Cube.interpolate`
+
+  * :meth:`~iris.cube.Cube.regrid`
+    Note: in this case the target ``grid`` can be dataless, but not the source
+    (``self``) cube.
diff --git a/docs/src/further_topics/index.rst b/docs/src/further_topics/index.rst
@@ -15,6 +15,7 @@ Extra information on specific technical issues.
    lenient_maths
    um_files_loading
    missing_data_handling
+   dataless_cubes
    netcdf_io
    dask_best_practices/index
    ugrid/index

diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -40,6 +40,11 @@ This document explains the changes made to Iris for this release
    :func:`~iris.fileformats.netcdf.saver.save_mesh` also supports ``zlib``
    compression. (:issue:`6565`, :pull:`6728`)
 
+#. `@pp-mo`_ added the ability to merge dataless cubes.  This also means they can be
+   re-loaded normally with :meth:`iris.load`.  See: :ref:`dataless_merge`.
+   Also added a new documentation section on dataless cubes.
+   (:issue:`5770`, :pull:`6581`)
+
 
 🐛 Bugs Fixed
 =============

diff --git a/lib/iris/_data_manager.py b/lib/iris/_data_manager.py
@@ -34,12 +34,16 @@ def __init__(self, data, shape=None):
             dataless.
 
         """
-        if (shape is None) and (data is None):
-            msg = 'one of "shape" or "data" should be provided; both are None'
-            raise ValueError(msg)
-        elif (shape is not None) and (data is not None):
-            msg = '"shape" should only be provided if "data" is None'
-            raise ValueError(msg)
+        if shape is None:
+            if data is None:
+                msg = 'one of "shape" or "data" should be provided; both are None'
+                raise ValueError(msg)
+        else:
+            if data is not None:
+                msg = '"shape" should only be provided if "data" is None'
+                raise ValueError(msg)
+            # Normalise how shape is recorded
+            shape = tuple(shape)
 
         # Initialise the instance.
         self._shape = shape

diff --git a/lib/iris/_merge.py b/lib/iris/_merge.py
@@ -12,6 +12,7 @@
 from collections import OrderedDict, namedtuple
 from copy import deepcopy
 
+import dask.array as da
 import numpy as np
 
 from iris._lazy_data import (
@@ -430,7 +431,13 @@ def match(self, other, error_on_mismatch):
         if self.data_shape != other.data_shape:
             msg = "cube.shape differs: {} != {}"
             msgs.append(msg.format(self.data_shape, other.data_shape))
-        if self.data_type != other.data_type:
+        if (
+            self.data_type is not None
+            and other.data_type is not None
+            and self.data_type != other.data_type
+        ):
+            # N.B. allow "None" to match any other dtype: this means that dataless
+            # cubes can merge with 'dataful' ones.
             msg = "cube data dtype differs: {} != {}"
             msgs.append(msg.format(self.data_type, other.data_type))
         # Both cell_measures_and_dims and ancillary_variables_and_dims are
@@ -1109,8 +1116,6 @@ def __init__(self, cube):
         source-cube.
 
         """
-        if cube.is_dataless():
-            raise iris.exceptions.DatalessError("merge")
         # Default hint ordering for candidate dimension coordinates.
         self._hints = [
             "time",
@@ -1240,7 +1245,10 @@ def merge(self, unique=True):
             # their data loaded then at the end we convert the stack back
             # into a plain numpy array.
             stack = np.empty(self._stack_shape, "object")
-            all_have_data = True
+            all_have_real_data = True
+            some_are_dataless = False
+            part_shape: tuple = None
+            part_dtype: np.dtype = None
             for nd_index in nd_indexes:
                 # Get the data of the current existing or last known
                 # good source-cube
@@ -1249,18 +1257,51 @@ def merge(self, unique=True):
                 data = self._skeletons[group[offset]].data
                 # Ensure the data is represented as a dask array and
                 # slot that array into the stack.
-                if is_lazy_data(data):
-                    all_have_data = False
+                if data is None:
+                    some_are_dataless = True
                 else:
-                    data = as_lazy_data(data)
+                    # We have (at least one) array content : Record the shape+dtype
+                    if part_shape is None:
+                        part_shape = data.shape
+                        part_dtype = data.dtype
+                    else:
+                        # We expect that the "parts" should **all be the same**
+                        assert data.shape == part_shape
+                        assert data.dtype == part_dtype
+
+                    # ensure lazy (we make the result real, later, if all were real)
+                    if is_lazy_data(data):
+                        all_have_real_data = False
+                    else:
+                        data = as_lazy_data(data)
                 stack[nd_index] = data
 
-            merged_data = multidim_lazy_stack(stack)
-            if all_have_data:
-                # All inputs were concrete, so turn the result back into a
-                # normal array.
-                merged_data = as_concrete_data(merged_data)
-            merged_cube = self._get_cube(merged_data)
+            if part_shape is None:
+                # NO parts had data : the result will also be dataless
+                merged_data = None
+                merged_shape = self._shape
+            else:
+                # At least some inputs had data : the result will have a data array.
+                if some_are_dataless:
+                    # Some parts were dataless: fill these with a lazy all-missing array.
+                    missing_part = da.ma.masked_array(
+                        data=da.zeros(part_shape, dtype=np.dtype("u1")),
+                        mask=da.ones(part_shape, dtype=bool),
+                        dtype=part_dtype,
+                    )
+                    for inds in np.ndindex(stack.shape):
+                        if stack[inds] is None:
+                            stack[inds] = missing_part
+
+                # Make a single lazy merged result array
+                merged_data = multidim_lazy_stack(stack)
+                merged_shape = None
+                if all_have_real_data:
+                    # All inputs were concrete, so turn the result back into a
+                    # normal array.
+                    merged_data = as_concrete_data(merged_data)
+
+            merged_cube = self._get_cube(merged_data, shape=merged_shape)
             merged_cubes.append(merged_cube)
 
         return merged_cubes
@@ -1291,8 +1332,6 @@ def register(self, cube, error_on_mismatch=False):
             this :class:`ProtoCube`.
 
         """
-        if cube.is_dataless():
-            raise iris.exceptions.DatalessError("merge")
         cube_signature = self._cube_signature
         other = self._build_signature(cube)
         match = cube_signature.match(other, error_on_mismatch)
@@ -1545,12 +1584,18 @@ def name_in_independents():
         # deferred loading, this does NOT change the shape.
         self._shape.extend(signature.data_shape)
 
-    def _get_cube(self, data):
+    def _get_cube(self, data, shape=None):
         """Generate fully constructed cube.
 
         Return a fully constructed cube for the given data, containing
         all its coordinates and metadata.
 
+        Parameters
+        ----------
+        data : array_like
+            Cube data content.  If None, `shape` must set and the result is dataless.
+        shape : tuple, optional
+            Cube data shape, only used if data is None.
         """
         signature = self._cube_signature
         dim_coords_and_dims = [
@@ -1573,6 +1618,7 @@ def _get_cube(self, data):
             aux_coords_and_dims=aux_coords_and_dims,
             cell_measures_and_dims=cms_and_dims,
             ancillary_variables_and_dims=avs_and_dims,
+            shape=shape,
             **kwargs,
         )
 

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -5095,7 +5095,7 @@ def interpolate(
 
         """
         if self.is_dataless():
-            raise iris.exceptions.DatalessError("interoplate")
+            raise iris.exceptions.DatalessError("interpolate")
         coords, points = zip(*sample_points)
         interp = scheme.interpolator(self, coords)  # type: ignore[arg-type]
         return interp(points, collapse_scalar=collapse_scalar)

diff --git a/lib/iris/tests/integration/merge/test_dataless.py b/lib/iris/tests/integration/merge/test_dataless.py
@@ -0,0 +1,65 @@
+# Copyright Iris contributors
+#
+# This file is part of Iris and is released under the BSD license.
+# See LICENSE in the root of the repository for full licensing details.
+"""Integration tests for merging with dataless cubes."""
+
+import dask.array as da
+import numpy as np
+
+from iris.coords import AuxCoord, DimCoord
+from iris.cube import Cube, CubeList
+
+
+class TestMergeDataless:
+    def _testcube(self, z=1, name="this", dataless=False, lazy=False):
+        # Create a testcube with a scalar Z coord, for merge testing.
+        data = da.arange(3) if lazy else np.arange(3)
+        cube = Cube(
+            data,
+            long_name=name,
+            dim_coords_and_dims=[(DimCoord([0.0, 1.0, 2], long_name="x"), 0)],
+            aux_coords_and_dims=[(AuxCoord([z], long_name="z"), ())],
+        )
+        if dataless:
+            cube.data = None
+        return cube
+
+    def test_mixed_passthrough(self):
+        # Check that normal merge can handle dataless alongside dataful cubes.
+        cube_normal = self._testcube(name="this", dataless=False)
+        cube_dataless = self._testcube(name="that", dataless=True)
+        cubes = CubeList([cube_normal, cube_dataless])
+
+        result = cubes.merge()
+
+        assert len(result) == 2
+        cube1, cube2 = [result.extract_cube(name) for name in ("this", "that")]
+        assert not cube1.is_dataless()
+        assert cube2.is_dataless()
+
+    def test_dataless_merge(self):
+        # Check that dataless cubes can be merged.
+        cube_1 = self._testcube(z=1, dataless=True)
+        cube_2 = self._testcube(z=2, dataless=True)
+        cubes = CubeList([cube_1, cube_2])
+
+        cube = cubes.merge_cube()
+
+        assert cube.is_dataless()
+        assert np.all(cube.coord("z").points == [1, 2])
+
+    def test_dataless_dataful_merge(self):
+        # Check that dataless cubes can merge **with** regular ones.
+        # Include checking that laziness is preserved.
+        cube_normal = self._testcube(z=1, dataless=False, lazy=True)
+        cube_dataless = self._testcube(z=2, dataless=True)
+        cubes = CubeList([cube_normal, cube_dataless])
+
+        cube = cubes.merge_cube()
+
+        assert not cube.is_dataless()
+        assert cube.has_lazy_data()
+        data_z1, data_z2 = cube[0].data, cube[1].data
+        assert np.all(data_z1 == [0, 1, 2])
+        assert np.all(np.ma.getmaskarray(data_z2) == True)  # noqa: E712