SciTools · pp-mo · Mar 16, 2026 · May 26, 2026 · Jun 9, 2026 · Jun 10, 2026
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -56,7 +56,6 @@ repos:
 
 # Hooks from all other repos
 # NOTE : keep these in hook-name (aka 'id') order
-
 -   repo: https://github.com/adamchainz/blacken-docs
     # This template does not keep up-to-date with versions, visit the repo to see the most recent release.
     rev: 1.20.0

diff --git a/lib/iris/cube.py b/lib/iris/cube.py
@@ -1280,10 +1280,6 @@ def __init__(
             ...                                  (longitude, 1)])
 
         """
-        # Temporary error while we transition the API.
-        if isinstance(data, str):
-            raise TypeError("Invalid data type: {!r}.".format(data))
-
         # Configure the metadata manager.
         self._metadata_manager = metadata_manager_factory(CubeMetadata)
 
@@ -4468,15 +4464,20 @@ def __eq__(self, other):
 
             # Having checked everything else, check approximate data equality.
             if result and not dataless_equality:
-                # TODO: why do we use allclose() here, but strict equality in
-                #  _DimensionalMetadata (via util.array_equal())?
-                result = bool(
-                    np.allclose(
-                        self.core_data(),
-                        other.core_data(),
-                        equal_nan=True,
+                if self.dtype.kind in "if":
+                    # numbers
+                    # TODO: why do we use allclose() here, but strict equality in
+                    #  _DimensionalMetadata (via util.array_equal())?
+                    result = bool(
+                        np.allclose(
+                            self.core_data(),
+                            other.core_data(),
+                            equal_nan=True,
+                        )
                     )
-                )
+                else:
+                    # non-numeric: use exact equality
+                    result = bool(np.all(self.core_data() == other.core_data()))
         return result
 
     # Must supply __ne__, Python does not defer to __eq__ for negative equality

diff --git a/lib/iris/fileformats/_nc_load_rules/helpers.py b/lib/iris/fileformats/_nc_load_rules/helpers.py
@@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
             ),
         )
         if problem is not None:
-            stack_notes = problem.stack_trace.__notes__
+            stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
             if stack_notes is None:
                 stack_notes = []
             stack_notes.append(
                 f"Skipping disallowed global attribute '{attr_name}' (see above error)"
             )
-            problem.stack_trace.__notes__ = stack_notes
+            problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
 
 
 ################################################################################
@@ -1209,9 +1209,6 @@ def get_attr_units(cf_var, attributes, capture_invalid=False):
         attributes["invalid_units"] = attr_units
         attr_units = UNKNOWN_UNIT_STRING
 
-    if np.issubdtype(cf_var.dtype, np.str_):
-        attr_units = NO_UNIT_STRING
-
     if any(
         hasattr(cf_var.cf_data, name)
         for name in ("flag_values", "flag_masks", "flag_meanings")
@@ -1536,14 +1533,14 @@ def build_and_add_dimension_coordinate(
     )
     if problem is not None:
         coord_var_name = str(cf_coord_var.cf_name)
-        stack_notes = problem.stack_trace.__notes__
+        stack_notes = problem.stack_trace.__notes__  # type: ignore[attr-defined]
         if stack_notes is None:
             stack_notes = []
         stack_notes.append(
             f"Failed to create {coord_var_name} dimension coordinate:\n"
             f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
         )
-        problem.stack_trace.__notes__ = stack_notes
+        problem.stack_trace.__notes__ = stack_notes  # type: ignore[attr-defined]
         problem.handled = True
 
         _ = _add_or_capture(
@@ -1574,11 +1571,7 @@ def _build_auxiliary_coordinate(
     # Get units
     attr_units = get_attr_units(cf_coord_var, attributes)
 
-    # Get any coordinate point data.
-    if isinstance(cf_coord_var, cf.CFLabelVariable):
-        points_data = cf_coord_var.cf_label_data(engine.cf_var)
-    else:
-        points_data = _get_cf_var_data(cf_coord_var)
+    points_data = _get_cf_var_data(cf_coord_var)
 
     # Get any coordinate bounds.
     cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var)
@@ -1643,9 +1636,9 @@ def _add_auxiliary_coordinate(
 
     # Determine the name of the dimension/s shared between the CF-netCDF data variable
     # and the coordinate being built.
-    common_dims = [
-        dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
-    ]
+    coord_dims = cf_coord_var.dimensions
+    datavar_dims = engine.cf_var.dimensions
+    common_dims = [dim for dim in coord_dims if dim in datavar_dims]
     data_dims = None
     if common_dims:
         # Calculate the offset of each common dimension.

diff --git a/lib/iris/fileformats/cf.py b/lib/iris/fileformats/cf.py
@@ -32,7 +32,7 @@
 
 import iris.exceptions
 import iris.fileformats._nc_load_rules.helpers as hh
-from iris.fileformats.netcdf import _thread_safe_nc
+from iris.fileformats.netcdf import _bytecoding_datasets, _thread_safe_nc
 from iris.mesh.components import Connectivity
 import iris.util
 import iris.warnings
@@ -73,7 +73,9 @@
 
 # NetCDF returns a different type for strings depending on Python version.
 def _is_str_dtype(var):
-    return np.issubdtype(var.dtype, np.bytes_)
+    # N.B. use 'datatype' not 'dtype', to "look inside" variable wrappers which
+    #  represent 'S1' type data as 'U<xx>'.
+    return np.dtype(var.dtype).kind in "SU"
 
 
 ################################################################################
@@ -774,73 +776,6 @@ def identify(cls, variables, ignore=None, target=None, warn=True):
 
         return result
 
-    def cf_label_data(self, cf_data_var):
-        """Return the associated CF-netCDF label variable strings.
-
-        Parameters
-        ----------
-        cf_data_var : :class:`iris.fileformats.cf.CFDataVariable`
-            The CF-netCDF data variable which the CF-netCDF label variable
-            describes.
-
-        Returns
-        -------
-        str labels
-
-        """
-        if not isinstance(cf_data_var, CFDataVariable):
-            raise TypeError(
-                "cf_data_var argument should be of type CFDataVariable. Got %r."
-                % type(cf_data_var)
-            )
-
-        # Determine the name of the label string (or length) dimension by
-        # finding the dimension name that doesn't exist within the data dimensions.
-        str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))
-
-        if len(str_dim_name) != 1:
-            raise ValueError(
-                "Invalid string dimensions for CF-netCDF label variable %r"
-                % self.cf_name
-            )
-
-        str_dim_name = str_dim_name[0]
-        label_data = self[:]
-
-        if ma.isMaskedArray(label_data):
-            label_data = label_data.filled()
-
-        # Determine whether we have a string-valued scalar label
-        # i.e. a character variable that only has one dimension (the length of the string).
-        if self.ndim == 1:
-            label_string = b"".join(label_data).strip()
-            label_string = label_string.decode("utf8")
-            data = np.array([label_string])
-        else:
-            # Determine the index of the string dimension.
-            str_dim = self.dimensions.index(str_dim_name)
-
-            # Calculate new label data shape (without string dimension) and create payload array.
-            new_shape = tuple(
-                dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
-            )
-            string_basetype = "|U%d"
-            string_dtype = string_basetype % self.shape[str_dim]
-            data = np.empty(new_shape, dtype=string_dtype)
-
-            for index in np.ndindex(new_shape):
-                # Create the slice for the label data.
-                if str_dim == 0:
-                    label_index = (slice(None, None),) + index
-                else:
-                    label_index = index + (slice(None, None),)
-
-                label_string = b"".join(label_data[label_index]).strip()
-                label_string = label_string.decode("utf8")
-                data[index] = label_string
-
-        return data
-
     def cf_label_dimensions(self, cf_data_var):
         """Return the name of the associated CF-netCDF label variable data dimensions.
 
@@ -1371,7 +1306,12 @@ def __init__(self, file_source, warn=False, monotonic=False):
             else:
                 self._filename = file_source
 
-            self._dataset = _thread_safe_nc.DatasetWrapper(self._filename, mode="r")
+            if _bytecoding_datasets.DECODE_TO_STRINGS_ON_READ:
+                ds_type = _bytecoding_datasets.EncodedDataset
+            else:
+                ds_type = _thread_safe_nc.DatasetWrapper
+
+            self._dataset = ds_type(self._filename, mode="r")
             self._own_file = True
         else:
             # We have been passed an open dataset.
@@ -1404,6 +1344,9 @@ def __init__(self, file_source, warn=False, monotonic=False):
             self._with_ugrid = False
 
         # Read the variables in the dataset only once to reduce runtime.
+        ds = self._dataset
+        # Turn off *any* automatic decoding in the underlying netCDF4 dataset.
+        ds.set_auto_chartostring(False)
         variables = self._dataset.variables
         self._translate(variables)
         self._build_cf_groups(variables)

diff --git a/lib/iris/fileformats/netcdf/__init__.py b/lib/iris/fileformats/netcdf/__init__.py
@@ -25,6 +25,12 @@
 
 # Note: these probably shouldn't be public, but for now they are.
 from .._nc_load_rules.helpers import UnknownCellMethodWarning, parse_cell_methods
+from ._bytecoding_datasets import (
+    DECODE_TO_STRINGS_ON_READ,
+    DEFAULT_READ_ENCODING,
+    DEFAULT_WRITE_ENCODING,
+    SUPPORTED_ENCODINGS,
+)
 from .loader import DEBUG, NetCDFDataProxy, load_cubes
 from .saver import (
     CF_CONVENTIONS_VERSION,
@@ -42,9 +48,13 @@
     "CFNameCoordMap",
     "CF_CONVENTIONS_VERSION",
     "DEBUG",
+    "DECODE_TO_STRINGS_ON_READ",
+    "DEFAULT_READ_ENCODING",
+    "DEFAULT_WRITE_ENCODING",
     "MESH_ELEMENTS",
     "NetCDFDataProxy",
     "SPATIO_TEMPORAL_AXES",
+    "SUPPORTED_ENCODINGS",
     "Saver",
     "UnknownCellMethodWarning",
     "load_cubes",