Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ repos:

# Hooks from all other repos
# NOTE : keep these in hook-name (aka 'id') order

- repo: https://github.com/adamchainz/blacken-docs
# This template does not keep up-to-date with versions, visit the repo to see the most recent release.
rev: 1.20.0
Expand Down
25 changes: 13 additions & 12 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,10 +1280,6 @@ def __init__(
... (longitude, 1)])

"""
# Temporary error while we transition the API.
if isinstance(data, str):
raise TypeError("Invalid data type: {!r}.".format(data))

# Configure the metadata manager.
self._metadata_manager = metadata_manager_factory(CubeMetadata)

Expand Down Expand Up @@ -4468,15 +4464,20 @@ def __eq__(self, other):

# Having checked everything else, check approximate data equality.
if result and not dataless_equality:
# TODO: why do we use allclose() here, but strict equality in
# _DimensionalMetadata (via util.array_equal())?
result = bool(
np.allclose(
self.core_data(),
other.core_data(),
equal_nan=True,
if self.dtype.kind in "if":
# numbers
# TODO: why do we use allclose() here, but strict equality in
# _DimensionalMetadata (via util.array_equal())?
result = bool(
np.allclose(
self.core_data(),
other.core_data(),
equal_nan=True,
)
)
)
else:
# non-numeric: use exact equality
result = bool(np.all(self.core_data() == other.core_data()))
return result

# Must supply __ne__, Python does not defer to __eq__ for negative equality
Expand Down
23 changes: 8 additions & 15 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,13 +708,13 @@ def build_and_add_global_attributes(engine: Engine):
),
)
if problem is not None:
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Skipping disallowed global attribute '{attr_name}' (see above error)"
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]


################################################################################
Expand Down Expand Up @@ -1209,9 +1209,6 @@ def get_attr_units(cf_var, attributes, capture_invalid=False):
attributes["invalid_units"] = attr_units
attr_units = UNKNOWN_UNIT_STRING

if np.issubdtype(cf_var.dtype, np.str_):
attr_units = NO_UNIT_STRING

if any(
hasattr(cf_var.cf_data, name)
for name in ("flag_values", "flag_masks", "flag_meanings")
Expand Down Expand Up @@ -1536,14 +1533,14 @@ def build_and_add_dimension_coordinate(
)
if problem is not None:
coord_var_name = str(cf_coord_var.cf_name)
stack_notes = problem.stack_trace.__notes__
stack_notes = problem.stack_trace.__notes__ # type: ignore[attr-defined]
if stack_notes is None:
stack_notes = []
stack_notes.append(
f"Failed to create {coord_var_name} dimension coordinate:\n"
f"Gracefully creating {coord_var_name!r} auxiliary coordinate instead."
)
problem.stack_trace.__notes__ = stack_notes
problem.stack_trace.__notes__ = stack_notes # type: ignore[attr-defined]
problem.handled = True

_ = _add_or_capture(
Expand Down Expand Up @@ -1574,11 +1571,7 @@ def _build_auxiliary_coordinate(
# Get units
attr_units = get_attr_units(cf_coord_var, attributes)

# Get any coordinate point data.
if isinstance(cf_coord_var, cf.CFLabelVariable):
points_data = cf_coord_var.cf_label_data(engine.cf_var)
else:
points_data = _get_cf_var_data(cf_coord_var)
points_data = _get_cf_var_data(cf_coord_var)

# Get any coordinate bounds.
cf_bounds_var, climatological = get_cf_bounds_var(cf_coord_var)
Expand Down Expand Up @@ -1643,9 +1636,9 @@ def _add_auxiliary_coordinate(

# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the coordinate being built.
common_dims = [
dim for dim in cf_coord_var.dimensions if dim in engine.cf_var.dimensions
]
coord_dims = cf_coord_var.dimensions
datavar_dims = engine.cf_var.dimensions
common_dims = [dim for dim in coord_dims if dim in datavar_dims]
data_dims = None
if common_dims:
# Calculate the offset of each common dimension.
Expand Down
83 changes: 13 additions & 70 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

import iris.exceptions
import iris.fileformats._nc_load_rules.helpers as hh
from iris.fileformats.netcdf import _thread_safe_nc
from iris.fileformats.netcdf import _bytecoding_datasets, _thread_safe_nc
from iris.mesh.components import Connectivity
import iris.util
import iris.warnings
Expand Down Expand Up @@ -73,7 +73,9 @@

# NetCDF returns a different type for strings depending on Python version.
def _is_str_dtype(var):
return np.issubdtype(var.dtype, np.bytes_)
# N.B. use 'datatype' not 'dtype', to "look inside" variable wrappers which
# represent 'S1' type data as 'U<xx>'.
return np.dtype(var.dtype).kind in "SU"


################################################################################
Expand Down Expand Up @@ -774,73 +776,6 @@ def identify(cls, variables, ignore=None, target=None, warn=True):

return result

def cf_label_data(self, cf_data_var):
"""Return the associated CF-netCDF label variable strings.

Parameters
----------
cf_data_var : :class:`iris.fileformats.cf.CFDataVariable`
The CF-netCDF data variable which the CF-netCDF label variable
describes.

Returns
-------
str labels

"""
if not isinstance(cf_data_var, CFDataVariable):
raise TypeError(
"cf_data_var argument should be of type CFDataVariable. Got %r."
% type(cf_data_var)
)

# Determine the name of the label string (or length) dimension by
# finding the dimension name that doesn't exist within the data dimensions.
str_dim_name = list(set(self.dimensions) - set(cf_data_var.dimensions))

if len(str_dim_name) != 1:
raise ValueError(
"Invalid string dimensions for CF-netCDF label variable %r"
% self.cf_name
)

str_dim_name = str_dim_name[0]
label_data = self[:]

if ma.isMaskedArray(label_data):
label_data = label_data.filled()

# Determine whether we have a string-valued scalar label
# i.e. a character variable that only has one dimension (the length of the string).
if self.ndim == 1:
label_string = b"".join(label_data).strip()
label_string = label_string.decode("utf8")
data = np.array([label_string])
else:
# Determine the index of the string dimension.
str_dim = self.dimensions.index(str_dim_name)

# Calculate new label data shape (without string dimension) and create payload array.
new_shape = tuple(
dim_len for i, dim_len in enumerate(self.shape) if i != str_dim
)
string_basetype = "|U%d"
string_dtype = string_basetype % self.shape[str_dim]
data = np.empty(new_shape, dtype=string_dtype)

for index in np.ndindex(new_shape):
# Create the slice for the label data.
if str_dim == 0:
label_index = (slice(None, None),) + index
else:
label_index = index + (slice(None, None),)

label_string = b"".join(label_data[label_index]).strip()
label_string = label_string.decode("utf8")
data[index] = label_string

return data

def cf_label_dimensions(self, cf_data_var):
"""Return the name of the associated CF-netCDF label variable data dimensions.

Expand Down Expand Up @@ -1371,7 +1306,12 @@ def __init__(self, file_source, warn=False, monotonic=False):
else:
self._filename = file_source

self._dataset = _thread_safe_nc.DatasetWrapper(self._filename, mode="r")
if _bytecoding_datasets.DECODE_TO_STRINGS_ON_READ:
ds_type = _bytecoding_datasets.EncodedDataset
else:
ds_type = _thread_safe_nc.DatasetWrapper

self._dataset = ds_type(self._filename, mode="r")
self._own_file = True
else:
# We have been passed an open dataset.
Expand Down Expand Up @@ -1404,6 +1344,9 @@ def __init__(self, file_source, warn=False, monotonic=False):
self._with_ugrid = False

# Read the variables in the dataset only once to reduce runtime.
ds = self._dataset
# Turn off *any* automatic decoding in the underlying netCDF4 dataset.
ds.set_auto_chartostring(False)
variables = self._dataset.variables
self._translate(variables)
self._build_cf_groups(variables)
Expand Down
10 changes: 10 additions & 0 deletions lib/iris/fileformats/netcdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@

# Note: these probably shouldn't be public, but for now they are.
from .._nc_load_rules.helpers import UnknownCellMethodWarning, parse_cell_methods
from ._bytecoding_datasets import (
DECODE_TO_STRINGS_ON_READ,
DEFAULT_READ_ENCODING,
DEFAULT_WRITE_ENCODING,
SUPPORTED_ENCODINGS,
)
from .loader import DEBUG, NetCDFDataProxy, load_cubes
from .saver import (
CF_CONVENTIONS_VERSION,
Expand All @@ -42,9 +48,13 @@
"CFNameCoordMap",
"CF_CONVENTIONS_VERSION",
"DEBUG",
"DECODE_TO_STRINGS_ON_READ",
"DEFAULT_READ_ENCODING",
"DEFAULT_WRITE_ENCODING",
"MESH_ELEMENTS",
"NetCDFDataProxy",
"SPATIO_TEMPORAL_AXES",
"SUPPORTED_ENCODINGS",
"Saver",
"UnknownCellMethodWarning",
"load_cubes",
Expand Down
Loading
Loading