Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions lib/iris/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1280,10 +1280,6 @@ def __init__(
... (longitude, 1)])

"""
# Temporary error while we transition the API.
if isinstance(data, str):
raise TypeError("Invalid data type: {!r}.".format(data))

# Configure the metadata manager.
self._metadata_manager = metadata_manager_factory(CubeMetadata)

Expand Down Expand Up @@ -4468,15 +4464,20 @@ def __eq__(self, other):

# Having checked everything else, check approximate data equality.
if result and not dataless_equality:
# TODO: why do we use allclose() here, but strict equality in
# _DimensionalMetadata (via util.array_equal())?
result = bool(
np.allclose(
self.core_data(),
other.core_data(),
equal_nan=True,
if self.dtype.kind in "if":
# numbers
# TODO: why do we use allclose() here, but strict equality in
# _DimensionalMetadata (via util.array_equal())?
result = bool(
np.allclose(
self.core_data(),
other.core_data(),
equal_nan=True,
)
)
)
else:
# non-numeric: use exact equality
result = bool(np.all(self.core_data() == other.core_data()))
return result

# Must supply __ne__, Python does not defer to __eq__ for negative equality
Expand Down
4 changes: 0 additions & 4 deletions lib/iris/fileformats/_nc_load_rules/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1637,11 +1637,7 @@ def _add_auxiliary_coordinate(
# Determine the name of the dimension/s shared between the CF-netCDF data variable
# and the coordinate being built.
coord_dims = cf_coord_var.dimensions
# if cf._is_str_dtype(cf_coord_var):
# coord_dims = coord_dims[:-1]
datavar_dims = engine.cf_var.dimensions
# if cf._is_str_dtype(engine.cf_var):
# datavar_dims = datavar_dims[:-1]
common_dims = [dim for dim in coord_dims if dim in datavar_dims]
data_dims = None
if common_dims:
Expand Down
6 changes: 6 additions & 0 deletions lib/iris/fileformats/cf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,12 @@ def __init__(self, file_source, warn=False, monotonic=False):
self._with_ugrid = False

# Read the variables in the dataset only once to reduce runtime.
# Turn off *any* automatic decoding in the underlying netCDF4 dataset
ds = self._dataset
if isinstance(ds, _thread_safe_nc.DatasetWrapper):
ds._contained_instance.set_auto_chartostring(False)
else:
ds.set_auto_chartostring(False)
variables = self._dataset.variables
self._translate(variables)
self._build_cf_groups(variables)
Expand Down
1 change: 0 additions & 1 deletion lib/iris/fileformats/netcdf/_bytecoding_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ def encode_stringarray_as_bytearray(
raise ValueError(msg) from err

n_bytes = len(bytes)
# TODO: may want to issue warning or error if we overflow the length?
if n_bytes > string_dimension_length:
from iris.exceptions import TranslationError

Expand Down
2 changes: 1 addition & 1 deletion lib/iris/fileformats/netcdf/_thread_safe_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ class GroupWrapper(_ThreadSafeWrapper):
# Note: will also accept a whole Dataset object, but that is OK.
_DUCKTYPE_CHECK_PROPERTIES = ["createVariable"]
# Class to use when creating variable wrappers (default=VariableWrapper).
# - needed to support _byte_encoded_data.EncodedDataset.
# - needed to support _bytecoding_datasets.EncodedDataset.
VAR_WRAPPER_CLS = VariableWrapper
GRP_WRAPPER_CLS: typing.Any | None = None # self-reference : fill in later

Expand Down
88 changes: 51 additions & 37 deletions lib/iris/fileformats/netcdf/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -1719,20 +1719,23 @@ def add_names_attrs():
if element.units.calendar:
_setncattr(cf_var, "calendar", str(element.units.calendar))

# Take a copy so we can remove things
element_attrs = element.attributes.copy()

# Note: when writing UGRID, "element" can be a Mesh which has no "dtype",
# and for dataless cubes it will have a 'None' dtype.
if getattr(element, "dtype", None) is not None:
# Most attributes are dealt with later. But _Encoding needs to be defined
# *before* we can write to a character variable.
if element.dtype.kind in "SU" and "_Encoding" in element.attributes:
encoding = element.attributes.pop("_Encoding")
if element.dtype.kind in "SU" and "_Encoding" in element_attrs:
encoding = element_attrs.pop("_Encoding")
_setncattr(cf_var, "_Encoding", encoding)

if not isinstance(element, Cube):
# Add any other custom coordinate attributes.
# N.B. not Cube, which has specific handling in _create_cf_data_variable
for name in sorted(element.attributes):
value = element.attributes[name]
for name in sorted(element_attrs):
value = element_attrs[name]

if name == "STASH":
# Adopting provisional Metadata Conventions for representing MO
Expand Down Expand Up @@ -1830,8 +1833,8 @@ def _create_generic_cf_array_var(
if cube is not None and data is not None and cube.shape != data.shape:
compression_kwargs = {}

if not is_dataless and np.issubdtype(data.dtype, np.str_):
# Deal with string-type variables.
if not is_dataless and data.dtype.kind == "U":
# Deal with unicode-string-type variables.
# Typically CF label variables, but also possibly ancil-vars ?

# NOTE: all we are doing here is to calculate the byte dimension length,
Expand All @@ -1840,37 +1843,26 @@ def _create_generic_cf_array_var(
# being a _bytecoding_datasets.EncodedVariable.
string_dimension_depth = data.dtype.itemsize

if data.dtype.kind == "U":
# String content (U) instead of bytes (S).
# For numpy strings, itemsize is **always** a multiple of 4
if string_dimension_depth % 4 != 0:
msg = (
"Unexpected numpy string 'itemsize' for element "
f"{cube_or_mesh.name()}: "
f"'dtype.itemsize = {string_dimension_depth}, expected "
"a multiple of four (always)."
)
raise ValueError(msg)
nchars = string_dimension_depth // 4

encoding_attr = element.attributes.get("_Encoding", "ascii")
# Look this up + return a supported encoding name
# NB implements defaults and raises a warning if given not recognised.
encoding = bytecoding_datasets._identify_encoding(
encoding=encoding_attr, var_name=cf_name, writing=True
# String content (U) instead of bytes (S).
# For numpy strings, itemsize is **always** a multiple of 4
if string_dimension_depth % 4 != 0:
msg = (
"Unexpected numpy string 'itemsize' for element "
f"{cube_or_mesh.name()}: "
f"'dtype.itemsize = {string_dimension_depth}, expected "
"a multiple of four (always)."
)
width_fns = bytecoding_datasets._ENCODING_WIDTH_TRANSLATIONS[encoding]
string_dimension_depth = width_fns.nchars_2_nbytes(nchars)
else:
if data.dtype.kind != "S" or data.dtype.itemsize != 1:
# Some type of data we don't "understand".
# NB this includes "Sxx" types other than "S1" : It seems that
# netCDF4 saves Sxx as variable-length strings. But we don't support that type in Iris.
msg = (
f"Variable {cf_name!r} has unexpected string/character dtype, "
f"{data.dtype} -- should be either 'S' or 'U' type."
)
raise ValueError(msg)
raise ValueError(msg)
nchars = string_dimension_depth // 4

encoding_attr = element.attributes.get("_Encoding", "ascii")
# Look this up + return a supported encoding name
# NB implements defaults and raises a warning if given not recognised.
encoding = bytecoding_datasets._identify_encoding(
encoding=encoding_attr, var_name=cf_name, writing=True
)
width_fns = bytecoding_datasets._ENCODING_WIDTH_TRANSLATIONS[encoding]
string_dimension_depth = width_fns.nchars_2_nbytes(nchars)

string_dimension_name = "string%d" % string_dimension_depth

Expand All @@ -1890,12 +1882,34 @@ def _create_generic_cf_array_var(
# Create the label coordinate variable.
cf_var = self._dataset.createVariable(cf_name, "|S1", element_dims)
else:
# A normal (numeric) variable.
# A non-string variable.
# ensure a valid datatype for the file format.
if is_dataless:
dtype = self._DATALESS_DTYPE
fill_value = self._DATALESS_FILLVALUE
else:
# Normal non-string data.
# NOTE: this includes byte-arrays (S1 only) : however these must
# use an actual cube dimension for the 'string dimension', which
# seriously limits the utility of DECODE_TO_STRINGS_ON_READ.
# TODO: also support netCDF variable-length strings ("string" type).
# Currently hit a **write error here**, being numpy object dtype ("O").
if (
data.dtype.kind not in "iufSU"
or data.dtype.kind == "S"
and data.dtype.itemsize != 1
):
# This is a type of data we don't "understand".
# NB this includes "Sxx" types other than "S1" : It seems that
# netCDF4 saves Sxx as variable-length strings.
# But we don't support that type in Iris.
msg = (
f"Variable {cf_name!r} has unexpected dtype, {data.dtype!r}."
f"Data content arrays must be numeric, or contain "
"single-bytes (dtype 'S1'), or unicode strings (dtype 'U<n>')."
)
raise ValueError(msg)

element_type = type(element).__name__
data = self._ensure_valid_dtype(data, element_type, element)
if not packing_controls:
Expand Down
Loading
Loading