Merge branch 'endian_convert'

shoyer · shoyer · commit 7ea9de85698e · 2015-02-25T20:41:24.000-08:00
diff --git a/xray/backends/netCDF4_.py b/xray/backends/netCDF4_.py
@@ -11,6 +11,13 @@
 from .common import AbstractWritableDataStore
 from .netcdf3 import encode_nc3_variable, maybe_convert_to_char_array
 
+# This lookup table maps from dtype.byteorder to a readable endian
+# string used by netCDF4.
+_endian_lookup = {'=': 'native',
+                  '>': 'big',
+                  '<': 'little',
+                  '|': 'native'}
+
 
 class NetCDF4ArrayWrapper(NDArrayMixin):
     def __init__(self, array):
@@ -83,6 +90,27 @@ def _ensure_fill_value_valid(data, attributes):
         attributes['_FillValue'] = np.string_(attributes['_FillValue'])
 
 
+def _force_native_endianness(var):
+    # possible values for byteorder are:
+    #     =    native
+    #     <    little-endian
+    #     >    big-endian
+    #     |    not applicable
+    # Below we check if the data type is not native or NA
+    if var.dtype.byteorder not in ['=', '|']:
+        # if endianness is specified explicitly, convert to the native type
+        data = var.values.astype(var.dtype.newbyteorder('='))
+        var = Variable(var.dims, data, var.attrs, var.encoding)
+        # if endian exists, remove it from the encoding.
+        var.encoding.pop('endian', None)
+    # check to see if encoding has a value for endian its 'native'
+    if not var.encoding.get('endian', 'native') is 'native':
+        raise NotImplementedError("Attempt to write non-native endian type, "
+                                  "this is not supported by the netCDF4 python "
+                                  "library.")
+    return var
+
+
 class NetCDF4DataStore(AbstractWritableDataStore):
     """Store for reading and writing data via the Python-NetCDF4 library.
 
@@ -152,6 +180,9 @@ def set_attribute(self, key, value):
 
     def set_variable(self, name, variable):
         attrs = variable.attrs.copy()
+
+        variable = _force_native_endianness(variable)
+
         if self.format == 'NETCDF4':
             variable, datatype = _nc4_values_and_dtype(variable)
         else:
@@ -167,6 +198,8 @@ def set_variable(self, name, variable):
             fill_value = None
 
         encoding = variable.encoding
+        data = variable.values
+
         nc4_var = self.ds.createVariable(
             varname=name,
             datatype=datatype,
@@ -177,11 +210,11 @@ def set_variable(self, name, variable):
             fletcher32=encoding.get('fletcher32', False),
             contiguous=encoding.get('contiguous', False),
             chunksizes=encoding.get('chunksizes'),
-            endian=encoding.get('endian', 'native'),
+            endian='native',
             least_significant_digit=encoding.get('least_significant_digit'),
             fill_value=fill_value)
         nc4_var.set_auto_maskandscale(False)
-        nc4_var[:] = variable.values
+        nc4_var[:] = data
         for k, v in iteritems(attrs):
             # set attributes one-by-one since netCDF4<1.0.10 can't handle
             # OrderedDict as the input to setncatts
diff --git a/xray/test/test_backends.py b/xray/test/test_backends.py
@@ -444,6 +444,24 @@ def test_variable_len_strings(self):
                 with open_dataset(tmp_file, **kwargs) as actual:
                     self.assertDatasetIdentical(expected, actual)
 
+    def test_roundtrip_endian(self):
+        ds = Dataset({'x': np.arange(3, 10, dtype='>i2'),
+                      'y': np.arange(3, 20, dtype='<i4'),
+                      'z': np.arange(3, 30, dtype='=i8'),
+                      'w': ('x', np.arange(3, 10, dtype=np.float))})
+
+        with self.roundtrip(ds) as actual:
+            # technically these datasets are slightly different,
+            # one hold mixed endian data (ds) the other should be
+            # all big endian (actual).  assertDatasetIdentical
+            # should still pass though.
+            self.assertDatasetIdentical(ds, actual)
+
+        ds['z'].encoding['endian'] = 'big'
+        with self.assertRaises(NotImplementedError):
+            with self.roundtrip(ds) as actual:
+                pass
+
     def test_roundtrip_character_array(self):
         with create_tmp_file() as tmp_file:
             values = np.array([['a', 'b', 'c'], ['d', 'e', 'f']], dtype='S')