JohnLCaron · JohnLCaron · Jul 28, 2025 · Jul 28, 2025
diff --git a/Readme.md b/Readme.md
@@ -1,5 +1,5 @@
 # netchdf
-_last updated: 7/27/2025_
+_last updated: 7/28/2025_
 
 This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries. 
 
@@ -33,6 +33,8 @@
       * [Compare with HDF5 data model](#compare-with-hdf5-data-model)
       * [Compare with HDF4 data model](#compare-with-hdf4-data-model)
       * [Compare with HDF-EOS data model](#compare-with-hdf-eos-data-model)
+  * [Implementation Notes](#implementation-notes)
+    * [Netcdf4 vs HDF5](#netcdf4-vs-hdf5)
   * [Elevator blurb](#elevator-blurb)
 <!-- TOC -->
 
@@ -263,7 +265,7 @@
    return data as ArrayUByte. 
  * Netcdf-4 encodes CHAR values as HDF5 string type with elemSize = 1, so we use that convention to detect 
    legacy CHAR variables in HDF5 format. (NC_CHAR should not be used in new Netcdf-4 files, use NC_UBYTE or NC_STRING.) 
    Variables of type CHAR return data as STRING, since users can use UBYTE if thats what they intend.
  * Netcdf-4/HDF5 String variables may be fixed or variable length. For fixed Strings, we set the size of Datatype.STRING to 
    the fixed size. For both fixed and variable length Strings, the string will be truncated at the first zero byte, if any.
  * HDF4 does not have a STRING type, but does have signed and unsigned CHAR, and signed and unsigned BYTE. 
@@ -305,6 +307,24 @@
 * The _StructMetadata_ ODL is gathered and applied to the file header metadata as well as possible. 
   Contact us with example files if you see something we are missing.
 
+## Implementation Notes
+
+### Netcdf4 vs HDF5
+
+All Netcdf4 files are HDF5, but not all HDF5 files are Netcdf4. We'd like to be able to detect when a file was written
+using the Netcdf-4 library, but its not possible to always tell for certain. If any of the following are true, we set
+isNetcdf4 = true.
+
+    1.  If a group or variable has an attribute with name "_NCProperties", "_Netcdf4Coordinates", "_Netcdf4Dimid" or "_nc3_strict".
+    2.  If a variable name starts with "_nc4_non_coord_".
+    3.  If a variable has an attrinute named "DIMENSION_LIST with type vlen of reference.
+    4.  If a dimenson name starts with "This is a netCDF dimension but not a netCDF variable"
+
+Other than trying to identify which library wrote the file, Netchdf does not do any special processing for Netcdf4 files,
+except:
+
+    1. When testing, use the Netcdf4 C library when comparing data and metadata.
+
 ## Elevator blurb
 
 An independent implementation of HDF4/HDF5/HDF-EOS in Kotlin.

diff --git a/core/src/commonMain/kotlin/com/sunya/cdm/api/Netchdf.kt b/core/src/commonMain/kotlin/com/sunya/cdm/api/Netchdf.kt
@@ -17,7 +17,7 @@ interface Netchdf : AutoCloseable {
     // TODO I think the output type is not always the input type
     fun <T> readArrayData(v2: Variable<T>, wantSection: SectionPartial? = null) : ArrayTyped<T>
 
-    // iterate over all the chunks in section, order is arbitrary. TODO where is intersection with wantSection done ??
+    // iterate over all the chunks in section, order is arbitrary.
     fun <T> chunkIterator(v2: Variable<T>, wantSection: SectionPartial? = null, maxElements : Int? = null) : Iterator<ArraySection<T>>
 
     // iterate over all the chunks in section, order is arbitrary, callbacks are in multiple threads.
@@ -31,9 +31,4 @@ interface Netchdf : AutoCloseable {
 }
 
 // the section describes the array chunk reletive to the variable's shape.
-data class ArraySection<T>(val array : ArrayTyped<T>, val chunkSection : Section) {
-    fun intersect(wantSection: SectionPartial) : ArrayTyped<T> {
-        // TODO ??
-        return array
-    }
-}
+data class ArraySection<T>(val array : ArrayTyped<T>, val chunkSection : Section)
diff --git a/core/src/commonMain/kotlin/com/sunya/cdm/array/ArrayStructureData.kt b/core/src/commonMain/kotlin/com/sunya/cdm/array/ArrayStructureData.kt
@@ -22,13 +22,8 @@ class ArrayStructureData(shape : IntArray, val ba : ByteArray, val isBE: Boolean
     }
 
     private val heap = mutableMapOf<Int, Any>()
-    // private var heapIndex = 0
     internal fun putOnHeap(offset: Int, value: Any) {
         heap[offset] = value
-        // ba.putInt(offset, heapIndex) // TODO clobber the ByteArray ?? Or just use the byte pos, which is unique
-        //val result = heapIndex
-        // heapIndex++
-        // return result
     }
 
     internal fun getFromHeap(offset: Int): Any? {

diff --git a/core/src/commonMain/kotlin/com/sunya/cdm/util/Math.kt b/core/src/commonMain/kotlin/com/sunya/cdm/util/Math.kt
@@ -129,9 +129,6 @@ fun unsignedByteToShort(b: Byte): Short {
 */
 
 ////////////////////////////////////////////////////////////////////////
-// TODO
-// doubleIsNearlyEqual() doublesAreNearlyEqual
-
 const val defaultMaxRelativeDiffFloat = 1.0e-5f
 
 /** The default maximum relative difference for floats, when comparing as doubles.  */

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/NetchdfFileFormat.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/NetchdfFileFormat.kt
@@ -107,7 +107,7 @@ enum class NetchdfFileFormat(private val version: Int, private val formatName: S
     NC_FORMAT_64BIT_OFFSET(2, "netcdf-3 64bit-offset"),
     NC_FORMAT_NETCDF4(3, "NetCDF-4"),  // This is really just HDF-5, dont know yet if its written by netcdf4.
     NC_FORMAT_NETCDF4_CLASSIC(4, "netcdf-4 classic"),  // psuedo format I think
-    NC_FORMAT_64BIT_DATA(5, "netcdf-5"), // TODO support this; need test files
+    NC_FORMAT_64BIT_DATA(5, "netcdf-5"), // we have one test file: ../core/src/commonTest/data/jays_DOMAIN000.nc
 
     HDF5(5, "hdf5"), // not written by netcdf C library
     HDF4(6, "hdf4"); // not written by netcdf C library

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt
@@ -24,7 +24,6 @@ internal class BTree1data(
         rootNode = BTreeNode(rootNodeAddress, null)
     }
 
-    // if other layouts like BTree2data had this interface we could use in chunkConcurrent
     override fun asSequence(): Sequence<DataChunk> = sequence {
         repeat( tiling.nelems) {
             yield(findDataChunk(it) ?: missingDataChunk(it, tiling))
@@ -33,6 +32,8 @@ internal class BTree1data(
 
     fun chunkIterator(): Iterator<DataChunk> = asSequence().iterator()
 
+    fun countChunks() = asSequence().count()
+
     internal fun findDataChunk(order: Int): DataChunk? {
         return rootNode.findDataChunk(order)
     }

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/FractalHeap.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/FractalHeap.kt
@@ -186,7 +186,6 @@ internal class FractalHeap(private val h5: H5builder, forWho: String, address: L
                             return record1.hugeObjectAddress
                         }
 
-                        // 3, 4 -> return offset.toLong() // TODO only a guess
                         else -> throw RuntimeException("Unknown DHeapId subtype =$subtype")
                     }
                 }

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5TypeInfo.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5TypeInfo.kt
@@ -57,6 +57,7 @@ internal data class H5TypeInfo(val isVlenString: Boolean, val isRefObject : Bool
 
             Datatype5.Floating ->
                 when (this.elemSize) {
+                    // 2 -> "half float" see jhdf
                     4 -> Datatype.FLOAT
                     8 -> Datatype.DOUBLE
                     else -> throw RuntimeException("Bad hdf5 float type with size= ${this.elemSize}")

diff --git a/...m/sunya/netchdf/hdf5/H5chunkConcurrent.kt → ...a/netchdf/hdf5/H5readChunkedConcurrent.kt b/...m/sunya/netchdf/hdf5/H5chunkConcurrent.kt → ...a/netchdf/hdf5/H5readChunkedConcurrent.kt
@@ -27,7 +27,7 @@ import kotlinx.coroutines.runBlocking
 import kotlinx.coroutines.yield
 
 @ExperimentalCoroutinesApi
-class H5chunkConcurrent<T>(val h5: H5builder, val v2: Variable<T>, wantSection: SectionPartial?, ) {
+class H5readChunkedConcurrent<T>(val h5: H5builder, val v2: Variable<T>, wantSection: SectionPartial?, ) {
     val rafext: OpenFileExtended = h5.makeFileExtended()
 
     val varShape = v2.shape

diff --git a/...n/com/sunya/netchdf/hdf5/H5chunkReader.kt → ...com/sunya/netchdf/hdf5/H5readerChunked.kt b/...n/com/sunya/netchdf/hdf5/H5chunkReader.kt → ...com/sunya/netchdf/hdf5/H5readerChunked.kt
@@ -27,14 +27,10 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
         throw RuntimeException("Illegal nbytes to read = $sizeBytes")
     }
     val ba = ByteArray(sizeBytes.toInt())
-
-    // just reading into memory the entire index for now
-    // val index =  BTree2j(h5, v2.name, vinfo.dataPos, vinfo.storageDims)
-
     val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
     val state = OpenFileState(0L, vinfo.h5type.isBE)
 
-    // just run through all the chunks, we wont read any that we dont need
+    // run through all the chunks, we wont read any that we dont need
     for (dataChunk: DataChunk in index) {
         val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims)
         val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
@@ -62,60 +58,6 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
     }
 }
 
-/* DataLayoutBTreeVer1
-internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
-    val vinfo = v2.spObject as DataContainerVariable
-    val h5type = vinfo.h5type
-
-    val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size
-    val datatype = vinfo.h5type.datatype()
-
-    val wantSpace = IndexSpace(wantSection)
-    val sizeBytes = wantSpace.totalElements * elemSize
-    if (sizeBytes <= 0 || sizeBytes >= Int.MAX_VALUE) {
-        throw RuntimeException("Illegal nbytes to read = $sizeBytes")
-    }
-    val ba = ByteArray(sizeBytes.toInt())
-
-    val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) {
-        val rafext: OpenFileExtended = this.openNewFileExtended()
-        BTree1data(rafext, vinfo.dataPos, v2.shape, vinfo.storageDims)
-    } else {
-        throw RuntimeException("Unsupported mdl ${vinfo.mdl}")
-    }
-
-    //val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
-    val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
-    //if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")
-
-    var transferChunks = 0
-    val state = OpenFileState(0L, vinfo.h5type.isBE)
-    btree1.asSequence().forEach { dataChunk ->
-        val dataSection = IndexSpace(v2.rank, dataChunk.offsets.toLongArray(), vinfo.storageDims)
-        val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
-        if (dataChunk.isMissing()) {
-            if (debugChunking) println("   missing ${dataChunk.show()}")
-            chunker.transferMissing(vinfo.fillValue, elemSize, ba)
-        } else {
-            if (debugChunking) println("   chunk=${dataChunk.show()}")
-            state.pos = dataChunk.address
-            val chunkData = this.raf.readByteArray(state, dataChunk.size)
-            val filteredData = if (dataChunk.filterMask == null) chunkData
-            else filters.apply(chunkData, dataChunk.filterMask)
-            chunker.transferBA(filteredData, 0, elemSize, ba, 0)
-            transferChunks += chunker.transferChunks
-        }
-    }
-
-    val shape = wantSpace.shape.toIntArray()
-
-    return if (h5type.datatype5 == Datatype5.Vlen) {
-        this.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize)
-    } else {
-        this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped<T>
-    }
-} */
-
 internal fun <T> readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
     val vinfo = v2.spObject as DataContainerVariable
     val datatype = vinfo.h5type.datatype()
@@ -149,7 +91,6 @@ internal fun <T> readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable<T>, wa
         val dataSection = IndexSpace(dataChunk.chunkSection)
         val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
         chunker.forEach {
-            // println(it)
             dataChunk.array.transfer(values, it)
         }
     }

diff --git a/...in/com/sunya/netchdf/hdf5/H5dataReader.kt → .../sunya/netchdf/hdf5/H5readerNonChunked.kt b/...in/com/sunya/netchdf/hdf5/H5dataReader.kt → .../sunya/netchdf/hdf5/H5readerNonChunked.kt
diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt
@@ -51,7 +51,7 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
         return readArrayData(v2, wantSection, recurse = false)
     }
 
-    fun <T> readArrayData(v2: Variable<T>, wantSection: SectionPartial?, recurse: Boolean): ArrayTyped<T> {
+    fun <T> readArrayData(v2: Variable<T>, wantSection: SectionPartial?, recurse: Boolean, countChunks: Boolean = false): ArrayTyped<T> {
         if (v2.nelems == 0L) {
             return ArrayEmpty(v2.shape.toIntArray(), v2.datatype)
         }
@@ -87,6 +87,7 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
                     v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) {
                     val btree1 =
                         BTree1data(header.makeFileExtended(),  vinfo.dataPos, v2.shape, vinfo.storageDims)
+                    if (countChunks) println(" nchunks = ${btree1.countChunks()}")
                     header.readChunkedData(v2, section, btree1.chunkIterator())
                     // header.readBtree1data(v2, section)
                 } else {
@@ -160,7 +161,7 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
     }
 
     class H5chunkIterator2<T>(hdfFile: Hdf5File, val v2: Variable<T>, val wantSection: SectionPartial?): AbstractIterator<ArraySection<T>>() {
-        val reader = H5chunkConcurrent(hdfFile.header, v2, wantSection)
+        val reader = H5readChunkedConcurrent(hdfFile.header, v2, wantSection)
         val nthreads = hdfFile.useNThreads()
         val deque = Deque<ArraySection<T>>(10)
 
@@ -184,7 +185,7 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
 
     override fun <T> readChunksConcurrent(v2: Variable<T>, lamda : (ArraySection<T>) -> Unit, done : () -> Unit,
                                           wantSection: SectionPartial?, nthreads: Int?) {
-        val reader = H5chunkConcurrent(header, v2, wantSection)
+        val reader = H5readChunkedConcurrent(header, v2, wantSection)
         val availableProcessors = this.useNThreads()
         // println("availableProcessors = $availableProcessors")
         reader.readChunks(nthreads ?: availableProcessors, lamda, done = { done() })

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/netcdf3/N3builder.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/netcdf3/N3builder.kt
@@ -264,7 +264,7 @@ internal class N3header(rafOrg: OpenFileIF, val root: Group.Builder) {
         nelems
       }
       Datatype.CHAR -> {
-        // a CHAR is made into a String with UTF8 assumed. TODO make this settable ??
+        // a CHAR is made into a String with UTF8 assumed.
         attBuilder.setValue(raf.readString(filePos, nelems))
         nelems
       }

diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/netcdf4/Netcdf4.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/netcdf4/Netcdf4.kt
@@ -48,7 +48,7 @@ object Netcdf4 {
 
     val NETCDF4_SPECIAL_ATTS = listOf(NCPROPERTIES, NETCDF4_COORDINATES, NETCDF4_STRICT, NETCDF4_DIMID)
 
-    // appended to variable when it conflicts with dimension scale
+    // prepended to variable when it conflicts with dimension scale
     const val NETCDF4_NON_COORD = "_nc4_non_coord_"
 
     const val NETCDF4_NOT_VARIABLE = "This is a netCDF dimension but not a netCDF variable"

diff --git a/core/src/commonTest/kotlin/com/sunya/netchdf/hdf5/Btree1dataTest.kt b/core/src/commonTest/kotlin/com/sunya/netchdf/hdf5/Btree1dataTest.kt
@@ -60,7 +60,7 @@ class Btree1dataTest {
             for (nthreads in listOf(1, 2, 4, 8, 10, 16, 20, 24, 32, 40, 48)) {
                 val time = measureNanoTime {
                     //     fun readChunks(nthreads: Int, lamda: (ArraySection<*>) -> Unit, done: () -> Unit) {
-                    val reader = H5chunkConcurrent(myfile.header, myvar, null)
+                    val reader = H5readChunkedConcurrent(myfile.header, myvar, null)
                     reader.readChunks(nthreads, lamda = { asect: ArraySection<*> ->
                         // println(" section = ${asect.chunkSection}")
                     }, { }, )

diff --git a/testfiles/src/test/kotlin/com/sunya/netchdf/CountVersions.kt b/testfiles/src/test/kotlin/com/sunya/netchdf/CountVersions.kt
@@ -68,6 +68,9 @@ class CountVersions {
                     } else {
                         val paths = versions.getOrPut(ncfile.type()) { mutableListOf() }
                         paths.add(filename)
+                        if (ncfile.type() == "netcdf3.5")
+                            println("ncfile.type() file=$filename ")
+
                     }
                 }
             } catch (e: Throwable) {

diff --git a/testfiles/src/test/kotlin/com/sunya/netchdf/hdf5/H5readConcurrentTest.kt b/testfiles/src/test/kotlin/com/sunya/netchdf/hdf5/H5readConcurrentTest.kt
@@ -66,7 +66,7 @@ class H5readConcurrentTest {
             for (nthreads in listOf(1, 2, 4, 8, 10, 16, 20, 24, 32, 40, 48)) {
                 myfile.useNThreads = nthreads
                 val time = measureNanoTime {
-                    myfile.readArrayData(myvar)
+                    myfile.readArrayData(myvar) // , null, recurse = true, countChunks = (nthreads == 1))
                 }
                 println("$nthreads, ${time * nano}")
                 val map1 = timing.getOrPut(nthreads) { mutableMapOf() }
-Original file line number
+Diff line change
@@ Expand Up @@
                                 return record1.hugeObjectAddress
                             }
-                            // 3, 4 -> return offset.toLong() // TODO only a guess
                             else -> throw RuntimeException("Unknown DHeapId subtype =$subtype")
                         }
                     }
@@ Expand Down @@