Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion Readme.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# netchdf
_last updated: 7/26/2025_
_last updated: 7/27/2025_

This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries.

Expand Down Expand Up @@ -263,7 +263,7 @@
return data as ArrayUByte.
* Netcdf-4 encodes CHAR values as HDF5 string type with elemSize = 1, so we use that convention to detect
legacy CHAR variables in HDF5 format. (NC_CHAR should not be used in new Netcdf-4 files, use NC_UBYTE or NC_STRING.)
Variables of type CHAR return data as STRING, since users can use UBYTE if thats what they intend.

Check failure on line 266 in Readme.md

View workflow job for this annotation

GitHub Actions / Check for spelling errors

thats ==> that's
* Netcdf-4/HDF5 String variables may be fixed or variable length. For fixed Strings, we set the size of Datatype.STRING to
the fixed size. For both fixed and variable length Strings, the string will be truncated at the first zero byte, if any.
* HDF4 does not have a STRING type, but does have signed and unsigned CHAR, and signed and unsigned BYTE.
Expand Down Expand Up @@ -294,6 +294,10 @@
* Vlen Strings are stored on the heap. Fixed length Strings are kept in byte arrays.
This is more or less invisible to the User.

We have very limited example data for the "version 4" data layouts of HDF5 (much thanks to James Mudd and
the [jhdf project](https://github.com/jamesmudd/jhdf) for code and the test data we do have.)
Please carefully check results if you have this kind of data, and send us samples to test!

#### Compare with HDF4 data model
* All data access is unified under the netchdf API.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ internal class BTree1data(
// if other layouts like BTree2data had this interface we could use in chunkConcurrent
override fun asSequence(): Sequence<DataChunk> = sequence {
repeat( tiling.nelems) {
//val startingIndex = tiling.orderToIndex(it.toLong())
//val indexSpace = IndexSpace(startingIndex, tiling.chunk)
yield(findDataChunk(it) ?: missingDataChunk(it, tiling))
}
}

fun chunkIterator(): Iterator<DataChunk> = asSequence().iterator()

internal fun findDataChunk(order: Int): DataChunk? {
return rootNode.findDataChunk(order)
}
Expand Down
74 changes: 4 additions & 70 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import kotlin.collections.iterator

private val debugChunking = false

// DataLayoutSingleChunk4, DataLayoutImplicit4, DataLayoutFixedArray4, DataLayoutExtensibleArray4, DataLayoutBtreeVer2
// DataLayoutSingleChunk4, DataLayoutImplicit4, DataLayoutFixedArray4, DataLayoutExtensibleArray4, DataLayoutBtreeVer2, DataLayoutBTreeVer1
internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section, index: Iterator<DataChunk>): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type
Expand Down Expand Up @@ -62,59 +62,7 @@ internal fun <T> H5builder.readChunkedData(v2: Variable<T>, wantSection: Section
}
}

/* DataLayoutBTreeVer1 (to be removed)
internal fun <T> H5builder.readBtreeVer1(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type

val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size
val datatype = vinfo.h5type.datatype()

val wantSpace = IndexSpace(wantSection)
val sizeBytes = wantSpace.totalElements * elemSize
if (sizeBytes <= 0 || sizeBytes >= Int.MAX_VALUE) {
throw RuntimeException("Illegal nbytes to read = $sizeBytes")
}
val ba = ByteArray(sizeBytes.toInt())

val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1)
BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size)
else
throw RuntimeException("Unsupprted mdl ${vinfo.mdl}")

val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims)
val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE)
if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}")

var transferChunks = 0
val state = OpenFileState(0L, vinfo.h5type.isBE)
for (dataChunk: DataChunk in tiledData.dataChunks(wantSpace)) { // : Iterable<BTree1New.DataChunkEntry>
val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims)
val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration
if (dataChunk.isMissing()) {
if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}")
chunker.transferMissing(vinfo.fillValue, elemSize, ba)
} else {
if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}")
state.pos = dataChunk.childAddress()
val chunkData = this.raf.readByteArray(state, dataChunk.chunkSize())
val filteredData = if (dataChunk.filterMask() == null) chunkData
else filters.apply(chunkData, dataChunk.filterMask()!!)
chunker.transferBA(filteredData, 0, elemSize, ba, 0)
transferChunks += chunker.transferChunks
}
}

val shape = wantSpace.shape.toIntArray()

return if (h5type.datatype5 == Datatype5.Vlen) {
this.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize)
} else {
this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped<T>
}
} */

// DataLayoutBTreeVer1
/* DataLayoutBTreeVer1
internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val h5type = vinfo.h5type
Expand All @@ -130,19 +78,6 @@ internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section)
val ba = ByteArray(sizeBytes.toInt())

val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) {
// internal class BTree1(
// val h5: H5builder,
// val rootNodeAddress: Long,
// val nodeType : Int, // 0 = group/symbol table, 1 = raw data chunks
// val ndimStorage: Int? = null // TODO allowed to be null ??
//)
// BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size)
// internal class BTree1data(
// val raf: OpenFileExtended,
// rootNodeAddress: Long,
// varShape: LongArray,
// chunkShape: LongArray,
//)
val rafext: OpenFileExtended = this.openNewFileExtended()
BTree1data(rafext, vinfo.dataPos, v2.shape, vinfo.storageDims)
} else {
Expand Down Expand Up @@ -179,10 +114,9 @@ internal fun <T> H5builder.readBtree1data(v2: Variable<T>, wantSection: Section)
} else {
this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped<T>
}
}
} */

// DataLayoutBTreeVer1 using chunkIterator
internal fun <T> readBtreeWithChunkIterator(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
internal fun <T> readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable<T>, wantSection: SectionPartial?): ArrayTyped<T> {
val vinfo = v2.spObject as DataContainerVariable
val datatype = vinfo.h5type.datatype()

Expand Down
15 changes: 9 additions & 6 deletions core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,14 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
} else if (vinfo.mdl is DataLayoutBTreeVer1) {
// skip the concurrent read on the hard stuff
if ( recurse || (v2.datatype == Datatype.CHAR || v2.datatype == Datatype.COMPOUND || v2.datatype == Datatype.OPAQUE ||
v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN))
header.readBtree1data(v2, section)
else
readBtreeWithChunkIterator(this, v2, wantSection)
v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) {
val btree1 =
BTree1data(header.makeFileExtended(), vinfo.dataPos, v2.shape, vinfo.storageDims)
header.readChunkedData(v2, section, btree1.chunkIterator())
// header.readBtree1data(v2, section)
} else {
readChunkedDataWithIterator(this, v2, wantSection)
}

} else if (vinfo.mdl is DataLayoutSingleChunk4) {
// header.readSingleChunk(v2, wantSection)
Expand Down Expand Up @@ -118,9 +122,8 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf {
v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) {
val index = BTree2data(header.makeFileExtended(), v2.name, vinfo.dataPos, v2.shape, vinfo.storageDims)
header.readChunkedData(v2, section, index.chunkIterator())
// header.readBtree1data(v2, section)
} else {
readBtreeWithChunkIterator(this, v2, wantSection)
readChunkedDataWithIterator(this, v2, wantSection)
}

} else {
Expand Down