From 34cfd87333092b6c68efae9867d52b20ca291673 Mon Sep 17 00:00:00 2001 From: Jakub Orlinski Date: Fri, 17 Nov 2023 10:18:07 +0100 Subject: [PATCH 1/3] [Issue #17] Change single value writing to use SCALAR spaces instead of SIMPLE ones (single element arrays) --- HDF5-CSharp/Hdf5Dataset.cs | 56 ++++++++++++++++++++++++-------------- HDF5-CSharp/Hdf5Strings.cs | 12 ++------ 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/HDF5-CSharp/Hdf5Dataset.cs b/HDF5-CSharp/Hdf5Dataset.cs index 87f4ac5e..d0bb3140 100644 --- a/HDF5-CSharp/Hdf5Dataset.cs +++ b/HDF5-CSharp/Hdf5Dataset.cs @@ -271,27 +271,41 @@ public static (bool success, Array result) ReadDataset(long groupId, string n return dsetRW.ReadArray(groupId, name, alternativeName, mandatory); } - /// - /// Writes one value to a hdf5 file - /// - /// Generic parameter strings or primitive type - /// id of the group. Can also be a file Id - /// name of the dataset - /// The dataset - /// status of the write method - public static (int success, long CreatedgroupId) WriteOneValue(long groupId, string name, T dset, Dictionary> attributes) - { - if (typeof(T) == typeof(string)) - //WriteStrings(groupId, name, new string[] { dset.ToString() }); - { - return dsetRW.WriteArray(groupId, name, new T[1] { dset }, attributes); - } - - Array oneVal = new T[1, 1] { { dset } }; - return dsetRW.WriteArray(groupId, name, oneVal, attributes); - } - - public static void WriteDataset(long groupId, string name, Array collection) + /// + /// Writes one value to a hdf5 file + /// + /// Generic parameter strings or primitive type + /// id of the group. Can also be a file Id + /// name of the dataset + /// The dataset + /// status of the write method + public static (int success, long CreatedgroupId) WriteOneValue(long groupId, string name, T value, Dictionary> attributes) + { + var spaceId = H5S.create(H5S.class_t.SCALAR); + var datatype = GetDatatype(typeof(T)); + var typeId = H5T.copy(datatype); + if (datatype == H5T.C_S1) + { + H5T.set_size(typeId, new IntPtr(2)); + } + + string normalizedName = Hdf5Utils.NormalizedName(name); + var datasetId = Hdf5Utils.GetDatasetId(groupId, normalizedName, datatype, spaceId, H5P.DEFAULT); + if (datasetId == -1L) + { + return (-1, -1L); + } + + GCHandle hnd = GCHandle.Alloc(value, GCHandleType.Pinned); + var result = H5D.write(datasetId, datatype, H5S.ALL, H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); + hnd.Free(); + H5D.close(datasetId); + H5S.close(spaceId); + H5T.close(typeId); + return (result, datasetId); + } + + public static void WriteDataset(long groupId, string name, Array collection) { dsetRW.WriteArray(groupId, name, collection, new Dictionary>()); } diff --git a/HDF5-CSharp/Hdf5Strings.cs b/HDF5-CSharp/Hdf5Strings.cs index 547bdff1..23c06110 100644 --- a/HDF5-CSharp/Hdf5Strings.cs +++ b/HDF5-CSharp/Hdf5Strings.cs @@ -110,21 +110,13 @@ public static (int success, long CreatedgroupId) WriteStrings(long groupId, stri public static int WriteAsciiString(long groupId, string name, string str) { - var spaceNullId = H5S.create(H5S.class_t.NULL); var spaceScalarId = H5S.create(H5S.class_t.SCALAR); - // create two datasets of the extended ASCII character set - // store as H5T.FORTRAN_S1 -> space padding - int strLength = str.Length; ulong[] dims = { (ulong)strLength, 1 }; - /* Create the dataset. */ - //name = ToHdf5Name(name); - - var spaceId = H5S.create_simple(1, dims, null); - var datasetId = H5D.create(groupId, Hdf5Utils.NormalizedName(name), H5T.FORTRAN_S1, spaceId); - H5S.close(spaceId); + var datasetId = H5D.create(groupId, Hdf5Utils.NormalizedName(name), H5T.FORTRAN_S1, spaceScalarId); + H5S.close(spaceScalarId); // we write from C and must provide null-terminated strings From 91032d84785311b453b48d17d51189281b7cae6c Mon Sep 17 00:00:00 2001 From: Jakub Orlinski Date: Tue, 21 Nov 2023 11:36:17 +0100 Subject: [PATCH 2/3] Correct the implementation of WriteAsciiString and WriteOneValue so that they set the right amount of space for strings and use the correct type identifiers to write datasets --- HDF5-CSharp/Hdf5Dataset.cs | 25 +++++++++++++++++----- HDF5-CSharp/Hdf5Strings.cs | 43 +++++++++++++++++++------------------- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/HDF5-CSharp/Hdf5Dataset.cs b/HDF5-CSharp/Hdf5Dataset.cs index d0bb3140..5350cea9 100644 --- a/HDF5-CSharp/Hdf5Dataset.cs +++ b/HDF5-CSharp/Hdf5Dataset.cs @@ -284,20 +284,35 @@ public static (int success, long CreatedgroupId) WriteOneValue(long groupId, var spaceId = H5S.create(H5S.class_t.SCALAR); var datatype = GetDatatype(typeof(T)); var typeId = H5T.copy(datatype); - if (datatype == H5T.C_S1) + + GCHandle hnd; + if (datatype == H5T.C_S1 || datatype == H5T.FORTRAN_S1) + { + int stringLen = (value as string).Length; + + H5T.set_size(typeId, new IntPtr(stringLen)); + + byte[] strByteArray = new byte[stringLen + 1]; + // Write the string to the buffer, with the last element being 0 as the string terminator + for (int i = 0; i < stringLen; ++i) + { + strByteArray[i] = Convert.ToByte((value as string)[i]); + } + hnd = GCHandle.Alloc(strByteArray, GCHandleType.Pinned); + } + else { - H5T.set_size(typeId, new IntPtr(2)); + hnd = GCHandle.Alloc(value, GCHandleType.Pinned); } string normalizedName = Hdf5Utils.NormalizedName(name); - var datasetId = Hdf5Utils.GetDatasetId(groupId, normalizedName, datatype, spaceId, H5P.DEFAULT); + var datasetId = Hdf5Utils.GetDatasetId(groupId, normalizedName, typeId, spaceId, H5P.DEFAULT); if (datasetId == -1L) { return (-1, -1L); } - GCHandle hnd = GCHandle.Alloc(value, GCHandleType.Pinned); - var result = H5D.write(datasetId, datatype, H5S.ALL, H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); + var result = H5D.write(datasetId, typeId, H5S.ALL, H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); hnd.Free(); H5D.close(datasetId); H5S.close(spaceId); diff --git a/HDF5-CSharp/Hdf5Strings.cs b/HDF5-CSharp/Hdf5Strings.cs index 23c06110..34e14215 100644 --- a/HDF5-CSharp/Hdf5Strings.cs +++ b/HDF5-CSharp/Hdf5Strings.cs @@ -110,33 +110,32 @@ public static (int success, long CreatedgroupId) WriteStrings(long groupId, stri public static int WriteAsciiString(long groupId, string name, string str) { - var spaceScalarId = H5S.create(H5S.class_t.SCALAR); + var spaceScalarId = H5S.create(H5S.class_t.SCALAR); - int strLength = str.Length; - ulong[] dims = { (ulong)strLength, 1 }; + int strLength = str.Length; - var datasetId = H5D.create(groupId, Hdf5Utils.NormalizedName(name), H5T.FORTRAN_S1, spaceScalarId); - H5S.close(spaceScalarId); + var memId = H5T.copy(H5T.C_S1); + // Set the size needed for the string. Leave one extra space for a null-terminated string + H5T.set_size(memId, new IntPtr(strLength + 1)); - // we write from C and must provide null-terminated strings + var datasetId = H5D.create(groupId, Hdf5Utils.NormalizedName(name), memId, spaceScalarId); - byte[] wdata = new byte[strLength * 2]; - - for (int i = 0; i < strLength; ++i) - { - wdata[2 * i] = Convert.ToByte(str[i]); - } + byte[] wdata = new byte[strLength]; + // Write the string to the buffer, with the last element being 0 as the string terminator + for (int i = 0; i < strLength; ++i) + { + wdata[i] = Convert.ToByte(str[i]); + } - var memId = H5T.copy(H5T.C_S1); - H5T.set_size(memId, new IntPtr(2)); - GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned); - int result = H5D.write(datasetId, memId, H5S.ALL, - H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); - hnd.Free(); - H5T.close(memId); - H5D.close(datasetId); - return result; - } + GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned); + + int result = H5D.write(datasetId, memId, H5S.ALL, H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); + hnd.Free(); + H5S.close(spaceScalarId); + H5T.close(memId); + H5D.close(datasetId); + return result; + } public static string ReadAsciiString(long groupId, string name) { From 05a31c2c449dc224aced3f78d21a565627b4b4cf Mon Sep 17 00:00:00 2001 From: Jakub Orlinski Date: Tue, 21 Nov 2023 15:27:49 +0100 Subject: [PATCH 3/3] Rework how Ascii strings are read out based on the implementation of writing to scalar spaces --- HDF5-CSharp/Hdf5Strings.cs | 26 +++++++++----------------- 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/HDF5-CSharp/Hdf5Strings.cs b/HDF5-CSharp/Hdf5Strings.cs index 34e14215..f5c0107f 100644 --- a/HDF5-CSharp/Hdf5Strings.cs +++ b/HDF5-CSharp/Hdf5Strings.cs @@ -139,30 +139,22 @@ public static int WriteAsciiString(long groupId, string name, string str) public static string ReadAsciiString(long groupId, string name) { - var datatype = H5T.FORTRAN_S1; - - //name = ToHdf5Name(name); - var datasetId = H5D.open(groupId, Hdf5Utils.NormalizedName(name)); var spaceId = H5D.get_space(datasetId); - int rank = H5S.get_simple_extent_ndims(spaceId); - ulong[] maxDims = new ulong[rank]; - ulong[] dims = new ulong[rank]; - ulong[] chunkDims = new ulong[rank]; - var memId_n = H5S.get_simple_extent_dims(spaceId, dims, null); - // we write from C and must provide null-terminated strings - - byte[] wdata = new byte[dims[0] * 2]; + + ulong spaceNeeded = H5D.get_storage_size(datasetId); + byte[] wdata = new byte[spaceNeeded]; - var memId = H5T.copy(H5T.C_S1); - H5T.set_size(memId, new IntPtr(2)); GCHandle hnd = GCHandle.Alloc(wdata, GCHandleType.Pinned); - int resultId = H5D.read(datasetId, memId, H5S.ALL, + var memId = H5T.copy(H5T.C_S1); + H5T.set_size(memId, new IntPtr((int)spaceNeeded)); + + int resultId = H5D.read(datasetId, memId, H5S.ALL, H5S.ALL, H5P.DEFAULT, hnd.AddrOfPinnedObject()); hnd.Free(); - wdata = wdata.Where((b, i) => i % 2 == 0). - Select(b => (b == 0) ? (byte)32 : b).ToArray(); + // Remove the null termination of the string + wdata = wdata.Where(b => b != 0).ToArray(); string result = Encoding.ASCII.GetString(wdata); H5T.close(memId);