Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for HDF5 transient types #2655

Merged
merged 10 commits into from
Jul 18, 2023
34 changes: 27 additions & 7 deletions libhdf5/hdf5open.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "hdf5internal.h"
#include "hdf5err.h"
#include "hdf5debug.h"
#include "nc4internal.h"
#include "ncrc.h"
#include "ncauth.h"
#include "ncmodel.h"
Expand Down Expand Up @@ -63,6 +64,7 @@ extern int NC4_open_image_file(NC_FILE_INFO_T* h5);

/* Defined later in this file. */
static int rec_read_metadata(NC_GRP_INFO_T *grp);
static int read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name);

/**
* @internal Struct to track HDF5 object info, for
Expand Down Expand Up @@ -103,7 +105,7 @@ typedef struct {
* struct, either an existing one (for user-defined types) or a newly
* created one.
*
* @param h5 Pointer to HDF5 file info struct.
* @param h5_grp Pointer to group info struct.
* @param datasetid HDF5 dataset ID.
* @param type_info Pointer to pointer that gets type info struct.
*
Expand All @@ -114,7 +116,7 @@ typedef struct {
* @author Ed Hartnett
*/
static int
get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
get_type_info2(NC_GRP_INFO_T *h5_grp, hid_t datasetid, NC_TYPE_INFO_T **type_info)
{
NC_HDF5_TYPE_INFO_T *hdf5_type;
htri_t is_str, equal = 0;
Expand All @@ -123,7 +125,7 @@ get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
H5T_order_t order;
int t;

assert(h5 && type_info);
assert(h5_grp && type_info);

/* Because these N5T_NATIVE_* constants are actually function calls
* (!) in H5Tpublic.h, I can't initialize this array in the usual
Expand Down Expand Up @@ -231,11 +233,24 @@ get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
else
{
NC_TYPE_INFO_T *type;
NC_FILE_INFO_T *h5 = h5_grp->nc4_info;

/* This is a user-defined type. */
if((type = nc4_rec_find_hdf_type(h5, native_typeid)))
*type_info = type;

/* If we didn't find the type, then it's probably a transient
* type, stored in the dataset itself, so let's read it now */
if (type == NULL) {
/* If we still can't read the type, ignore it, it probably
* means this object is a reference */
if (read_type(h5_grp, native_typeid, ""))
return NC_EBADTYPID;

if((type = nc4_rec_find_hdf_type(h5, native_typeid)))
*type_info = type;
}

/* The type entry in the array of user-defined types already has
* an open data typeid (and native typeid), so close the ones we
* opened above. */
Expand Down Expand Up @@ -1592,7 +1607,7 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
/* Learn all about the type of this variable. This will fail for
* HDF5 reference types, and then the var we just created will be
* deleted, thus ignoring HDF5 reference type objects. */
if ((retval = get_type_info2(var->container->nc4_info, hdf5_var->hdf_datasetid,
if ((retval = get_type_info2(var->container, hdf5_var->hdf_datasetid,
&var->type_info)))
BAIL(retval);

Expand Down Expand Up @@ -2057,6 +2072,14 @@ read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name)
LOG((4, "%s: type_name %s grp->hdr.name %s", __func__, type_name,
grp->hdr.name));

/* What is the class of this type, compound, vlen, etc. */
if ((class = H5Tget_class(hdf_typeid)) < 0)
return NC_EHDFERR;

/* Explicitly don't handle reference types */
if (class == H5T_REFERENCE)
return NC_EBADCLASS;

/* What is the native type for this platform? */
if ((native_typeid = H5Tget_native_type(hdf_typeid, H5T_DIR_DEFAULT)) < 0)
return NC_EHDFERR;
Expand Down Expand Up @@ -2086,9 +2109,6 @@ read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name)
if (H5Iinc_ref(hdf5_type->hdf_typeid) < 0)
return NC_EHDFERR;

/* What is the class of this type, compound, vlen, etc. */
if ((class = H5Tget_class(hdf_typeid)) < 0)
return NC_EHDFERR;
switch (class)
{
case H5T_STRING:
Expand Down
2 changes: 1 addition & 1 deletion nc_test4/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ SET(NC4_TESTS tst_dims tst_dims2 tst_dims3 tst_files tst_files4
tst_rename2 tst_rename3 tst_h5_endians tst_atts_string_rewrite tst_put_vars_two_unlim_dim
tst_hdf5_file_compat tst_fill_attr_vanish tst_rehash tst_types tst_bug324
tst_atts3 tst_put_vars tst_elatefill tst_udf tst_bug1442 tst_broken_files
tst_quantize)
tst_quantize tst_h_transient_types)

IF(HAS_PAR_FILTERS)
SET(NC4_tests $NC4_TESTS tst_alignment)
Expand Down
2 changes: 1 addition & 1 deletion nc_test4/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ tst_h_scalar tst_rename tst_rename2 tst_rename3 tst_h5_endians \
tst_atts_string_rewrite tst_hdf5_file_compat tst_fill_attr_vanish \
tst_rehash tst_filterparser tst_bug324 tst_types tst_atts3 \
tst_put_vars tst_elatefill tst_udf tst_put_vars_two_unlim_dim \
tst_bug1442 tst_quantize
tst_bug1442 tst_quantize tst_h_transient_types

if HAS_PAR_FILTERS
NC4_TESTS += tst_alignment
Expand Down
124 changes: 124 additions & 0 deletions nc_test4/tst_h_transient_types.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
/* This is part of the netCDF package. Copyright 2018 University
Corporation for Atmospheric Research/Unidata See COPYRIGHT file for
conditions of use.

This program tests fixes for reading netCDF-4 files that contain
transient/unnamed datatypes embedded directly within datasets.

*/

#include "netcdf.h"
#include <config.h>
#include <nc_tests.h>
#include <err_macros.h>
#include <hdf5.h>
#include <stdbool.h>

#define FILE_NAME "tst_h_transient.h5"
#define VAR_NAME "var"
#define ENUM_NAME "bool_var"

/* Don't use the C99 standard `complex` because MSVC has non-compliant
* implementation */
typedef struct {
double r, i;
} complex;

int
main()
{
complex expected_z = {1, 2};
bool expected_b = true;

printf("\n*** Creating file with datasets that have transient datatypes.\n");
{
hid_t fileid, scalar_spaceid;
hid_t dsetid, complex_dtype;
hid_t enum_dtype, enum_dsetid ;
bool enum_value;

/* Create new file, using default properties. */
if ((fileid = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;

/* Create compound datatype, but don't commit to file */
if ((complex_dtype = H5Tcreate(H5T_COMPOUND, sizeof(complex))) < 0) ERR;
if (H5Tinsert(complex_dtype, "r", 0, H5T_NATIVE_DOUBLE) < 0) ERR;
if (H5Tinsert(complex_dtype, "i", sizeof(double), H5T_NATIVE_DOUBLE) < 0) ERR;

/* Create dataset with transient datatype */
if ((scalar_spaceid = H5Screate(H5S_SCALAR)) < 0) ERR;
if ((dsetid = H5Dcreate2(fileid, VAR_NAME, complex_dtype, scalar_spaceid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;

/* Write complex number to file */
if ((H5Dwrite(dsetid, complex_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &expected_z)) <0) ERR;

/* Create enum datatype, but don't commit to file */
if ((enum_dtype = H5Tenum_create(H5T_NATIVE_CHAR)) < 0) ERR;
enum_value = false;
if (H5Tenum_insert(enum_dtype, "FALSE", &enum_value) < 0) ERR;
enum_value = true;
if (H5Tenum_insert(enum_dtype, "TRUE", &enum_value) < 0) ERR;

/* Create dataset with transient enum datatype */
if ((enum_dsetid = H5Dcreate2(fileid, ENUM_NAME, enum_dtype, scalar_spaceid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;

/* Write enum to file */
if ((H5Dwrite(enum_dsetid, enum_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &expected_b)) <0) ERR;

/* Close everything */
if (H5Dclose(dsetid) < 0) ERR;
if (H5Dclose(enum_dsetid) < 0) ERR;
if (H5Sclose(scalar_spaceid) < 0) ERR;
if (H5Tclose(complex_dtype) < 0) ERR;
if (H5Tclose(enum_dtype) < 0) ERR;
if (H5Fclose(fileid) < 0) ERR;
}

printf("*** Checking accessing file through netCDF-4 API...");
{
int ncid, varid, enumid;
complex read_z;
int num_types, class;
int *typeids;
nc_type base_nc_type;
char name[NC_MAX_NAME];
size_t size, nfields;
bool read_b;


nc_set_log_level(4);
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;

/* Read known types */
if (nc_inq_typeids(ncid, &num_types, NULL)) ERR;
/* Verify there are two known: complex and bool enum */
if (num_types != 2) ERR;

typeids = (int*)malloc((size_t)num_types * sizeof(int));
if (nc_inq_typeids(ncid, NULL, typeids)) ERR;

if (nc_inq_user_type(ncid, typeids[0], name, &size, &base_nc_type, &nfields, &class)) ERR;
free(typeids);

/* Verify that the dataset is present */
if (nc_inq_varid(ncid, VAR_NAME, &varid)) ERR;

/* Read complex variable */
if (nc_get_var(ncid, varid, &read_z)) ERR;

if (read_z.r != expected_z.r) ERR;
if (read_z.i != expected_z.i) ERR;

/* Verify that the enum dataset is present */
if (nc_inq_varid(ncid, ENUM_NAME, &enumid)) ERR;

/* Read and check enum variable */
if (nc_get_var(ncid, enumid, &read_b)) ERR;
if (read_b != expected_b) ERR;

if (nc_close(ncid)) ERR;
}
SUMMARIZE_ERR;

FINAL_RESULTS;
}