From e8bb351bd52018ab0f71f0eb837f71283789ccce Mon Sep 17 00:00:00 2001 From: Manuel Reis Date: Tue, 9 Jul 2024 13:39:04 +0200 Subject: [PATCH 1/5] Allow region and bucket inference with NCS3UNK S3 server type --- libdispatch/ds3util.c | 50 ++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/libdispatch/ds3util.c b/libdispatch/ds3util.c index e99e1433f5..ebc3853715 100644 --- a/libdispatch/ds3util.c +++ b/libdispatch/ds3util.c @@ -128,17 +128,24 @@ NC_s3urlrebuild(NCURI* url, NCS3INFO* s3, NCURI** newurlp) /* split the path by "/" */ if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done; - /* Distinguish path-style from virtual-host style from s3: and from other. - Virtual: https://.s3..amazonaws.com/ (1) - or: https://.s3.amazonaws.com/ -- region defaults (to us-east-1) (2) - Path: https://s3..amazonaws.com// (3) - or: https://s3.amazonaws.com// -- region defaults to us-east-1 (4) - S3: s3:/// (5) - Google: https://storage.googleapis.com// (6) - or: gs3:/// (7) - Other: https://// (8) - */ - if(url->host == NULL || strlen(url->host) == 0) + /* Distinguish path-style from virtual-host style from s3: and from other. + Virtual: + (1) https://.s3..amazonaws.com/ + (2) https://.s3.amazonaws.com/ -- region defaults (to us-east-1) + Path: + (3) https://s3..amazonaws.com// + (4) https://s3.amazonaws.com// -- region defaults to us-east-1 + S3: + (5) s3:/// + Google: + (6) https://storage.googleapis.com// + (7) gs3:/// + Other: + (8) https://// + (9) https://.s3..domain.example.com/ + (10)https://s3..example.com// + */ + if(url->host == NULL || strlen(url->host) == 0) {stat = NC_EURL; goto done;} /* Reduce the host to standard form such as s3.amazonaws.com by pulling out the @@ -186,12 +193,21 @@ NC_s3urlrebuild(NCURI* url, NCS3INFO* s3, NCURI** newurlp) /* region is unknown */ /* bucket is unknown at this point */ svc = NCS3GS; - } else { /* Presume Format (8) */ - if((host = strdup(url->host))==NULL) - {stat = NC_ENOMEM; goto done;} - /* region is unknown */ - /* bucket is unknown */ - } + } else { /* Presume Formats (8),(9),(10) */ + if (nclistlength(hostsegments) > 3 && strcasecmp(nclistget(hostsegments, 1), "s3") == 0){ + bucket = nclistremove(hostsegments, 0); + region = nclistremove(hostsegments, 2); + host = strdup(url->host + sizeof(bucket) + 1); + }else{ + if (nclistlength(hostsegments) > 2 && strcasecmp(nclistget(hostsegments, 0), "s3") == 0){ + region = nclistremove(hostsegments, 1); + } + if ((host = strdup(url->host)) == NULL){ + stat = NC_ENOMEM; + goto done; + } + } + } /* region = (1) from url, (2) s3->region, (3) default */ if(region == NULL && s3 != NULL) From 80d886a5d4aecd0e300b66c4beaa4ac0de912417 Mon Sep 17 00:00:00 2001 From: Manuel Reis Date: Tue, 9 Jul 2024 13:32:21 +0200 Subject: [PATCH 2/5] Recognize _unknown_ S3 server type --- include/ncs3sdk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ncs3sdk.h b/include/ncs3sdk.h index ee72a77ab3..b3dfc52ea4 100644 --- a/include/ncs3sdk.h +++ b/include/ncs3sdk.h @@ -15,7 +15,7 @@ /* Track the server type, if known */ typedef enum NCS3SVC {NCS3UNK=0, /* unknown */ NCS3=1, /* s3.amazon.aws */ - NCS3GS=0 /* storage.googleapis.com */ + NCS3GS=2 /* storage.googleapis.com */ } NCS3SVC; /* Opaque Handles */ From 6a8bcfb4a408546995eb001c7f56ff9297773bff Mon Sep 17 00:00:00 2001 From: Manuel Reis Date: Mon, 18 Nov 2024 09:40:41 +0100 Subject: [PATCH 3/5] Add dispatch layer for consolidated ZARR access --- libnczarr/CMakeLists.txt | 4 + libnczarr/zincludes.h | 1 + libnczarr/zinternal.h | 2 + libnczarr/zmetadata.c | 206 +++++++++++++++++++++++ libnczarr/zmetadata.h | 104 ++++++++++++ libnczarr/zmetadata2.c | 309 ++++++++++++++++++++++++++++++++++ libnczarr/zmetadata3.c | 349 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 975 insertions(+) create mode 100644 libnczarr/zmetadata.c create mode 100644 libnczarr/zmetadata.h create mode 100644 libnczarr/zmetadata2.c create mode 100644 libnczarr/zmetadata3.c diff --git a/libnczarr/CMakeLists.txt b/libnczarr/CMakeLists.txt index 97dc4d6510..0c8312688b 100644 --- a/libnczarr/CMakeLists.txt +++ b/libnczarr/CMakeLists.txt @@ -27,6 +27,9 @@ zgrp.c zinternal.c zmap.c zmap_file.c +zmetadata3.c +zmetadata2.c +zmetadata.c zodom.c zopen.c zprov.c @@ -44,6 +47,7 @@ zdispatch.h zincludes.h zinternal.h zmap.h +zmetadata.h zodom.h zprovenance.h zfilter.h diff --git a/libnczarr/zincludes.h b/libnczarr/zincludes.h index 882768703c..a5d4e9979f 100644 --- a/libnczarr/zincludes.h +++ b/libnczarr/zincludes.h @@ -49,6 +49,7 @@ extern "C" { #include "ncproplist.h" #include "zmap.h" +#include "zmetadata.h" #include "zinternal.h" #include "zfilter.h" #include "zformat.h" diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 380f96a3f4..6fdda735e9 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -67,6 +67,7 @@ #define Z3METAROOT "/zarr.json" #define Z3OBJECT "zarr.json" #define Z3GROUP Z3OBJECT +#define Z3ATTRS Z3OBJECT #define Z3ARRAY Z3OBJECT /* Bytes codec name */ @@ -252,6 +253,7 @@ typedef struct NCZ_FILE_INFO { # define FLAG_XARRAYDIMS 8 NCZM_IMPL mapimpl; struct NCZ_Formatter* dispatcher; + struct NCZ_Metadata * metadata_handler; struct NCZ_META_HDR* metastate; /* Hold per-format state */ } NCZ_FILE_INFO_T; diff --git a/libnczarr/zmetadata.c b/libnczarr/zmetadata.c new file mode 100644 index 0000000000..070e170a8f --- /dev/null +++ b/libnczarr/zmetadata.c @@ -0,0 +1,206 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zmetadata.h" + +/**************************************************/ + +extern int NCZF2_initialize(void); +extern int NCZF2_finalize(void); +extern int NCZF3_initialize(void); +extern int NCZF3_finalize(void); + +/**************************************************/ +//////////////////////////////////////////////////// + +int NCZMD_initialize(void) +{ + int stat = NC_NOERR; + if((stat=NCZMD2_initialize())) goto done; + if((stat=NCZMD3_initialize())) goto done; +done: + return THROW(stat); +} + +int NCZMD_finalize(void) +{ + + int stat = NC_NOERR; + if((stat=NCZMD2_finalize())) goto done; + if((stat=NCZMD3_finalize())) goto done; +done: + return THROW(stat); +} + +// Returns the list of subgroups from *grp +int NCZMD_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + return zfile->metadata_handler->dispatcher->list_groups(zfile, grp, subgrpnames); +} + +// Returns the list of variables from grp +int NCZMD_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + return zfile->metadata_handler->dispatcher->list_variables(zfile, grp, varnames); +} + + +///////////////////////////////////////////////////////////////////// +// Fetch JSON content from .zmetadata or storage +///////////////////////////////////////////////////////////////////// + +int NCZMD_fetch_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jgroup) +{ + int stat = NC_NOERR; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &key)) != NC_NOERR)) + goto done; + if (name && (stat = nczm_concat(key, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_GROUP, key, jgroup); +done: + nullfree(key); + return stat; +} + +int NCZMD_fetch_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jattrs) +{ + int stat = NC_NOERR; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &key)) != NC_NOERR)) + goto done; + if (name && (stat = nczm_concat(key, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_ATTRS, key, jattrs); +done: + nullfree(key); + return stat; +} + +int NCZMD_fetch_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jarray) +{ + int stat = NC_NOERR; + char *key = NULL; + + if (grp && ((stat = NCZ_grpkey(grp, &key)) != NC_NOERR)) + goto done; + if (name && (stat = nczm_concat(key, name, &key))) + goto done; + + stat = zfile->metadata_handler->dispatcher->fetch_json_content(zfile, NCZMD_ARRAY, key, jarray); +done: + nullfree(key); + return stat; +} + +//////////////////////////////////////////////////////////////////////////// + +int NCZMD_is_metadata_consolidated(NCZ_FILE_INFO_T *zfile) +{ + NCZ_Metadata *zmd = NULL; + zmd = zfile->metadata_handler; + if (zmd == NULL || + zmd->jcsl == NULL || + NCJsort(zmd->jcsl) != NCJ_DICT || + !(zmd->dispatcher->flags & ZARR_CONSOLIDATED)) + { + return NC_ENOOBJECT; + } + return NC_NOERR; +} + +int NCZMD_get_metadata_format(NCZ_FILE_INFO_T *zfile, int *zarrformat) +{ // Only pure Zarr is determined + + NCZ_Metadata *zmd = zfile->metadata_handler; + if ( !zmd || !zmd->dispatcher ) { + return NC_EFILEMETA; + } + + + if (zmd->dispatcher->zarr_format >= ZARRFORMAT2) + { + *zarrformat = zmd->dispatcher->zarr_format; + return NC_NOERR; + } + + // At this point, we know that version 3 cannot be + // otherwise the zarr.json would have been found + // when setting the handler. + // Last thing to do is to look for: + // .zattrs, .zgroup or .zarray + + if (!nczmap_exists(zfile->map, "/" Z2ATTRS) && !nczmap_exists(zfile->map, "/" Z2GROUP) && !nczmap_exists(zfile->map, "/" Z2ARRAY)) + { + return NC_ENOTZARR; + } + + *zarrformat = ZARRFORMAT2; + return NC_NOERR; +} + +int NCZMD_set_metadata_handler(NCZ_FILE_INFO_T *zfile, const NCZ_Metadata **mdhandlerp) +{ + int stat = NC_NOERR; + const NCZ_Metadata_Dispatcher *zmd_dispatcher = NULL; + NCjson *jcsl = NULL; + + if (zfile->metadata_handler != NULL) + { + stat = NC_EOBJECT; + goto done; + } + + zmd_dispatcher = NCZ_metadata_handler2; + + if ((stat = NCZ_downloadjson(zfile->map, Z2METADATA, &jcsl)) != NC_NOERR || jcsl == NULL) + { + if ((stat = NCZ_downloadjson(zfile->map, Z3METADATA, &jcsl)) != NC_NOERR || jcsl == NULL) + { + /* We've tried to get any json + objects for consolidated access + but they seem to be absent */ + goto done; + } + NCjson * jtmp = NULL; + if (NCJsort(jcsl) != NCJ_DICT || NCJdictget(jcsl,"consolidated_metadata", &jtmp) || NCJsort(jtmp) != NCJ_DICT) + { + zmd_dispatcher = NCZ_metadata_handler3; + } + else + { + zmd_dispatcher = NCZ_csl_metadata_handler3; + } + } + else + { + zmd_dispatcher = NCZ_csl_metadata_handler2; + } + + if (jcsl != NULL && NCJsort(jcsl) == NCJ_DICT) + { + NCZ_Metadata *zmdh = NULL; + if ((zmdh = (NCZ_Metadata *)calloc(1, sizeof(NCZ_Metadata))) == NULL) + { + stat = NC_ENOMEM; + goto done; + } + zmdh->jcsl = jcsl; + zmdh->dispatcher = zmd_dispatcher; + + *mdhandlerp = (const NCZ_Metadata *)zmdh; + } + else + { + NCJreclaim(jcsl); // Free jcsl if not assigned + } + +done: + return stat; +} diff --git a/libnczarr/zmetadata.h b/libnczarr/zmetadata.h new file mode 100644 index 0000000000..4b1a6bfe79 --- /dev/null +++ b/libnczarr/zmetadata.h @@ -0,0 +1,104 @@ +/* Copyright 2018-2018 University Corporation for Atmospheric + Research/Unidata. */ + + +#ifndef ZMETADATA_H +#define ZMETADATA_H +#include "zincludes.h" +#include "ncjson.h" +#include "zinternal.h" + +/* +Notes on internal architecture. + +Encapsulating Zarr metadata operations across versions. +Such allows to use the same interface for both consolidated +access as well as fetching each and every object on the filesystem + +The dispatcher is defined by the type NCZ_Metadata_Dispatcher. +That dispatcher allows the Zarr independent code to be +isolated from the code handling the json files. +The table has the following groups of entries: +1. List variables within a group +2. List subgroups withing a group +3. Retrieve JSON representation of (sub)groups, arrays and attributes + depending on the implementation it might require fetch the content + of the json file or process the consolidated json to retrive + the corrent part + Note: This is also the case of v3, the elements will be extracted from zarr.json + +*/ + +/* This is the version of the metadata table. It should be changed + * when new functions are added to the metadata table. */ +#ifndef NCZ_METADATA_VERSION +#define NCZ_METADATA_VERSION 1 +#endif /*NCZ_METADATA_VERSION*/ + +#define Z2METADATA "/.zmetadata" +#define Z3METADATA "/zarr.json" + +typedef enum +{ + NCZMD_NULL, + NCZMD_GROUP, + NCZMD_ATTRS, + NCZMD_ARRAY +} NCZMD_MetadataType; + +typedef struct NCZ_Metadata_Dispatcher +{ + int zarr_format; + int dispatch_version; /* Version of the dispatch table */ + size64_t flags; +#define ZARR_NOT_CONSOLIDATED 0 +#define ZARR_CONSOLIDATED 1 + + int (*list_groups)(NCZ_FILE_INFO_T *, NC_GRP_INFO_T *, NClist *subgrpnames); + int (*list_variables)(NCZ_FILE_INFO_T *, NC_GRP_INFO_T *, NClist *varnames); + + int (*fetch_json_content)(NCZ_FILE_INFO_T *, NCZMD_MetadataType, const char *name, NCjson **jobj); +} NCZ_Metadata_Dispatcher; + +typedef struct NCZ_Metadata +{ + NCjson *jcsl; // Internal JSON configuration + const NCZ_Metadata_Dispatcher *dispatcher; +} NCZ_Metadata; + +// "normal" handlers +extern const NCZ_Metadata_Dispatcher *NCZ_metadata_handler2; +extern const NCZ_Metadata_Dispatcher *NCZ_metadata_handler3; + +// "consolidated" metadata handlers +extern const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2; +extern const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler3; + +#if defined(__cplusplus) +extern "C" +{ +#endif + +/* Called by nc_initialize and nc_finalize respectively */ +extern int NCZMD_initialize(void); +extern int NCZMD_finalize(void); + +extern int NCZMD_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +extern int NCZMD_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames); + +extern int NCZMD_fetch_json_group(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jgroup); +extern int NCZMD_fetch_json_attrs(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jattrs); +extern int NCZMD_fetch_json_array(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, const char *name, NCjson **jarrays); + +/**************************************************/ + +/* Inference for the Metadata handler */ +extern int NCZMD_is_metadata_consolidated(NCZ_FILE_INFO_T *zfile); +extern int NCZMD_get_metadata_format(NCZ_FILE_INFO_T *zfile, int *zarrformat); // Only pure Zarr is determined +extern int NCZMD_set_metadata_handler(NCZ_FILE_INFO_T *zfile, const NCZ_Metadata **mdhandlerp); + +#if defined(__cplusplus) +} +#endif + +#endif /* ZMETADATA_H */ diff --git a/libnczarr/zmetadata2.c b/libnczarr/zmetadata2.c new file mode 100644 index 0000000000..016fabd232 --- /dev/null +++ b/libnczarr/zmetadata2.c @@ -0,0 +1,309 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zmetadata.h" + +/**************************************************/ + +extern int NCZF2_initialize(void); +extern int NCZF2_finalize(void); + +int NCZMD_v2_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v2_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + +int NCZMD_v2_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v2_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + + +int v2_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +int v2_csl_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); + +/**************************************************/ + +static const NCZ_Metadata_Dispatcher NCZ_md2_table = { + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_NOT_CONSOLIDATED, /* Flags*/ + + .list_groups = NCZMD_v2_list_groups, + .list_variables = NCZMD_v2_list_variables, + + .fetch_json_content = v2_json_content, +}; + +static const NCZ_Metadata_Dispatcher NCZ_csl_md2_table = { + ZARRFORMAT2, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_CONSOLIDATED, /* Flags*/ + + .list_groups = NCZMD_v2_csl_list_groups, + .list_variables = NCZMD_v2_csl_list_variables, + + .fetch_json_content = v2_csl_json_content, +}; + +const NCZ_Metadata_Dispatcher *NCZ_metadata_handler2 = &NCZ_md2_table; +const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler2 = &NCZ_csl_md2_table; + +/******************************************************/ + +int +NCZMD2_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD2_finalize(void) +{ + return NC_NOERR; +} + +//////////////////////////////////////////////////// + +int NCZMD_v2_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *subkey = NULL; + char *zgroup = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + if ((stat = nczmap_list(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zgroup exists */ + if ((stat = nczm_concat(grpkey, name, &subkey))) + goto done; + if ((stat = nczm_concat(subkey, Z2GROUP, &zgroup))) + goto done; + if ((stat = nczmap_exists(zfile->map, zgroup)) == NC_NOERR) + nclistpush(subgrpnames, strdup(name)); + stat = NC_NOERR; + nullfree(subkey); + subkey = NULL; + nullfree(zgroup); + zgroup = NULL; + } +done: + nullfree(grpkey); + nullfree(subkey); + nullfree(zgroup); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *subkey = NULL; + char *zgroup = NULL; + NClist *matches = nclistnew(); + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + const char *group = grpkey + (grpkey[0] == '/'); + size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jmetadata); + for (i = 0; i < NCJarraylength(jmetadata); i += 2) + { + NCjson *jname = NCJith(jmetadata, i); + const char *fullname = NCJstring(jname); + size_t lfullname = strlen(fullname); + + if (lfullname < lgroup || + strncmp(fullname, group, lgroup) || + (lgroup > 0 && fullname[lgroup] != NCZM_SEP[0])) + { + continue; + } + char *start = fullname + lgroup + (lgroup > 0); + char *end = strchr(start, NCZM_SEP[0]); + if (end == NULL) + continue; + size_t lname = end - start; + // Ends with "/.zgroup + if (strncmp(Z2METAROOT, end, sizeof(Z2METAROOT)) == 0) + { + nclistpush(subgrpnames, strndup(start, lname)); + } + } +done: + nullfree(grpkey); + nullfree(subkey); + nullfree(zgroup); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + int stat = NC_NOERR; + size_t i; + char *grpkey = NULL; + char *varkey = NULL; + char *zarray = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + /* Get the map and search group */ + if ((stat = nczmap_list(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zarray exists */ + if ((stat = nczm_concat(grpkey, name, &varkey))) + goto done; + if ((stat = nczm_concat(varkey, Z2ARRAY, &zarray))) + goto done; + switch (stat = nczmap_exists(zfile->map, zarray)) + { + case NC_NOERR: + nclistpush(varnames, strdup(name)); + break; + case NC_ENOOBJECT: + stat = NC_NOERR; + break; /* ignore */ + default: + goto done; + } + nullfree(varkey); + varkey = NULL; + nullfree(zarray); + zarray = NULL; + } + +done: + nullfree(grpkey); + nullfree(varkey); + nullfree(zarray); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v2_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *varkey = NULL; + char *zarray = NULL; + NClist *matches = nclistnew(); + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + const char *group = grpkey + (grpkey[0] == '/'); + size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jmetadata); + for (i = 0; i < NCJarraylength(jmetadata); i += 2) + { + NCjson *jname = NCJith(jmetadata, i); + const char *fullname = NCJstring(jname); + size_t lfullname = strlen(fullname); + if (lfullname < lgroup || + strncmp(fullname, group, lgroup) || + (lgroup > 0 && fullname[lgroup] != NCZM_SEP[0])) + { + continue; + } + char *start = fullname + lgroup + (lgroup > 0); + char *end = strchr(start, NCZM_SEP[0]); + if (end == NULL) + continue; + size_t lname = end - start; + // Ends with ".zarray" + if (strncmp("/" Z2ARRAY, end, sizeof("/" Z2ARRAY)) == 0) + { + nclistpush(varnames, strndup(start, lname)); + } + } +done: + nullfree(grpkey); + nullfree(varkey); + nullfree(zarray); + nclistfreeall(matches); + return stat; +} + +// Static function only valid for V2! +static int zarr_obj_type2suffix(NCZMD_MetadataType zarr_obj_type, const char **suffix){ + switch (zarr_obj_type) + { + case NCZMD_GROUP: + *suffix = Z2GROUP; + break; + case NCZMD_ATTRS: + *suffix = Z2ATTRS; + break; + case NCZMD_ARRAY: + *suffix = Z2ARRAY; + break; + default: + return NC_EINVAL; // Invalid type + } + return NC_NOERR; +} + +int v2_csl_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + NCjson *jtmp = NULL; + const char *suffix; + char * key = NULL; + if ( (stat = zarr_obj_type2suffix(zobj_t, &suffix)) + ||(stat = nczm_concat(prefix, suffix, &key))){ + return stat; + } + + if (NCJdictget(zfile->metadata_handler->jcsl, "metadata", &jtmp) == 0 + && jtmp && NCJsort(jtmp) == NCJ_DICT) + { + NCjson *tmp = NULL; + if ((stat = NCJdictget(jtmp, key + (key[0] == '/'), &tmp))) + goto done; + if (tmp) + NCJclone(tmp, jobj); + } +done: + nullfree(key); + return stat; + +} + +int v2_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + const char *suffix; + char * key = NULL; + if ((stat = zarr_obj_type2suffix(zobj_t, &suffix)) + || (stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + + stat = NCZ_downloadjson(zfile->map, key, jobj); +done: + nullfree(key); + return stat; +} +//////////////////////////////////////////////////////////////////////////// \ No newline at end of file diff --git a/libnczarr/zmetadata3.c b/libnczarr/zmetadata3.c new file mode 100644 index 0000000000..fd120c125a --- /dev/null +++ b/libnczarr/zmetadata3.c @@ -0,0 +1,349 @@ +/********************************************************************* + * Copyright 2018, UCAR/Unidata + * See netcdf/COPYRIGHT file for copying and redistribution conditions. + *********************************************************************/ + +#include "zmetadata.h" + +/**************************************************/ + +extern int NCZF3_initialize(void); +extern int NCZF3_finalize(void); + +int NCZMD_v3_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v3_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + +int NCZMD_v3_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); +int NCZMD_v3_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames); + + +int v3_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); +int v3_csl_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zarr_obj_type, const char *key, NCjson **jobj); + +/**************************************************/ + +static const NCZ_Metadata_Dispatcher NCZ_md3_table = { + ZARRFORMAT3, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_NOT_CONSOLIDATED, /* Flags*/ + + NCZMD_v3_list_groups, + NCZMD_v3_list_variables, + + .fetch_json_content = v3_json_content, +}; + +static const NCZ_Metadata_Dispatcher NCZ_csl_md3_table = { + ZARRFORMAT3, + NCZ_METADATA_VERSION, /* Version of the dispatch table */ + ZARR_CONSOLIDATED, /* Flags*/ + + NCZMD_v3_csl_list_groups, + NCZMD_v3_csl_list_variables, + + .fetch_json_content = v3_csl_json_content, +}; + +const NCZ_Metadata_Dispatcher *NCZ_metadata_handler3 = &NCZ_md3_table; +const NCZ_Metadata_Dispatcher *NCZ_csl_metadata_handler3 = &NCZ_csl_md3_table; +int +NCZMD3_initialize(void) +{ + return NC_NOERR; +} + +int +NCZMD3_finalize(void) +{ + return NC_NOERR; +} + +//////////////////////////////////////////////////// + +int NCZMD_v3_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + char *subkey = NULL; + char *zgroup = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + if ((stat = nczmap_list(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zgroup exists */ + if ((stat = nczm_concat(grpkey, name, &subkey))) + goto done; + if ((stat = nczm_concat(subkey, Z3GROUP, &zgroup))) + goto done; + if ((stat = nczmap_exists(zfile->map, zgroup)) == NC_NOERR) + nclistpush(subgrpnames, strdup(name)); + stat = NC_NOERR; + nullfree(subkey); + subkey = NULL; + nullfree(zgroup); + zgroup = NULL; + } +done: + nullfree(grpkey); + nullfree(subkey); + nullfree(zgroup); + nclistfreeall(matches); + return stat; +} + +int extract_node_types(const NCjson *jobj, const char * prefix, const char *node_type, NClist **nodenames){ + size_t lprefix = strlen(prefix); + for (int i = 0; i < NCJarraylength(jobj); i += 2) { + const NCjson *jname = NCJith(jobj, i); + const NCjson *jvalue = NCJith(jobj, i+1); + + //check if json types are the expected string for key and dict for value + if (NCJsort(jname) != NCJ_STRING || NCJsort(jvalue) != NCJ_DICT){ + continue; + } + // check if it value dict has key "node_type" and matches "array" + if (NCJdictget(jvalue, "node_type", &jvalue) || NCJsort(jvalue) != NCJ_STRING + || strncmp(node_type, NCJstring(jvalue), strlen(node_type + 1))){ + continue; + } + // check if prefix matches item's name but ensure it's not looking at itself + if(strncmp(prefix, NCJstring(jname),lprefix) || lprefix == strlen(NCJstring(jname))){ + continue; + } + + // if we've reach here it means it's the correct type for sure + // but we must check if it's directly under our current group + + const char *start = NCJstring(jname) + lprefix + (lprefix > 0); + if (strchr(start, NCZM_SEP[0])) // Zarr separator found => not a direct group under grpkey + continue; + + // It's a subgroup + const size_t lname = strlen(start); + nclistpush(nodenames, strndup(start, lname)); + } + return NC_NOERR; +} + +int NCZMD_v3_csl_list_groups(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *subgrpnames) +{ + int stat = NC_NOERR; + char *grpkey = NULL; + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + // In the consolidated representation '/' is never observed as the key's first char + const char *group = grpkey + (grpkey[0] == '/'); + const size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + if (( stat = NCJdictget(zfile->metadata_handler->jcsl, "consolidated_metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + if (( stat = NCJdictget(jmetadata, "metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + + // for each key, check if is a dict and has node_type ="group" + extract_node_types(jmetadata, group, "group", subgrpnames); + +done: + nullfree(grpkey); + return stat; +} + + +int NCZMD_v3_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + int stat = NC_NOERR; + size_t i; + char *grpkey = NULL; + char *varkey = NULL; + char *zarray = NULL; + NClist *matches = nclistnew(); + + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + /* Get the map and search group */ + if ((stat = nczmap_list(zfile->map, grpkey, matches))) + goto done; + for (i = 0; i < nclistlength(matches); i++) + { + const char *name = nclistget(matches, i); + if (name[0] == NCZM_DOT) + continue; /* zarr/nczarr specific */ + /* See if name/.zarray exists */ + if ((stat = nczm_concat(grpkey, name, &varkey))) + goto done; + if ((stat = nczm_concat(varkey, Z3ARRAY, &zarray))) + goto done; + switch (stat = nczmap_exists(zfile->map, zarray)) + { + case NC_NOERR: + nclistpush(varnames, strdup(name)); + break; + case NC_ENOOBJECT: + stat = NC_NOERR; + break; /* ignore */ + default: + goto done; + } + nullfree(varkey); + varkey = NULL; + nullfree(zarray); + zarray = NULL; + } + +done: + nullfree(grpkey); + nullfree(varkey); + nullfree(zarray); + nclistfreeall(matches); + return stat; +} + +int NCZMD_v3_csl_list_variables(NCZ_FILE_INFO_T *zfile, NC_GRP_INFO_T *grp, NClist *varnames) +{ + size_t i; + int stat = NC_NOERR; + char *grpkey = NULL; + /* Compute the key for the grp */ + if ((stat = NCZ_grpkey(grp, &grpkey))) + goto done; + // In the consolidated representation '/' is never observed as the key's first char + const char *group = grpkey + (grpkey[0] == '/'); + const size_t lgroup = strlen(group); + + const NCjson *jmetadata = NULL; + if (( stat = NCJdictget(zfile->metadata_handler->jcsl, "consolidated_metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + if (( stat = NCJdictget(jmetadata, "metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + // for each key, check if is a dict and has node_type ="array" + extract_node_types(jmetadata, group, "array", varnames); + +done: + nullfree(grpkey); + return stat; +} + +// Returns error if consolidated metadata is not found +// node_name should match item name on the consolidated representation, meaning no '/' prefix +static int get_consolidated_json_node(const NCjson * zobj, const char* c_node_name ,const char * node_type, const NCjson ** jitem){ + size_t i; + int stat = NC_NOERR; + + // In the consolidated representation '/' is never observed as the key's first char + const char *node_name = c_node_name + (c_node_name[0] == '/'); + const size_t lnode_name = strlen(node_name); + + const NCjson *jmetadata = NULL; + if (( stat = NCJdictget(zobj, "consolidated_metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + if (( stat = NCJdictget(jmetadata, "metadata", &jmetadata) ) || jmetadata == NULL || NCJsort(jmetadata) != NCJ_DICT){ + goto done; + } + + // for each key, check if is a dict and has matching node_type + for (i = 0; i < NCJarraylength(jmetadata); i += 2) { + const NCjson *jname = NCJith(jmetadata, i); + if (NCJsort(jname) != NCJ_STRING || strncmp(NCJstring(jname),node_name, lnode_name+1)){ + continue; + } + + const NCjson *jvalue = NCJith(jmetadata, i+1); + //check if json types are the expected string for key and dict for value + if (NCJsort(jname) != NCJ_STRING ||NCJsort(jvalue) != NCJ_DICT){ + continue; + } + + // check if it value dict has key "node_type" and matches node_type ["array","group"] + if (NCJsort(jvalue) != NCJ_DICT || NCJdictget(jvalue, "node_type", &jname) || jname == NULL + || NCJsort(jname) != NCJ_STRING || strncmp(node_type, NCJstring(jname),strlen(node_type))){ + continue; + } + + *jitem = jvalue; + break; //object found! + } +done: + return stat; +} + +// Static function only valid for v3! +static int zarr_obj_type2suffix(NCZMD_MetadataType zarr_obj_type, const char **suffix){ + switch (zarr_obj_type) + { + case NCZMD_GROUP: + *suffix = Z3GROUP; + break; + case NCZMD_ATTRS: + *suffix = Z3ATTRS; + break; + case NCZMD_ARRAY: + *suffix = Z3ARRAY; + break; + default: + return NC_EINVAL; // Invalid type + } + return NC_NOERR; +} + +int v3_csl_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + const char *suffix; + char * key = NULL; + if ( (stat = zarr_obj_type2suffix(zobj_t, &suffix)) + ||(stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + + if (prefix == NULL || (*prefix == '/' && strlen(prefix)==1) ){ + stat = NCJclone(zfile->metadata_handler->jcsl, jobj); + goto done; + } + + + stat = get_consolidated_json_node(zfile->metadata_handler->jcsl, prefix, "group", jobj); + +done: + nullfree(key); + return stat; + +} + +int v3_json_content(NCZ_FILE_INFO_T *zfile, NCZMD_MetadataType zobj_t, const char *prefix, NCjson **jobj) +{ + int stat = NC_NOERR; + const char *suffix; + char * key = NULL; + if ((stat = zarr_obj_type2suffix(zobj_t, &suffix)) + || (stat = nczm_concat(prefix, suffix, &key))){ + goto done; + } + + stat = NCZ_downloadjson(zfile->map, key, jobj); +done: + nullfree(key); + return stat; +} +//////////////////////////////////////////////////////////////////////////// \ No newline at end of file From a3de0590d7d2a8889ff6f2f7c27cd605ca79994f Mon Sep 17 00:00:00 2001 From: Manuel Reis Date: Mon, 18 Nov 2024 09:44:10 +0100 Subject: [PATCH 4/5] Use Zarr Metadata layer instead of sfetching zarr json files directly --- libnczarr/zformat2.c | 46 ++++++++++++++++++++------------------------ libnczarr/zformat3.c | 26 +++++++++++-------------- libnczarr/zinfer.c | 29 ++++++---------------------- 3 files changed, 38 insertions(+), 63 deletions(-) diff --git a/libnczarr/zformat2.c b/libnczarr/zformat2.c index 58bb494585..1692f02d0d 100644 --- a/libnczarr/zformat2.c +++ b/libnczarr/zformat2.c @@ -1046,17 +1046,19 @@ read_grp_contents(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) purezarr = (zfile->flags & FLAG_PUREZARR); /* Read .zgroup (might be NULL) */ - if((stat = NCZ_grpkey(grp,&grppath))) goto done; - if((stat = nczm_concat(grppath,Z2GROUP,&key))) goto done; - stat=NCZ_downloadjson(zfile->map,key,&jgroup); - nullfree(key); key = NULL; - if(stat) goto done; - - /* Read .zattrs (might be NULL) */ - if((stat = nczm_concat(grppath,Z2ATTRS,&key))) goto done; - stat=NCZ_downloadjson(zfile->map,key,&jatts); - nullfree(key); key = NULL; - if(stat) goto done; + NCZMD_fetch_json_group(zfile, grp, NULL, &jgroup); + // if((stat = NCZ_grpkey(grp,&grppath))) goto done; + // if((stat = nczm_concat(grppath,Z2GROUP,&key))) goto done; + // stat=NCZ_downloadjson(zfile->map,key,&jgroup); + // nullfree(key); key = NULL; + // if(stat) goto done; + + /* Read /.zattrs (might be NULL) */ + NCZMD_fetch_json_attrs(zfile, grp, NULL, &jatts); + // if((stat = nczm_concat(grppath,Z2ATTRS,&key))) goto done; + // stat=NCZ_downloadjson(zfile->map,key,&jatts); + // nullfree(key); key = NULL; + // if(stat) goto done; /* Extract jatts and _nczarr_XXX values */ if((stat = locate_nczarr_grp_info(file, grp, jgroup, jatts, &jzgroup, &jzatts, &jzsuper, &ncv21))) goto done; @@ -1224,20 +1226,13 @@ read_var1(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, const char* varname) purezarr = (zfile->flags & FLAG_PUREZARR)?1:0; - /* Construct var path */ - if((stat = NCZ_grpkey(grp,&grppath))) goto done; - if((stat = nczm_concat(grppath,varname,&varpath))) goto done; - /* Construct the path to the zarray object */ - if((stat = nczm_concat(varpath,Z2ARRAY,&key))) goto done; - /* Download the .zarray object */ - if((stat=NCZ_readdict(zfile->map,key,&jvar))) goto done; - nullfree(key); key = NULL; + /* Download the /.zarray object */ + if((stat=NCZMD_fetch_json_array(zfile,grp,varname,&jvar))) goto done; if(jvar == NULL) {stat = NC_ENOTZARR; goto done;} /* Download the .zattrs object */ - /* Construct the path to .zattrs object */ - if((stat = nczm_concat(varpath,Z2ATTRS,&key))) goto done; - if((stat=NCZ_readdict(zfile->map,key,&jatts))) goto done; - nullfree(key); key = NULL; + + /* Construct the path to /.zattrs object */ + if((stat=NCZMD_fetch_json_attrs(zfile,grp,varname,&jatts))) goto done; /* locate the _nczarr_XXX values */ if((stat = locate_nczarr_array_info(file,grp,jvar,jatts,&jzarray,&jzatts,&nczv21))) goto done; @@ -1873,9 +1868,10 @@ parse_group_content_pure(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varn ZTRACE(3,"zfile=%s grp=%s |varnames|=%u |subgrps|=%u",zfile->common.file->controller->path,grp->hdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); nclistclear(varnames); - if((stat = searchvars(zfile,grp,varnames))) goto done; + if((stat = NCZMD_list_variables(zfile,grp,varnames))) goto done; nclistclear(subgrps); - if((stat = searchsubgrps(zfile,grp,subgrps))) goto done; + if((stat = NCZMD_list_groups(zfile,grp,subgrps))) goto done; + //if((stat = searchsubgrps(zfile,grp,subgrps))) goto done; done: return ZUNTRACE(THROW(stat)); diff --git a/libnczarr/zformat3.c b/libnczarr/zformat3.c index c98c90f840..398a1d3f89 100644 --- a/libnczarr/zformat3.c +++ b/libnczarr/zformat3.c @@ -861,7 +861,8 @@ ZF3_readmeta(NC_FILE_INFO_T* file) assert(zroot); /* Read the root group's metadata */ - if((stat = NCZ_downloadjson(zfile->map, Z3METAROOT, &jrootgrp))) goto done; + //if((stat = NCZ_downloadjson(zfile->map, Z3METAROOT, &jrootgrp))) goto done; + if((stat = NCZMD_fetch_json_group(zfile, NULL, NULL, &jrootgrp))) goto done; if(jrootgrp == NULL) {/* not there */ zfile->flags |= FLAG_PUREZARR; stat = NC_NOERR; /* reset */ @@ -1184,13 +1185,14 @@ read_grp_contents(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) /* build Z3GROUP path */ /* Construct grp path */ - if((stat = NCZ_grpkey(grp,&grppath))) goto done; - /* and the grp's zarr.json */ - if((stat = nczm_concat(grppath,Z3GROUP,&key))) goto done; + // if((stat = NCZ_grpkey(grp,&grppath))) goto done; + // /* and the grp's zarr.json */ + // if((stat = nczm_concat(grppath,Z3GROUP,&key))) goto done; /* Read zarr.json */ - stat=NCZ_downloadjson(map,key,&jgroup); - nullfree(key); key = NULL; + //stat=NCZ_downloadjson(map,key,&jgroup); + stat=NCZMD_fetch_json_group(zfile,grp,key,&jgroup); + // nullfree(key); key = NULL; if(stat) goto done; /* Verify that group zarr.json exists */ @@ -1898,15 +1900,9 @@ static int subobjects_pure(NCZ_FILE_INFO_T* zfile, NC_GRP_INFO_T* grp, NClist* varnames, NClist* grpnames) { int stat = NC_NOERR; - char* grpkey = NULL; - - /* Compute the key for the grp */ - if((stat = NCZ_grpkey(grp,&grpkey))) goto done; - /* Get the map and search group */ - if((stat = getnextlevel(zfile,grp,varnames,grpnames))) goto done; - -done: - nullfree(grpkey); + // Get names of variables and groups present in grp (from consolidated view or list storage) + stat = NCZMD_list_variables(zfile, grp, varnames); + stat = NCZMD_list_groups(zfile,grp,grpnames); return stat; } diff --git a/libnczarr/zinfer.c b/libnczarr/zinfer.c index 3dd75f7e7c..f03e631fce 100644 --- a/libnczarr/zinfer.c +++ b/libnczarr/zinfer.c @@ -118,32 +118,15 @@ infer_open_format(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zfile, NCZMAP* map, int const NCjson* jsupera = NULL; struct TagParam param; - /* Probe the map for tell-tale objects and dict keys */ - - if(zarrformat == 0) { - /* We need to search subtree for a V2 or V3 tag */ - param.zarrformat = 0; param.nczarrformat = 0; - switch(stat = nczmap_walk(map,"/",tagsearch, ¶m)) { - case NC_NOERR: - /* No tag was found, so its not a zarr file */ - stat = NC_ENOTZARR; - goto done; - case NC_EOBJECT: /* Arbitrary error signaling found and format is in param */ - stat = NC_NOERR; - switch(param.zarrformat) { - case ZARRFORMAT2: case ZARRFORMAT3: zarrformat = param.zarrformat; break; - default: stat = NC_ENOTZARR; goto done; - } - break; - default: stat = NC_ENOTZARR; goto done; - } - } + stat = NCZMD_set_metadata_handler(zfile, (const NCZ_Metadata **)&(zfile->metadata_handler)); + + stat = NCZMD_get_metadata_format(zfile, &zarrformat); if(zarrformat == ZARRFORMAT2 && nczarrformat == 0) { NCjson* jrootatts = NULL; /* Download /.zattrs and /.zgroup */ - if((stat = NCZ_downloadjson(zfile->map, Z2ATTSROOT, &jrootgrp))) goto done; - if((stat = NCZ_downloadjson(zfile->map, Z2METAROOT, &jrootatts))) goto done; + if((stat = NCZMD_fetch_json_attrs(zfile, NULL, NULL, &jrootgrp))) goto done; + if((stat = NCZMD_fetch_json_group(zfile, NULL,NULL, &jrootatts))) goto done; /* Look for superblock */ if(jrootgrp != NULL) NCJdictget(jrootgrp,NCZ_V2_SUPERBLOCK,&jsuperg); if(jrootatts != NULL) NCJdictget(jrootatts,NCZ_V2_SUPERBLOCK,&jsupera); @@ -155,7 +138,7 @@ infer_open_format(NC_FILE_INFO_T* file, NCZ_FILE_INFO_T* zfile, NCZMAP* map, int if(zarrformat == ZARRFORMAT3 && nczarrformat == 0) { const NCjson* jrootatts = NULL; /* Look for "/zarr.json" */ - if((stat = NCZ_downloadjson(zfile->map, Z3METAROOT, &jrootgrp))) goto done; + if((stat = NCZMD_fetch_json_group(zfile, NULL,NULL, &jrootgrp))) goto done; if(jrootgrp == NULL || NCJsort(jrootgrp) != NCJ_DICT) { nczarrformat = NCZARRFORMAT0; } else { From 463795607429a2f8b64375c5a56c00bfb92c1475 Mon Sep 17 00:00:00 2001 From: Manuel Reis Date: Mon, 18 Nov 2024 09:45:21 +0100 Subject: [PATCH 5/5] Avoid S3 specif calls until really necessary - this allows broad HTTP read support --- libnczarr/zmap_s3sdk.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/libnczarr/zmap_s3sdk.c b/libnczarr/zmap_s3sdk.c index d70e7b5575..3642284d25 100644 --- a/libnczarr/zmap_s3sdk.c +++ b/libnczarr/zmap_s3sdk.c @@ -224,14 +224,14 @@ zs3open(const char *path, mode_t mode, size64_t flags, void* parameters, NCZMAP* z3map->s3client = NC_s3sdkcreateclient(&z3map->s3); /* Search the root for content */ - content = nclistnew(); - if((stat = NC_s3sdklist(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) - goto done; - if(nkeys == 0) { - /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ - stat = NC_ENOOBJECT; - goto done; - } + //content = nclistnew(); + //if((stat = NC_s3sdklist(z3map->s3client,z3map->s3.bucket,z3map->s3.rootkey,&nkeys,NULL,&z3map->errmsg))) + //goto done; + // if(nkeys == 0) { + // /* dataset does not actually exist; we choose to return ENOOBJECT instead of EEMPTY */ + // stat = NC_ENOOBJECT; + // goto done; + // } if(mapp) *mapp = (NCZMAP*)z3map; done: