From 7b3128b3e4b496f332b4845ecc037c790460e62f Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 6 Apr 2022 15:54:19 -0600 Subject: [PATCH 1/7] update file permission --- nczarr_test/ref_jsonconvention.cdl | 13 +++++++++++++ nczarr_test/run_jsonconvention.sh | 31 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 nczarr_test/ref_jsonconvention.cdl create mode 100755 nczarr_test/run_jsonconvention.sh diff --git a/nczarr_test/ref_jsonconvention.cdl b/nczarr_test/ref_jsonconvention.cdl new file mode 100644 index 0000000000..c4a52b8104 --- /dev/null +++ b/nczarr_test/ref_jsonconvention.cdl @@ -0,0 +1,13 @@ +netcdf tmp_jsonconvention { +dimensions: + d1 = 1 ; +variables: + int v(d1) ; + v:varconvention = "{\n\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ; + +// global attributes: + :grpconvention = "{\"key1\": [1,2,3], \n\"key2\": {\"key3\": \"abc\"}}" ; +data: + + v = _ ; +} diff --git a/nczarr_test/run_jsonconvention.sh b/nczarr_test/run_jsonconvention.sh new file mode 100755 index 0000000000..5d96052f1e --- /dev/null +++ b/nczarr_test/run_jsonconvention.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +# This shell script tests support for: +# read/write using json convention + +set -e + +testcase() { +zext=$1 + +echo "*** Test: write then read using json convention" +fileargs tmp_jsonconvention "mode=nczarr,$zext" +deletemap $zext $file +${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl +${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl +# remove '\n' from ref file before comparing +rm -f tmp_jsonconvention.cdl +sed -e 's|\\n||g' < ref_jsonconvention.cdl > tmp_jsonconvention.cdl +diff -b ${srcdir}/tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl +} + +testcase file +#if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +#if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0 From 90fe06592ec3a71e2fa72e318e85e66fd9b27b77 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 6 Apr 2022 15:54:42 -0600 Subject: [PATCH 2/7] ckp --- include/ncjson.h | 5 +- libdispatch/ncjson.c | 43 ++++++++++++---- libnczarr/zsync.c | 114 ++++++++++++++++++++++++++---------------- nczarr_test/zisjson.c | 109 ++++++++++++++++++++++++++++++++++++---- 4 files changed, 208 insertions(+), 63 deletions(-) diff --git a/include/ncjson.h b/include/ncjson.h index 86a91ef172..c065bc2ed1 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;}; extern "C" { #endif -/* Parse a JSON string */ +/* Parse a string to NCjson*/ DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp); +/* Parse a counted string to NCjson*/ +DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); + /* Reclaim a JSON tree */ DLLEXPORT extern void NCJreclaim(NCjson* json); diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index e862175648..c0f85dd3f4 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen); static void NCJreclaimArray(struct NCjlist*); static void NCJreclaimDict(struct NCjlist*); static int NCJunescape(NCJparser* parser); +static int unescape1(int c); static int listappend(struct NCjlist* list, NCjson* element); #ifndef NETCDF_JSON_H @@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c); int NCJparse(const char* text, unsigned flags, NCjson** jsonp) +{ + return NCJparsen(strlen(text),text,flags,jsonp); +} + +int +NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) { int stat = NCJ_OK; - size_t len; NCJparser* parser = NULL; NCjson* json = NULL; /* Need at least 1 character of input */ - if(text == NULL || text[0] == '\0') + if(len == 0 || text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} - len = strlen(text); parser->text = (char*)malloc(len+1+1); if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} - strcpy(parser->text,text); + memcpy(parser->text,text,len); parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -334,16 +339,21 @@ NCJlex(NCJparser* parser) c = *parser->pos; if(c == '\0') { token = NCJ_EOF; - } else if(c <= ' ' || c == '\177') { + } else if(c <= ' ' || c == '\177') {/* ignore whitespace */ + parser->pos++; + continue; + } else if(c == NCJ_ESCAPE) { parser->pos++; - continue; /* ignore whitespace */ + c = *parser->pos; + *parser->pos = unescape1(c); + continue; } else if(strchr(JSON_WORD, c) != NULL) { start = parser->pos; for(;;) { c = *parser->pos++; if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */ } - /* Pushback c if not whitespace */ + /* Pushback c */ parser->pos--; count = ((parser->pos) - start); if(NCJyytext(parser,start,count)) goto done; @@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser) return NCJTHROW(NCJ_OK); } +/* Unescape a single character */ +static int +unescape1(int c) +{ + switch (c) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = c; break;/* technically not Json conformant */ + } + return c; +} + #ifdef NCJDEBUG static char* tokenname(int token) @@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags) if(json->list.len > 0 && json->list.contents != NULL) { int shortlist = 0; for(i=0;!shortlist && i < json->list.len;i+=2) { - if(i > 0) bytesappendc(buf,NCJ_COMMA); + if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');}; NCJunparseR(json->list.contents[i],buf,flags); /* key */ bytesappendc(buf,NCJ_COLON); bytesappendc(buf,' '); @@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf) case '\n': replace = 'n'; break; case '\r': replace = 'r'; break; case '\t': replace = 't'; break; - case NCJ_QUOTE: replace = '\''; break; + case NCJ_QUOTE: replace = '\"'; break; case NCJ_ESCAPE: replace = '\\'; break; default: break; } diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 7711032499..aa564211e9 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -12,6 +12,10 @@ #undef FILLONCLOSE +/*mnemonics*/ +#define DICTOPEN '{' +#define DICTCLOSE '}' + /* Forward */ static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp); static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); @@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s static int inferattrtype(NCjson* values, nc_type* typeidp); static int mininttype(unsigned long long u64, int negative); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); +static int read_dict(NCjson** valuep); +static int write_dict(size_t len, const void* data, NCjson** jsonp); /**************************************************/ /**************************************************/ @@ -776,6 +782,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc Note that this does not push to the file. Also note that attributes of length 1 are stored as singletons, not arrays. This is to be more consistent with pure zarr. +Also implements the JSON dictionary convention. @param attlist - [in] the attributes to dictify @param jattrsp - [out] the json'ized att list @return NC_NOERR @@ -785,7 +792,7 @@ static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) { int stat = NC_NOERR; - int i; + int i, isdict; NCjson* jattrs = NULL; NCjson* akey = NULL; NCjson* jdata = NULL; @@ -795,9 +802,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) /* Iterate over the attribute list */ for(i=0;inc_typeid,att->len,att->data,&jdata))) - goto done; + if(att->nc_typeid == NC_CHAR + && ((char*)att->data)[0] == DICTOPEN + && ((char*)att->data)[att->len-1] == DICTCLOSE) { + /* this is subject to the JSON dictionary convention? */ + if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1; + } + if(!isdict) { + if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata))) + goto done; + } if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done; jdata = NULL; } @@ -1011,12 +1027,18 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done; if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} + if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) + goto done; + /* Collect the length of the attribute; might be a singleton */ switch (NCJsort(values)) { - case NCJ_DICT: stat = NC_ENCZARR; goto done; case NCJ_ARRAY: count = NCJlength(values); break; + case NCJ_DICT: + /* Apply the JSON dictionary convention and convert to string */ + if((stat = read_dict(&values))) goto done; + /* fall thru */ case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ if(typeid == NC_CHAR) { count = strlen(NCJstring(values)); @@ -1029,10 +1051,8 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp break; } - if(count > 0) { + if(count > 0 && data == NULL) { /* Allocate data space */ - if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) - goto done; if(typeid == NC_CHAR) data = malloc(typelen*(count+1)); else @@ -1079,7 +1099,9 @@ inferattrtype(NCjson* value, nc_type* typeidp) case NCJ_NULL: typeid = NC_CHAR; return NC_NOERR; - case NCJ_DICT: /* fall thru */ + case NCJ_DICT: + typeid = NC_CHAR; + goto done; case NCJ_UNDEF: return NC_EINVAL; default: /* atomic */ @@ -2289,42 +2311,48 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra return THROW(stat); } -#if 0 -Not currently used -Special compatibility case: - if the value of the attribute is a dictionary, - or an array with non-atomic values, then - then stringify it and pretend it is of char type. -/* Return 1 if this json is not an -atomic value or an array of atomic values. -That is, it does not look like valid -attribute data. +/** +Implement the JSON convention for dictionaries. + +Reading: If the value of the attribute is a dictionary, then stringify + it as the value and make the attribute be of type "char". + +Writing: if the attribute is of type char and looks like a JSON dictionary, + then parse it as JSON and use that as its value in .zattrs. */ + static int -iscomplexjson(NCjson* j) +read_dict(NCjson** jvaluep) { - int i; - switch(NCJsort(j)) { - case NCJ_ARRAY: - /* verify that the elements of the array are not complex */ - for(i=0;i +#endif #include "stdlib.h" #include "stdio.h" +#include "string.h" #ifdef HAVE_UNISTD_H #include #endif +#if defined(_WIN32) && !defined(__MINGW32__) +#include "XGetopt.h" +#else +#include +#endif + #include "netcdf.h" #include "nclist.h" #include "ncjson.h" #define MAXREAD 8192 +/* Command line options */ +struct Jsonpptions { + int trace; +} jsonoptions; + +static const char* +sortname(int thesort) +{ + switch(thesort) { + default: break; + case NCJ_INT: return "NCJ_INT"; + case NCJ_DOUBLE: return "NCJ_DOUBLE"; + case NCJ_BOOLEAN: return "NCJ_BOOLEAN"; + case NCJ_STRING: return "NCJ_STRING"; + case NCJ_DICT: return "NCJ_DICT"; + case NCJ_ARRAY: return "NCJ_ARRAY"; + case NCJ_NULL: return "NCJ_NULL"; + } + return "?"; +} + + +static void +jsontrace(NCjson* json, int depth) +{ + int i; + if(json == NULL) goto done; + printf("[%d] sort=%s",depth,sortname(NCJsort(json))); + switch(NCJsort(json)) { + case NCJ_INT: + case NCJ_DOUBLE: + case NCJ_BOOLEAN: + case NCJ_STRING: + printf(" string=|%s|\n",NCJstring(json)); + break; + case NCJ_NULL: + printf("\n"); + break; + case NCJ_ARRAY: + printf("\n"); + for(i=0;i 1) { - /* use argv[1] as input */ - f = fopen(argv[1],"r"); - if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);} - } else - f = stdin; + nc_initialize(); + memset((void*)&jsonoptions,0,sizeof(jsonoptions)); + + while ((c = getopt(argc, argv, "t")) != EOF) { + switch(c) { + case 't': jsonoptions.trace = 1; break; + case '?': + fprintf(stderr,"unknown option\n"); + exit(1); + } + } + + /* get file argument */ + argc -= optind; + argv += optind; + + if (argc > 1) { + fprintf(stderr, "zisjson: only one input file argument permitted\n"); + exit(1); + } + if (argc == 0) + f = stdin; + else { + /* use argv[0] as input */ + f = fopen(argv[0],"r"); + if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);} + } /* Read json from stdin */ for(i=0;;i++) { @@ -54,9 +140,12 @@ main(int argc, char** argv) stat = NC_EEMPTY; } else { stat = NCJparse(text,0,&json); - NCJreclaim(json); + if(!stat) { + if(jsonoptions.trace) jsontrace(json,0); + NCJreclaim(json); + } } - printf("%d",(stat==NC_NOERR?1:0)); /* parse success|failure */ + printf("%d",(stat?0:1)); /* parse success|failure */ if(f != stdin) fclose(f); return 0; } From d5798aff2eef6e83ab5cf2e36e854f6119df117b Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 6 Apr 2022 15:57:17 -0600 Subject: [PATCH 3/7] testing --- .github/workflows/run_tests_ubuntu.yml | 2 +- .github/workflows/run_tests_win_mingw.yml | 2 +- nczarr_test/Makefile.am | 5 +++-- nczarr_test/run_jsonconvention.sh | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 93f402c150..15cbf017f6 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [ pull_request ] +on: [pull_request,push] jobs: diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index 913920a3aa..aaf999e6a4 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -7,7 +7,7 @@ name: Run MSYS2, MinGW64-based Tests -on: [pull_request] +on: [pull_request,push] jobs: diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index e9efa73f7e..968b5a821e 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -61,6 +61,7 @@ TESTS += run_purezarr.sh TESTS += run_interop.sh TESTS += run_misc.sh TESTS += run_nczarr_fill.sh +TESTS += run_jsonconvention.sh endif @@ -127,7 +128,7 @@ run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \ run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \ run_purezarr.sh run_interop.sh run_misc.sh \ run_filter.sh run_specific_filters.sh \ -run_newformat.sh run_nczarr_fill.sh run_quantize.sh +run_newformat.sh run_nczarr_fill.sh run_quantize.sh run_jsonconvention.sh EXTRA_DIST += \ ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \ @@ -147,7 +148,7 @@ ref_bzip2.cdl ref_filtered.cdl ref_multi.cdl \ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \ ref_quotes.zip ref_quotes.cdl \ ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \ -ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl +ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl # Interoperability files EXTRA_DIST += ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl diff --git a/nczarr_test/run_jsonconvention.sh b/nczarr_test/run_jsonconvention.sh index 5d96052f1e..fc9c84d1dc 100755 --- a/nczarr_test/run_jsonconvention.sh +++ b/nczarr_test/run_jsonconvention.sh @@ -25,7 +25,7 @@ diff -b ${srcdir}/tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl } testcase file -#if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi -#if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi exit 0 From 56167a777541f8aa73513163cc7d80cef222fa6b Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 6 Apr 2022 17:16:40 -0600 Subject: [PATCH 4/7] final --- nc_test4/tst_broken_files.c | 3 ++- nczarr_test/CMakeLists.txt | 3 ++- nczarr_test/run_jsonconvention.sh | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nc_test4/tst_broken_files.c b/nc_test4/tst_broken_files.c index af68b26905..22bf3e8802 100644 --- a/nc_test4/tst_broken_files.c +++ b/nc_test4/tst_broken_files.c @@ -28,7 +28,8 @@ main() { fclose(fp); int ncid; - if (nc_open(FILE_NAME, 0, &ncid) != NC_EHDFERR) ERR; + int stat=nc_open(FILE_NAME, 0, &ncid); + if (stat != NC_EHDFERR && stat != NC_ENOTNC) ERR; } { diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index 03db4557fb..ef31d15df1 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -73,7 +73,7 @@ IF(ENABLE_TESTS) BUILD_BIN_TEST(zmapio ${COMMONSRC}) TARGET_INCLUDE_DIRECTORIES(zmapio PUBLIC ../libnczarr) BUILD_BIN_TEST(zhex) - BUILD_BIN_TEST(zisjson) + BUILD_BIN_TEST(zisjson ${COMMONSRC}) TARGET_INCLUDE_DIRECTORIES(zisjson PUBLIC ../libnczarr) BUILD_BIN_TEST(zs3parse ${COMMONSRC}) TARGET_INCLUDE_DIRECTORIES(zs3parse PUBLIC ../libnczarr) @@ -108,6 +108,7 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_interop) add_sh_test(nczarr_test run_misc) add_sh_test(nczarr_test run_nczarr_fill) + add_sh_test(nczarr_test run_jsonconvention) BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC}) add_sh_test(nczarr_test run_quantize) diff --git a/nczarr_test/run_jsonconvention.sh b/nczarr_test/run_jsonconvention.sh index fc9c84d1dc..7cf786a685 100755 --- a/nczarr_test/run_jsonconvention.sh +++ b/nczarr_test/run_jsonconvention.sh @@ -20,8 +20,8 @@ ${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl ${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl # remove '\n' from ref file before comparing rm -f tmp_jsonconvention.cdl -sed -e 's|\\n||g' < ref_jsonconvention.cdl > tmp_jsonconvention.cdl -diff -b ${srcdir}/tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl +sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl +diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl } testcase file From 9f78be8bb8af0cbc2293b2e1cd8984894762cc6b Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Wed, 6 Apr 2022 18:22:59 -0600 Subject: [PATCH 5/7] Allow the read/write of JSON-valued Zarr attributes. A number of other packages that read/write Zarr insert attributes whose value is a dictionary containing specialized information. An example is the GDAL Driver convention (see https://gdal.org/drivers/raster/zarr.html). In order to handle such attributes, this PR enforces a special convention. It applies to both pure Zarr an NCZarr format as written by the netdf-c library. The convention is as follows: ## Reading Suppose an attribute is read from *.zattrs* and it has a JSON value that is a a dictionary. In this case, the JSON dictionary is converted to a string value. It then appears in the netcdf-c API as if it is a character valued attribute of the same name, and whose value is the "stringified" dictionary. # Writing Suppose an attribute is of type character and its *value* *looks like* a JSON dictionary. In this case, it is parsed to JSON and written as the value of the attribute in the NCZarr file. Here the *value* is the concatenation of all the characters in the attributes netcdf-c value. The term "looks like" means that the *value*'s first character is "{", its last value is "}", and it can be successfully parsed by a JSON parser. A test case, *nczarr_test/run_jsonconventions.sh* was also added. ## Misc. Unrelated Changes 1. Fix an error in nc_test4/tst_broken_files.c 2. Modify the internal JSON parser API. 3. Modify the nczarr_test/zisjson program is modified to support this convention. --- .github/workflows/run_tests_osx.yml | 2 +- .github/workflows/run_tests_ubuntu.yml | 2 +- include/ncjson.h | 5 +- libdispatch/ncjson.c | 43 ++++++++-- libnczarr/zsync.c | 114 +++++++++++++++---------- nc_test4/tst_broken_files.c | 3 +- nczarr_test/CMakeLists.txt | 3 +- nczarr_test/Makefile.am | 5 +- nczarr_test/ref_jsonconvention.cdl | 13 +++ nczarr_test/run_jsonconvention.sh | 31 +++++++ nczarr_test/zisjson.c | 109 ++++++++++++++++++++--- 11 files changed, 261 insertions(+), 69 deletions(-) create mode 100644 nczarr_test/ref_jsonconvention.cdl create mode 100755 nczarr_test/run_jsonconvention.sh diff --git a/.github/workflows/run_tests_osx.yml b/.github/workflows/run_tests_osx.yml index aa35ae8795..8e9d65a6fb 100644 --- a/.github/workflows/run_tests_osx.yml +++ b/.github/workflows/run_tests_osx.yml @@ -7,7 +7,7 @@ name: Run macOS-based netCDF Tests -on: [pull_request,push] +on: [pull_request] jobs: diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index a1f6544249..92b5bfca66 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [ pull_request ] +on: [pull_request] jobs: diff --git a/include/ncjson.h b/include/ncjson.h index 86a91ef172..c065bc2ed1 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;}; extern "C" { #endif -/* Parse a JSON string */ +/* Parse a string to NCjson*/ DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp); +/* Parse a counted string to NCjson*/ +DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); + /* Reclaim a JSON tree */ DLLEXPORT extern void NCJreclaim(NCjson* json); diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index e862175648..c0f85dd3f4 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen); static void NCJreclaimArray(struct NCjlist*); static void NCJreclaimDict(struct NCjlist*); static int NCJunescape(NCJparser* parser); +static int unescape1(int c); static int listappend(struct NCjlist* list, NCjson* element); #ifndef NETCDF_JSON_H @@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c); int NCJparse(const char* text, unsigned flags, NCjson** jsonp) +{ + return NCJparsen(strlen(text),text,flags,jsonp); +} + +int +NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) { int stat = NCJ_OK; - size_t len; NCJparser* parser = NULL; NCjson* json = NULL; /* Need at least 1 character of input */ - if(text == NULL || text[0] == '\0') + if(len == 0 || text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} - len = strlen(text); parser->text = (char*)malloc(len+1+1); if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} - strcpy(parser->text,text); + memcpy(parser->text,text,len); parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -334,16 +339,21 @@ NCJlex(NCJparser* parser) c = *parser->pos; if(c == '\0') { token = NCJ_EOF; - } else if(c <= ' ' || c == '\177') { + } else if(c <= ' ' || c == '\177') {/* ignore whitespace */ + parser->pos++; + continue; + } else if(c == NCJ_ESCAPE) { parser->pos++; - continue; /* ignore whitespace */ + c = *parser->pos; + *parser->pos = unescape1(c); + continue; } else if(strchr(JSON_WORD, c) != NULL) { start = parser->pos; for(;;) { c = *parser->pos++; if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */ } - /* Pushback c if not whitespace */ + /* Pushback c */ parser->pos--; count = ((parser->pos) - start); if(NCJyytext(parser,start,count)) goto done; @@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser) return NCJTHROW(NCJ_OK); } +/* Unescape a single character */ +static int +unescape1(int c) +{ + switch (c) { + case 'b': c = '\b'; break; + case 'f': c = '\f'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + default: c = c; break;/* technically not Json conformant */ + } + return c; +} + #ifdef NCJDEBUG static char* tokenname(int token) @@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags) if(json->list.len > 0 && json->list.contents != NULL) { int shortlist = 0; for(i=0;!shortlist && i < json->list.len;i+=2) { - if(i > 0) bytesappendc(buf,NCJ_COMMA); + if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');}; NCJunparseR(json->list.contents[i],buf,flags); /* key */ bytesappendc(buf,NCJ_COLON); bytesappendc(buf,' '); @@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf) case '\n': replace = 'n'; break; case '\r': replace = 'r'; break; case '\t': replace = 't'; break; - case NCJ_QUOTE: replace = '\''; break; + case NCJ_QUOTE: replace = '\"'; break; case NCJ_ESCAPE: replace = '\\'; break; default: break; } diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index 7711032499..aa564211e9 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -12,6 +12,10 @@ #undef FILLONCLOSE +/*mnemonics*/ +#define DICTOPEN '{' +#define DICTCLOSE '}' + /* Forward */ static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp); static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); @@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s static int inferattrtype(NCjson* values, nc_type* typeidp); static int mininttype(unsigned long long u64, int negative); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); +static int read_dict(NCjson** valuep); +static int write_dict(size_t len, const void* data, NCjson** jsonp); /**************************************************/ /**************************************************/ @@ -776,6 +782,7 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc Note that this does not push to the file. Also note that attributes of length 1 are stored as singletons, not arrays. This is to be more consistent with pure zarr. +Also implements the JSON dictionary convention. @param attlist - [in] the attributes to dictify @param jattrsp - [out] the json'ized att list @return NC_NOERR @@ -785,7 +792,7 @@ static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) { int stat = NC_NOERR; - int i; + int i, isdict; NCjson* jattrs = NULL; NCjson* akey = NULL; NCjson* jdata = NULL; @@ -795,9 +802,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) /* Iterate over the attribute list */ for(i=0;inc_typeid,att->len,att->data,&jdata))) - goto done; + if(att->nc_typeid == NC_CHAR + && ((char*)att->data)[0] == DICTOPEN + && ((char*)att->data)[att->len-1] == DICTCLOSE) { + /* this is subject to the JSON dictionary convention? */ + if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1; + } + if(!isdict) { + if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata))) + goto done; + } if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done; jdata = NULL; } @@ -1011,12 +1027,18 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done; if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} + if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) + goto done; + /* Collect the length of the attribute; might be a singleton */ switch (NCJsort(values)) { - case NCJ_DICT: stat = NC_ENCZARR; goto done; case NCJ_ARRAY: count = NCJlength(values); break; + case NCJ_DICT: + /* Apply the JSON dictionary convention and convert to string */ + if((stat = read_dict(&values))) goto done; + /* fall thru */ case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ if(typeid == NC_CHAR) { count = strlen(NCJstring(values)); @@ -1029,10 +1051,8 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp break; } - if(count > 0) { + if(count > 0 && data == NULL) { /* Allocate data space */ - if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) - goto done; if(typeid == NC_CHAR) data = malloc(typelen*(count+1)); else @@ -1079,7 +1099,9 @@ inferattrtype(NCjson* value, nc_type* typeidp) case NCJ_NULL: typeid = NC_CHAR; return NC_NOERR; - case NCJ_DICT: /* fall thru */ + case NCJ_DICT: + typeid = NC_CHAR; + goto done; case NCJ_UNDEF: return NC_EINVAL; default: /* atomic */ @@ -2289,42 +2311,48 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra return THROW(stat); } -#if 0 -Not currently used -Special compatibility case: - if the value of the attribute is a dictionary, - or an array with non-atomic values, then - then stringify it and pretend it is of char type. -/* Return 1 if this json is not an -atomic value or an array of atomic values. -That is, it does not look like valid -attribute data. +/** +Implement the JSON convention for dictionaries. + +Reading: If the value of the attribute is a dictionary, then stringify + it as the value and make the attribute be of type "char". + +Writing: if the attribute is of type char and looks like a JSON dictionary, + then parse it as JSON and use that as its value in .zattrs. */ + static int -iscomplexjson(NCjson* j) +read_dict(NCjson** jvaluep) { - int i; - switch(NCJsort(j)) { - case NCJ_ARRAY: - /* verify that the elements of the array are not complex */ - for(i=0;i tmp_jsonconvention_${zext}.cdl +# remove '\n' from ref file before comparing +rm -f tmp_jsonconvention.cdl +sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl +diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl +} + +testcase file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0 diff --git a/nczarr_test/zisjson.c b/nczarr_test/zisjson.c index 186ae45f6c..e13c69f23f 100644 --- a/nczarr_test/zisjson.c +++ b/nczarr_test/zisjson.c @@ -7,36 +7,122 @@ Output 1 or 0. */ -#include "config.h" + +#ifdef HAVE_UNISTD_H +#include +#endif #include "stdlib.h" #include "stdio.h" +#include "string.h" #ifdef HAVE_UNISTD_H #include #endif +#if defined(_WIN32) && !defined(__MINGW32__) +#include "XGetopt.h" +#else +#include +#endif + #include "netcdf.h" #include "nclist.h" #include "ncjson.h" #define MAXREAD 8192 +/* Command line options */ +struct Jsonpptions { + int trace; +} jsonoptions; + +static const char* +sortname(int thesort) +{ + switch(thesort) { + default: break; + case NCJ_INT: return "NCJ_INT"; + case NCJ_DOUBLE: return "NCJ_DOUBLE"; + case NCJ_BOOLEAN: return "NCJ_BOOLEAN"; + case NCJ_STRING: return "NCJ_STRING"; + case NCJ_DICT: return "NCJ_DICT"; + case NCJ_ARRAY: return "NCJ_ARRAY"; + case NCJ_NULL: return "NCJ_NULL"; + } + return "?"; +} + + +static void +jsontrace(NCjson* json, int depth) +{ + int i; + if(json == NULL) goto done; + printf("[%d] sort=%s",depth,sortname(NCJsort(json))); + switch(NCJsort(json)) { + case NCJ_INT: + case NCJ_DOUBLE: + case NCJ_BOOLEAN: + case NCJ_STRING: + printf(" string=|%s|\n",NCJstring(json)); + break; + case NCJ_NULL: + printf("\n"); + break; + case NCJ_ARRAY: + printf("\n"); + for(i=0;i 1) { - /* use argv[1] as input */ - f = fopen(argv[1],"r"); - if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);} - } else - f = stdin; + nc_initialize(); + memset((void*)&jsonoptions,0,sizeof(jsonoptions)); + + while ((c = getopt(argc, argv, "t")) != EOF) { + switch(c) { + case 't': jsonoptions.trace = 1; break; + case '?': + fprintf(stderr,"unknown option\n"); + exit(1); + } + } + + /* get file argument */ + argc -= optind; + argv += optind; + + if (argc > 1) { + fprintf(stderr, "zisjson: only one input file argument permitted\n"); + exit(1); + } + if (argc == 0) + f = stdin; + else { + /* use argv[0] as input */ + f = fopen(argv[0],"r"); + if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);} + } /* Read json from stdin */ for(i=0;;i++) { @@ -54,9 +140,12 @@ main(int argc, char** argv) stat = NC_EEMPTY; } else { stat = NCJparse(text,0,&json); - NCJreclaim(json); + if(!stat) { + if(jsonoptions.trace) jsontrace(json,0); + NCJreclaim(json); + } } - printf("%d",(stat==NC_NOERR?1:0)); /* parse success|failure */ + printf("%d",(stat?0:1)); /* parse success|failure */ if(f != stdin) fclose(f); return 0; } From 3e2f32a4bcca19fd755bb3c731a05f99e2b8bcce Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Mon, 9 May 2022 14:23:28 -0600 Subject: [PATCH 6/7] update --- libnczarr/zsync.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index aa564211e9..032ea1c525 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -22,7 +22,7 @@ static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp); static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes); -static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src); +static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst); static int computeattrinfo(const char* name, NClist* atypes, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap); static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps); @@ -41,7 +41,7 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s static int inferattrtype(NCjson* values, nc_type* typeidp); static int mininttype(unsigned long long u64, int negative); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); -static int read_dict(NCjson** valuep); +static int read_dict(NCjson* jdict, NCjson** jtextp); static int write_dict(size_t len, const void* data, NCjson** jsonp); /**************************************************/ @@ -934,7 +934,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis /* Convert a json value to actual data values of an attribute. */ static int -zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src) +zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0) { int stat = NC_NOERR; int i; @@ -1020,6 +1020,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp void* data = NULL; size_t typelen; nc_type typeid = NC_NAT; + NCjson* jtext = NULL; int reclaimvalues = 0; /* Get assumed type */ @@ -1037,7 +1038,9 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp break; case NCJ_DICT: /* Apply the JSON dictionary convention and convert to string */ - if((stat = read_dict(&values))) goto done; + if((stat = read_dict(values,&jtext))) goto done; + values = jtext; jtext = NULL; + reclaimvalues = 1; /* fall thru */ case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ if(typeid == NC_CHAR) { @@ -1060,7 +1063,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp if(data == NULL) {stat = NC_ENOMEM; goto done;} /* convert to target type */ - if((stat = zconvert(typeid, typelen, data, values))) + if((stat = zconvert(typeid, typelen, values, data))) goto done; } if(lenp) *lenp = count; @@ -2322,20 +2325,19 @@ Writing: if the attribute is of type char and looks like a JSON dictionary, */ static int -read_dict(NCjson** jvaluep) +read_dict(NCjson* jdict, NCjson** jtextp) { int stat = NC_NOERR; - NCjson* jdict = NULL; NCjson* jtext = NULL; char* text = NULL; - assert(jvaluep != NULL && *jvaluep != NULL); - jdict = *jvaluep; + if(jdict == NULL) {stat = NC_EINVAL; goto done;} if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;} if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;} if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;} - *jvaluep = jtext; + *jtextp = jtext; jtext = NULL; done: + NCJreclaim(jtext); nullfree(text); return stat; } @@ -2351,8 +2353,9 @@ write_dict(size_t len, const void* data, NCjson** jsonp) {stat = NC_EINVAL; goto done;} if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;} - *jsonp = jdict; + *jsonp = jdict; jdict = NULL; done: + NCJreclaim(jdict); return stat; } From 8eb0712eb66fa4e7ccc290e63ede276bb8d63f87 Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Mon, 9 May 2022 14:45:06 -0600 Subject: [PATCH 7/7] final1 --- .github/workflows/run_tests_osx.yml | 2 +- .github/workflows/run_tests_ubuntu.yml | 2 +- .github/workflows/run_tests_win_mingw.yml | 2 +- RELEASE_NOTES.md | 2 ++ ncdump/tst_nccopy3.sh | 1 - ncdump/tst_output.sh | 1 - 6 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_tests_osx.yml b/.github/workflows/run_tests_osx.yml index aa35ae8795..8e9d65a6fb 100644 --- a/.github/workflows/run_tests_osx.yml +++ b/.github/workflows/run_tests_osx.yml @@ -7,7 +7,7 @@ name: Run macOS-based netCDF Tests -on: [pull_request,push] +on: [pull_request] jobs: diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 826ac43589..a21ac5b001 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [pull_request,push] +on: [pull_request] jobs: diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index aaf999e6a4..913920a3aa 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -7,7 +7,7 @@ name: Run MSYS2, MinGW64-based Tests -on: [pull_request,push] +on: [pull_request] jobs: diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index f876461b14..157b3d2abf 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release ## 4.8.2 - TBD +* [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow +for domain specific info such as used by GDAL/Zarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????). * [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249). * [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318). * [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245). diff --git a/ncdump/tst_nccopy3.sh b/ncdump/tst_nccopy3.sh index 7dcfc910c5..30b53bb8b1 100755 --- a/ncdump/tst_nccopy3.sh +++ b/ncdump/tst_nccopy3.sh @@ -8,7 +8,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh -set -x set -e echo "" diff --git a/ncdump/tst_output.sh b/ncdump/tst_output.sh index 18da9c3e6f..40c7e7cd47 100755 --- a/ncdump/tst_output.sh +++ b/ncdump/tst_output.sh @@ -4,7 +4,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh # This shell script tests the output from several previous tests. -set -x set -e echo ""