From 6f38a0e7cb4df0ac0c98f47f7bee8efd3bba531a Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 1 Aug 2017 15:38:21 -0600 Subject: [PATCH 1/2] Fix problem with hangs in parallel collective output See description in #447 --- libdispatch/dvarput.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libdispatch/dvarput.c b/libdispatch/dvarput.c index cad940fd94..4dd4656f8d 100644 --- a/libdispatch/dvarput.c +++ b/libdispatch/dvarput.c @@ -241,8 +241,7 @@ NCDEFAULT_put_vars(int ncid, int varid, const size_t * start, if(mystride[i] != 1) isstride1 = 0; nels *= myedges[i]; } - if(nels == 0) - return NC_NOERR; /* cannot write anything */ + if(isstride1) { return NC_put_vara(ncid, varid, mystart, myedges, value, memtype); } From a07938194e32fcbe74e31958f8a68f0cb1721f67 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Wed, 2 Aug 2017 11:12:04 -0600 Subject: [PATCH 2/2] Move nels==0 check instead of eliminating The previous change where the `nels==0` check was removed caused problems with nc_test. If the check is moved after the `NC_put_vara` call, then it will avoid the parallel hang problem for stride=1, but will still pass all tests in `nc_test`. Note that this is somewhat of a kluge since there will still be a parallel hang if nels==0 and stride is > 1 and the code falls into the odometer section. --- libdispatch/dvarput.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libdispatch/dvarput.c b/libdispatch/dvarput.c index 4dd4656f8d..7b29d09e96 100644 --- a/libdispatch/dvarput.c +++ b/libdispatch/dvarput.c @@ -246,6 +246,14 @@ NCDEFAULT_put_vars(int ncid, int varid, const size_t * start, return NC_put_vara(ncid, varid, mystart, myedges, value, memtype); } + if(nels == 0) { + /* This should be here instead of before NC_put_vara call to + * avoid hang in parallel write for single stride. + * Still issue with parallel hang if stride > 1 + */ + return NC_NOERR; /* cannot write anything */ + } + /* Initial version uses and odometer to walk the variable and read each value one at a time. This can later be optimized to read larger chunks at a time.