From 1b75fe2c90f2a0abf03844d867bd2afa00e86de9 Mon Sep 17 00:00:00 2001 From: Dusan Jovic <48258889+DusanJovic-NOAA@users.noreply.github.com> Date: Mon, 1 Apr 2024 17:12:08 -0400 Subject: [PATCH] Update module_write_netcdf to avoid hangs in RRFS runs (#803) * Split add_dim into two routines * Add NF90_NODIMSCALE_ATTACH constant * Test classic netcdf file formats --- io/fv3atm_history_io.F90 | 1 - io/module_write_netcdf.F90 | 180 ++++++++++++++++++++++++++----------- 2 files changed, 126 insertions(+), 55 deletions(-) diff --git a/io/fv3atm_history_io.F90 b/io/fv3atm_history_io.F90 index d7ee4e808..7171aa673 100644 --- a/io/fv3atm_history_io.F90 +++ b/io/fv3atm_history_io.F90 @@ -595,7 +595,6 @@ subroutine history_type_store_data3D(hist, id, work, Time, idx, intpl_method, fl integer k,j,i,nv,i1,j1 logical used ! - write(0,*) ' history_type_store_data3D kinds ', kind_phys, kind(work), lbound(work), ubound(work), size(work) if( id > 0 ) then if( hist%use_wrtgridcomp_output ) then if( trim(intpl_method) == 'bilinear') then diff --git a/io/module_write_netcdf.F90 b/io/module_write_netcdf.F90 index 7d4289a89..2b5fcacc1 100644 --- a/io/module_write_netcdf.F90 +++ b/io/module_write_netcdf.F90 @@ -30,6 +30,10 @@ module module_write_netcdf logical :: par !< True if parallel I/O should be used. + integer, parameter :: netcdf_file_type = NF90_NETCDF4 !< NetCDF file type HDF5 + ! integer, parameter :: netcdf_file_type = NF90_64BIT_DATA !< NetCDF file type CDF5 + ! integer, parameter :: netcdf_file_type = NF90_64BIT_OFFSET !< NetCDF file type CDF2 + contains !> Write netCDF file. @@ -37,7 +41,7 @@ module module_write_netcdf !> @param[in] wrtfb ESMF write field bundle. !> @param[in] filename NetCDF filename. !> @param[in] use_parallel_netcdf True if parallel I/O should be used. - !> @param[in] mpi_comm MPI communicator for parallel I/O. + !> @param[in] comm MPI communicator for parallel I/O. !> @param[in] mype MPI rank. !> @param[in] grid_id Output grid identifier. !> @param[out] rc Return code - 0 for success, ESMF error code otherwise. @@ -58,6 +62,7 @@ subroutine write_netcdf(wrtfb, filename, & integer, optional,intent(out) :: rc ! !** local vars + integer, parameter :: NF90_NODIMSCALE_ATTACH = int(Z'40000') integer :: i,j,t, istart,iend,jstart,jend integer :: im, jm, lm, lsoil @@ -89,8 +94,7 @@ subroutine write_netcdf(wrtfb, filename, & character(len=ESMF_MAXSTR) :: attName, fldName integer :: varival - real(4) :: varr4val, dataMin, dataMax - real(4), allocatable, dimension(:) :: compress_err + real(4) :: varr4val real(8) :: varr8val character(len=ESMF_MAXSTR) :: varcval @@ -98,8 +102,9 @@ subroutine write_netcdf(wrtfb, filename, & integer :: ncid integer :: oldMode integer :: dim_len - integer :: im_dimid, jm_dimid, tile_dimid, pfull_dimid, phalf_dimid, time_dimid, ch_dimid, lsoil_dimid - integer :: im_varid, jm_varid, tile_varid, lon_varid, lat_varid, timeiso_varid + integer :: im_dimid, jm_dimid, tile_dimid, pfull_dimid, phalf_dimid, time_dimid, lsoil_dimid, ch_dimid + integer :: im_varid, jm_varid, tile_varid, pfull_varid, phalf_varid, time_varid, lsoil_varid + integer :: lon_varid, lat_varid, timeiso_varid integer, dimension(:), allocatable :: dimids_2d, dimids_3d, dimids_soil, dimids, chunksizes integer, dimension(:), allocatable :: varids integer :: xtype @@ -117,6 +122,15 @@ subroutine write_netcdf(wrtfb, filename, & integer :: par_access character(len=ESMF_MAXSTR) :: output_grid_name ! + interface + function nf_set_log_level(new_level) result(status) + integer, intent(in) :: new_level + integer :: status + end function nf_set_log_level + end interface + + ! ncerr = nf_set_log_level(3); NC_ERR_STOP(ncerr) + is_cubed_sphere = .false. tileCount = 0 my_tile = 0 @@ -124,13 +138,21 @@ subroutine write_netcdf(wrtfb, filename, & start_j = -10000000 par = use_parallel_netcdf + + if (netcdf_file_type /= NF90_NETCDF4) then + par = .false. + if (ideflate(grid_id) > 0 .or. zstandard_level(grid_id) > 0) then + write(0,*)'Compression is unsupporeted in classic netcdf' + call ESMF_Finalize(endflag=ESMF_END_ABORT) + end if + end if + do_io = par .or. (mype==0) call ESMF_FieldBundleGet(wrtfb, fieldCount=fieldCount, rc=rc); ESMF_ERR_RETURN(rc) call ESMF_AttributeGet(wrtfb, convention="NetCDF", purpose="FV3", & name='grid', value=output_grid_name, rc=rc); ESMF_ERR_RETURN(rc) - allocate(compress_err(fieldCount)); compress_err=-999. allocate(fldlev(fieldCount)) ; fldlev = 0 allocate(fcstField(fieldCount)) allocate(varids(fieldCount)) @@ -234,32 +256,32 @@ subroutine write_netcdf(wrtfb, filename, & if (par) then ncerr = nf90_create(trim(filename),& - cmode=IOR(NF90_CLOBBER,NF90_NETCDF4),& + cmode=IOR(IOR(NF90_CLOBBER,netcdf_file_type),NF90_NODIMSCALE_ATTACH),& comm=comm%mpi_val, info = MPI_INFO_NULL%mpi_val, ncid=ncid); NC_ERR_STOP(ncerr) else ncerr = nf90_create(trim(filename),& - cmode=IOR(NF90_CLOBBER,NF90_NETCDF4),& + cmode=IOR(IOR(NF90_CLOBBER,netcdf_file_type),NF90_NODIMSCALE_ATTACH),& ncid=ncid); NC_ERR_STOP(ncerr) end if ! disable auto filling. ncerr = nf90_set_fill(ncid, NF90_NOFILL, oldMode); NC_ERR_STOP(ncerr) - ! define dimensions [grid_xt, grid_yta ,(pfull/phalf), (tile), time] + ! define dimensions [grid_xt, grid_yt, nchars, (pfull/phalf), (tile), time] ncerr = nf90_def_dim(ncid, "grid_xt", im, im_dimid); NC_ERR_STOP(ncerr) ncerr = nf90_def_dim(ncid, "grid_yt", jm, jm_dimid); NC_ERR_STOP(ncerr) ncerr = nf90_def_dim(ncid, "nchars", 20, ch_dimid); NC_ERR_STOP(ncerr) if (lm > 1) then - call add_dim(ncid, "pfull", pfull_dimid, wrtgrid, mype, rc) - call add_dim(ncid, "phalf", phalf_dimid, wrtgrid, mype, rc) + call add_dim(ncid, "pfull", pfull_dimid, pfull_varid, wrtgrid, mype, rc) + call add_dim(ncid, "phalf", phalf_dimid, phalf_varid, wrtgrid, mype, rc) end if if (lsoil > 1) then - call add_dim(ncid, "zsoil", lsoil_dimid, wrtgrid, mype, rc) + call add_dim(ncid, "zsoil", lsoil_dimid, lsoil_varid, wrtgrid, mype, rc) end if if (is_cubed_sphere) then ncerr = nf90_def_dim(ncid, "tile", tileCount, tile_dimid); NC_ERR_STOP(ncerr) end if - call add_dim(ncid, "time", time_dimid, wrtgrid, mype, rc) + call add_dim(ncid, "time", time_dimid, time_varid, wrtgrid, mype, rc) ! define coordinate variables ncerr = nf90_def_var(ncid, "grid_xt", NF90_DOUBLE, im_dimid, im_varid); NC_ERR_STOP(ncerr) @@ -314,20 +336,18 @@ subroutine write_netcdf(wrtfb, filename, & ncerr = nf90_put_att(ncid, lat_varid, "units", "degrees_N"); NC_ERR_STOP(ncerr) if (par) then - ncerr = nf90_var_par_access(ncid, im_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) - ncerr = nf90_var_par_access(ncid, lon_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) - ncerr = nf90_var_par_access(ncid, jm_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) - ncerr = nf90_var_par_access(ncid, lat_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) - ncerr = nf90_var_par_access(ncid, timeiso_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, im_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, lon_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, jm_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, lat_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, timeiso_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) if (is_cubed_sphere) then - ncerr = nf90_var_par_access(ncid, tile_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) + ncerr = nf90_var_par_access(ncid, tile_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) end if end if - call get_global_attr(wrtfb, ncid, mype, rc) - ! define variables (fields) if (is_cubed_sphere) then allocate(dimids_2d(4)) @@ -346,7 +366,7 @@ subroutine write_netcdf(wrtfb, filename, & do i=1, fieldCount call ESMF_FieldGet(fcstField(i), name=fldName, rank=rank, typekind=typekind, rc=rc); ESMF_ERR_RETURN(rc) - par_access = NF90_INDEPENDENT + par_access = NF90_COLLECTIVE if (rank == 2) then dimids = dimids_2d @@ -394,7 +414,7 @@ subroutine write_netcdf(wrtfb, filename, & ishuffle = NF90_NOSHUFFLE ! shuffle filter on when using lossy compression - if ( quantize_nsd(grid_id) > 0) then + if (quantize_nsd(grid_id) > 0) then ishuffle = NF90_SHUFFLE end if if (ideflate(grid_id) > 0) then @@ -482,11 +502,23 @@ subroutine write_netcdf(wrtfb, filename, & end do ! i=1,fieldCount ncerr = nf90_enddef(ncid); NC_ERR_STOP(ncerr) + ! end of define mode + + ! write dimension variables, except grid_xt, grid_yt + ! those will be written later with lon,lat variables + if (lm > 1) then + call write_dim(ncid, "pfull", pfull_dimid, pfull_varid, wrtgrid, mype, rc) + call write_dim(ncid, "phalf", phalf_dimid, phalf_varid, wrtgrid, mype, rc) + end if + if (lsoil > 1) then + call write_dim(ncid, "zsoil", lsoil_dimid, lsoil_varid, wrtgrid, mype, rc) + end if + call write_dim(ncid, "time", time_dimid, time_varid, wrtgrid, mype, rc) + end if - ! end of define mode ! - ! write dimension variables and lon,lat variables + ! write lon,lat variables ! if (allocated(start_idx)) deallocate(start_idx) if (is_cubed_sphere) then @@ -755,7 +787,6 @@ subroutine write_netcdf(wrtfb, filename, & deallocate(fcstField) deallocate(varids) - deallocate(compress_err) if (do_io) then ncerr = nf90_close(ncid=ncid); NC_ERR_STOP(ncerr) @@ -808,7 +839,14 @@ subroutine get_global_attr(fldbundle, ncid, mype, rc) else if (typekind==ESMF_TYPEKIND_I8) then call ESMF_AttributeGet(fldbundle, convention="NetCDF", purpose="FV3", & name=trim(attname), value=varival_i8, rc=rc); ESMF_ERR_RETURN(rc) - ncerr = nf90_put_att(ncid, nf90_global, trim(attname), varival_i8); NC_ERR_STOP(ncerr) + if (netcdf_file_type == NF90_64BIT_OFFSET) then + ! NetCDF NF90_64BIT_OFFSET (CDF2) does not support int64 attributes + ! Currently only one global attribute is int64 (:grid_id = 1LL) + varival_i4 = varival_i8 + ncerr = nf90_put_att(ncid, nf90_global, trim(attname), varival_i4); NC_ERR_STOP(ncerr) + else + ncerr = nf90_put_att(ncid, nf90_global, trim(attname), varival_i8); NC_ERR_STOP(ncerr) + end if else if (typekind==ESMF_TYPEKIND_R4) then allocate (varr4list(itemCount)) @@ -938,22 +976,19 @@ end subroutine get_dimlen_if_exists !> @param[out] rc Return code - 0 for success, ESMF error code otherwise. !> !> @author Dusan Jovic @date Nov 1, 2017 - subroutine add_dim(ncid, dim_name, dimid, grid, mype, rc) + subroutine add_dim(ncid, dim_name, dimid, dim_varid, grid, mype, rc) integer, intent(in) :: ncid character(len=*), intent(in) :: dim_name - integer, intent(inout) :: dimid + integer, intent(inout) :: dimid + integer, intent(inout) :: dim_varid type(ESMF_Grid), intent(in) :: grid integer, intent(in) :: mype integer, intent(out) :: rc ! local variable - integer :: n, dim_varid + integer :: n integer :: ncerr type(ESMF_TypeKind_Flag) :: typekind - - real(ESMF_KIND_I4), allocatable :: valueListI4(:) - real(ESMF_KIND_R4), allocatable :: valueListR4(:) - real(ESMF_KIND_R8), allocatable :: valueListR8(:) ! call ESMF_AttributeGet(grid, convention="NetCDF", purpose="FV3", & attnestflag=ESMF_ATTNEST_OFF, name=dim_name, & @@ -972,43 +1007,80 @@ subroutine add_dim(ncid, dim_name, dimid, grid, mype, rc) end if if (typekind==ESMF_TYPEKIND_R8) then - ncerr = nf90_def_var(ncid, dim_name, NF90_REAL8, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + ncerr = nf90_def_var(ncid, dim_name, NF90_REAL8, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + else if (typekind==ESMF_TYPEKIND_R4) then + ncerr = nf90_def_var(ncid, dim_name, NF90_REAL4, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + else if (typekind==ESMF_TYPEKIND_I4) then + ncerr = nf90_def_var(ncid, dim_name, NF90_INT4, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + else + if (mype==0) write(0,*)'Error in module_write_netcdf.F90(add_dim) unknown typekind for ',trim(dim_name) + call ESMF_Finalize(endflag=ESMF_END_ABORT) + end if + if (par) then + ncerr = nf90_var_par_access(ncid, dim_varid, NF90_COLLECTIVE); NC_ERR_STOP(ncerr) + end if + + call get_grid_attr(grid, dim_name, ncid, dim_varid, rc) + + end subroutine add_dim + + !> Write a dimension variable. + !> + !> @param[in] ncid NetCDF file ID. + !> @param[in] dim_name Dimension name. + !> @param[in] dimid Dimension ID. + !> @param[in] dim_varid Dimension variable ID. + !> @param[in] grid ESMF output grid. + !> @param[in] mype MPI rank. + !> @param[out] rc Return code - 0 for success, ESMF error code otherwise. + !> + !> @author Dusan Jovic @date Nov 1, 2017 + subroutine write_dim(ncid, dim_name, dimid, dim_varid, grid, mype, rc) + integer, intent(in) :: ncid + character(len=*), intent(in) :: dim_name + integer, intent(in) :: dimid + integer, intent(in) :: dim_varid + type(ESMF_Grid), intent(in) :: grid + integer, intent(in) :: mype + integer, intent(out) :: rc + +! local variable + integer :: n + integer :: ncerr + type(ESMF_TypeKind_Flag) :: typekind + + real(ESMF_KIND_I4), allocatable :: valueListI4(:) + real(ESMF_KIND_R4), allocatable :: valueListR4(:) + real(ESMF_KIND_R8), allocatable :: valueListR8(:) +! + call ESMF_AttributeGet(grid, convention="NetCDF", purpose="FV3", & + attnestflag=ESMF_ATTNEST_OFF, name=dim_name, & + typekind=typekind, itemCount=n, rc=rc); ESMF_ERR_RETURN(rc) + + if (typekind==ESMF_TYPEKIND_R8) then allocate(valueListR8(n)) call ESMF_AttributeGet(grid, convention="NetCDF", purpose="FV3", & name=trim(dim_name), valueList=valueListR8, rc=rc); ESMF_ERR_RETURN(rc) - ncerr = nf90_enddef(ncid=ncid); NC_ERR_STOP(ncerr) ncerr = nf90_put_var(ncid, dim_varid, values=valueListR8); NC_ERR_STOP(ncerr) - ncerr = nf90_redef(ncid=ncid); NC_ERR_STOP(ncerr) deallocate(valueListR8) - else if (typekind==ESMF_TYPEKIND_R4) then - ncerr = nf90_def_var(ncid, dim_name, NF90_REAL4, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + else if (typekind==ESMF_TYPEKIND_R4) then allocate(valueListR4(n)) call ESMF_AttributeGet(grid, convention="NetCDF", purpose="FV3", & name=trim(dim_name), valueList=valueListR4, rc=rc); ESMF_ERR_RETURN(rc) - ncerr = nf90_enddef(ncid=ncid); NC_ERR_STOP(ncerr) ncerr = nf90_put_var(ncid, dim_varid, values=valueListR4); NC_ERR_STOP(ncerr) - ncerr = nf90_redef(ncid=ncid); NC_ERR_STOP(ncerr) deallocate(valueListR4) - else if (typekind==ESMF_TYPEKIND_I4) then - ncerr = nf90_def_var(ncid, dim_name, NF90_INT4, dimids=[dimid], varid=dim_varid); NC_ERR_STOP(ncerr) + else if (typekind==ESMF_TYPEKIND_I4) then allocate(valueListI4(n)) call ESMF_AttributeGet(grid, convention="NetCDF", purpose="FV3", & name=trim(dim_name), valueList=valueListI4, rc=rc); ESMF_ERR_RETURN(rc) - ncerr = nf90_enddef(ncid=ncid); NC_ERR_STOP(ncerr) ncerr = nf90_put_var(ncid, dim_varid, values=valueListI4); NC_ERR_STOP(ncerr) - ncerr = nf90_redef(ncid=ncid); NC_ERR_STOP(ncerr) deallocate(valueListI4) - else - if (mype==0) write(0,*)'Error in module_write_netcdf.F90(add_dim) unknown typekind for ',trim(dim_name) - call ESMF_Finalize(endflag=ESMF_END_ABORT) - end if - if (par) then - ncerr = nf90_var_par_access(ncid, dim_varid, NF90_INDEPENDENT); NC_ERR_STOP(ncerr) + else + if (mype==0) write(0,*)'Error in module_write_netcdf.F90(write_dim) unknown typekind for ',trim(dim_name) + call ESMF_Finalize(endflag=ESMF_END_ABORT) end if - call get_grid_attr(grid, dim_name, ncid, dim_varid, rc) - - end subroutine add_dim + end subroutine write_dim !---------------------------------------------------------------------------------------- end module module_write_netcdf