From ab3b0aaa3b7688ee362589ea63a7bbf2d5042487 Mon Sep 17 00:00:00 2001 From: Alper Altuntas Date: Wed, 20 Dec 2023 16:03:42 -0700 Subject: [PATCH] Automated Runtime Land Block Elimination (#263) * Enhancements for adding land block elimination to NUOPC cap: - Add sum_across_PEs_int4_2d to the sum_across_PEs interface - Allow mask_table file to be placed in run directory (now, the first dir that is looked at). * Enhance NUOPC cap to support MOM_mask_table. - Determine masked blocks. - Evenly distribute eliminated cells. - Fill ESMF gindex array accordingly. - During Export phase, set fields of eliminated cells to zero. * set %label in register_netcdf_field and register_netcdf_axis * first working version of an automated mask table generator * While determining masked blocks, take reentrancy and tripolar stitch into account * apply tripolar stitch fix in auto mask_table generation * add AUTO_IO_LAYOUT_FAC parameter to control IO_LOAYUT when AUTO_MASKTABLE is on * Miscellaneous auto masking fixes to address reviews: - Dimensionalize topographic depth variables used to determine cell masks in auto masktable routine. - Raise error if the user provided PE layout is inconsistent with auto masktable generation. - Save the masktable parameter description to a string variable to avoid repetition. - Fix typos, whitespaces, use modern array syntax. * Disable FPEs in MacOS testing Due to poor handling of floating point in HDF5 1.14.3, it is currently not possible to use floating point exceptions (FPEs) whenever this version is present. The GitHub Actions CI nodes would randomly select either 1.14.2 or 1.14.3, and would raise an FPE error if 1.14.3 was selected. Additionally, the homebrew installation does not provide a clean method for selecting a different version of HDF5. Thus, for now we disable FPEs in the MacOS testing, and hope to catch any legitimate FP errors in the Ubuntu version. We will restore these tests as soon as this has been fixed in an easily-accessible version of HDF5. As part of this PR, I have also moved the FCFLAGS configuration to the platform specific Actions files, allowing for independent compiler configuration for each platform. --------- Co-authored-by: Marshall Ward --- .github/actions/macos-setup/action.yml | 15 + .github/actions/testing-setup/action.yml | 11 - .github/actions/ubuntu-setup/action.yml | 12 + config_src/drivers/nuopc_cap/mom_cap.F90 | 138 ++++++-- .../drivers/nuopc_cap/mom_cap_methods.F90 | 9 +- config_src/infra/FMS1/MOM_domain_infra.F90 | 15 +- config_src/infra/FMS2/MOM_coms_infra.F90 | 10 + config_src/infra/FMS2/MOM_domain_infra.F90 | 17 +- src/core/MOM.F90 | 4 +- src/framework/MOM_domains.F90 | 321 +++++++++++++++++- src/ice_shelf/MOM_ice_shelf.F90 | 2 +- src/ocean_data_assim/MOM_oda_driver.F90 | 2 +- 12 files changed, 497 insertions(+), 59 deletions(-) diff --git a/.github/actions/macos-setup/action.yml b/.github/actions/macos-setup/action.yml index fecbe787b5..4c248abd11 100644 --- a/.github/actions/macos-setup/action.yml +++ b/.github/actions/macos-setup/action.yml @@ -16,3 +16,18 @@ runs: brew install netcdf-fortran brew install mpich echo "::endgroup::" + + # NOTE: Floating point exceptions are currently disabled due to an error in + # HDF5 1.4.3. They will be re-enabled when the default brew version has + # been updated to a working version. + + - name: Set compiler flags + shell: bash + run: | + cd .testing + echo "FCFLAGS_DEBUG = -g -O0 -Wextra -Wno-compare-reals -fbacktrace -fcheck=bounds" >> config.mk + echo "FCFLAGS_REPRO = -g -O2 -fbacktrace" >> config.mk + echo "FCFLAGS_INIT = -finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk + echo "FCFLAGS_FMS = -g -fbacktrace -O0" >> config.mk + cat config.mk + echo "::endgroup::" diff --git a/.github/actions/testing-setup/action.yml b/.github/actions/testing-setup/action.yml index 6ba149d927..a15dd6d0a2 100644 --- a/.github/actions/testing-setup/action.yml +++ b/.github/actions/testing-setup/action.yml @@ -31,17 +31,6 @@ runs: REPORT_ERROR_LOGS=true make deps/lib/libFMS.a -s -j echo "::endgroup::" - - name: Store compiler flags used in Makefile - shell: bash - run: | - echo "::group::config.mk" - cd .testing - echo "FCFLAGS_DEBUG=-g -O0 -Wextra -Wno-compare-reals -fbacktrace -ffpe-trap=invalid,zero,overflow -fcheck=bounds" >> config.mk - echo "FCFLAGS_REPRO=-g -O2 -fbacktrace" >> config.mk - echo "FCFLAGS_INIT=-finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk - cat config.mk - echo "::endgroup::" - - name: Compile MOM6 in symmetric memory mode shell: bash run: | diff --git a/.github/actions/ubuntu-setup/action.yml b/.github/actions/ubuntu-setup/action.yml index 3f3ba5f0b6..83d6795954 100644 --- a/.github/actions/ubuntu-setup/action.yml +++ b/.github/actions/ubuntu-setup/action.yml @@ -17,3 +17,15 @@ runs: sudo apt-get install libopenmpi-dev sudo apt-get install linux-tools-common echo "::endgroup::" + + - name: Store compiler flags used in Makefile + shell: bash + run: | + echo "::group::config.mk" + cd .testing + echo "FCFLAGS_DEBUG = -g -O0 -Wextra -Wno-compare-reals -fbacktrace -ffpe-trap=invalid,zero,overflow -fcheck=bounds" >> config.mk + echo "FCFLAGS_REPRO = -g -O2 -fbacktrace" >> config.mk + echo "FCFLAGS_INIT = -finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk + echo "FCFLAGS_FMS = -g -fbacktrace -O0" >> config.mk + cat config.mk + echo "::endgroup::" diff --git a/config_src/drivers/nuopc_cap/mom_cap.F90 b/config_src/drivers/nuopc_cap/mom_cap.F90 index 120078b11e..843e8c2ef1 100644 --- a/config_src/drivers/nuopc_cap/mom_cap.F90 +++ b/config_src/drivers/nuopc_cap/mom_cap.F90 @@ -16,7 +16,7 @@ module MOM_cap_mod use MOM_domains, only: MOM_infra_init, MOM_infra_end use MOM_file_parser, only: get_param, log_version, param_file_type, close_param_file use MOM_get_input, only: get_MOM_input, directories -use MOM_domains, only: pass_var +use MOM_domains, only: pass_var, pe_here use MOM_error_handler, only: MOM_error, FATAL, is_root_pe use MOM_grid, only: ocean_grid_type, get_global_grid_size use MOM_ocean_model_nuopc, only: ice_ocean_boundary_type @@ -29,6 +29,7 @@ module MOM_cap_mod use MOM_cap_methods, only: med2mod_areacor, state_diagnose use MOM_cap_methods, only: ChkErr use MOM_ensemble_manager, only: ensemble_manager_init +use MOM_coms, only: sum_across_PEs #ifdef CESMCOUPLED use shr_log_mod, only: shr_log_setLogUnit @@ -826,6 +827,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) type(ocean_grid_type) , pointer :: ocean_grid type(ocean_internalstate_wrapper) :: ocean_internalstate integer :: npet, ntiles + integer :: npes ! number of PEs (from FMS). integer :: nxg, nyg, cnt integer :: isc,iec,jsc,jec integer, allocatable :: xb(:),xe(:),yb(:),ye(:),pe(:) @@ -852,6 +854,8 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) integer :: lsize integer :: ig,jg, ni,nj,k integer, allocatable :: gindex(:) ! global index space + integer, allocatable :: gindex_ocn(:) ! global index space for ocean cells (excl. masked cells) + integer, allocatable :: gindex_elim(:) ! global index space for eliminated cells character(len=128) :: fldname character(len=256) :: cvalue character(len=256) :: frmt ! format specifier for several error msgs @@ -875,6 +879,11 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) real(ESMF_KIND_R8) :: min_areacor_glob(2) real(ESMF_KIND_R8) :: max_areacor_glob(2) character(len=*), parameter :: subname='(MOM_cap:InitializeRealize)' + integer :: niproc, njproc + integer :: ip, jp, pe_ix + integer :: num_elim_blocks ! number of blocks to be eliminated + integer :: num_elim_cells_global, num_elim_cells_local, num_elim_cells_remaining + integer, allocatable :: cell_mask(:,:) !-------------------------------- rc = ESMF_SUCCESS @@ -919,19 +928,19 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) rc = ESMF_FAILURE call ESMF_LogWrite(subname//' ntiles must be 1', ESMF_LOGMSG_ERROR) endif - ntiles = mpp_get_domain_npes(ocean_public%domain) - write(tmpstr,'(a,1i6)') subname//' ntiles = ',ntiles + npes = mpp_get_domain_npes(ocean_public%domain) + write(tmpstr,'(a,1i6)') subname//' npes = ',npes call ESMF_LogWrite(trim(tmpstr), ESMF_LOGMSG_INFO) !--------------------------------- ! get start and end indices of each tile and their PET !--------------------------------- - allocate(xb(ntiles),xe(ntiles),yb(ntiles),ye(ntiles),pe(ntiles)) + allocate(xb(npes),xe(npes),yb(npes),ye(npes),pe(npes)) call mpp_get_compute_domains(ocean_public%domain, xbegin=xb, xend=xe, ybegin=yb, yend=ye) call mpp_get_pelist(ocean_public%domain, pe) if (dbug > 1) then - do n = 1,ntiles + do n = 1,npes write(tmpstr,'(a,6i6)') subname//' tiles ',n,pe(n),xb(n),xe(n),yb(n),ye(n) call ESMF_LogWrite(trim(tmpstr), ESMF_LOGMSG_INFO) enddo @@ -953,17 +962,102 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) call get_global_grid_size(ocean_grid, ni, nj) lsize = ( ocean_grid%iec - ocean_grid%isc + 1 ) * ( ocean_grid%jec - ocean_grid%jsc + 1 ) - ! Create the global index space for the computational domain - allocate(gindex(lsize)) - k = 0 - do j = ocean_grid%jsc, ocean_grid%jec - jg = j + ocean_grid%jdg_offset - do i = ocean_grid%isc, ocean_grid%iec - ig = i + ocean_grid%idg_offset - k = k + 1 ! Increment position within gindex - gindex(k) = ni * (jg - 1) + ig + num_elim_blocks = 0 + num_elim_cells_global = 0 + num_elim_cells_local = 0 + num_elim_cells_remaining = 0 + + ! Compute the number of eliminated blocks (specified in MOM_mask_table) + if (associated(ocean_grid%Domain%maskmap)) then + njproc = size(ocean_grid%Domain%maskmap, 1) + niproc = size(ocean_grid%Domain%maskmap, 2) + + do ip = 1, niproc + do jp = 1, njproc + if (.not. ocean_grid%Domain%maskmap(jp,ip)) then + num_elim_blocks = num_elim_blocks+1 + endif + enddo enddo - enddo + endif + + ! Apply land block elimination to ESMF gindex + ! (Here we assume that each processor gets assigned a single tile. If multi-tile implementation is to be added + ! in MOM6 NUOPC cap in the future, below code must be updated accordingly.) + if (num_elim_blocks>0) then + + allocate(cell_mask(ni, nj), source=0) + allocate(gindex_ocn(lsize)) + k = 0 + do j = ocean_grid%jsc, ocean_grid%jec + jg = j + ocean_grid%jdg_offset + do i = ocean_grid%isc, ocean_grid%iec + ig = i + ocean_grid%idg_offset + k = k + 1 ! Increment position within gindex + gindex_ocn(k) = ni * (jg - 1) + ig + cell_mask(ig, jg) = 1 + enddo + enddo + call sum_across_PEs(cell_mask, ni*nj) + + if (maxval(cell_mask) /= 1 ) then + call MOM_error(FATAL, "Encountered cells shared by multiple PEs while attempting to determine masked cells.") + endif + + num_elim_cells_global = ni * nj - sum(cell_mask) + num_elim_cells_local = num_elim_cells_global / npes + + if (pe_here() == pe(npes)) then + ! assign all remaining cells to the last PE. + num_elim_cells_remaining = num_elim_cells_global - num_elim_cells_local * npes + allocate(gindex_elim(num_elim_cells_local+num_elim_cells_remaining)) + else + allocate(gindex_elim(num_elim_cells_local)) + endif + + ! Zero-based PE index. + pe_ix = pe_here() - pe(1) + + k = 0 + do jg = 1, nj + do ig = 1, ni + if (cell_mask(ig, jg) == 0) then + k = k + 1 + if (k > pe_ix * num_elim_cells_local .and. & + k <= ((pe_ix+1) * num_elim_cells_local + num_elim_cells_remaining)) then + gindex_elim(k - pe_ix * num_elim_cells_local) = ni * (jg -1) + ig + endif + endif + enddo + enddo + + allocate(gindex(lsize + num_elim_cells_local + num_elim_cells_remaining)) + do k = 1, lsize + gindex(k) = gindex_ocn(k) + enddo + do k = 1, num_elim_cells_local + num_elim_cells_remaining + gindex(k+lsize) = gindex_elim(k) + enddo + + deallocate(cell_mask) + deallocate(gindex_ocn) + deallocate(gindex_elim) + + else ! no eliminated land blocks + + ! Create the global index space for the computational domain + allocate(gindex(lsize)) + k = 0 + do j = ocean_grid%jsc, ocean_grid%jec + jg = j + ocean_grid%jdg_offset + do i = ocean_grid%isc, ocean_grid%iec + ig = i + ocean_grid%idg_offset + k = k + 1 ! Increment position within gindex + gindex(k) = ni * (jg - 1) + ig + enddo + enddo + + endif DistGrid = ESMF_DistGridCreate(arbSeqIndexList=gindex, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -987,6 +1081,10 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) call ESMF_MeshGet(Emesh, spatialDim=spatialDim, numOwnedElements=numOwnedElements, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + if (lsize /= numOwnedElements - num_elim_cells_local - num_elim_cells_remaining) then + call MOM_error(FATAL, "Discrepancy detected between ESMF mesh and internal MOM6 domain sizes. Check mask table.") + endif + allocate(ownedElemCoords(spatialDim*numOwnedElements)) allocate(lonMesh(numOwnedElements), lon(numOwnedElements)) allocate(latMesh(numOwnedElements), lat(numOwnedElements)) @@ -1018,7 +1116,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) end do eps_omesh = get_eps_omesh(ocean_state) - do n = 1,numOwnedElements + do n = 1,lsize diff_lon = abs(mod(lonMesh(n) - lon(n),360.0)) if (diff_lon > eps_omesh) then frmt = "('ERROR: Difference between ESMF Mesh and MOM6 domain coords is "//& @@ -1122,11 +1220,11 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) ! generate delayout and dist_grid - allocate(deBlockList(2,2,ntiles)) - allocate(petMap(ntiles)) - allocate(deLabelList(ntiles)) + allocate(deBlockList(2,2,npes)) + allocate(petMap(npes)) + allocate(deLabelList(npes)) - do n = 1, ntiles + do n = 1, npes deLabelList(n) = n deBlockList(1,1,n) = xb(n) deBlockList(1,2,n) = xe(n) diff --git a/config_src/drivers/nuopc_cap/mom_cap_methods.F90 b/config_src/drivers/nuopc_cap/mom_cap_methods.F90 index db8bc33c90..3aa6278e9f 100644 --- a/config_src/drivers/nuopc_cap/mom_cap_methods.F90 +++ b/config_src/drivers/nuopc_cap/mom_cap_methods.F90 @@ -852,7 +852,7 @@ subroutine State_SetExport(state, fldname, isc, iec, jsc, jec, input, ocean_grid ! local variables type(ESMF_StateItem_Flag) :: itemFlag - integer :: n, i, j, i1, j1, ig,jg + integer :: n, i, j, k, i1, j1, ig,jg integer :: lbnd1,lbnd2 real(ESMF_KIND_R8), pointer :: dataPtr1d(:) real(ESMF_KIND_R8), pointer :: dataPtr2d(:,:) @@ -888,6 +888,13 @@ subroutine State_SetExport(state, fldname, isc, iec, jsc, jec, input, ocean_grid enddo end if + ! if a maskmap is provided, set exports of all eliminated cells to zero. + if (associated(ocean_grid%Domain%maskmap)) then + do k = n+1, size(dataPtr1d) + dataPtr1d(k) = 0.0 + enddo + endif + else if (geomtype == ESMF_GEOMTYPE_GRID) then call state_getfldptr(state, trim(fldname), dataptr2d, rc) diff --git a/config_src/infra/FMS1/MOM_domain_infra.F90 b/config_src/infra/FMS1/MOM_domain_infra.F90 index 2c97a0bb31..1de9a6d658 100644 --- a/config_src/infra/FMS1/MOM_domain_infra.F90 +++ b/config_src/infra/FMS1/MOM_domain_infra.F90 @@ -16,7 +16,7 @@ module MOM_domain_infra use mpp_domains_mod, only : mpp_create_group_update, mpp_do_group_update use mpp_domains_mod, only : mpp_reset_group_update_field, mpp_group_update_initialized use mpp_domains_mod, only : mpp_start_group_update, mpp_complete_group_update -use mpp_domains_mod, only : mpp_compute_block_extent +use mpp_domains_mod, only : mpp_compute_block_extent, mpp_compute_extent use mpp_domains_mod, only : mpp_broadcast_domain, mpp_redistribute, mpp_global_field use mpp_domains_mod, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR, BITWISE_EXACT_SUM use mpp_domains_mod, only : CYCLIC_GLOBAL_DOMAIN, FOLD_NORTH_EDGE @@ -40,7 +40,7 @@ module MOM_domain_infra public :: domain2D, domain1D, group_pass_type ! These interfaces are actually implemented or have explicit interfaces in this file. public :: create_MOM_domain, clone_MOM_domain, get_domain_components, get_domain_extent -public :: deallocate_MOM_domain, get_global_shape, compute_block_extent +public :: deallocate_MOM_domain, get_global_shape, compute_block_extent, compute_extent public :: pass_var, pass_vector, fill_symmetric_edges, rescale_comp_data public :: pass_var_start, pass_var_complete, pass_vector_start, pass_vector_complete public :: create_group_pass, do_group_pass, start_group_pass, complete_group_pass @@ -1945,6 +1945,17 @@ subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) call mpp_compute_block_extent(isg, ieg, ndivs, ibegin, iend) end subroutine compute_block_extent +!> Get the array ranges in one dimension for the divisions of a global index space +subroutine compute_extent(isg, ieg, ndivs, ibegin, iend) + integer, intent(in) :: isg !< The starting index of the global index space + integer, intent(in) :: ieg !< The ending index of the global index space + integer, intent(in) :: ndivs !< The number of divisions + integer, dimension(:), intent(out) :: ibegin !< The starting index of each division + integer, dimension(:), intent(out) :: iend !< The ending index of each division + + call mpp_compute_extent(isg, ieg, ndivs, ibegin, iend) +end subroutine compute_extent + !> Broadcast a 2-d domain from the root PE to the other PEs subroutine broadcast_domain(domain) type(domain2d), intent(inout) :: domain !< The domain2d type that will be shared across PEs. diff --git a/config_src/infra/FMS2/MOM_coms_infra.F90 b/config_src/infra/FMS2/MOM_coms_infra.F90 index cf9a724734..06a9b9f343 100644 --- a/config_src/infra/FMS2/MOM_coms_infra.F90 +++ b/config_src/infra/FMS2/MOM_coms_infra.F90 @@ -42,6 +42,7 @@ module MOM_coms_infra interface sum_across_PEs module procedure sum_across_PEs_int4_0d module procedure sum_across_PEs_int4_1d + module procedure sum_across_PEs_int4_2d module procedure sum_across_PEs_int8_0d module procedure sum_across_PEs_int8_1d module procedure sum_across_PEs_int8_2d @@ -357,6 +358,15 @@ subroutine sum_across_PEs_int4_1d(field, length, pelist) call mpp_sum(field, length, pelist) end subroutine sum_across_PEs_int4_1d +!> Find the sum of the values in corresponding positions of field across PEs, and return these sums in field. +subroutine sum_across_PEs_int4_2d(field, length, pelist) + integer(kind=int32), dimension(:,:), intent(inout) :: field !< The values to add, the sums upon return + integer, intent(in) :: length !< Number of elements in field to add + integer, optional, intent(in) :: pelist(:) !< List of PEs to work with + + call mpp_sum(field, length, pelist) +end subroutine sum_across_PEs_int4_2d + !> Find the sum of field across PEs, and return this sum in field. subroutine sum_across_PEs_int8_0d(field, pelist) integer(kind=int64), intent(inout) :: field !< Value on this PE, and the sum across PEs upon return diff --git a/config_src/infra/FMS2/MOM_domain_infra.F90 b/config_src/infra/FMS2/MOM_domain_infra.F90 index ff1d888c47..95159f7fe1 100644 --- a/config_src/infra/FMS2/MOM_domain_infra.F90 +++ b/config_src/infra/FMS2/MOM_domain_infra.F90 @@ -16,7 +16,7 @@ module MOM_domain_infra use mpp_domains_mod, only : mpp_create_group_update, mpp_do_group_update use mpp_domains_mod, only : mpp_reset_group_update_field, mpp_group_update_initialized use mpp_domains_mod, only : mpp_start_group_update, mpp_complete_group_update -use mpp_domains_mod, only : mpp_compute_block_extent +use mpp_domains_mod, only : mpp_compute_block_extent, mpp_compute_extent use mpp_domains_mod, only : mpp_broadcast_domain, mpp_redistribute, mpp_global_field use mpp_domains_mod, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR, BITWISE_EXACT_SUM use mpp_domains_mod, only : CYCLIC_GLOBAL_DOMAIN, FOLD_NORTH_EDGE @@ -38,7 +38,7 @@ module MOM_domain_infra public :: domain2D, domain1D, group_pass_type ! These interfaces are actually implemented or have explicit interfaces in this file. public :: create_MOM_domain, clone_MOM_domain, get_domain_components, get_domain_extent -public :: deallocate_MOM_domain, get_global_shape, compute_block_extent +public :: deallocate_MOM_domain, get_global_shape, compute_block_extent, compute_extent public :: pass_var, pass_vector, fill_symmetric_edges, rescale_comp_data public :: pass_var_start, pass_var_complete, pass_vector_start, pass_vector_complete public :: create_group_pass, do_group_pass, start_group_pass, complete_group_pass @@ -1936,7 +1936,7 @@ subroutine get_global_shape(domain, niglobal, njglobal) njglobal = domain%njglobal end subroutine get_global_shape -!> Get the array ranges in one dimension for the divisions of a global index space +!> Get the array ranges in one dimension for the divisions of a global index space (alternative to compute_extent) subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) integer, intent(in) :: isg !< The starting index of the global index space integer, intent(in) :: ieg !< The ending index of the global index space @@ -1947,6 +1947,17 @@ subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) call mpp_compute_block_extent(isg, ieg, ndivs, ibegin, iend) end subroutine compute_block_extent +!> Get the array ranges in one dimension for the divisions of a global index space +subroutine compute_extent(isg, ieg, ndivs, ibegin, iend) + integer, intent(in) :: isg !< The starting index of the global index space + integer, intent(in) :: ieg !< The ending index of the global index space + integer, intent(in) :: ndivs !< The number of divisions + integer, dimension(:), intent(out) :: ibegin !< The starting index of each division + integer, dimension(:), intent(out) :: iend !< The ending index of each division + + call mpp_compute_extent(isg, ieg, ndivs, ibegin, iend) +end subroutine compute_extent + !> Broadcast a 2-d domain from the root PE to the other PEs subroutine broadcast_domain(domain) type(domain2d), intent(inout) :: domain !< The domain2d type that will be shared across PEs. diff --git a/src/core/MOM.F90 b/src/core/MOM.F90 index 89d1ee2004..447f77117f 100644 --- a/src/core/MOM.F90 +++ b/src/core/MOM.F90 @@ -2430,12 +2430,12 @@ subroutine initialize_MOM(Time, Time_init, param_file, dirs, CS, restart_CSp, & #endif G_in => CS%G_in #ifdef STATIC_MEMORY_ - call MOM_domains_init(G_in%domain, param_file, symmetric=symmetric, & + call MOM_domains_init(G_in%domain, US, param_file, symmetric=symmetric, & static_memory=.true., NIHALO=NIHALO_, NJHALO=NJHALO_, & NIGLOBAL=NIGLOBAL_, NJGLOBAL=NJGLOBAL_, NIPROC=NIPROC_, & NJPROC=NJPROC_) #else - call MOM_domains_init(G_in%domain, param_file, symmetric=symmetric, & + call MOM_domains_init(G_in%domain, US, param_file, symmetric=symmetric, & domain_name="MOM_in") #endif diff --git a/src/framework/MOM_domains.F90 b/src/framework/MOM_domains.F90 index a0f3855d19..f2c3225025 100644 --- a/src/framework/MOM_domains.F90 +++ b/src/framework/MOM_domains.F90 @@ -20,10 +20,13 @@ module MOM_domains use MOM_domain_infra, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR use MOM_domain_infra, only : CORNER, CENTER, NORTH_FACE, EAST_FACE use MOM_domain_infra, only : To_East, To_West, To_North, To_South, To_All, Omit_Corners -use MOM_error_handler, only : MOM_error, MOM_mesg, NOTE, WARNING, FATAL +use MOM_domain_infra, only : compute_extent +use MOM_error_handler, only : MOM_error, MOM_mesg, NOTE, WARNING, FATAL, is_root_pe use MOM_file_parser, only : get_param, log_param, log_version, param_file_type -use MOM_io_infra, only : file_exists +use MOM_io_infra, only : file_exists, read_field, open_ASCII_file, close_file, WRITEONLY_FILE use MOM_string_functions, only : slasher +use MOM_cpu_clock, only : cpu_clock_id, cpu_clock_begin, cpu_clock_end, CLOCK_ROUTINE +use MOM_unit_scaling, only : unit_scale_type implicit none ; private @@ -60,11 +63,12 @@ module MOM_domains !> MOM_domains_init initializes a MOM_domain_type variable, based on the information !! read in from a param_file_type, and optionally returns data describing various !! properties of the domain type. -subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & +subroutine MOM_domains_init(MOM_dom, US, param_file, symmetric, static_memory, & NIHALO, NJHALO, NIGLOBAL, NJGLOBAL, NIPROC, NJPROC, & min_halo, domain_name, include_name, param_suffix) type(MOM_domain_type), pointer :: MOM_dom !< A pointer to the MOM_domain_type !! being defined here. + type(unit_scale_type), pointer :: US !< A dimensional unit scaling type type(param_file_type), intent(in) :: param_file !< A structure to parse for !! run-time parameters logical, optional, intent(in) :: symmetric !< If present, this specifies @@ -98,6 +102,7 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & ! Local variables integer, dimension(2) :: layout ! The number of logical processors in the i- and j- directions + integer, dimension(2) :: auto_layout ! The layout determined by the auto masking routine integer, dimension(2) :: io_layout ! The layout of logical processors for input and output !$ integer :: ocean_nthreads ! Number of openMP threads !$ logical :: ocean_omp_hyper_thread ! If true use openMP hyper-threads @@ -112,6 +117,8 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & logical :: nonblocking ! If true, nonblocking halo updates will be used. logical :: thin_halos ! If true, If true, optional arguments may be used to specify the ! width of the halos that are updated with each call. + logical :: auto_mask_table ! Runtime flag that turns on automatic mask table generator + integer :: auto_io_layout_fac ! Used to compute IO layout when auto_mask_table is True. logical :: mask_table_exists ! True if there is a mask table file character(len=128) :: inputdir ! The directory in which to find the diag table character(len=200) :: mask_table ! The file name and later the full path to the diag table @@ -122,6 +129,10 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & character(len=8) :: char_xsiz, char_ysiz, char_niglobal, char_njglobal character(len=40) :: nihalo_nm, njhalo_nm, layout_nm, io_layout_nm, masktable_nm character(len=40) :: niproc_nm, njproc_nm + character(len=200) :: topo_config + integer :: id_clock_auto_mask + character(len=:), allocatable :: masktable_desc + character(len=:), allocatable :: auto_mask_table_fname ! Auto-generated mask table file name ! This include declares and sets the variable "version". # include "version_variable.h" character(len=40) :: mdl ! This module's name. @@ -277,18 +288,52 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & call get_param(param_file, mdl, "INPUTDIR", inputdir, do_not_log=.true., default=".") inputdir = slasher(inputdir) - call get_param(param_file, mdl, trim(masktable_nm), mask_table, & - "A text file to specify n_mask, layout and mask_list. This feature masks out "//& - "processors that contain only land points. The first line of mask_table is the "//& - "number of regions to be masked out. The second line is the layout of the "//& - "model and must be consistent with the actual model layout. The following "//& - "(n_mask) lines give the logical positions of the processors that are masked "//& - "out. The mask_table can be created by tools like check_mask. The following "//& - "example of mask_table masks out 2 processors, (1,2) and (3,6), out of the 24 "//& - "in a 4x6 layout: \n 2\n 4,6\n 1,2\n 3,6\n", default="MOM_mask_table", & - layoutParam=.true.) - mask_table = trim(inputdir)//trim(mask_table) - mask_table_exists = file_exists(mask_table) + call get_param(param_file, mdl, "TOPO_CONFIG", topo_config, do_not_log=.true., fail_if_missing=.true.) + + auto_mask_table = .false. + if (.not. present(param_suffix) .and. .not. is_static .and. trim(topo_config) == 'file') then + call get_param(param_file, mdl, 'AUTO_MASKTABLE', auto_mask_table, & + "Turn on automatic mask table generation to eliminate land blocks.", & + default=.false., layoutParam=.true.) + endif + + masktable_desc = "A text file to specify n_mask, layout and mask_list. This feature masks out "//& + "processors that contain only land points. The first line of mask_table is the "//& + "number of regions to be masked out. The second line is the layout of the "//& + "model and must be consistent with the actual model layout. The following "//& + "(n_mask) lines give the logical positions of the processors that are masked "//& + "out. The mask_table can be created by tools like check_mask. The following "//& + "example of mask_table masks out 2 processors, (1,2) and (3,6), out of the 24 "//& + "in a 4x6 layout: \n 2\n 4,6\n 1,2\n 3,6\n" + + if (auto_mask_table) then + id_clock_auto_mask = cpu_clock_id('(Ocean gen_auto_mask_table)', grain=CLOCK_ROUTINE) + auto_mask_table_fname = "MOM_auto_mask_table" + + ! Auto-generate a mask file and determine the layout + call cpu_clock_begin(id_clock_auto_mask) + if (is_root_PE()) then + call gen_auto_mask_table(n_global, reentrant, tripolar_N, PEs_used, param_file, inputdir, & + auto_mask_table_fname, US, auto_layout) + endif + call broadcast(auto_layout, length=2) + call cpu_clock_end(id_clock_auto_mask) + + mask_table = auto_mask_table_fname + call log_param(param_file, mdl, trim(masktable_nm), mask_table, masktable_desc, & + default="MOM_mask_table", layoutParam=.true.) + else + call get_param(param_file, mdl, trim(masktable_nm), mask_table, masktable_desc, & + default="MOM_mask_table", layoutParam=.true.) + endif + + ! First, check the run directory for the mask_table input file. + mask_table_exists = file_exists(trim(mask_table)) + ! If not found, check the input directory + if (.not. mask_table_exists) then + mask_table = trim(inputdir)//trim(mask_table) + mask_table_exists = file_exists(mask_table) + endif if (is_static) then layout(1) = NIPROC ; layout(2) = NJPROC @@ -317,6 +362,16 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & "Shift to using "//trim(layout_nm)//" instead.") endif + if (auto_mask_table) then + if (layout(1) /= 0 .and. layout(1) /= auto_layout(1)) then + call MOM_error(FATAL, "Cannot set LAYOUT or NIPROC when AUTO_MASKTABLE is enabled.") + endif + if (layout(2) /= 0 .and. layout(2) /= auto_layout(2)) then + call MOM_error(FATAL, "Cannot set LAYOUT or NJPROC when AUTO_MASKTABLE is enabled.") + endif + layout(:) = auto_layout(:) + endif + if ( (layout(1) == 0) .and. (layout(2) == 0) ) & call MOM_define_layout(n_global, PEs_used, layout) if ( (layout(1) /= 0) .and. (layout(2) == 0) ) layout(2) = PEs_used / layout(1) @@ -351,9 +406,28 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & ! Set up the I/O layout, it will be checked later that it uses an even multiple of the number of ! PEs in each direction. io_layout(:) = (/ 1, 1 /) - call get_param(param_file, mdl, trim(io_layout_nm), io_layout, & - "The processor layout to be used, or 0,0 to automatically set the io_layout "//& - "to be the same as the layout.", default=1, layoutParam=.true.) + + ! Compute a valid IO layout if auto_mask_table is on. Otherwise, read in IO_LAYOUT parameter, + if (auto_mask_table) then + call get_param(param_file, mdl, "AUTO_IO_LAYOUT_FAC", auto_io_layout_fac, & + "When AUTO_MASKTABLE is enabled, io layout is calculated by performing integer "//& + "division of the runtime-determined domain layout with this factor. If the factor "//& + "is set to 0 (default), the io layout is set to 1,1.", & + default=0, layoutParam=.true.) + if (auto_io_layout_fac>0) then + io_layout(1) = max(layout(1)/auto_io_layout_fac, 1) + io_layout(2) = max(layout(2)/auto_io_layout_fac, 1) + elseif (auto_io_layout_fac<0) then + call MOM_error(FATAL, 'AUTO_IO_LAYOUT_FAC must be a nonnegative integer.') + endif + call log_param(param_file, mdl, trim(io_layout_nm), io_layout, & + "The processor layout to be used, or 0,0 to automatically set the io_layout "//& + "to be the same as the layout.", layoutParam=.true.) + else + call get_param(param_file, mdl, trim(io_layout_nm), io_layout, & + "The processor layout to be used, or 0,0 to automatically set the io_layout "//& + "to be the same as the layout.", default=1, layoutParam=.true.) + endif call create_MOM_domain(MOM_dom, n_global, n_halo, reentrant, tripolar_N, layout, & io_layout=io_layout, domain_name=domain_name, mask_table=mask_table, & @@ -387,4 +461,215 @@ subroutine MOM_define_layout(n_global, ndivs, layout) layout = (/ idiv, jdiv /) end subroutine MOM_define_layout +!> Given a desired number of active npes, generate a layout and mask_table +subroutine gen_auto_mask_table(n_global, reentrant, tripolar_N, npes, param_file, inputdir, filename, US, layout) + integer, dimension(2), intent(in) :: n_global !< The total number of gridpoints in 2 directions + logical, dimension(2), intent(in) :: reentrant !< True if the x- and y- directions are periodic. + logical :: tripolar_N !< A flag indicating whether there is n. tripolar connectivity + integer, intent(in) :: npes !< The desired number of active PEs. + type(param_file_type), intent(in) :: param_file !< A structure to parse for run-time parameters + character(len=128), intent(in) :: inputdir !< INPUTDIR parameter + character(len=:), allocatable, intent(in) :: filename !< Mask table file path (to be auto-generated.) + type(unit_scale_type), pointer :: US !< A dimensional unit scaling type + integer, dimension(2), intent(out) :: layout !< The generated layout of PEs (incl. masked blocks) + !local + real, dimension(n_global(1), n_global(2)) :: D ! Bathymetric depth (to be read in from TOPO_FILE) [Z ~> m] + integer, dimension(:,:), allocatable :: mask ! Cell masks (based on D and MINIMUM_DEPTH) + character(len=200) :: topo_filepath, topo_file ! Strings for file/path + character(len=200) :: topo_varname ! Variable name in file + character(len=200) :: topo_config + character(len=40) :: mdl = "gen_auto_mask_table" ! This subroutine's name. + integer :: i, j, p + real :: Dmask ! The depth for masking in the same units as D [Z ~> m] + real :: min_depth ! The minimum ocean depth in the same units as D [Z ~> m] + real :: mask_depth ! The depth shallower than which to mask a point as land. [Z ~> m] + real :: glob_ocn_frac ! ratio of ocean points to total number of points + real :: r_p ! aspect ratio for division count p. + integer :: nx, ny ! global domain sizes + integer, parameter :: ibuf=2, jbuf=2 + real, parameter :: r_extreme = 4.0 ! aspect ratio limit (>1) for a layout to be considered. + integer :: num_masked_blocks + integer, allocatable :: mask_table(:,:) + + ! Read in params necessary for auto-masking + call get_param(param_file, mdl, "MINIMUM_DEPTH", min_depth, do_not_log=.true., units="m", default=0.0) + call get_param(param_file, mdl, "MASKING_DEPTH", mask_depth, do_not_log=.true., units="m", default=-9999.0) + call get_param(param_file, mdl, "TOPO_CONFIG", topo_config, do_not_log=.true., fail_if_missing=.true.) + call get_param(param_file, mdl, "TOPO_FILE", topo_file, do_not_log=.true., default="topog.nc") + call get_param(param_file, mdl, "TOPO_VARNAME", topo_varname, do_not_log=.true., default="depth") + topo_filepath = trim(inputdir)//trim(topo_file) + + ! Sanity checks + if (.not. is_root_pe()) then + call MOM_error(FATAL, 'gen_auto_mask_table should only be called by the root PE.') + endif + if (trim(topo_config) /= "file") then + call MOM_error(FATAL, 'Auto mask table only works with TOPO_CONFIG="file"') + endif + if (.not.file_exists(topo_filepath)) then + call MOM_error(FATAL, " gen_auto_mask_table: Unable to open "//trim(topo_filepath)) + endif + + nx = n_global(1) + ny = n_global(2) + + ! Read in bathymetric depth. + D(:,:) = -9.0e30 * US%m_to_Z ! Initializing to a very large negative depth (tall mountains) everywhere. + call read_field(topo_filepath, trim(topo_varname), D, start=(/1, 1/), nread=n_global, no_domain=.true., & + scale=US%m_to_Z) + + allocate(mask(nx+2*ibuf, ny+2*jbuf), source=0) + + ! Determine cell masks + Dmask = mask_depth + if (mask_depth == -9999.0) Dmask = min_depth + do i=1,nx ; do j=1,ny + if (D(i,j) <= Dmask) then + mask(i+ibuf,j+jbuf) = 0 + else + mask(i+ibuf,j+jbuf) = 1 + endif + enddo ; enddo + + ! fill in buffer cells + + if (reentrant(1)) then ! REENTRANT_X + mask(1:ibuf, :) = mask(nx+1:nx+ibuf, :) + mask(ibuf+nx+1:nx+2*ibuf, :) = mask(ibuf+1:2*ibuf, :) + endif + + if (reentrant(2)) then ! REENTRANT_Y + mask(:, 1:jbuf) = mask(:, ny+1:ny+jbuf) + mask(:, jbuf+ny+1:ny+2*jbuf) = mask(:, jbuf+1:2*jbuf) + endif + + if (tripolar_N) then ! TRIPOLAR_N + do i=1,nx+2*ibuf + do j=1,jbuf + mask(i, jbuf+ny+j) = mask(nx+2*ibuf+1-i, jbuf+ny+1-j) + enddo + enddo + endif + + ! Tripolar Stitch Fix: In cases where masking is asymmetrical across the tripolar stitch, there's a possibility + ! that certain unmasked blocks won't be able to obtain grid metrics from the halo points. This occurs when the + ! neighboring block on the opposite side of the tripolar stitch is masked. As a consequence, certain metrics like + ! dxT and dyT may be calculated through extrapolation (refer to extrapolate_metric), potentially leading to the + ! generation of non-positive values. This can result in divide-by-zero errors elsewhere, e.g., in MOM_hor_visc.F90. + ! Currently, the safest and most general solution is to prohibit masking along the tripolar stitch: + if (tripolar_N) then + mask(:, jbuf+ny) = 1 + endif + + glob_ocn_frac = real(sum(mask(1+ibuf:nx+ibuf, 1+jbuf:ny+jbuf))) / (nx * ny) + + ! Iteratively check for all possible division counts starting from the upper bound of npes/glob_ocn_frac, + ! which is over-optimistic for realistic domains, but may be satisfied with idealized domains. + do p = ceiling(npes/glob_ocn_frac), npes, -1 + + ! compute the layout for the current division count, p + call MOM_define_layout(n_global, p, layout) + + ! don't bother checking this p if the aspect ratio is extreme + r_p = (real(nx)/layout(1)) / (real(ny)/layout(2)) + if ( r_p * r_extreme < 1 .or. r_extreme < r_p ) cycle + + ! Get the number of masked_blocks for this particular division count + call determine_land_blocks(mask, nx, ny, layout(1), layout(2), ibuf, jbuf, num_masked_blocks) + + ! If we can eliminate enough blocks to reach the target npes, adopt + ! this p (and the associated layout) and terminate the iteration. + if (p-num_masked_blocks <= npes) then + call MOM_error(NOTE, "Found the optimum layout for auto-masking. Terminating iteration...") + exit + endif + enddo + + if (num_masked_blocks == 0) then + call MOM_error(FATAL, "Couldn't auto-eliminate any land blocks. Try to increase the number "//& + "of MOM6 PEs or set AUTO_MASKTABLE to False.") + endif + + ! Call determine_land_blocks once again, this time to retrieve and write out the mask_table. + allocate(mask_table(num_masked_blocks,2)) + call determine_land_blocks(mask, nx, ny, layout(1), layout(2), ibuf, jbuf, num_masked_blocks, mask_table) + call write_auto_mask_file(mask_table, layout, npes, filename) + deallocate(mask_table) + deallocate(mask) + +end subroutine gen_auto_mask_table + +!> Given a number of domain divisions, compute the max number of land blocks that can be eliminated, +!! and return the resulting mask table if requested. +subroutine determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf, num_masked_blocks, mask_table) + integer, dimension(:,:), intent(in) :: mask !< cell masks based on depth and MINIMUM_DEPTH + integer, intent(in) :: nx !< Total number of gridpoints in x-dir (global) + integer, intent(in) :: ny !< Total number of gridpoints in y-dir (global) + integer, intent(in) :: idiv !< number of divisions along x-dir + integer, intent(in) :: jdiv !< number of divisions along y-dir + integer, intent(in) :: ibuf !< number of buffer cells in x-dir. + !! (not necessarily the same as NIHALO) + integer, intent(in) :: jbuf !< number of buffer cells in y-dir. + !! (not necessarily the same as NJHALO) + integer, intent(out) :: num_masked_blocks !< the final number of masked blocks + integer, intent(out), optional :: mask_table(:,:) !< the resulting array of mask_table + ! integer + integer, dimension(idiv) :: ibegin !< The starting index of each division along x axis + integer, dimension(idiv) :: iend !< The ending index of each division along x axis + integer, dimension(jdiv) :: jbegin !< The starting index of each division along y axis + integer, dimension(jdiv) :: jend !< The ending index of each division along y axis + integer :: i, j, ib, ie, jb,je + + call compute_extent(1, nx, idiv, ibegin, iend) + call compute_extent(1, ny, jdiv, jbegin, jend) + + num_masked_blocks = 0 + + do i=1,idiv + ib = ibegin(i) + ie = iend(i) + 2 * ibuf + do j=1,jdiv + jb = jbegin(j) + je = jend(j) + 2 * jbuf + + if (any(mask(ib:ie,jb:je)==1)) cycle + + num_masked_blocks = num_masked_blocks + 1 + + if (present(mask_table)) then + if ( num_masked_blocks > size(mask_table, dim=1)) then + call MOM_error(FATAL, "The mask_table argument passed to determine_land_blocks() has insufficient size.") + endif + + mask_table(num_masked_blocks,1) = i + mask_table(num_masked_blocks,2) = j + endif + enddo + enddo + +end subroutine determine_land_blocks + +!> Write out the auto-generated mask information to a file in the run directory. +subroutine write_auto_mask_file(mask_table, layout, npes, filename) + integer, intent(in) :: mask_table(:,:) !> mask table array to be written out. + integer, dimension(2), intent(in) :: layout !> PE layout + integer, intent(in) :: npes !> Number of divisions (incl. eliminated ones) + character(len=:), allocatable, intent(in) :: filename !> file name for the mask_table to be written + ! local + integer :: file_ascii= -1 !< The unit number of the auto-generated mask_file file. + integer :: true_num_masked_blocks + integer :: p + + ! Eliminate only enough blocks to ensure that the number of active blocks precisely matches the target npes. + true_num_masked_blocks = layout(1) * layout(2) - npes + + call open_ASCII_file(file_ascii, trim(filename), action=WRITEONLY_FILE) + write(file_ascii, '(I0)'), true_num_masked_blocks + write(file_ascii, '(I0,",",I0)'), layout(1), layout(2) + do p = 1, true_num_masked_blocks + write(file_ascii, '(I0,",",I0)'), mask_table(p,1), mask_table(p,2) + enddo + call close_file(file_ascii) +end subroutine write_auto_mask_file + end module MOM_domains diff --git a/src/ice_shelf/MOM_ice_shelf.F90 b/src/ice_shelf/MOM_ice_shelf.F90 index 8e0e58c1b6..b5ed3f91cf 100644 --- a/src/ice_shelf/MOM_ice_shelf.F90 +++ b/src/ice_shelf/MOM_ice_shelf.F90 @@ -1276,7 +1276,7 @@ subroutine initialize_ice_shelf(param_file, ocn_grid, Time, CS, diag, forces_in, ! Set up the ice-shelf domain and grid wd_halos(:)=0 allocate(CS%Grid) - call MOM_domains_init(CS%Grid%domain, param_file, min_halo=wd_halos, symmetric=GRID_SYM_,& + call MOM_domains_init(CS%Grid%domain, CS%US, param_file, min_halo=wd_halos, symmetric=GRID_SYM_,& domain_name='MOM_Ice_Shelf_in') !allocate(CS%Grid_in%HI) !call hor_index_init(CS%Grid%Domain, CS%Grid%HI, param_file, & diff --git a/src/ocean_data_assim/MOM_oda_driver.F90 b/src/ocean_data_assim/MOM_oda_driver.F90 index 53615b0063..1fdf09e258 100644 --- a/src/ocean_data_assim/MOM_oda_driver.F90 +++ b/src/ocean_data_assim/MOM_oda_driver.F90 @@ -305,7 +305,7 @@ subroutine init_oda(Time, G, GV, US, diag_CS, CS) CS%G => G allocate(CS%Grid) ! params NIHALO_ODA, NJHALO_ODA set the DA halo size - call MOM_domains_init(CS%Grid%Domain,PF,param_suffix='_ODA') + call MOM_domains_init(CS%Grid%Domain,CS%US,PF,param_suffix='_ODA') allocate(HI) call hor_index_init(CS%Grid%Domain, HI, PF) call verticalGridInit( PF, CS%GV, CS%US )