diff --git a/.github/actions/macos-setup/action.yml b/.github/actions/macos-setup/action.yml index fecbe787b5..4c248abd11 100644 --- a/.github/actions/macos-setup/action.yml +++ b/.github/actions/macos-setup/action.yml @@ -16,3 +16,18 @@ runs: brew install netcdf-fortran brew install mpich echo "::endgroup::" + + # NOTE: Floating point exceptions are currently disabled due to an error in + # HDF5 1.4.3. They will be re-enabled when the default brew version has + # been updated to a working version. + + - name: Set compiler flags + shell: bash + run: | + cd .testing + echo "FCFLAGS_DEBUG = -g -O0 -Wextra -Wno-compare-reals -fbacktrace -fcheck=bounds" >> config.mk + echo "FCFLAGS_REPRO = -g -O2 -fbacktrace" >> config.mk + echo "FCFLAGS_INIT = -finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk + echo "FCFLAGS_FMS = -g -fbacktrace -O0" >> config.mk + cat config.mk + echo "::endgroup::" diff --git a/.github/actions/testing-setup/action.yml b/.github/actions/testing-setup/action.yml index 6ba149d927..a15dd6d0a2 100644 --- a/.github/actions/testing-setup/action.yml +++ b/.github/actions/testing-setup/action.yml @@ -31,17 +31,6 @@ runs: REPORT_ERROR_LOGS=true make deps/lib/libFMS.a -s -j echo "::endgroup::" - - name: Store compiler flags used in Makefile - shell: bash - run: | - echo "::group::config.mk" - cd .testing - echo "FCFLAGS_DEBUG=-g -O0 -Wextra -Wno-compare-reals -fbacktrace -ffpe-trap=invalid,zero,overflow -fcheck=bounds" >> config.mk - echo "FCFLAGS_REPRO=-g -O2 -fbacktrace" >> config.mk - echo "FCFLAGS_INIT=-finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk - cat config.mk - echo "::endgroup::" - - name: Compile MOM6 in symmetric memory mode shell: bash run: | diff --git a/.github/actions/ubuntu-setup/action.yml b/.github/actions/ubuntu-setup/action.yml index 3f3ba5f0b6..83d6795954 100644 --- a/.github/actions/ubuntu-setup/action.yml +++ b/.github/actions/ubuntu-setup/action.yml @@ -17,3 +17,15 @@ runs: sudo apt-get install libopenmpi-dev sudo apt-get install linux-tools-common echo "::endgroup::" + + - name: Store compiler flags used in Makefile + shell: bash + run: | + echo "::group::config.mk" + cd .testing + echo "FCFLAGS_DEBUG = -g -O0 -Wextra -Wno-compare-reals -fbacktrace -ffpe-trap=invalid,zero,overflow -fcheck=bounds" >> config.mk + echo "FCFLAGS_REPRO = -g -O2 -fbacktrace" >> config.mk + echo "FCFLAGS_INIT = -finit-real=snan -finit-integer=2147483647 -finit-derived" >> config.mk + echo "FCFLAGS_FMS = -g -fbacktrace -O0" >> config.mk + cat config.mk + echo "::endgroup::" diff --git a/config_src/drivers/nuopc_cap/mom_cap.F90 b/config_src/drivers/nuopc_cap/mom_cap.F90 index 120078b11e..843e8c2ef1 100644 --- a/config_src/drivers/nuopc_cap/mom_cap.F90 +++ b/config_src/drivers/nuopc_cap/mom_cap.F90 @@ -16,7 +16,7 @@ module MOM_cap_mod use MOM_domains, only: MOM_infra_init, MOM_infra_end use MOM_file_parser, only: get_param, log_version, param_file_type, close_param_file use MOM_get_input, only: get_MOM_input, directories -use MOM_domains, only: pass_var +use MOM_domains, only: pass_var, pe_here use MOM_error_handler, only: MOM_error, FATAL, is_root_pe use MOM_grid, only: ocean_grid_type, get_global_grid_size use MOM_ocean_model_nuopc, only: ice_ocean_boundary_type @@ -29,6 +29,7 @@ module MOM_cap_mod use MOM_cap_methods, only: med2mod_areacor, state_diagnose use MOM_cap_methods, only: ChkErr use MOM_ensemble_manager, only: ensemble_manager_init +use MOM_coms, only: sum_across_PEs #ifdef CESMCOUPLED use shr_log_mod, only: shr_log_setLogUnit @@ -826,6 +827,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) type(ocean_grid_type) , pointer :: ocean_grid type(ocean_internalstate_wrapper) :: ocean_internalstate integer :: npet, ntiles + integer :: npes ! number of PEs (from FMS). integer :: nxg, nyg, cnt integer :: isc,iec,jsc,jec integer, allocatable :: xb(:),xe(:),yb(:),ye(:),pe(:) @@ -852,6 +854,8 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) integer :: lsize integer :: ig,jg, ni,nj,k integer, allocatable :: gindex(:) ! global index space + integer, allocatable :: gindex_ocn(:) ! global index space for ocean cells (excl. masked cells) + integer, allocatable :: gindex_elim(:) ! global index space for eliminated cells character(len=128) :: fldname character(len=256) :: cvalue character(len=256) :: frmt ! format specifier for several error msgs @@ -875,6 +879,11 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) real(ESMF_KIND_R8) :: min_areacor_glob(2) real(ESMF_KIND_R8) :: max_areacor_glob(2) character(len=*), parameter :: subname='(MOM_cap:InitializeRealize)' + integer :: niproc, njproc + integer :: ip, jp, pe_ix + integer :: num_elim_blocks ! number of blocks to be eliminated + integer :: num_elim_cells_global, num_elim_cells_local, num_elim_cells_remaining + integer, allocatable :: cell_mask(:,:) !-------------------------------- rc = ESMF_SUCCESS @@ -919,19 +928,19 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) rc = ESMF_FAILURE call ESMF_LogWrite(subname//' ntiles must be 1', ESMF_LOGMSG_ERROR) endif - ntiles = mpp_get_domain_npes(ocean_public%domain) - write(tmpstr,'(a,1i6)') subname//' ntiles = ',ntiles + npes = mpp_get_domain_npes(ocean_public%domain) + write(tmpstr,'(a,1i6)') subname//' npes = ',npes call ESMF_LogWrite(trim(tmpstr), ESMF_LOGMSG_INFO) !--------------------------------- ! get start and end indices of each tile and their PET !--------------------------------- - allocate(xb(ntiles),xe(ntiles),yb(ntiles),ye(ntiles),pe(ntiles)) + allocate(xb(npes),xe(npes),yb(npes),ye(npes),pe(npes)) call mpp_get_compute_domains(ocean_public%domain, xbegin=xb, xend=xe, ybegin=yb, yend=ye) call mpp_get_pelist(ocean_public%domain, pe) if (dbug > 1) then - do n = 1,ntiles + do n = 1,npes write(tmpstr,'(a,6i6)') subname//' tiles ',n,pe(n),xb(n),xe(n),yb(n),ye(n) call ESMF_LogWrite(trim(tmpstr), ESMF_LOGMSG_INFO) enddo @@ -953,17 +962,102 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) call get_global_grid_size(ocean_grid, ni, nj) lsize = ( ocean_grid%iec - ocean_grid%isc + 1 ) * ( ocean_grid%jec - ocean_grid%jsc + 1 ) - ! Create the global index space for the computational domain - allocate(gindex(lsize)) - k = 0 - do j = ocean_grid%jsc, ocean_grid%jec - jg = j + ocean_grid%jdg_offset - do i = ocean_grid%isc, ocean_grid%iec - ig = i + ocean_grid%idg_offset - k = k + 1 ! Increment position within gindex - gindex(k) = ni * (jg - 1) + ig + num_elim_blocks = 0 + num_elim_cells_global = 0 + num_elim_cells_local = 0 + num_elim_cells_remaining = 0 + + ! Compute the number of eliminated blocks (specified in MOM_mask_table) + if (associated(ocean_grid%Domain%maskmap)) then + njproc = size(ocean_grid%Domain%maskmap, 1) + niproc = size(ocean_grid%Domain%maskmap, 2) + + do ip = 1, niproc + do jp = 1, njproc + if (.not. ocean_grid%Domain%maskmap(jp,ip)) then + num_elim_blocks = num_elim_blocks+1 + endif + enddo enddo - enddo + endif + + ! Apply land block elimination to ESMF gindex + ! (Here we assume that each processor gets assigned a single tile. If multi-tile implementation is to be added + ! in MOM6 NUOPC cap in the future, below code must be updated accordingly.) + if (num_elim_blocks>0) then + + allocate(cell_mask(ni, nj), source=0) + allocate(gindex_ocn(lsize)) + k = 0 + do j = ocean_grid%jsc, ocean_grid%jec + jg = j + ocean_grid%jdg_offset + do i = ocean_grid%isc, ocean_grid%iec + ig = i + ocean_grid%idg_offset + k = k + 1 ! Increment position within gindex + gindex_ocn(k) = ni * (jg - 1) + ig + cell_mask(ig, jg) = 1 + enddo + enddo + call sum_across_PEs(cell_mask, ni*nj) + + if (maxval(cell_mask) /= 1 ) then + call MOM_error(FATAL, "Encountered cells shared by multiple PEs while attempting to determine masked cells.") + endif + + num_elim_cells_global = ni * nj - sum(cell_mask) + num_elim_cells_local = num_elim_cells_global / npes + + if (pe_here() == pe(npes)) then + ! assign all remaining cells to the last PE. + num_elim_cells_remaining = num_elim_cells_global - num_elim_cells_local * npes + allocate(gindex_elim(num_elim_cells_local+num_elim_cells_remaining)) + else + allocate(gindex_elim(num_elim_cells_local)) + endif + + ! Zero-based PE index. + pe_ix = pe_here() - pe(1) + + k = 0 + do jg = 1, nj + do ig = 1, ni + if (cell_mask(ig, jg) == 0) then + k = k + 1 + if (k > pe_ix * num_elim_cells_local .and. & + k <= ((pe_ix+1) * num_elim_cells_local + num_elim_cells_remaining)) then + gindex_elim(k - pe_ix * num_elim_cells_local) = ni * (jg -1) + ig + endif + endif + enddo + enddo + + allocate(gindex(lsize + num_elim_cells_local + num_elim_cells_remaining)) + do k = 1, lsize + gindex(k) = gindex_ocn(k) + enddo + do k = 1, num_elim_cells_local + num_elim_cells_remaining + gindex(k+lsize) = gindex_elim(k) + enddo + + deallocate(cell_mask) + deallocate(gindex_ocn) + deallocate(gindex_elim) + + else ! no eliminated land blocks + + ! Create the global index space for the computational domain + allocate(gindex(lsize)) + k = 0 + do j = ocean_grid%jsc, ocean_grid%jec + jg = j + ocean_grid%jdg_offset + do i = ocean_grid%isc, ocean_grid%iec + ig = i + ocean_grid%idg_offset + k = k + 1 ! Increment position within gindex + gindex(k) = ni * (jg - 1) + ig + enddo + enddo + + endif DistGrid = ESMF_DistGridCreate(arbSeqIndexList=gindex, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return @@ -987,6 +1081,10 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) call ESMF_MeshGet(Emesh, spatialDim=spatialDim, numOwnedElements=numOwnedElements, rc=rc) if (ChkErr(rc,__LINE__,u_FILE_u)) return + if (lsize /= numOwnedElements - num_elim_cells_local - num_elim_cells_remaining) then + call MOM_error(FATAL, "Discrepancy detected between ESMF mesh and internal MOM6 domain sizes. Check mask table.") + endif + allocate(ownedElemCoords(spatialDim*numOwnedElements)) allocate(lonMesh(numOwnedElements), lon(numOwnedElements)) allocate(latMesh(numOwnedElements), lat(numOwnedElements)) @@ -1018,7 +1116,7 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) end do eps_omesh = get_eps_omesh(ocean_state) - do n = 1,numOwnedElements + do n = 1,lsize diff_lon = abs(mod(lonMesh(n) - lon(n),360.0)) if (diff_lon > eps_omesh) then frmt = "('ERROR: Difference between ESMF Mesh and MOM6 domain coords is "//& @@ -1122,11 +1220,11 @@ subroutine InitializeRealize(gcomp, importState, exportState, clock, rc) ! generate delayout and dist_grid - allocate(deBlockList(2,2,ntiles)) - allocate(petMap(ntiles)) - allocate(deLabelList(ntiles)) + allocate(deBlockList(2,2,npes)) + allocate(petMap(npes)) + allocate(deLabelList(npes)) - do n = 1, ntiles + do n = 1, npes deLabelList(n) = n deBlockList(1,1,n) = xb(n) deBlockList(1,2,n) = xe(n) diff --git a/config_src/drivers/nuopc_cap/mom_cap_methods.F90 b/config_src/drivers/nuopc_cap/mom_cap_methods.F90 index db8bc33c90..3aa6278e9f 100644 --- a/config_src/drivers/nuopc_cap/mom_cap_methods.F90 +++ b/config_src/drivers/nuopc_cap/mom_cap_methods.F90 @@ -852,7 +852,7 @@ subroutine State_SetExport(state, fldname, isc, iec, jsc, jec, input, ocean_grid ! local variables type(ESMF_StateItem_Flag) :: itemFlag - integer :: n, i, j, i1, j1, ig,jg + integer :: n, i, j, k, i1, j1, ig,jg integer :: lbnd1,lbnd2 real(ESMF_KIND_R8), pointer :: dataPtr1d(:) real(ESMF_KIND_R8), pointer :: dataPtr2d(:,:) @@ -888,6 +888,13 @@ subroutine State_SetExport(state, fldname, isc, iec, jsc, jec, input, ocean_grid enddo end if + ! if a maskmap is provided, set exports of all eliminated cells to zero. + if (associated(ocean_grid%Domain%maskmap)) then + do k = n+1, size(dataPtr1d) + dataPtr1d(k) = 0.0 + enddo + endif + else if (geomtype == ESMF_GEOMTYPE_GRID) then call state_getfldptr(state, trim(fldname), dataptr2d, rc) diff --git a/config_src/infra/FMS1/MOM_domain_infra.F90 b/config_src/infra/FMS1/MOM_domain_infra.F90 index 2c97a0bb31..1de9a6d658 100644 --- a/config_src/infra/FMS1/MOM_domain_infra.F90 +++ b/config_src/infra/FMS1/MOM_domain_infra.F90 @@ -16,7 +16,7 @@ module MOM_domain_infra use mpp_domains_mod, only : mpp_create_group_update, mpp_do_group_update use mpp_domains_mod, only : mpp_reset_group_update_field, mpp_group_update_initialized use mpp_domains_mod, only : mpp_start_group_update, mpp_complete_group_update -use mpp_domains_mod, only : mpp_compute_block_extent +use mpp_domains_mod, only : mpp_compute_block_extent, mpp_compute_extent use mpp_domains_mod, only : mpp_broadcast_domain, mpp_redistribute, mpp_global_field use mpp_domains_mod, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR, BITWISE_EXACT_SUM use mpp_domains_mod, only : CYCLIC_GLOBAL_DOMAIN, FOLD_NORTH_EDGE @@ -40,7 +40,7 @@ module MOM_domain_infra public :: domain2D, domain1D, group_pass_type ! These interfaces are actually implemented or have explicit interfaces in this file. public :: create_MOM_domain, clone_MOM_domain, get_domain_components, get_domain_extent -public :: deallocate_MOM_domain, get_global_shape, compute_block_extent +public :: deallocate_MOM_domain, get_global_shape, compute_block_extent, compute_extent public :: pass_var, pass_vector, fill_symmetric_edges, rescale_comp_data public :: pass_var_start, pass_var_complete, pass_vector_start, pass_vector_complete public :: create_group_pass, do_group_pass, start_group_pass, complete_group_pass @@ -1945,6 +1945,17 @@ subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) call mpp_compute_block_extent(isg, ieg, ndivs, ibegin, iend) end subroutine compute_block_extent +!> Get the array ranges in one dimension for the divisions of a global index space +subroutine compute_extent(isg, ieg, ndivs, ibegin, iend) + integer, intent(in) :: isg !< The starting index of the global index space + integer, intent(in) :: ieg !< The ending index of the global index space + integer, intent(in) :: ndivs !< The number of divisions + integer, dimension(:), intent(out) :: ibegin !< The starting index of each division + integer, dimension(:), intent(out) :: iend !< The ending index of each division + + call mpp_compute_extent(isg, ieg, ndivs, ibegin, iend) +end subroutine compute_extent + !> Broadcast a 2-d domain from the root PE to the other PEs subroutine broadcast_domain(domain) type(domain2d), intent(inout) :: domain !< The domain2d type that will be shared across PEs. diff --git a/config_src/infra/FMS2/MOM_coms_infra.F90 b/config_src/infra/FMS2/MOM_coms_infra.F90 index cf9a724734..06a9b9f343 100644 --- a/config_src/infra/FMS2/MOM_coms_infra.F90 +++ b/config_src/infra/FMS2/MOM_coms_infra.F90 @@ -42,6 +42,7 @@ module MOM_coms_infra interface sum_across_PEs module procedure sum_across_PEs_int4_0d module procedure sum_across_PEs_int4_1d + module procedure sum_across_PEs_int4_2d module procedure sum_across_PEs_int8_0d module procedure sum_across_PEs_int8_1d module procedure sum_across_PEs_int8_2d @@ -357,6 +358,15 @@ subroutine sum_across_PEs_int4_1d(field, length, pelist) call mpp_sum(field, length, pelist) end subroutine sum_across_PEs_int4_1d +!> Find the sum of the values in corresponding positions of field across PEs, and return these sums in field. +subroutine sum_across_PEs_int4_2d(field, length, pelist) + integer(kind=int32), dimension(:,:), intent(inout) :: field !< The values to add, the sums upon return + integer, intent(in) :: length !< Number of elements in field to add + integer, optional, intent(in) :: pelist(:) !< List of PEs to work with + + call mpp_sum(field, length, pelist) +end subroutine sum_across_PEs_int4_2d + !> Find the sum of field across PEs, and return this sum in field. subroutine sum_across_PEs_int8_0d(field, pelist) integer(kind=int64), intent(inout) :: field !< Value on this PE, and the sum across PEs upon return diff --git a/config_src/infra/FMS2/MOM_domain_infra.F90 b/config_src/infra/FMS2/MOM_domain_infra.F90 index ff1d888c47..95159f7fe1 100644 --- a/config_src/infra/FMS2/MOM_domain_infra.F90 +++ b/config_src/infra/FMS2/MOM_domain_infra.F90 @@ -16,7 +16,7 @@ module MOM_domain_infra use mpp_domains_mod, only : mpp_create_group_update, mpp_do_group_update use mpp_domains_mod, only : mpp_reset_group_update_field, mpp_group_update_initialized use mpp_domains_mod, only : mpp_start_group_update, mpp_complete_group_update -use mpp_domains_mod, only : mpp_compute_block_extent +use mpp_domains_mod, only : mpp_compute_block_extent, mpp_compute_extent use mpp_domains_mod, only : mpp_broadcast_domain, mpp_redistribute, mpp_global_field use mpp_domains_mod, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR, BITWISE_EXACT_SUM use mpp_domains_mod, only : CYCLIC_GLOBAL_DOMAIN, FOLD_NORTH_EDGE @@ -38,7 +38,7 @@ module MOM_domain_infra public :: domain2D, domain1D, group_pass_type ! These interfaces are actually implemented or have explicit interfaces in this file. public :: create_MOM_domain, clone_MOM_domain, get_domain_components, get_domain_extent -public :: deallocate_MOM_domain, get_global_shape, compute_block_extent +public :: deallocate_MOM_domain, get_global_shape, compute_block_extent, compute_extent public :: pass_var, pass_vector, fill_symmetric_edges, rescale_comp_data public :: pass_var_start, pass_var_complete, pass_vector_start, pass_vector_complete public :: create_group_pass, do_group_pass, start_group_pass, complete_group_pass @@ -1936,7 +1936,7 @@ subroutine get_global_shape(domain, niglobal, njglobal) njglobal = domain%njglobal end subroutine get_global_shape -!> Get the array ranges in one dimension for the divisions of a global index space +!> Get the array ranges in one dimension for the divisions of a global index space (alternative to compute_extent) subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) integer, intent(in) :: isg !< The starting index of the global index space integer, intent(in) :: ieg !< The ending index of the global index space @@ -1947,6 +1947,17 @@ subroutine compute_block_extent(isg, ieg, ndivs, ibegin, iend) call mpp_compute_block_extent(isg, ieg, ndivs, ibegin, iend) end subroutine compute_block_extent +!> Get the array ranges in one dimension for the divisions of a global index space +subroutine compute_extent(isg, ieg, ndivs, ibegin, iend) + integer, intent(in) :: isg !< The starting index of the global index space + integer, intent(in) :: ieg !< The ending index of the global index space + integer, intent(in) :: ndivs !< The number of divisions + integer, dimension(:), intent(out) :: ibegin !< The starting index of each division + integer, dimension(:), intent(out) :: iend !< The ending index of each division + + call mpp_compute_extent(isg, ieg, ndivs, ibegin, iend) +end subroutine compute_extent + !> Broadcast a 2-d domain from the root PE to the other PEs subroutine broadcast_domain(domain) type(domain2d), intent(inout) :: domain !< The domain2d type that will be shared across PEs. diff --git a/src/core/MOM.F90 b/src/core/MOM.F90 index 89d1ee2004..447f77117f 100644 --- a/src/core/MOM.F90 +++ b/src/core/MOM.F90 @@ -2430,12 +2430,12 @@ subroutine initialize_MOM(Time, Time_init, param_file, dirs, CS, restart_CSp, & #endif G_in => CS%G_in #ifdef STATIC_MEMORY_ - call MOM_domains_init(G_in%domain, param_file, symmetric=symmetric, & + call MOM_domains_init(G_in%domain, US, param_file, symmetric=symmetric, & static_memory=.true., NIHALO=NIHALO_, NJHALO=NJHALO_, & NIGLOBAL=NIGLOBAL_, NJGLOBAL=NJGLOBAL_, NIPROC=NIPROC_, & NJPROC=NJPROC_) #else - call MOM_domains_init(G_in%domain, param_file, symmetric=symmetric, & + call MOM_domains_init(G_in%domain, US, param_file, symmetric=symmetric, & domain_name="MOM_in") #endif diff --git a/src/framework/MOM_domains.F90 b/src/framework/MOM_domains.F90 index a0f3855d19..f2c3225025 100644 --- a/src/framework/MOM_domains.F90 +++ b/src/framework/MOM_domains.F90 @@ -20,10 +20,13 @@ module MOM_domains use MOM_domain_infra, only : AGRID, BGRID_NE, CGRID_NE, SCALAR_PAIR use MOM_domain_infra, only : CORNER, CENTER, NORTH_FACE, EAST_FACE use MOM_domain_infra, only : To_East, To_West, To_North, To_South, To_All, Omit_Corners -use MOM_error_handler, only : MOM_error, MOM_mesg, NOTE, WARNING, FATAL +use MOM_domain_infra, only : compute_extent +use MOM_error_handler, only : MOM_error, MOM_mesg, NOTE, WARNING, FATAL, is_root_pe use MOM_file_parser, only : get_param, log_param, log_version, param_file_type -use MOM_io_infra, only : file_exists +use MOM_io_infra, only : file_exists, read_field, open_ASCII_file, close_file, WRITEONLY_FILE use MOM_string_functions, only : slasher +use MOM_cpu_clock, only : cpu_clock_id, cpu_clock_begin, cpu_clock_end, CLOCK_ROUTINE +use MOM_unit_scaling, only : unit_scale_type implicit none ; private @@ -60,11 +63,12 @@ module MOM_domains !> MOM_domains_init initializes a MOM_domain_type variable, based on the information !! read in from a param_file_type, and optionally returns data describing various !! properties of the domain type. -subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & +subroutine MOM_domains_init(MOM_dom, US, param_file, symmetric, static_memory, & NIHALO, NJHALO, NIGLOBAL, NJGLOBAL, NIPROC, NJPROC, & min_halo, domain_name, include_name, param_suffix) type(MOM_domain_type), pointer :: MOM_dom !< A pointer to the MOM_domain_type !! being defined here. + type(unit_scale_type), pointer :: US !< A dimensional unit scaling type type(param_file_type), intent(in) :: param_file !< A structure to parse for !! run-time parameters logical, optional, intent(in) :: symmetric !< If present, this specifies @@ -98,6 +102,7 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & ! Local variables integer, dimension(2) :: layout ! The number of logical processors in the i- and j- directions + integer, dimension(2) :: auto_layout ! The layout determined by the auto masking routine integer, dimension(2) :: io_layout ! The layout of logical processors for input and output !$ integer :: ocean_nthreads ! Number of openMP threads !$ logical :: ocean_omp_hyper_thread ! If true use openMP hyper-threads @@ -112,6 +117,8 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & logical :: nonblocking ! If true, nonblocking halo updates will be used. logical :: thin_halos ! If true, If true, optional arguments may be used to specify the ! width of the halos that are updated with each call. + logical :: auto_mask_table ! Runtime flag that turns on automatic mask table generator + integer :: auto_io_layout_fac ! Used to compute IO layout when auto_mask_table is True. logical :: mask_table_exists ! True if there is a mask table file character(len=128) :: inputdir ! The directory in which to find the diag table character(len=200) :: mask_table ! The file name and later the full path to the diag table @@ -122,6 +129,10 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & character(len=8) :: char_xsiz, char_ysiz, char_niglobal, char_njglobal character(len=40) :: nihalo_nm, njhalo_nm, layout_nm, io_layout_nm, masktable_nm character(len=40) :: niproc_nm, njproc_nm + character(len=200) :: topo_config + integer :: id_clock_auto_mask + character(len=:), allocatable :: masktable_desc + character(len=:), allocatable :: auto_mask_table_fname ! Auto-generated mask table file name ! This include declares and sets the variable "version". # include "version_variable.h" character(len=40) :: mdl ! This module's name. @@ -277,18 +288,52 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & call get_param(param_file, mdl, "INPUTDIR", inputdir, do_not_log=.true., default=".") inputdir = slasher(inputdir) - call get_param(param_file, mdl, trim(masktable_nm), mask_table, & - "A text file to specify n_mask, layout and mask_list. This feature masks out "//& - "processors that contain only land points. The first line of mask_table is the "//& - "number of regions to be masked out. The second line is the layout of the "//& - "model and must be consistent with the actual model layout. The following "//& - "(n_mask) lines give the logical positions of the processors that are masked "//& - "out. The mask_table can be created by tools like check_mask. The following "//& - "example of mask_table masks out 2 processors, (1,2) and (3,6), out of the 24 "//& - "in a 4x6 layout: \n 2\n 4,6\n 1,2\n 3,6\n", default="MOM_mask_table", & - layoutParam=.true.) - mask_table = trim(inputdir)//trim(mask_table) - mask_table_exists = file_exists(mask_table) + call get_param(param_file, mdl, "TOPO_CONFIG", topo_config, do_not_log=.true., fail_if_missing=.true.) + + auto_mask_table = .false. + if (.not. present(param_suffix) .and. .not. is_static .and. trim(topo_config) == 'file') then + call get_param(param_file, mdl, 'AUTO_MASKTABLE', auto_mask_table, & + "Turn on automatic mask table generation to eliminate land blocks.", & + default=.false., layoutParam=.true.) + endif + + masktable_desc = "A text file to specify n_mask, layout and mask_list. This feature masks out "//& + "processors that contain only land points. The first line of mask_table is the "//& + "number of regions to be masked out. The second line is the layout of the "//& + "model and must be consistent with the actual model layout. The following "//& + "(n_mask) lines give the logical positions of the processors that are masked "//& + "out. The mask_table can be created by tools like check_mask. The following "//& + "example of mask_table masks out 2 processors, (1,2) and (3,6), out of the 24 "//& + "in a 4x6 layout: \n 2\n 4,6\n 1,2\n 3,6\n" + + if (auto_mask_table) then + id_clock_auto_mask = cpu_clock_id('(Ocean gen_auto_mask_table)', grain=CLOCK_ROUTINE) + auto_mask_table_fname = "MOM_auto_mask_table" + + ! Auto-generate a mask file and determine the layout + call cpu_clock_begin(id_clock_auto_mask) + if (is_root_PE()) then + call gen_auto_mask_table(n_global, reentrant, tripolar_N, PEs_used, param_file, inputdir, & + auto_mask_table_fname, US, auto_layout) + endif + call broadcast(auto_layout, length=2) + call cpu_clock_end(id_clock_auto_mask) + + mask_table = auto_mask_table_fname + call log_param(param_file, mdl, trim(masktable_nm), mask_table, masktable_desc, & + default="MOM_mask_table", layoutParam=.true.) + else + call get_param(param_file, mdl, trim(masktable_nm), mask_table, masktable_desc, & + default="MOM_mask_table", layoutParam=.true.) + endif + + ! First, check the run directory for the mask_table input file. + mask_table_exists = file_exists(trim(mask_table)) + ! If not found, check the input directory + if (.not. mask_table_exists) then + mask_table = trim(inputdir)//trim(mask_table) + mask_table_exists = file_exists(mask_table) + endif if (is_static) then layout(1) = NIPROC ; layout(2) = NJPROC @@ -317,6 +362,16 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & "Shift to using "//trim(layout_nm)//" instead.") endif + if (auto_mask_table) then + if (layout(1) /= 0 .and. layout(1) /= auto_layout(1)) then + call MOM_error(FATAL, "Cannot set LAYOUT or NIPROC when AUTO_MASKTABLE is enabled.") + endif + if (layout(2) /= 0 .and. layout(2) /= auto_layout(2)) then + call MOM_error(FATAL, "Cannot set LAYOUT or NJPROC when AUTO_MASKTABLE is enabled.") + endif + layout(:) = auto_layout(:) + endif + if ( (layout(1) == 0) .and. (layout(2) == 0) ) & call MOM_define_layout(n_global, PEs_used, layout) if ( (layout(1) /= 0) .and. (layout(2) == 0) ) layout(2) = PEs_used / layout(1) @@ -351,9 +406,28 @@ subroutine MOM_domains_init(MOM_dom, param_file, symmetric, static_memory, & ! Set up the I/O layout, it will be checked later that it uses an even multiple of the number of ! PEs in each direction. io_layout(:) = (/ 1, 1 /) - call get_param(param_file, mdl, trim(io_layout_nm), io_layout, & - "The processor layout to be used, or 0,0 to automatically set the io_layout "//& - "to be the same as the layout.", default=1, layoutParam=.true.) + + ! Compute a valid IO layout if auto_mask_table is on. Otherwise, read in IO_LAYOUT parameter, + if (auto_mask_table) then + call get_param(param_file, mdl, "AUTO_IO_LAYOUT_FAC", auto_io_layout_fac, & + "When AUTO_MASKTABLE is enabled, io layout is calculated by performing integer "//& + "division of the runtime-determined domain layout with this factor. If the factor "//& + "is set to 0 (default), the io layout is set to 1,1.", & + default=0, layoutParam=.true.) + if (auto_io_layout_fac>0) then + io_layout(1) = max(layout(1)/auto_io_layout_fac, 1) + io_layout(2) = max(layout(2)/auto_io_layout_fac, 1) + elseif (auto_io_layout_fac<0) then + call MOM_error(FATAL, 'AUTO_IO_LAYOUT_FAC must be a nonnegative integer.') + endif + call log_param(param_file, mdl, trim(io_layout_nm), io_layout, & + "The processor layout to be used, or 0,0 to automatically set the io_layout "//& + "to be the same as the layout.", layoutParam=.true.) + else + call get_param(param_file, mdl, trim(io_layout_nm), io_layout, & + "The processor layout to be used, or 0,0 to automatically set the io_layout "//& + "to be the same as the layout.", default=1, layoutParam=.true.) + endif call create_MOM_domain(MOM_dom, n_global, n_halo, reentrant, tripolar_N, layout, & io_layout=io_layout, domain_name=domain_name, mask_table=mask_table, & @@ -387,4 +461,215 @@ subroutine MOM_define_layout(n_global, ndivs, layout) layout = (/ idiv, jdiv /) end subroutine MOM_define_layout +!> Given a desired number of active npes, generate a layout and mask_table +subroutine gen_auto_mask_table(n_global, reentrant, tripolar_N, npes, param_file, inputdir, filename, US, layout) + integer, dimension(2), intent(in) :: n_global !< The total number of gridpoints in 2 directions + logical, dimension(2), intent(in) :: reentrant !< True if the x- and y- directions are periodic. + logical :: tripolar_N !< A flag indicating whether there is n. tripolar connectivity + integer, intent(in) :: npes !< The desired number of active PEs. + type(param_file_type), intent(in) :: param_file !< A structure to parse for run-time parameters + character(len=128), intent(in) :: inputdir !< INPUTDIR parameter + character(len=:), allocatable, intent(in) :: filename !< Mask table file path (to be auto-generated.) + type(unit_scale_type), pointer :: US !< A dimensional unit scaling type + integer, dimension(2), intent(out) :: layout !< The generated layout of PEs (incl. masked blocks) + !local + real, dimension(n_global(1), n_global(2)) :: D ! Bathymetric depth (to be read in from TOPO_FILE) [Z ~> m] + integer, dimension(:,:), allocatable :: mask ! Cell masks (based on D and MINIMUM_DEPTH) + character(len=200) :: topo_filepath, topo_file ! Strings for file/path + character(len=200) :: topo_varname ! Variable name in file + character(len=200) :: topo_config + character(len=40) :: mdl = "gen_auto_mask_table" ! This subroutine's name. + integer :: i, j, p + real :: Dmask ! The depth for masking in the same units as D [Z ~> m] + real :: min_depth ! The minimum ocean depth in the same units as D [Z ~> m] + real :: mask_depth ! The depth shallower than which to mask a point as land. [Z ~> m] + real :: glob_ocn_frac ! ratio of ocean points to total number of points + real :: r_p ! aspect ratio for division count p. + integer :: nx, ny ! global domain sizes + integer, parameter :: ibuf=2, jbuf=2 + real, parameter :: r_extreme = 4.0 ! aspect ratio limit (>1) for a layout to be considered. + integer :: num_masked_blocks + integer, allocatable :: mask_table(:,:) + + ! Read in params necessary for auto-masking + call get_param(param_file, mdl, "MINIMUM_DEPTH", min_depth, do_not_log=.true., units="m", default=0.0) + call get_param(param_file, mdl, "MASKING_DEPTH", mask_depth, do_not_log=.true., units="m", default=-9999.0) + call get_param(param_file, mdl, "TOPO_CONFIG", topo_config, do_not_log=.true., fail_if_missing=.true.) + call get_param(param_file, mdl, "TOPO_FILE", topo_file, do_not_log=.true., default="topog.nc") + call get_param(param_file, mdl, "TOPO_VARNAME", topo_varname, do_not_log=.true., default="depth") + topo_filepath = trim(inputdir)//trim(topo_file) + + ! Sanity checks + if (.not. is_root_pe()) then + call MOM_error(FATAL, 'gen_auto_mask_table should only be called by the root PE.') + endif + if (trim(topo_config) /= "file") then + call MOM_error(FATAL, 'Auto mask table only works with TOPO_CONFIG="file"') + endif + if (.not.file_exists(topo_filepath)) then + call MOM_error(FATAL, " gen_auto_mask_table: Unable to open "//trim(topo_filepath)) + endif + + nx = n_global(1) + ny = n_global(2) + + ! Read in bathymetric depth. + D(:,:) = -9.0e30 * US%m_to_Z ! Initializing to a very large negative depth (tall mountains) everywhere. + call read_field(topo_filepath, trim(topo_varname), D, start=(/1, 1/), nread=n_global, no_domain=.true., & + scale=US%m_to_Z) + + allocate(mask(nx+2*ibuf, ny+2*jbuf), source=0) + + ! Determine cell masks + Dmask = mask_depth + if (mask_depth == -9999.0) Dmask = min_depth + do i=1,nx ; do j=1,ny + if (D(i,j) <= Dmask) then + mask(i+ibuf,j+jbuf) = 0 + else + mask(i+ibuf,j+jbuf) = 1 + endif + enddo ; enddo + + ! fill in buffer cells + + if (reentrant(1)) then ! REENTRANT_X + mask(1:ibuf, :) = mask(nx+1:nx+ibuf, :) + mask(ibuf+nx+1:nx+2*ibuf, :) = mask(ibuf+1:2*ibuf, :) + endif + + if (reentrant(2)) then ! REENTRANT_Y + mask(:, 1:jbuf) = mask(:, ny+1:ny+jbuf) + mask(:, jbuf+ny+1:ny+2*jbuf) = mask(:, jbuf+1:2*jbuf) + endif + + if (tripolar_N) then ! TRIPOLAR_N + do i=1,nx+2*ibuf + do j=1,jbuf + mask(i, jbuf+ny+j) = mask(nx+2*ibuf+1-i, jbuf+ny+1-j) + enddo + enddo + endif + + ! Tripolar Stitch Fix: In cases where masking is asymmetrical across the tripolar stitch, there's a possibility + ! that certain unmasked blocks won't be able to obtain grid metrics from the halo points. This occurs when the + ! neighboring block on the opposite side of the tripolar stitch is masked. As a consequence, certain metrics like + ! dxT and dyT may be calculated through extrapolation (refer to extrapolate_metric), potentially leading to the + ! generation of non-positive values. This can result in divide-by-zero errors elsewhere, e.g., in MOM_hor_visc.F90. + ! Currently, the safest and most general solution is to prohibit masking along the tripolar stitch: + if (tripolar_N) then + mask(:, jbuf+ny) = 1 + endif + + glob_ocn_frac = real(sum(mask(1+ibuf:nx+ibuf, 1+jbuf:ny+jbuf))) / (nx * ny) + + ! Iteratively check for all possible division counts starting from the upper bound of npes/glob_ocn_frac, + ! which is over-optimistic for realistic domains, but may be satisfied with idealized domains. + do p = ceiling(npes/glob_ocn_frac), npes, -1 + + ! compute the layout for the current division count, p + call MOM_define_layout(n_global, p, layout) + + ! don't bother checking this p if the aspect ratio is extreme + r_p = (real(nx)/layout(1)) / (real(ny)/layout(2)) + if ( r_p * r_extreme < 1 .or. r_extreme < r_p ) cycle + + ! Get the number of masked_blocks for this particular division count + call determine_land_blocks(mask, nx, ny, layout(1), layout(2), ibuf, jbuf, num_masked_blocks) + + ! If we can eliminate enough blocks to reach the target npes, adopt + ! this p (and the associated layout) and terminate the iteration. + if (p-num_masked_blocks <= npes) then + call MOM_error(NOTE, "Found the optimum layout for auto-masking. Terminating iteration...") + exit + endif + enddo + + if (num_masked_blocks == 0) then + call MOM_error(FATAL, "Couldn't auto-eliminate any land blocks. Try to increase the number "//& + "of MOM6 PEs or set AUTO_MASKTABLE to False.") + endif + + ! Call determine_land_blocks once again, this time to retrieve and write out the mask_table. + allocate(mask_table(num_masked_blocks,2)) + call determine_land_blocks(mask, nx, ny, layout(1), layout(2), ibuf, jbuf, num_masked_blocks, mask_table) + call write_auto_mask_file(mask_table, layout, npes, filename) + deallocate(mask_table) + deallocate(mask) + +end subroutine gen_auto_mask_table + +!> Given a number of domain divisions, compute the max number of land blocks that can be eliminated, +!! and return the resulting mask table if requested. +subroutine determine_land_blocks(mask, nx, ny, idiv, jdiv, ibuf, jbuf, num_masked_blocks, mask_table) + integer, dimension(:,:), intent(in) :: mask !< cell masks based on depth and MINIMUM_DEPTH + integer, intent(in) :: nx !< Total number of gridpoints in x-dir (global) + integer, intent(in) :: ny !< Total number of gridpoints in y-dir (global) + integer, intent(in) :: idiv !< number of divisions along x-dir + integer, intent(in) :: jdiv !< number of divisions along y-dir + integer, intent(in) :: ibuf !< number of buffer cells in x-dir. + !! (not necessarily the same as NIHALO) + integer, intent(in) :: jbuf !< number of buffer cells in y-dir. + !! (not necessarily the same as NJHALO) + integer, intent(out) :: num_masked_blocks !< the final number of masked blocks + integer, intent(out), optional :: mask_table(:,:) !< the resulting array of mask_table + ! integer + integer, dimension(idiv) :: ibegin !< The starting index of each division along x axis + integer, dimension(idiv) :: iend !< The ending index of each division along x axis + integer, dimension(jdiv) :: jbegin !< The starting index of each division along y axis + integer, dimension(jdiv) :: jend !< The ending index of each division along y axis + integer :: i, j, ib, ie, jb,je + + call compute_extent(1, nx, idiv, ibegin, iend) + call compute_extent(1, ny, jdiv, jbegin, jend) + + num_masked_blocks = 0 + + do i=1,idiv + ib = ibegin(i) + ie = iend(i) + 2 * ibuf + do j=1,jdiv + jb = jbegin(j) + je = jend(j) + 2 * jbuf + + if (any(mask(ib:ie,jb:je)==1)) cycle + + num_masked_blocks = num_masked_blocks + 1 + + if (present(mask_table)) then + if ( num_masked_blocks > size(mask_table, dim=1)) then + call MOM_error(FATAL, "The mask_table argument passed to determine_land_blocks() has insufficient size.") + endif + + mask_table(num_masked_blocks,1) = i + mask_table(num_masked_blocks,2) = j + endif + enddo + enddo + +end subroutine determine_land_blocks + +!> Write out the auto-generated mask information to a file in the run directory. +subroutine write_auto_mask_file(mask_table, layout, npes, filename) + integer, intent(in) :: mask_table(:,:) !> mask table array to be written out. + integer, dimension(2), intent(in) :: layout !> PE layout + integer, intent(in) :: npes !> Number of divisions (incl. eliminated ones) + character(len=:), allocatable, intent(in) :: filename !> file name for the mask_table to be written + ! local + integer :: file_ascii= -1 !< The unit number of the auto-generated mask_file file. + integer :: true_num_masked_blocks + integer :: p + + ! Eliminate only enough blocks to ensure that the number of active blocks precisely matches the target npes. + true_num_masked_blocks = layout(1) * layout(2) - npes + + call open_ASCII_file(file_ascii, trim(filename), action=WRITEONLY_FILE) + write(file_ascii, '(I0)'), true_num_masked_blocks + write(file_ascii, '(I0,",",I0)'), layout(1), layout(2) + do p = 1, true_num_masked_blocks + write(file_ascii, '(I0,",",I0)'), mask_table(p,1), mask_table(p,2) + enddo + call close_file(file_ascii) +end subroutine write_auto_mask_file + end module MOM_domains diff --git a/src/ice_shelf/MOM_ice_shelf.F90 b/src/ice_shelf/MOM_ice_shelf.F90 index 8e0e58c1b6..b5ed3f91cf 100644 --- a/src/ice_shelf/MOM_ice_shelf.F90 +++ b/src/ice_shelf/MOM_ice_shelf.F90 @@ -1276,7 +1276,7 @@ subroutine initialize_ice_shelf(param_file, ocn_grid, Time, CS, diag, forces_in, ! Set up the ice-shelf domain and grid wd_halos(:)=0 allocate(CS%Grid) - call MOM_domains_init(CS%Grid%domain, param_file, min_halo=wd_halos, symmetric=GRID_SYM_,& + call MOM_domains_init(CS%Grid%domain, CS%US, param_file, min_halo=wd_halos, symmetric=GRID_SYM_,& domain_name='MOM_Ice_Shelf_in') !allocate(CS%Grid_in%HI) !call hor_index_init(CS%Grid%Domain, CS%Grid%HI, param_file, & diff --git a/src/ocean_data_assim/MOM_oda_driver.F90 b/src/ocean_data_assim/MOM_oda_driver.F90 index 53615b0063..1fdf09e258 100644 --- a/src/ocean_data_assim/MOM_oda_driver.F90 +++ b/src/ocean_data_assim/MOM_oda_driver.F90 @@ -305,7 +305,7 @@ subroutine init_oda(Time, G, GV, US, diag_CS, CS) CS%G => G allocate(CS%Grid) ! params NIHALO_ODA, NJHALO_ODA set the DA halo size - call MOM_domains_init(CS%Grid%Domain,PF,param_suffix='_ODA') + call MOM_domains_init(CS%Grid%Domain,CS%US,PF,param_suffix='_ODA') allocate(HI) call hor_index_init(CS%Grid%Domain, HI, PF) call verticalGridInit( PF, CS%GV, CS%US )