Skip to content

Commit

Permalink
+Remove clocks inside of j-loops
Browse files Browse the repository at this point in the history
  Removed clocks that were being called from inside of j-loops in two modules.
These are inefficient and can cause the model to hang in some cases if used, and
there are better ways to get timing information at this level.  If there is
interest in the timing breakdown at this level, the code should be restructured
to move the key blocks outside of the j-loops.  The run-time parameter
ALLOW_CLOCKS_IN_OMP_LOOPS is no longer being used so it is now obsoleted. All
answers are bitwise identical, but there are changes to some MOM_parameter_doc
files.
  • Loading branch information
Hallberg-NOAA committed Dec 5, 2021
1 parent 585cc70 commit f3ab52c
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 68 deletions.
1 change: 1 addition & 0 deletions src/diagnostics/MOM_obsolete_params.F90
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ subroutine find_obsolete_params(param_file)
call obsolete_logical(param_file, "MSTAR_FIXED", hint="Instead use MSTAR_MODE.")
call obsolete_logical(param_file, "USE_VISBECK_SLOPE_BUG", .false.)

call obsolete_logical(param_file, "ALLOW_CLOCKS_IN_OMP_LOOPS", .true.)
call obsolete_logical(param_file, "LARGE_FILE_SUPPORT", .true.)
call obsolete_real(param_file, "MIN_Z_DIAG_INTERVAL")
call obsolete_char(param_file, "Z_OUTPUT_GRID_FILE")
Expand Down
54 changes: 2 additions & 52 deletions src/parameterizations/vertical/MOM_bulk_mixed_layer.F90
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ module MOM_bulk_mixed_layer
!! detrainment [R Z L2 T-3 ~> W m-2].
diag_PE_detrain2 !< The spurious source of potential energy due to mixed layer only
!! detrainment [R Z L2 T-3 ~> W m-2].
logical :: allow_clocks_in_omp_loops !< If true, clocks can be called from inside loops that can
!! be threaded. To run with multiple threads, set to False.
type(group_pass_type) :: pass_h_sum_hmbl_prev !< For group halo pass

!>@{ Diagnostic IDs
Expand All @@ -150,8 +148,7 @@ module MOM_bulk_mixed_layer
end type bulkmixedlayer_CS

!>@{ CPU clock IDs
integer :: id_clock_detrain=0, id_clock_mech=0, id_clock_conv=0, id_clock_adjustment=0
integer :: id_clock_EOS=0, id_clock_resort=0, id_clock_pass=0
integer :: id_clock_pass=0
!>@}

contains
Expand Down Expand Up @@ -433,7 +430,6 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
d_ea(i,k) = 0.0 ; d_eb(i,k) = 0.0
enddo ; enddo

if (id_clock_EOS>0) call cpu_clock_begin(id_clock_EOS)
! Calculate an estimate of the mid-mixed layer pressure [R L2 T-2 ~> Pa]
if (associated(tv%p_surf)) then
do i=is,ie ; p_ref(i) = tv%p_surf(i,j) ; enddo
Expand All @@ -449,27 +445,22 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
call calculate_density(T(:,k), S(:,k), p_ref, R0(:,k), tv%eqn_of_state, EOSdom)
call calculate_density(T(:,k), S(:,k), p_ref_cv, Rcv(:,k), tv%eqn_of_state, EOSdom)
enddo
if (id_clock_EOS>0) call cpu_clock_end(id_clock_EOS)

if (CS%ML_resort) then
if (id_clock_resort>0) call cpu_clock_begin(id_clock_resort)
if (CS%ML_presort_nz_conv_adj > 0) &
call convective_adjustment(h, u, v, R0, Rcv, T, S, eps, d_eb, dKE_CA, cTKE, j, G, GV, &
US, CS, CS%ML_presort_nz_conv_adj)

call sort_ML(h, R0, eps, G, GV, CS, ksort)
if (id_clock_resort>0) call cpu_clock_end(id_clock_resort)
else
do k=1,nz ; do i=is,ie ; ksort(i,k) = k ; enddo ; enddo

if (id_clock_adjustment>0) call cpu_clock_begin(id_clock_adjustment)
! Undergo instantaneous entrainment into the buffer layers and mixed layers
! to remove hydrostatic instabilities. Any water that is lighter than
! currently in the mixed or buffer layer is entrained.
call convective_adjustment(h, u, v, R0, Rcv, T, S, eps, d_eb, dKE_CA, cTKE, j, G, GV, US, CS)
do i=is,ie ; h_CA(i) = h(i,1) ; enddo

if (id_clock_adjustment>0) call cpu_clock_end(id_clock_adjustment)
endif

if (associated(fluxes%lrunoff) .and. CS%do_rivermix) then
Expand All @@ -493,9 +484,6 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
do i=is,ie ; TKE_river(i) = 0.0 ; enddo
endif


if (id_clock_conv>0) call cpu_clock_begin(id_clock_conv)

! The surface forcing is contained in the fluxes type.
! We aggregate the thermodynamic forcing for a time step into the following:
! netMassInOut = water [H ~> m or kg m-2] added/removed via surface fluxes
Expand All @@ -515,16 +503,12 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
nsw, Pen_SW_bnd, opacity_band, Conv_En, dKE_FC, &
j, ksort, G, GV, US, CS, tv, fluxes, dt, aggregate_FW_forcing)

if (id_clock_conv>0) call cpu_clock_end(id_clock_conv)

! Now the mixed layer undergoes mechanically forced entrainment.
! The mixed layer may entrain down to the Monin-Obukhov depth if the
! surface is becoming lighter, and is effecti1336vely detraining.

! First the TKE at the depth of free convection that is available
! to drive mixing is calculated.
if (id_clock_mech>0) call cpu_clock_begin(id_clock_mech)

call find_starting_TKE(htot, h_CA, fluxes, Conv_En, cTKE, dKE_FC, dKE_CA, &
TKE, TKE_river, Idecay_len_TKE, cMKE, dt, Idt_diag, &
j, ksort, G, GV, US, CS)
Expand All @@ -542,7 +526,6 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
if (CS%TKE_diagnostics) then ; do i=is,ie
CS%diag_TKE_mech_decay(i,j) = CS%diag_TKE_mech_decay(i,j) - Idt_diag * TKE(i)
enddo ; endif
if (id_clock_mech>0) call cpu_clock_end(id_clock_mech)

! Calculate the homogeneous mixed layer properties and store them in layer 0.
do i=is,ie ; if (htot(i) > 0.0) then
Expand Down Expand Up @@ -572,10 +555,8 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
! these unused layers (but not currently in the code).

if (CS%ML_resort) then
if (id_clock_resort>0) call cpu_clock_begin(id_clock_resort)
call resort_ML(h(:,0:), T(:,0:), S(:,0:), R0(:,0:), Rcv(:,0:), GV%Rlay(:), eps, &
d_ea, d_eb, ksort, G, GV, CS, dR0_dT, dR0_dS, dRcv_dT, dRcv_dS)
if (id_clock_resort>0) call cpu_clock_end(id_clock_resort)
endif

if (CS%limit_det .or. (CS%id_Hsfc_max > 0) .or. (CS%id_Hsfc_min > 0)) then
Expand Down Expand Up @@ -606,7 +587,6 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
! Move water left in the former mixed layer into the buffer layer and
! from the buffer layer into the interior. These steps might best be
! treated in conjuction.
if (id_clock_detrain>0) call cpu_clock_begin(id_clock_detrain)
if (CS%nkbl == 1) then
call mixedlayer_detrain_1(h(:,0:), T(:,0:), S(:,0:), R0(:,0:), Rcv(:,0:), &
GV%Rlay(:), dt, dt__diag, d_ea, d_eb, j, G, GV, US, CS, &
Expand All @@ -619,8 +599,6 @@ subroutine bulkmixedlayer(h_3d, u_3d, v_3d, tv, fluxes, dt, ea, eb, G, GV, US, C
! This code only works with 1 or 2 buffer layers.
call MOM_error(FATAL, "MOM_mixed_layer: CS%nkbl must be 1 or 2 for now.")
endif
if (id_clock_detrain>0) call cpu_clock_end(id_clock_detrain)


if (CS%id_Hsfc_used > 0) then
do i=is,ie ; Hsfc_used(i,j) = GV%H_to_Z * h(i,0) ; enddo
Expand Down Expand Up @@ -3526,12 +3504,6 @@ subroutine bulkmixedlayer_init(Time, G, GV, US, param_file, diag, CS)
"If true, use code with a bug that causes a loss of momentum conservation "//&
"during mixedlayer convection.", default=.false.)

call get_param(param_file, mdl, "ALLOW_CLOCKS_IN_OMP_LOOPS", &
CS%allow_clocks_in_omp_loops, &
"If true, clocks can be called from inside loops that can "//&
"be threaded. To run with multiple threads, set to False.", &
default=.true.)

CS%id_ML_depth = register_diag_field('ocean_model', 'h_ML', diag%axesT1, &
Time, 'Surface mixed layer depth', 'm')
CS%id_TKE_wind = register_diag_field('ocean_model', 'TKE_wind', diag%axesT1, &
Expand Down Expand Up @@ -3610,30 +3582,8 @@ subroutine bulkmixedlayer_init(Time, G, GV, US, param_file, diag, CS)
if (CS%id_PE_detrain2 > 0) call safe_alloc_alloc(CS%diag_PE_detrain2, isd, ied, jsd, jed)
if (CS%id_ML_depth > 0) call safe_alloc_alloc(CS%ML_depth, isd, ied, jsd, jed)

if (CS%allow_clocks_in_omp_loops) then
id_clock_detrain = cpu_clock_id('(Ocean mixed layer detrain)', &
sync=.false., grain=CLOCK_ROUTINE)
id_clock_mech = cpu_clock_id('(Ocean mixed layer mechanical entrainment)', &
sync=.false., grain=CLOCK_ROUTINE)
id_clock_conv = cpu_clock_id('(Ocean mixed layer convection)', &
sync=.false., grain=CLOCK_ROUTINE)
if (CS%ML_resort) then
id_clock_resort = cpu_clock_id('(Ocean mixed layer resorting)', &
sync=.false., grain=CLOCK_ROUTINE)
else
id_clock_adjustment = cpu_clock_id('(Ocean mixed layer convective adjustment)', &
sync=.false., grain=CLOCK_ROUTINE)
endif
id_clock_EOS = cpu_clock_id('(Ocean mixed layer EOS)', &
sync=.false., grain=CLOCK_ROUTINE)
endif

if (CS%limit_det .or. (CS%id_Hsfc_min > 0)) &
id_clock_pass = cpu_clock_id('(Ocean mixed layer halo updates)', grain=CLOCK_ROUTINE)


! if (CS%limit_det) then
! endif
id_clock_pass = cpu_clock_id('(Ocean mixed layer halo updates)', grain=CLOCK_ROUTINE)

end subroutine bulkmixedlayer_init

Expand Down
18 changes: 2 additions & 16 deletions src/parameterizations/vertical/MOM_regularize_layers.F90
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,11 @@ module MOM_regularize_layers
logical :: debug !< If true, do more thorough checks for debugging purposes.

integer :: id_def_rat = -1 !< A diagnostic ID
logical :: allow_clocks_in_omp_loops !< If true, clocks can be called from inside loops that
!! can be threaded. To run with multiple threads, set to False.
end type regularize_layers_CS

!>@{ Clock IDs
!! \todo Should these be global?
integer :: id_clock_pass, id_clock_EOS
integer :: id_clock_pass
!>@}

contains
Expand Down Expand Up @@ -233,12 +231,10 @@ subroutine regularize_surface(h, tv, dt, ea, eb, G, GV, US, CS)
! Now restructure the layers.
!$OMP parallel do default(private) shared(is,ie,js,je,nz,do_j,def_rat_h,CS,nkmb,G,GV,US, &
!$OMP e,I_dtol,h,tv,debug,h_neglect,p_ref_cv,ea, &
!$OMP eb,id_clock_EOS,nkml,EOSdom)
!$OMP eb,nkml,EOSdom)
do j=js,je ; if (do_j(j)) then

! call cpu_clock_begin(id_clock_EOS)
! call calculate_density_derivs(T(:,1), S(:,1), p_ref_cv, dRcv_dT, dRcv_dS, tv%eqn_of_state, EOSdom)
! call cpu_clock_end(id_clock_EOS)

do k=1,nz ; do i=is,ie ; d_ea(i,k) = 0.0 ; d_eb(i,k) = 0.0 ; enddo ; enddo
kmax_d_ea = 0
Expand Down Expand Up @@ -367,11 +363,9 @@ subroutine regularize_surface(h, tv, dt, ea, eb, G, GV, US, CS)
enddo
endif
if (det_any) then
call cpu_clock_begin(id_clock_EOS)
do k=1,nkmb
call calculate_density(T_2d(:,k), S_2d(:,k), p_ref_cv, Rcv(:,k), tv%eqn_of_state, EOSdom)
enddo
call cpu_clock_end(id_clock_EOS)

do i=is,ie ; if (det_i(i)) then
k1 = nkmb ; k2 = nz
Expand Down Expand Up @@ -780,19 +774,11 @@ subroutine regularize_layers_init(Time, G, GV, param_file, diag, CS)
! call get_param(param_file, mdl, "DEBUG_CONSERVATION", CS%debug, &
! "If true, monitor conservation and extrema.", default=.false., do_not_log=just_read)

call get_param(param_file, mdl, "ALLOW_CLOCKS_IN_OMP_LOOPS", CS%allow_clocks_in_omp_loops, &
"If true, clocks can be called from inside loops that can "//&
"be threaded. To run with multiple threads, set to False.", &
default=.true., do_not_log=just_read)

if (.not.CS%regularize_surface_layers) return

CS%id_def_rat = register_diag_field('ocean_model', 'deficit_ratio', diag%axesT1, &
Time, 'Max face thickness deficit ratio', 'nondim')

if (CS%allow_clocks_in_omp_loops) then
id_clock_EOS = cpu_clock_id('(Ocean regularize_layers EOS)', grain=CLOCK_ROUTINE)
endif
id_clock_pass = cpu_clock_id('(Ocean regularize_layers halo updates)', grain=CLOCK_ROUTINE)

end subroutine regularize_layers_init
Expand Down

0 comments on commit f3ab52c

Please sign in to comment.