From 0abd134ea2f9042aa8ea3af4627db8c199405d23 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Thu, 26 Sep 2019 10:23:51 -0400 Subject: [PATCH 1/5] Report negative zero min/max as positive This patch addresses the inconsistency of signed zero in the minimum and maximum values used in checksum report. The behavior of the Fortran intrinsic min() and our MPI library's implementation of MPI_Reduce with MPI_MIN can give different results for different values of signed zero, e.g. min(0,-0) vs min(-0,0). Additionally, the MPI_Reduce result appears to not consistenty follow these rules in more complex MPI calculations. Due to these issues, we add the result to positive zero to ensure that any negative zero results are reported as positive. --- .testing/Makefile | 4 ++-- src/framework/MOM_checksums.F90 | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.testing/Makefile b/.testing/Makefile index a567786dd2..8bad469a23 100644 --- a/.testing/Makefile +++ b/.testing/Makefile @@ -181,7 +181,7 @@ define CMP_RULE .PRECIOUS: $(foreach b,$(2),results/%/chksum_diag.$(b)) %.$(1).diag: $(foreach b,$(2),results/%/chksum_diag.$(b)) - cmp $$^ || true + cmp $$^ endef $(eval $(call CMP_RULE,regression,symmetric target)) @@ -255,7 +255,7 @@ results/%/ocean.stats.restart: ../build/symmetric/MOM6 rm -rf work/$*/restart mkdir -p work/$*/restart cp -rL $*/* work/$*/restart - mkdir work/$*/restart/RESTART + mkdir -p work/$*/restart/RESTART # Generate the half-period input namelist # TODO: Assumes runtime set by DAYMAX, will fail if set by input.nml cd work/$*/restart \ diff --git a/src/framework/MOM_checksums.F90 b/src/framework/MOM_checksums.F90 index c6a23667db..0f2db2c955 100644 --- a/src/framework/MOM_checksums.F90 +++ b/src/framework/MOM_checksums.F90 @@ -1822,8 +1822,11 @@ subroutine chk_sum_msg3(fmsg, aMean, aMin, aMax, mesg, iounit) real, intent(in) :: aMax !< The maximum value of the array integer, intent(in) :: iounit !< Checksum logger IO unit + ! NOTE: We add zero to aMin and aMax to remove any negative zeros. + ! This is due to inconsistencies of signed zero in local vs MPI calculations. + if (is_root_pe()) write(iounit, '(A,3(A,ES25.16,1X),A)') & - fmsg, " mean=", aMean, "min=", aMin, "max=", aMax, trim(mesg) + fmsg, " mean=", aMean, "min=", (0. + aMin), "max=", (0. + aMax), trim(mesg) end subroutine chk_sum_msg3 !> MOM_checksums_init initializes the MOM_checksums module. As it happens, the From ceae8928fde5a4799c8296b9a158470c1134d605 Mon Sep 17 00:00:00 2001 From: Niki Zadeh Date: Mon, 30 Sep 2019 12:20:22 -0400 Subject: [PATCH 2/5] Fixing openmp compile - Simple tests produce with threads --- .../lateral/MOM_lateral_mixing_coeffs.F90 | 2 +- .../vertical/MOM_geothermal.F90 | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 index 1582b23615..c3c88b4795 100644 --- a/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 +++ b/src/parameterizations/lateral/MOM_lateral_mixing_coeffs.F90 @@ -241,7 +241,7 @@ subroutine calc_resoln_function(h, tv, G, GV, US, CS) ! Do this calculation on the extent used in MOM_hor_visc.F90, and ! MOM_tracer.F90 so that no halo update is needed. -!$OMP parallel default(none) shared(is,ie,js,je,Ieq,Jeq,CS) & +!$OMP parallel default(none) shared(is,ie,js,je,Ieq,Jeq,CS,US) & !$OMP private(dx_term,cg1_q,power_2,cg1_u,cg1_v) if (CS%Res_fn_power_visc >= 100) then !$OMP do diff --git a/src/parameterizations/vertical/MOM_geothermal.F90 b/src/parameterizations/vertical/MOM_geothermal.F90 index 5fefbf199e..6d81955ab9 100644 --- a/src/parameterizations/vertical/MOM_geothermal.F90 +++ b/src/parameterizations/vertical/MOM_geothermal.F90 @@ -143,15 +143,6 @@ subroutine geothermal(h, tv, dt, ea, eb, G, GV, CS, halo) ! resid(i,j) = tv%internal_heat(i,j) ! enddo ; enddo -!$OMP parallel do default(none) shared(is,ie,js,je,G,GV,CS,dt,Irho_cp,nkmb,tv, & -!$OMP p_Ref,h,Angstrom,nz,H_neglect,eb) & -!$OMP private(num_start,heat_rem,do_i,h_geo_rem,num_left,& -!$OMP isj,iej,Rcv_BL,h_heated,heat_avail,k_tgt, & -!$OMP Rcv_tgt,Rcv,dRcv_dT,T2,S2,dRcv_dT_, & -!$OMP dRcv_dS_,heat_in_place,heat_trans, & -!$OMP wt_in_place,dTemp,dRcv,h_transfer,heating, & -!$OMP I_h) - ! Conditionals for tracking diagnostic depdendencies compute_h_old = CS%id_internal_heat_h_tendency > 0 & .or. CS%id_internal_heat_heat_tendency > 0 & @@ -164,6 +155,15 @@ subroutine geothermal(h, tv, dt, ea, eb, G, GV, CS, halo) if (compute_h_old) h_old(:,:,:) = 0.0 if (compute_T_old) T_old(:,:,:) = 0.0 +!$OMP parallel do default(none) shared(is,ie,js,je,G,GV,CS,dt,Irho_cp,nkmb,tv, & +!$OMP p_Ref,h,Angstrom,nz,H_neglect,eb) & +!$OMP private(num_start,heat_rem,do_i,h_geo_rem,num_left,& +!$OMP isj,iej,Rcv_BL,h_heated,heat_avail,k_tgt, & +!$OMP Rcv_tgt,Rcv,dRcv_dT,T2,S2,dRcv_dT_, & +!$OMP dRcv_dS_,heat_in_place,heat_trans, & +!$OMP wt_in_place,dTemp,dRcv,h_transfer,heating, & +!$OMP I_h) + do j=js,je ! 1. Only work on columns that are being heated. ! 2. Find the deepest layer with any mass. From b6dfa49c1a45ab2ff1da8dfa8f22a6f0ef17fa27 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Tue, 1 Oct 2019 00:42:47 -0400 Subject: [PATCH 3/5] Use popcnt intrinsic for bitcount Profiling of the test suite showed a large amount of time (nearly 1/3) devoted to computing of bitcounts used as checksums for diagnostics. This patch replaces the bit loop with the popcnt intrinsic, which produces the same result and uses the hardware assembly instruction when available (e.g. popcnt in x86). This change appears to have reduced the runtime of the test suite from 4.5 minutes to under 3 minutes. --- src/framework/MOM_checksums.F90 | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/framework/MOM_checksums.F90 b/src/framework/MOM_checksums.F90 index c6a23667db..34390017ee 100644 --- a/src/framework/MOM_checksums.F90 +++ b/src/framework/MOM_checksums.F90 @@ -1849,20 +1849,12 @@ end subroutine chksum_error !> Does a bitcount of a number by first casting to an integer and then using BTEST !! to check bit by bit integer function bitcount(x) - real :: x !< Number to be bitcount + real, intent(in) :: x !< Number to be bitcount - ! Local variables - integer(kind(x)) :: y !< Store the integer representation of the memory used by x - integer :: bit - - bitcount = 0 - y = transfer(x,y) - - ! Fortran standard says that bit indexing start at 0 - do bit = 0, bit_size(y)-1 - if (BTEST(y,bit)) bitcount = bitcount+1 - enddo + integer, parameter :: xk = kind(x) !< Kind type of x + ! NOTE: Assumes that reals and integers of kind=xk are the same size + bitcount = popcnt(transfer(x, 1_xk)) end function bitcount end module MOM_checksums From 277ff1f4ecaa675d5ef0cde8237ded4f4d0b423c Mon Sep 17 00:00:00 2001 From: Niki Zadeh Date: Tue, 1 Oct 2019 13:36:13 -0400 Subject: [PATCH 4/5] Fix -openmp compilation - HOWEVER, Answers for 1 thread runs differ from answers with no openmp threads!!! - Not acceptable --- src/core/MOM_PressureForce_blocked_AFV.F90 | 2 +- src/core/MOM_barotropic.F90 | 4 +-- src/core/MOM_continuity_PPM.F90 | 4 +-- src/diagnostics/MOM_diagnostics.F90 | 4 +-- .../lateral/MOM_hor_visc.F90 | 15 ++++++++-- .../lateral/MOM_mixed_layer_restrat.F90 | 4 +-- .../lateral/MOM_thickness_diffuse.F90 | 30 +++++++++---------- .../vertical/MOM_diabatic_aux.F90 | 6 ++-- .../vertical/MOM_geothermal.F90 | 4 ++- .../vertical/MOM_vert_friction.F90 | 2 +- src/tracer/MOM_tracer_advect.F90 | 2 +- 11 files changed, 45 insertions(+), 32 deletions(-) diff --git a/src/core/MOM_PressureForce_blocked_AFV.F90 b/src/core/MOM_PressureForce_blocked_AFV.F90 index 773bcefc1d..073f790fc5 100644 --- a/src/core/MOM_PressureForce_blocked_AFV.F90 +++ b/src/core/MOM_PressureForce_blocked_AFV.F90 @@ -327,7 +327,7 @@ subroutine PressureForce_blk_AFV_nonBouss(h, tv, PFu, PFv, G, GV, US, CS, p_atm, ! geopotentials will not now be linear at the sub-grid-scale. Doing this ! ensures no motion with flat isopycnals, even with a nonlinear equation of state. !$OMP parallel do default(none) shared(nz,za,G,GV,dza,intx_dza,h,PFu, & -!$OMP intp_dza,p,dp_neglect,inty_dza,PFv,CS,dM) & +!$OMP intp_dza,p,dp_neglect,inty_dza,PFv,CS,dM,US) & !$OMP private(is_bk,ie_bk,js_bk,je_bk,Isq_bk,Ieq_bk,Jsq_bk, & !$OMP Jeq_bk,ioff_bk,joff_bk,i,j,za_bk,intx_za_bk, & !$OMP inty_za_bk,dp_bk) diff --git a/src/core/MOM_barotropic.F90 b/src/core/MOM_barotropic.F90 index 7b2f367487..7984fa97ce 100644 --- a/src/core/MOM_barotropic.F90 +++ b/src/core/MOM_barotropic.F90 @@ -1262,9 +1262,9 @@ subroutine btstep(U_in, V_in, eta_in, dt, bc_accel_u, bc_accel_v, forces, pbce, !$OMP find_etaav,jsvf,jevf,isvf,ievf,eta_sum,eta_wtd, & !$OMP ubt_sum,uhbt_sum,PFu_bt_sum,Coru_bt_sum,ubt_wtd,& !$OMP ubt_trans,vbt_sum,vhbt_sum,PFv_bt_sum, & -!$OMP Corv_bt_sum,vbt_wtd,vbt_trans,eta_src,dt,dtbt, & +!$OMP Corv_bt_sum,vbt_wtd,vbt_trans,eta_src,dt_in_T,dtbt, & !$OMP Rayleigh_u, Rayleigh_v, & -!$OMP use_BT_Cont,BTCL_u,uhbt0,BTCL_v,vhbt0,eta,Idt) & +!$OMP use_BT_Cont,BTCL_u,uhbt0,BTCL_v,vhbt0,eta,Idt,US) & !$OMP private(u_max_cor,v_max_cor,eta_cor_max,Htot) !$OMP do do j=js-1,je+1 ; do I=is-1,ie ; av_rem_u(I,j) = 0.0 ; enddo ; enddo diff --git a/src/core/MOM_continuity_PPM.F90 b/src/core/MOM_continuity_PPM.F90 index 8a8ecf9da5..a2a125eabe 100644 --- a/src/core/MOM_continuity_PPM.F90 +++ b/src/core/MOM_continuity_PPM.F90 @@ -300,7 +300,7 @@ subroutine zonal_mass_flux(u, h_in, uh, dt_in_T, G, GV, US, CS, LB, uhbt, OBC, & call cpu_clock_begin(id_clock_correct) !$OMP parallel do default(none) shared(ish,ieh,jsh,jeh,nz,u,h_in,h_L,h_R,use_visc_rem,visc_rem_u, & -!$OMP uh,dt,G,GV,CS,local_specified_BC,OBC,uhbt,set_BT_cont, & +!$OMP uh,dt_in_T,US,G,GV,CS,local_specified_BC,OBC,uhbt,set_BT_cont, & !$OMP CFL_dt,I_dt,u_cor,BT_cont, local_Flather_OBC) & !$OMP private(do_I,duhdu,du,du_max_CFL,du_min_CFL,uh_tot_0,duhdu_tot_0, & !$OMP is_simple,FAuI,visc_rem_max,I_vrm,du_lim,dx_E,dx_W,any_simple_OBC ) & @@ -1099,7 +1099,7 @@ subroutine meridional_mass_flux(v, h_in, vh, dt_in_T, G, GV, US, CS, LB, vhbt, O call cpu_clock_begin(id_clock_correct) !$OMP parallel do default(none) shared(ish,ieh,jsh,jeh,nz,v,h_in,h_L,h_R,vh,use_visc_rem, & -!$OMP visc_rem_v,dt,G,GV,CS,local_specified_BC,OBC,vhbt, & +!$OMP visc_rem_v,dt_in_T,US,G,GV,CS,local_specified_BC,OBC,vhbt, & !$OMP set_BT_cont,CFL_dt,I_dt,v_cor,BT_cont, local_Flather_OBC ) & !$OMP private(do_I,dvhdv,dv,dv_max_CFL,dv_min_CFL,vh_tot_0, & !$OMP dvhdv_tot_0,visc_rem_max,I_vrm,dv_lim,dy_N, & diff --git a/src/diagnostics/MOM_diagnostics.F90 b/src/diagnostics/MOM_diagnostics.F90 index 8fa106c4e0..7344a5e677 100644 --- a/src/diagnostics/MOM_diagnostics.F90 +++ b/src/diagnostics/MOM_diagnostics.F90 @@ -623,7 +623,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, p_surf, & call wave_speed(h, tv, G, GV, US, CS%cg1, CS%wave_speed_CSp) if (CS%id_cg1>0) call post_data(CS%id_cg1, CS%cg1, CS%diag) if (CS%id_Rd1>0) then -!$OMP parallel do default(none) shared(is,ie,js,je,G,CS) & +!$OMP parallel do default(none) shared(is,ie,js,je,G,CS,US) & !$OMP private(f2_h,mag_beta) do j=js,je ; do i=is,ie ! Blend the equatorial deformation radius with the standard one. @@ -672,7 +672,7 @@ subroutine calculate_diagnostic_fields(u, v, h, uh, vh, tv, ADp, CDp, p_surf, & endif if (CS%id_cg_ebt>0) call post_data(CS%id_cg_ebt, CS%cg1, CS%diag) if (CS%id_Rd_ebt>0) then -!$OMP parallel do default(none) shared(is,ie,js,je,G,CS) & +!$OMP parallel do default(none) shared(is,ie,js,je,G,CS,US) & !$OMP private(f2_h,mag_beta) do j=js,je ; do i=is,ie ! Blend the equatorial deformation radius with the standard one. diff --git a/src/parameterizations/lateral/MOM_hor_visc.F90 b/src/parameterizations/lateral/MOM_hor_visc.F90 index 82d20c239b..d9afcab581 100644 --- a/src/parameterizations/lateral/MOM_hor_visc.F90 +++ b/src/parameterizations/lateral/MOM_hor_visc.F90 @@ -510,13 +510,24 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, !$OMP h,rescale_Kh,VarMix,h_neglect,h_neglect3, & !$OMP Kh_h,Ah_h,Kh_q,Ah_q,diffu,diffv,apply_OBC,OBC, & !$OMP find_FrictWork,FrictWork,use_MEKE_Ku, & - !$OMP use_MEKE_Au, MEKE, hq, & + !$OMP use_MEKE_Au, MEKE,sh_xx_3d,sh_xy_3d, & + !$OMP GME_coeff_limiter,boundary_mask,FWfrac,backscat_subround,& !$OMP mod_Leith, legacy_bound, div_xx_h, vort_xy_q) & !$OMP private(Del2u, Del2v, sh_xx, str_xx, visc_bound_rem, & + !$OMP dudx,dvdy,DX_dyBu,DY_dxBu, & + !$OMP grad_div_mag_h,grad_div_mag_q, & + !$OMP grad_vort_mag_h_2d,grad_vort_mag_q_2d, & + !$OMP grad_vort_mag_h,grad_vort_mag_q,vert_vort_mag, & + !$OMP inv_PI3,inv_PI5,grad_vel_mag_h, & + !$OMP grad_d2vel_mag_h,diss_rate,max_diss_rate, & + !$OMP FrictWork_diss,FrictWorkMax, & + !$OMP target_diss_rate_GME,GME_coeff, & + !$OMP grad_vel_mag_bt_h,H0_GME,GME_coeff_h, & + !$OMP str_xx_GME,grad_vel_mag_bt_q,GME_coeff_q,str_xy_GME,FrictWork_GME,& !$OMP sh_xy,str_xy,Ah,Kh,AhSm,dvdx,dudy,dDel2udy, & !$OMP dDel2vdx,sh_xx_bt, sh_xy_bt, dvdx_bt, dudy_bt, & !$OMP bhstr_xx, bhstr_xy,FatH,RoScl, hu, hv,h_u,h_v, & - !$OMP vort_xy,vort_xy_dx,vort_xy_dy,Vort_mag,AhLth, & + !$OMP vort_xy,vort_xy_dx,vort_xy_dy,AhLth, & !$OMP div_xx, div_xx_dx, div_xx_dy, local_strain, & !$OMP meke_res_fn,Sh_F_pow, & !$OMP Shear_mag, h2uq, h2vq, hq, Kh_scale, hrat_min) diff --git a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 index ba241ea4b1..62fb3b6732 100644 --- a/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 +++ b/src/parameterizations/lateral/MOM_mixed_layer_restrat.F90 @@ -295,7 +295,7 @@ subroutine mixedlayer_restrat_general(h, uhtr, vhtr, tv, forces, dt_in_T, MLD_in p0(:) = 0.0 !$OMP parallel default(none) shared(is,ie,js,je,G,GV,US,htot_fast,Rml_av_fast,tv,p0,h,h_avail,& -!$OMP h_neglect,g_Rho0,I4dt,CS,uhml,uhtr,dt,vhml,vhtr, & +!$OMP h_neglect,g_Rho0,I4dt,CS,uhml,uhtr,dt_in_T,vhml,vhtr, & !$OMP utimescale_diag,vtimescale_diag,forces,dz_neglect, & !$OMP htot_slow,MLD_slow,Rml_av_slow,VarMix,I_LFront, & !$OMP res_upscale, & @@ -628,7 +628,7 @@ subroutine mixedlayer_restrat_BML(h, uhtr, vhtr, tv, forces, dt_in_T, G, GV, US, p0(:) = 0.0 !$OMP parallel default(none) shared(is,ie,js,je,G,GV,US,htot,Rml_av,tv,p0,h,h_avail, & -!$OMP h_neglect,g_Rho0,I4dt,CS,uhml,uhtr,dt,vhml,vhtr, & +!$OMP h_neglect,g_Rho0,I4dt,CS,uhml,uhtr,dt_in_T,vhml,vhtr, & !$OMP utimescale_diag,vtimescale_diag,forces,dz_neglect, & !$OMP uDml_diag,vDml_diag,nkml) & !$OMP private(Rho0,h_vel,u_star,absf,mom_mixrate,timescale, & diff --git a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 index 2b4cdfadee..93aca7d0be 100644 --- a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 +++ b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 @@ -182,12 +182,12 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp endif -!$OMP parallel do default(none) shared(is,ie,js,je,KH_u_CFL,dt,G,CS) +!$OMP parallel do default(none) shared(is,ie,js,je,KH_u_CFL,dt_in_T,G,CS) do j=js,je ; do I=is-1,ie KH_u_CFL(I,j) = (0.25*CS%max_Khth_CFL) / & (dt_in_T * (G%IdxCu(I,j)*G%IdxCu(I,j) + G%IdyCu(I,j)*G%IdyCu(I,j))) enddo ; enddo -!$OMP parallel do default(none) shared(is,ie,js,je,KH_v_CFL,dt,G,CS) +!$OMP parallel do default(none) shared(is,ie,js,je,KH_v_CFL,dt_in_T,G,CS) do j=js-1,je ; do I=is,ie KH_v_CFL(i,J) = (0.25*CS%max_Khth_CFL) / & (dt_in_T * (G%IdxCv(i,J)*G%IdxCv(i,J) + G%IdyCv(i,J)*G%IdyCv(i,J))) @@ -198,7 +198,7 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp ! Set the diffusivities. !$OMP parallel default(none) shared(is,ie,js,je,Khth_Loc_u,CS,use_VarMix,VarMix, & -!$OMP MEKE,Resoln_scaled,KH_u, & +!$OMP MEKE,Resoln_scaled,KH_u,G,use_QG_Leith,use_Visbeck,& !$OMP KH_u_CFL,nz,Khth_Loc,KH_v,KH_v_CFL,int_slope_u, & !$OMP int_slope_v,khth_use_ebt_struct) !$OMP do @@ -207,8 +207,8 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp enddo ; enddo if (use_VarMix) then -!$OMP do if (use_Visbeck) then +!$OMP do do j=js,je ; do I=is-1,ie Khth_loc_u(I,j) = Khth_loc_u(I,j) + & CS%KHTH_Slope_Cff*VarMix%L2u(I,j) * VarMix%SN_u(I,j) @@ -217,8 +217,8 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp endif if (associated(MEKE)) then ; if (associated(MEKE%Kh)) then -!$OMP do if (CS%MEKE_GEOMETRIC) then +!$OMP do do j=js,je ; do I=is-1,ie Khth_loc_u(I,j) = Khth_loc_u(I,j) + G%mask2dCu(I,j) * CS%MEKE_GEOMETRIC_alpha * & 0.5*(MEKE%MEKE(i,j)+MEKE%MEKE(i+1,j)) / & @@ -267,16 +267,16 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp endif if (use_VarMix) then -!$OMP do if (use_QG_Leith) then +!$OMP do do k=1,nz ; do j=js,je ; do I=is-1,ie KH_u(I,j,k) = VarMix%KH_u_QG(I,j,k) enddo ; enddo ; enddo endif endif -!$OMP do if (CS%use_GME_thickness_diffuse) then +!$OMP do do k=1,nz+1 ; do j=js,je ; do I=is-1,ie CS%KH_u_GME(I,j,k) = KH_u(I,j,k) enddo ; enddo ; enddo @@ -288,16 +288,16 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp enddo ; enddo if (use_VarMix) then -!$OMP do if (use_Visbeck) then +!$OMP do do J=js-1,je ; do i=is,ie Khth_loc(i,j) = Khth_loc(i,j) + CS%KHTH_Slope_Cff*VarMix%L2v(i,J)*VarMix%SN_v(i,J) enddo ; enddo endif endif if (associated(MEKE)) then ; if (associated(MEKE%Kh)) then -!$OMP do if (CS%MEKE_GEOMETRIC) then +!$OMP do do j=js-1,je ; do I=is,ie Khth_loc(I,j) = Khth_loc(I,j) + G%mask2dCv(i,J) * CS%MEKE_GEOMETRIC_alpha * & 0.5*(MEKE%MEKE(i,j)+MEKE%MEKE(i,j+1)) / & @@ -349,24 +349,24 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp endif if (use_VarMix) then -!$OMP do if (use_QG_Leith) then +!$OMP do do k=1,nz ; do J=js-1,je ; do i=is,ie KH_v(i,J,k) = VarMix%KH_v_QG(i,J,k) enddo ; enddo ; enddo endif endif -!$OMP do if (CS%use_GME_thickness_diffuse) then +!$OMP do do k=1,nz+1 ; do J=js-1,je ; do i=is,ie CS%KH_v_GME(i,J,k) = KH_v(i,J,k) enddo ; enddo ; enddo endif if (associated(MEKE)) then ; if (associated(MEKE%Kh)) then -!$OMP do if (CS%MEKE_GEOMETRIC) then +!$OMP do do j=js,je ; do I=is,ie !### This will not give bitwise rotational symmetry. Add parentheses. MEKE%Kh(i,j) = CS%MEKE_GEOMETRIC_alpha * MEKE%MEKE(i,j) / & @@ -477,7 +477,7 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp endif - !$OMP parallel do default(none) shared(is,ie,js,je,nz,uhtr,uhD,dt,vhtr,CDp,vhD,h,G,GV) + !$OMP parallel do default(none) shared(is,ie,js,je,nz,uhtr,uhD,dt_in_T,vhtr,CDp,vhD,h,G,GV) do k=1,nz do j=js,je ; do I=is-1,ie uhtr(I,j,k) = uhtr(I,j,k) + uhD(I,j,k) * dt_in_T @@ -720,7 +720,7 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, cg1, dt_in_T, !$OMP int_slope_u,KH_u,uhtot,h_frac,h_avail_rsum, & !$OMP uhD,h_avail,G_scale,work_u,CS,slope_x,cg1, & !$OMP diag_sfn_x, diag_sfn_unlim_x,N2_floor, & -!$OMP present_slope_x,G_rho0) & +!$OMP present_slope_x,G_rho0,Slope_x_PE,hN2_x_PE) & !$OMP private(drdiA,drdiB,drdkL,drdkR,pres_u,T_u,S_u, & !$OMP drho_dT_u,drho_dS_u,hg2A,hg2B,hg2L,hg2R,haA, & !$OMP haB,haL,haR,dzaL,dzaR,wtA,wtB,wtL,wtR,drdz, & @@ -973,7 +973,7 @@ subroutine thickness_diffuse_full(h, e, Kh_u, Kh_v, tv, uhD, vhD, cg1, dt_in_T, !$OMP int_slope_v,KH_v,vhtot,h_frac,h_avail_rsum, & !$OMP vhD,h_avail,G_scale,Work_v,CS,slope_y,cg1, & !$OMP diag_sfn_y, diag_sfn_unlim_y,N2_floor, & -!$OMP present_slope_y,G_rho0) & +!$OMP present_slope_y,G_rho0,Slope_y_PE,hN2_y_PE) & !$OMP private(drdjA,drdjB,drdkL,drdkR,pres_v,T_v,S_v, & !$OMP drho_dT_v,drho_dS_v,hg2A,hg2B,hg2L,hg2R,haA, & !$OMP haB,haL,haR,dzaL,dzaR,wtA,wtB,wtL,wtR,drdz, & diff --git a/src/parameterizations/vertical/MOM_diabatic_aux.F90 b/src/parameterizations/vertical/MOM_diabatic_aux.F90 index 96652a9f45..740cf33897 100644 --- a/src/parameterizations/vertical/MOM_diabatic_aux.F90 +++ b/src/parameterizations/vertical/MOM_diabatic_aux.F90 @@ -350,7 +350,7 @@ subroutine adjust_salt(h, tv, G, GV, CS, halo) salt_add_col(:,:) = 0.0 - !$OMP parallel do default(none) private(mc) + !$OMP parallel do default(shared) private(mc) do j=js,je do k=nz,1,-1 ; do i=is,ie if ( (G%mask2dT(i,j) > 0.0) .and. & @@ -595,7 +595,7 @@ subroutine find_uv_at_h(u, v, h, u_h, v_h, G, GV, US, ea, eb) "in call to find_uv_at_h.") !$OMP parallel do default(none) shared(is,ie,js,je,G,GV,mix_vertically,h,h_neglect, & !$OMP eb,u_h,u,v_h,v,nz,ea) & -!$OMP private(s,Idenom,a_w,a_e,a_s,a_n,b_denom_1,b1,d1,c1) +!$OMP private(sum_area,Idenom,a_w,a_e,a_s,a_n,b_denom_1,b1,d1,c1) do j=js,je do i=is,ie sum_area = G%areaCu(I-1,j) + G%areaCu(I,j) @@ -965,7 +965,7 @@ subroutine applyBoundaryFluxesInOut(CS, G, GV, US, dt, fluxes, optics, nsw, h, t !$OMP parallel do default(none) shared(is,ie,js,je,nz,h,tv,nsw,G,GV,US,optics,fluxes,dt, & !$OMP H_limit_fluxes,numberOfGroundings,iGround,jGround,& !$OMP nonPenSW,hGrounding,CS,Idt,aggregate_FW_forcing, & - !$OMP minimum_forcing_depth,evap_CFL_limit, & + !$OMP minimum_forcing_depth,evap_CFL_limit,dt_in_T, & !$OMP calculate_buoyancy,netPen,SkinBuoyFlux,GoRho, & !$OMP calculate_energetics,dSV_dT,dSV_dS,cTKE,g_Hconv2) & !$OMP private(opacityBand,h2d,T2d,netMassInOut,netMassOut, & diff --git a/src/parameterizations/vertical/MOM_geothermal.F90 b/src/parameterizations/vertical/MOM_geothermal.F90 index 6d81955ab9..db14ef930a 100644 --- a/src/parameterizations/vertical/MOM_geothermal.F90 +++ b/src/parameterizations/vertical/MOM_geothermal.F90 @@ -156,7 +156,9 @@ subroutine geothermal(h, tv, dt, ea, eb, G, GV, CS, halo) if (compute_T_old) T_old(:,:,:) = 0.0 !$OMP parallel do default(none) shared(is,ie,js,je,G,GV,CS,dt,Irho_cp,nkmb,tv, & -!$OMP p_Ref,h,Angstrom,nz,H_neglect,eb) & +!$OMP p_Ref,h,Angstrom,nz,H_neglect,eb, & +!$OMP compute_h_old,compute_T_old,h_old,T_old, & +!$OMP work_3d,Idt) & !$OMP private(num_start,heat_rem,do_i,h_geo_rem,num_left,& !$OMP isj,iej,Rcv_BL,h_heated,heat_avail,k_tgt, & !$OMP Rcv_tgt,Rcv,dRcv_dT,T2,S2,dRcv_dT_, & diff --git a/src/parameterizations/vertical/MOM_vert_friction.F90 b/src/parameterizations/vertical/MOM_vert_friction.F90 index b282995d3f..70c9533c69 100644 --- a/src/parameterizations/vertical/MOM_vert_friction.F90 +++ b/src/parameterizations/vertical/MOM_vert_friction.F90 @@ -1399,7 +1399,7 @@ subroutine vertvisc_limit_vel(u, v, h, ADp, CDp, forces, visc, dt, G, GV, US, CS enddo ! j-loop else ! Do not report accelerations leading to large velocities. if (CS%CFL_based_trunc) then -!$OMP parallel do default(none) shared(nz,js,je,Isq,Ieq,u,dt,G,CS,h,H_report) +!$OMP parallel do default(none) shared(nz,js,je,Isq,Ieq,u,dt_in_T,G,CS,h,H_report) do k=1,nz ; do j=js,je ; do I=Isq,Ieq if (abs(u(I,j,k)) < CS%vel_underflow) then ; u(I,j,k) = 0.0 elseif ((u(I,j,k) * (dt_in_T * G%dy_Cu(I,j))) * G%IareaT(i+1,j) < -CS%CFL_trunc) then diff --git a/src/tracer/MOM_tracer_advect.F90 b/src/tracer/MOM_tracer_advect.F90 index 7717fcc050..753faa2a56 100644 --- a/src/tracer/MOM_tracer_advect.F90 +++ b/src/tracer/MOM_tracer_advect.F90 @@ -252,7 +252,7 @@ subroutine advect_tracer(h_end, uhtr, vhtr, OBC, dt, G, GV, US, CS, Reg, & !$OMP parallel do default(none) shared(nz,domore_k,x_first,Tr,hprev,uhr,uh_neglect, & !$OMP OBC,domore_u,ntr,Idt,isv,iev,jsv,jev,stencil, & -!$OMP G,GV,CS,vhr,vh_neglect,domore_v) +!$OMP G,GV,CS,vhr,vh_neglect,domore_v,US) ! To ensure positive definiteness of the thickness at each iteration, the ! mass fluxes out of each layer are checked each step, and limited to keep From 4d846ab7705d03fb650a4e2b8f24d53431979c6c Mon Sep 17 00:00:00 2001 From: Niki Zadeh Date: Tue, 1 Oct 2019 14:33:27 -0400 Subject: [PATCH 5/5] Fix openmp threads to reproduce non-openmp answers - One varible was set before OMP section and needs to be firstprivate. - We have to check for consistency of answers between openmp and non-openmp builds. --- src/parameterizations/vertical/MOM_diabatic_aux.F90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parameterizations/vertical/MOM_diabatic_aux.F90 b/src/parameterizations/vertical/MOM_diabatic_aux.F90 index 740cf33897..1431956a89 100644 --- a/src/parameterizations/vertical/MOM_diabatic_aux.F90 +++ b/src/parameterizations/vertical/MOM_diabatic_aux.F90 @@ -974,9 +974,9 @@ subroutine applyBoundaryFluxesInOut(CS, G, GV, US, dt, fluxes, optics, nsw, h, t !$OMP dThickness,dTemp,dSalt,hOld,Ithickness, & !$OMP netMassIn,pres,d_pres,p_lay,dSV_dT_2d, & !$OMP netmassinout_rate,netheat_rate,netsalt_rate, & - !$OMP drhodt,drhods,pen_sw_bnd_rate,SurfPressure, & + !$OMP drhodt,drhods,pen_sw_bnd_rate, & !$OMP pen_TKE_2d,Temp_in,Salin_in,RivermixConst) & - !$OMP firstprivate(start,npts) + !$OMP firstprivate(start,npts,SurfPressure) do j=js,je ! Work in vertical slices for efficiency