From ffbd82a809374e6df5b7b8b4a7d085b574c7b07d Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Thu, 29 Apr 2021 10:33:52 -0400 Subject: [PATCH 1/6] Performance monitoring; GitHub integration This patch introduces two new testing targets to the verification suite based on a small configuration based on the `benchmark` regression test. The profile test is saved a `p0` in `.testing`. Future tests can be included if appropriate. The new targets: * `make profile`: Run the model and record the FMS timings. * `make perf`: Run the model through the `perf` tool and record timings for the resolvable functions (as symbols). In both cases, the timings are converted to JSON output files and the top results are reported to stdout, and readable in GitHub actions output. It can also be run locally. Support Python scripts have been included to do this work. This will require a functional Python environment. Some system and configuration data is logged alongside the timings, but this is still rather incomplete and needs some further planning. Times are compared to the target build (usually dev/gfdl). ANSI terminal highlighting (i.e. color) is to used to highlight excessive differences. Current issues: - Model configuration - GitHub timings are still rather unreliable, and should currently only be treated as crude estimates. This should be considered a work in progress. - The GitHub profiling rule still builds the standard configuration, evem though it is unused. - Additional tools are required to push the timings to some database, either a local sqlite3 or an external one. --- .github/actions/testing-setup/action.yml | 1 + .github/workflows/perfmon.yml | 36 ++ .testing/Makefile | 76 +++- .testing/p0/MOM_input | 505 +++++++++++++++++++++++ .testing/p0/MOM_override | 0 .testing/p0/diag_table | 91 ++++ .testing/p0/input.nml | 22 + .testing/tools/compare_clocks.py | 88 ++++ .testing/tools/compare_perf.py | 110 +++++ .testing/tools/parse_fms_clocks.py | 120 ++++++ .testing/tools/parse_perf.py | 71 ++++ 11 files changed, 1119 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/perfmon.yml create mode 100644 .testing/p0/MOM_input create mode 100644 .testing/p0/MOM_override create mode 100644 .testing/p0/diag_table create mode 100644 .testing/p0/input.nml create mode 100755 .testing/tools/compare_clocks.py create mode 100755 .testing/tools/compare_perf.py create mode 100755 .testing/tools/parse_fms_clocks.py create mode 100755 .testing/tools/parse_perf.py diff --git a/.github/actions/testing-setup/action.yml b/.github/actions/testing-setup/action.yml index c47270af0d..1ab96aa3df 100644 --- a/.github/actions/testing-setup/action.yml +++ b/.github/actions/testing-setup/action.yml @@ -33,6 +33,7 @@ runs: echo "::group::Install linux packages" sudo apt-get update sudo apt-get install netcdf-bin libnetcdf-dev libnetcdff-dev mpich libmpich-dev + sudo apt-get install linux-tools-common echo "::endgroup::" - name: Compile FMS library diff --git a/.github/workflows/perfmon.yml b/.github/workflows/perfmon.yml new file mode 100644 index 0000000000..00e645c4fd --- /dev/null +++ b/.github/workflows/perfmon.yml @@ -0,0 +1,36 @@ +name: Performance Monitor + +on: [pull_request] + +jobs: + build-test-perfmon: + + runs-on: ubuntu-latest + defaults: + run: + working-directory: .testing + + steps: + - uses: actions/checkout@v2 + with: + submodules: recursive + + - uses: ./.github/actions/testing-setup + + - name: Compile optimized models + run: >- + make -j build.prof + MOM_TARGET_SLUG=$GITHUB_REPOSITORY + MOM_TARGET_LOCAL_BRANCH=$GITHUB_BASE_REF + DO_REGRESSION_TESTS=true + + - name: Generate profile data + run: >- + pip install f90nml && + make profile + DO_REGRESSION_TESTS=true + + - name: Generate perf data + run: | + sudo sysctl -w kernel.perf_event_paranoid=2 + make perf DO_REGRESSION_TESTS=true diff --git a/.testing/Makefile b/.testing/Makefile index 45d05cd23f..89ea64e1e2 100644 --- a/.testing/Makefile +++ b/.testing/Makefile @@ -34,6 +34,7 @@ # Build configuration: # FCFLAGS_DEBUG Testing ("debug") compiler flags # FCFLAGS_REPRO Production ("repro") compiler flags +# FCFLAGS_OPT Aggressive optimization compiler flags # FCFLAGS_INIT Variable initialization flags # FCFLAGS_COVERAGE Code coverage flags # @@ -72,6 +73,7 @@ export MPIFC # NOTE: FMS will be built using FCFLAGS_DEBUG FCFLAGS_DEBUG ?= -g -O0 FCFLAGS_REPRO ?= -g -O2 +FCFLAGS_OPT ?= -g -O3 -mavx -fno-omit-frame-pointer FCFLAGS_INIT ?= FCFLAGS_COVERAGE ?= # Additional notes: @@ -96,6 +98,7 @@ DO_REPRO_TESTS ?= # Time measurement (configurable by the CI) TIME ?= time + #--- # Dependencies DEPS = deps @@ -122,6 +125,11 @@ ifeq ($(DO_REPRO_TESTS), true) TESTS += repros endif +# Profiling +ifeq ($(DO_PROFILE), false) + BUILDS += opt opt_target +endif + # The following variables are configured by Travis: # DO_REGRESSION_TESTS: true if $(TRAVIS_PULL_REQUEST) is a PR number # MOM_TARGET_SLUG: TRAVIS_REPO_SLUG @@ -195,6 +203,7 @@ endif .PHONY: all build.regressions all: $(foreach b,$(BUILDS),build/$(b)/MOM6) $(VENV_PATH) build.regressions: $(foreach b,symmetric target,build/$(b)/MOM6) +build.prof: $(foreach b,opt opt_target,build/$(b)/MOM6) # Executable BUILD_TARGETS = MOM6 Makefile path_names @@ -217,6 +226,7 @@ PATH_FMS = PATH="${PATH}:../../$(DEPS)/bin" SYMMETRIC_FCFLAGS := FCFLAGS="$(FCFLAGS_DEBUG) $(FCFLAGS_INIT) $(COVERAGE) $(FCFLAGS_FMS)" ASYMMETRIC_FCFLAGS := FCFLAGS="$(FCFLAGS_DEBUG) $(FCFLAGS_INIT) $(FCFLAGS_FMS)" REPRO_FCFLAGS := FCFLAGS="$(FCFLAGS_REPRO) $(FCFLAGS_FMS)" +OPT_FCFLAGS := FCFLAGS="$(FCFLAGS_OPT) $(FCFLAGS_FMS)" OPENMP_FCFLAGS := FCFLAGS="$(FCFLAGS_DEBUG) $(FCFLAGS_INIT) $(FCFLAGS_FMS)" TARGET_FCFLAGS := FCFLAGS="$(FCFLAGS_DEBUG) $(FCFLAGS_INIT) $(FCFLAGS_FMS)" @@ -230,6 +240,8 @@ build/asymmetric/Makefile: MOM_ENV=$(PATH_FMS) $(ASYMMETRIC_FCFLAGS) $(MOM_LDFLA build/repro/Makefile: MOM_ENV=$(PATH_FMS) $(REPRO_FCFLAGS) $(MOM_LDFLAGS) build/openmp/Makefile: MOM_ENV=$(PATH_FMS) $(OPENMP_FCFLAGS) $(MOM_LDFLAGS) build/target/Makefile: MOM_ENV=$(PATH_FMS) $(TARGET_FCFLAGS) $(MOM_LDFLAGS) +build/opt/Makefile: MOM_ENV=$(PATH_FMS) $(OPT_FCFLAGS) $(MOM_LDFLAGS) +build/opt_target/Makefile: MOM_ENV=$(PATH_FMS) $(OPT_FCFLAGS) $(MOM_LDFLAGS) build/coupled/Makefile: MOM_ENV=$(PATH_FMS) $(SYMMETRIC_FCFLAGS) $(SYMMETRIC_LDFLAGS) build/nuopc/Makefile: MOM_ENV=$(PATH_FMS) $(SYMMETRIC_FCFLAGS) $(SYMMETRIC_LDFLAGS) build/mct/Makefile: MOM_ENV=$(PATH_FMS) $(SYMMETRIC_FCFLAGS) $(SYMMETRIC_LDFLAGS) @@ -240,12 +252,15 @@ build/asymmetric/Makefile: MOM_ACFLAGS=--enable-asymmetric build/repro/Makefile: MOM_ACFLAGS= build/openmp/Makefile: MOM_ACFLAGS=--enable-openmp build/target/Makefile: MOM_ACFLAGS= +build/opt/Makefile: MOM_ACFLAGS= +build/opt_target/Makefile: MOM_ACFLAGS= build/coupled/Makefile: MOM_ACFLAGS=--with-driver=coupled_driver build/nuopc/Makefile: MOM_ACFLAGS=--with-driver=nuopc_driver build/mct/Makefile: MOM_ACFLAGS=--with-driver=mct_driver # Fetch regression target source code build/target/Makefile: | $(TARGET_CODEBASE) +build/opt_target/Makefile: | $(TARGET_CODEBASE) # Define source code dependencies @@ -276,7 +291,8 @@ build/%/Makefile: ../ac/configure ../ac/Makefile.in $(DEPS)/lib/libFMS.a $(MKMF) # Fetch the regression target codebase -build/target/Makefile: $(TARGET_CODEBASE)/ac/configure $(DEPS)/lib/libFMS.a $(MKMF) $(LIST_PATHS) +build/target/Makefile build/opt_target/Makefile: \ + $(TARGET_CODEBASE)/ac/configure $(DEPS)/lib/libFMS.a $(MKMF) $(LIST_PATHS) mkdir -p $(@D) cd $(@D) \ && $(MOM_ENV) ../../$(TARGET_CODEBASE)/ac/configure $(MOM_ACFLAGS) \ @@ -620,6 +636,64 @@ test.summary: fi +#--- +# Profiling +# XXX: This is experimental work to track, log, and report changes in runtime +PCONFIGS = p0 + +.PHONY: profile +profile: $(foreach p,$(PCONFIGS), prof.$(p)) + +.PHONY: prof.p0 +prof.p0: work/p0/opt/clocks.json work/p0/opt_target/clocks.json + python tools/compare_clocks.py $^ + +work/p0/%/clocks.json: work/p0/%/std.out + python tools/parse_fms_clocks.py -d $(@D) $^ > $@ + +work/p0/opt/std.out: build/opt/MOM6 +work/p0/opt_target/std.out: build/opt_target/MOM6 + +work/p0/%/std.out: + mkdir -p $(@D) + cp -RL p0/* $(@D) + mkdir -p $(@D)/RESTART + echo -e "" > $(@D)/MOM_override + cd $(@D) \ + && $(MPIRUN) -n 1 ../../../$< 2> std.err > std.out + +#--- +# Same but with perf + +# TODO: This expects the -e flag, can I handle it in the command? +PERF_EVENTS ?= + +.PHONY: perf +perf: $(foreach p,$(PCONFIGS), perf.$(p)) + +.PHONY: prof.p0 +perf.p0: work/p0/opt/profile.json work/p0/opt_target/profile.json + python tools/compare_perf.py $^ + +work/p0/%/profile.json: work/p0/%/perf.data + python tools/parse_perf.py -f $< > $@ + +work/p0/opt/perf.data: build/opt/MOM6 +work/p0/opt_target/perf.data: build/opt_target/MOM6 + +work/p0/%/perf.data: + mkdir -p $(@D) + cp -RL p0/* $(@D) + mkdir -p $(@D)/RESTART + echo -e "" > $(@D)/MOM_override + cd $(@D) \ + && perf record \ + -F 3999 \ + ${PERF_EVENTS} \ + ../../../$< 2> std.perf.err > std.perf.out \ + || cat std.perf.err + + #---- # NOTE: These tests assert that we are in the .testing directory. diff --git a/.testing/p0/MOM_input b/.testing/p0/MOM_input new file mode 100644 index 0000000000..8f751d7bf1 --- /dev/null +++ b/.testing/p0/MOM_input @@ -0,0 +1,505 @@ +! This input file provides the adjustable run-time parameters for version 6 of the Modular Ocean Model (MOM6). +! Where appropriate, parameters use usually given in MKS units. + +! This particular file is for the example in benchmark. + +! This MOM_input file typically contains only the non-default values that are needed to reproduce this example. +! A full list of parameters for this example can be found in the corresponding MOM_parameter_doc.all file +! which is generated by the model at run-time. + +! === module MOM_domains === +NIGLOBAL = 32 ! + ! The total number of thickness grid points in the x-direction in the physical + ! domain. With STATIC_MEMORY_ this is set in MOM_memory.h at compile time. +NJGLOBAL = 32 ! + ! The total number of thickness grid points in the y-direction in the physical + ! domain. With STATIC_MEMORY_ this is set in MOM_memory.h at compile time. +LAYOUT = 1, 1 + ! The processor layout that was actually used. + +! === module MOM === +THICKNESSDIFFUSE = True ! [Boolean] default = False + ! If true, interface heights are diffused with a coefficient of KHTH. +THICKNESSDIFFUSE_FIRST = True ! [Boolean] default = False + ! If true, do thickness diffusion before dynamics. This is only used if + ! THICKNESSDIFFUSE is true. +DT = 900.0 ! [s] + ! The (baroclinic) dynamics time step. The time-step that is actually used will + ! be an integer fraction of the forcing time-step (DT_FORCING in ocean-only mode + ! or the coupling timestep in coupled mode.) +DT_THERM = 3600.0 ! [s] default = 900.0 + ! The thermodynamic and tracer advection time step. Ideally DT_THERM should be + ! an integer multiple of DT and less than the forcing or coupling time-step, + ! unless THERMO_SPANS_COUPLING is true, in which case DT_THERM can be an integer + ! multiple of the coupling timestep. By default DT_THERM is set to DT. +DTBT_RESET_PERIOD = 0.0 ! [s] default = 3600.0 + ! The period between recalculations of DTBT (if DTBT <= 0). If DTBT_RESET_PERIOD + ! is negative, DTBT is set based only on information available at + ! initialization. If 0, DTBT will be set every dynamics time step. The default + ! is set by DT_THERM. This is only used if SPLIT is true. +FRAZIL = True ! [Boolean] default = False + ! If true, water freezes if it gets too cold, and the accumulated heat deficit + ! is returned in the surface state. FRAZIL is only used if + ! ENABLE_THERMODYNAMICS is true. +C_P = 3925.0 ! [J kg-1 K-1] default = 3991.86795711963 + ! The heat capacity of sea water, approximated as a constant. This is only used + ! if ENABLE_THERMODYNAMICS is true. The default value is from the TEOS-10 + ! definition of conservative temperature. +SAVE_INITIAL_CONDS = True ! [Boolean] default = False + ! If true, write the initial conditions to a file given by IC_OUTPUT_FILE. +IC_OUTPUT_FILE = "GOLD_IC" ! default = "MOM_IC" + ! The file into which to write the initial conditions. + +! === module MOM_fixed_initialization === +INPUTDIR = "INPUT" ! default = "." + ! The directory in which input files are found. + +! === module MOM_grid_init === +GRID_CONFIG = "cartesian" ! + ! A character string that determines the method for defining the horizontal + ! grid. Current options are: + ! mosaic - read the grid from a mosaic (supergrid) + ! file set by GRID_FILE. + ! cartesian - use a (flat) Cartesian grid. + ! spherical - use a simple spherical grid. + ! mercator - use a Mercator spherical grid. +SOUTHLAT = -41.0 ! [degrees] + ! The southern latitude of the domain. +LENLAT = 41.0 ! [degrees] + ! The latitudinal length of the domain. +LENLON = 90.0 ! [degrees] + ! The longitudinal length of the domain. +ISOTROPIC = True ! [Boolean] default = False + ! If true, an isotropic grid on a sphere (also known as a Mercator grid) is + ! used. With an isotropic grid, the meridional extent of the domain (LENLAT), + ! the zonal extent (LENLON), and the number of grid points in each direction are + ! _not_ independent. In MOM the meridional extent is determined to fit the zonal + ! extent and the number of grid points, while grid is perfectly isotropic. +TOPO_CONFIG = "benchmark" ! + ! This specifies how bathymetry is specified: + ! file - read bathymetric information from the file + ! specified by (TOPO_FILE). + ! flat - flat bottom set to MAXIMUM_DEPTH. + ! bowl - an analytically specified bowl-shaped basin + ! ranging between MAXIMUM_DEPTH and MINIMUM_DEPTH. + ! spoon - a similar shape to 'bowl', but with an vertical + ! wall at the southern face. + ! halfpipe - a zonally uniform channel with a half-sine + ! profile in the meridional direction. + ! bbuilder - build topography from list of functions. + ! benchmark - use the benchmark test case topography. + ! Neverworld - use the Neverworld test case topography. + ! DOME - use a slope and channel configuration for the + ! DOME sill-overflow test case. + ! ISOMIP - use a slope and channel configuration for the + ! ISOMIP test case. + ! DOME2D - use a shelf and slope configuration for the + ! DOME2D gravity current/overflow test case. + ! Kelvin - flat but with rotated land mask. + ! seamount - Gaussian bump for spontaneous motion test case. + ! dumbbell - Sloshing channel with reservoirs on both ends. + ! shelfwave - exponential slope for shelfwave test case. + ! Phillips - ACC-like idealized topography used in the Phillips config. + ! dense - Denmark Strait-like dense water formation and overflow. + ! USER - call a user modified routine. + +! === module benchmark_initialize_topography === +MINIMUM_DEPTH = 1.0 ! [m] default = 0.0 + ! The minimum depth of the ocean. +MAXIMUM_DEPTH = 5500.0 ! [m] + ! The maximum depth of the ocean. + +! === module MOM_verticalGrid === +! Parameters providing information about the vertical grid. +NK = 75 ! [nondim] + ! The number of model layers. + +! === module MOM_EOS === + +! === module MOM_tracer_flow_control === +USE_IDEAL_AGE_TRACER = True ! [Boolean] default = False + ! If true, use the ideal_age_example tracer package. + +! === module ideal_age_example === + +! === module MOM_coord_initialization === +COORD_CONFIG = "ts_range" ! default = "none" + ! This specifies how layers are to be defined: + ! ALE or none - used to avoid defining layers in ALE mode + ! file - read coordinate information from the file + ! specified by (COORD_FILE). + ! BFB - Custom coords for buoyancy-forced basin case + ! based on SST_S, T_BOT and DRHO_DT. + ! linear - linear based on interfaces not layers + ! layer_ref - linear based on layer densities + ! ts_ref - use reference temperature and salinity + ! ts_range - use range of temperature and salinity + ! (T_REF and S_REF) to determine surface density + ! and GINT calculate internal densities. + ! gprime - use reference density (RHO_0) for surface + ! density and GINT calculate internal densities. + ! ts_profile - use temperature and salinity profiles + ! (read from COORD_FILE) to set layer densities. + ! USER - call a user modified routine. +TS_RANGE_T_LIGHT = 25.0 ! [degC] default = 10.0 + ! The initial temperature of the lightest layer when COORD_CONFIG is set to + ! ts_range. +TS_RANGE_T_DENSE = 3.0 ! [degC] default = 10.0 + ! The initial temperature of the densest layer when COORD_CONFIG is set to + ! ts_range. +TS_RANGE_RESOLN_RATIO = 5.0 ! [nondim] default = 1.0 + ! The ratio of density space resolution in the densest part of the range to that + ! in the lightest part of the range when COORD_CONFIG is set to ts_range. Values + ! greater than 1 increase the resolution of the denser water. + +! === module MOM_state_initialization === +THICKNESS_CONFIG = "benchmark" ! default = "uniform" + ! A string that determines how the initial layer thicknesses are specified for a + ! new run: + ! file - read interface heights from the file specified + ! thickness_file - read thicknesses from the file specified + ! by (THICKNESS_FILE). + ! coord - determined by ALE coordinate. + ! uniform - uniform thickness layers evenly distributed + ! between the surface and MAXIMUM_DEPTH. + ! list - read a list of positive interface depths. + ! DOME - use a slope and channel configuration for the + ! DOME sill-overflow test case. + ! ISOMIP - use a configuration for the + ! ISOMIP test case. + ! benchmark - use the benchmark test case thicknesses. + ! Neverworld - use the Neverworld test case thicknesses. + ! search - search a density profile for the interface + ! densities. This is not yet implemented. + ! circle_obcs - the circle_obcs test case is used. + ! DOME2D - 2D version of DOME initialization. + ! adjustment2d - 2D lock exchange thickness ICs. + ! sloshing - sloshing gravity thickness ICs. + ! seamount - no motion test with seamount ICs. + ! dumbbell - sloshing channel ICs. + ! soliton - Equatorial Rossby soliton. + ! rossby_front - a mixed layer front in thermal wind balance. + ! USER - call a user modified routine. + +! === module benchmark_initialize_thickness === +TS_CONFIG = "benchmark" ! + ! A string that determines how the initial tempertures and salinities are + ! specified for a new run: + ! file - read velocities from the file specified + ! by (TS_FILE). + ! fit - find the temperatures that are consistent with + ! the layer densities and salinity S_REF. + ! TS_profile - use temperature and salinity profiles + ! (read from TS_FILE) to set layer densities. + ! benchmark - use the benchmark test case T & S. + ! linear - linear in logical layer space. + ! DOME2D - 2D DOME initialization. + ! ISOMIP - ISOMIP initialization. + ! adjustment2d - 2d lock exchange T/S ICs. + ! sloshing - sloshing mode T/S ICs. + ! seamount - no motion test with seamount ICs. + ! dumbbell - sloshing channel ICs. + ! rossby_front - a mixed layer front in thermal wind balance. + ! SCM_CVMix_tests - used in the SCM CVMix tests. + ! USER - call a user modified routine. + +! === module MOM_diag_mediator === + +! === module MOM_lateral_mixing_coeffs === +USE_VARIABLE_MIXING = True ! [Boolean] default = False + ! If true, the variable mixing code will be called. This allows diagnostics to + ! be created even if the scheme is not used. If KHTR_SLOPE_CFF>0 or + ! KhTh_Slope_Cff>0, this is set to true regardless of what is in the parameter + ! file. +USE_VISBECK = True ! [Boolean] default = False + ! If true, use the Visbeck et al. (1997) formulation for + ! thickness diffusivity. +RESOLN_SCALED_KH = True ! [Boolean] default = False + ! If true, the Laplacian lateral viscosity is scaled away when the first + ! baroclinic deformation radius is well resolved. +RESOLN_SCALED_KHTH = True ! [Boolean] default = False + ! If true, the interface depth diffusivity is scaled away when the first + ! baroclinic deformation radius is well resolved. +RESOLN_SCALED_KHTR = True ! [Boolean] default = False + ! If true, the epipycnal tracer diffusivity is scaled away when the first + ! baroclinic deformation radius is well resolved. +KHTH_SLOPE_CFF = 0.1 ! [nondim] default = 0.0 + ! The nondimensional coefficient in the Visbeck formula for the interface depth + ! diffusivity +KHTR_SLOPE_CFF = 0.1 ! [nondim] default = 0.0 + ! The nondimensional coefficient in the Visbeck formula for the epipycnal tracer + ! diffusivity +VARMIX_KTOP = 6 ! [nondim] default = 2 + ! The layer number at which to start vertical integration of S*N for purposes of + ! finding the Eady growth rate. +VISBECK_L_SCALE = 3.0E+04 ! [m] default = 0.0 + ! The fixed length scale in the Visbeck formula. + +! === module MOM_set_visc === +PRANDTL_TURB = 0.0 ! [nondim] default = 1.0 + ! The turbulent Prandtl number applied to shear instability. +DYNAMIC_VISCOUS_ML = True ! [Boolean] default = False + ! If true, use a bulk Richardson number criterion to determine the mixed layer + ! thickness for viscosity. +ML_OMEGA_FRAC = 1.0 ! [nondim] default = 0.0 + ! When setting the decay scale for turbulence, use this fraction of the absolute + ! rotation rate blended with the local value of f, as sqrt((1-of)*f^2 + + ! of*4*omega^2). +HBBL = 10.0 ! [m] + ! The thickness of a bottom boundary layer with a viscosity of KVBBL if + ! BOTTOMDRAGLAW is not defined, or the thickness over which near-bottom + ! velocities are averaged for the drag law if BOTTOMDRAGLAW is defined but + ! LINEAR_DRAG is not. +DRAG_BG_VEL = 0.1 ! [m s-1] default = 0.0 + ! DRAG_BG_VEL is either the assumed bottom velocity (with LINEAR_DRAG) or an + ! unresolved velocity that is combined with the resolved velocity to estimate + ! the velocity magnitude. DRAG_BG_VEL is only used when BOTTOMDRAGLAW is + ! defined. +BBL_THICK_MIN = 0.1 ! [m] default = 0.0 + ! The minimum bottom boundary layer thickness that can be used with + ! BOTTOMDRAGLAW. This might be Kv/(cdrag*drag_bg_vel) to give Kv as the minimum + ! near-bottom viscosity. +KV = 1.0E-04 ! [m2 s-1] + ! The background kinematic viscosity in the interior. The molecular value, ~1e-6 + ! m2 s-1, may be used. + +! === module MOM_thickness_diffuse === +KHTH = 1.0 ! [m2 s-1] default = 0.0 + ! The background horizontal thickness diffusivity. +KHTH_MAX = 900.0 ! [m2 s-1] default = 0.0 + ! The maximum horizontal thickness diffusivity. + +! === module MOM_dynamics_split_RK2 === + +! === module MOM_continuity === + +! === module MOM_continuity_PPM === +ETA_TOLERANCE = 1.0E-06 ! [m] default = 1.1E-09 + ! The tolerance for the differences between the barotropic and baroclinic + ! estimates of the sea surface height due to the fluxes through each face. The + ! total tolerance for SSH is 4 times this value. The default is + ! 0.5*NK*ANGSTROM, and this should not be set less than about + ! 10^-15*MAXIMUM_DEPTH. +VELOCITY_TOLERANCE = 0.001 ! [m s-1] default = 3.0E+08 + ! The tolerance for barotropic velocity discrepancies between the barotropic + ! solution and the sum of the layer thicknesses. + +! === module MOM_CoriolisAdv === +BOUND_CORIOLIS = True ! [Boolean] default = False + ! If true, the Coriolis terms at u-points are bounded by the four estimates of + ! (f+rv)v from the four neighboring v-points, and similarly at v-points. This + ! option would have no effect on the SADOURNY Coriolis scheme if it were + ! possible to use centered difference thickness fluxes. + +! === module MOM_PressureForce === + +! === module MOM_PressureForce_AFV === + +! === module MOM_hor_visc === +AH_VEL_SCALE = 0.05 ! [m s-1] default = 0.0 + ! The velocity scale which is multiplied by the cube of the grid spacing to + ! calculate the biharmonic viscosity. The final viscosity is the largest of this + ! scaled viscosity, the Smagorinsky and Leith viscosities, and AH. +SMAGORINSKY_AH = True ! [Boolean] default = False + ! If true, use a biharmonic Smagorinsky nonlinear eddy viscosity. +SMAG_BI_CONST = 0.06 ! [nondim] default = 0.0 + ! The nondimensional biharmonic Smagorinsky constant, typically 0.015 - 0.06. + +! === module MOM_vert_friction === +MAXVEL = 10.0 ! [m s-1] default = 3.0E+08 + ! The maximum velocity allowed before the velocity components are truncated. + +! === module MOM_barotropic === +BOUND_BT_CORRECTION = True ! [Boolean] default = False + ! If true, the corrective pseudo mass-fluxes into the barotropic solver are + ! limited to values that require less than maxCFL_BT_cont to be accommodated. +NONLINEAR_BT_CONTINUITY = True ! [Boolean] default = False + ! If true, use nonlinear transports in the barotropic continuity equation. This + ! does not apply if USE_BT_CONT_TYPE is true. +BT_PROJECT_VELOCITY = True ! [Boolean] default = False + ! If true, step the barotropic velocity first and project out the velocity + ! tendency by 1+BEBT when calculating the transport. The default (false) is to + ! use a predictor continuity step to find the pressure field, and then to do a + ! corrector continuity step using a weighted average of the old and new + ! velocities, with weights of (1-BEBT) and BEBT. +BEBT = 0.2 ! [nondim] default = 0.1 + ! BEBT determines whether the barotropic time stepping uses the forward-backward + ! time-stepping scheme or a backward Euler scheme. BEBT is valid in the range + ! from 0 (for a forward-backward treatment of nonrotating gravity waves) to 1 + ! (for a backward Euler treatment). In practice, BEBT must be greater than about + ! 0.05. +DTBT = -0.95 ! [s or nondim] default = -0.98 + ! The barotropic time step, in s. DTBT is only used with the split explicit time + ! stepping. To set the time step automatically based the maximum stable value + ! use 0, or a negative value gives the fraction of the stable value. Setting + ! DTBT to 0 is the same as setting it to -0.98. The value of DTBT that will + ! actually be used is an integer fraction of DT, rounding down. + +! === module MOM_mixed_layer_restrat === +MIXEDLAYER_RESTRAT = True ! [Boolean] default = False + ! If true, a density-gradient dependent re-stratifying flow is imposed in the + ! mixed layer. Can be used in ALE mode without restriction but in layer mode can + ! only be used if BULKMIXEDLAYER is true. +FOX_KEMPER_ML_RESTRAT_COEF = 5.0 ! [nondim] default = 0.0 + ! A nondimensional coefficient that is proportional to the ratio of the + ! deformation radius to the dominant lengthscale of the submesoscale mixed layer + ! instabilities, times the minimum of the ratio of the mesoscale eddy kinetic + ! energy to the large-scale geostrophic kinetic energy or 1 plus the square of + ! the grid spacing over the deformation radius, as detailed by Fox-Kemper et al. + ! (2010) + +! === module MOM_diagnostics === + +! === module MOM_diabatic_driver === +! The following parameters are used for diabatic processes. + +! === module MOM_entrain_diffusive === +MAX_ENT_IT = 20 ! default = 5 + ! The maximum number of iterations that may be used to calculate the interior + ! diapycnal entrainment. +TOLERANCE_ENT = 1.0E-05 ! [m] default = 1.341640786499874E-05 + ! The tolerance with which to solve for entrainment values. + +! === module MOM_set_diffusivity === + +! === module MOM_bkgnd_mixing === +! Adding static vertical background mixing coefficients +KD = 2.0E-05 ! [m2 s-1] default = 0.0 + ! The background diapycnal diffusivity of density in the interior. Zero or the + ! molecular value, ~1e-7 m2 s-1, may be used. + +! === module MOM_kappa_shear === +! Parameterization of shear-driven turbulence following Jackson, Hallberg and Legg, JPO 2008 +USE_JACKSON_PARAM = True ! [Boolean] default = False + ! If true, use the Jackson-Hallberg-Legg (JPO 2008) shear mixing + ! parameterization. +MAX_RINO_IT = 25 ! [nondim] default = 50 + ! The maximum number of iterations that may be used to estimate the Richardson + ! number driven mixing. + +! === module MOM_diabatic_aux === +! The following parameters are used for auxiliary diabatic processes. +RECLAIM_FRAZIL = False ! [Boolean] default = True + ! If true, try to use any frazil heat deficit to cool any overlying layers down + ! to the freezing point, thereby avoiding the creation of thin ice when the SST + ! is above the freezing point. + +! === module MOM_mixed_layer === +MSTAR = 0.3 ! [nondim] default = 1.2 + ! The ratio of the friction velocity cubed to the TKE input to the mixed layer. +BULK_RI_ML = 0.05 ! [nondim] + ! The efficiency with which mean kinetic energy released by mechanically forced + ! entrainment of the mixed layer is converted to turbulent kinetic energy. +ABSORB_ALL_SW = True ! [Boolean] default = False + ! If true, all shortwave radiation is absorbed by the ocean, instead of passing + ! through to the bottom mud. +TKE_DECAY = 10.0 ! [nondim] default = 2.5 + ! TKE_DECAY relates the vertical rate of decay of the TKE available for + ! mechanical entrainment to the natural Ekman depth. +HMIX_MIN = 2.0 ! [m] default = 0.0 + ! The minimum mixed layer depth if the mixed layer depth is determined + ! dynamically. +LIMIT_BUFFER_DETRAIN = True ! [Boolean] default = False + ! If true, limit the detrainment from the buffer layers to not be too different + ! from the neighbors. +DEPTH_LIMIT_FLUXES = 0.1 ! [m] default = 0.2 + ! The surface fluxes are scaled away when the total ocean depth is less than + ! DEPTH_LIMIT_FLUXES. +CORRECT_ABSORPTION_DEPTH = True ! [Boolean] default = False + ! If true, the average depth at which penetrating shortwave radiation is + ! absorbed is adjusted to match the average heating depth of an exponential + ! profile by moving some of the heating upward in the water column. + +! === module MOM_opacity === +PEN_SW_SCALE = 15.0 ! [m] default = 0.0 + ! The vertical absorption e-folding depth of the penetrating shortwave + ! radiation. +PEN_SW_FRAC = 0.42 ! [nondim] default = 0.0 + ! The fraction of the shortwave radiation that penetrates below the surface. + +! === module MOM_tracer_advect === + +! === module MOM_tracer_hor_diff === +KHTR = 1.0 ! [m2 s-1] default = 0.0 + ! The background along-isopycnal tracer diffusivity. +KHTR_MAX = 900.0 ! [m2 s-1] default = 0.0 + ! The maximum along-isopycnal tracer diffusivity. +DIFFUSE_ML_TO_INTERIOR = True ! [Boolean] default = False + ! If true, enable epipycnal mixing between the surface boundary layer and the + ! interior. +ML_KHTR_SCALE = 0.0 ! [nondim] default = 1.0 + ! With Diffuse_ML_interior, the ratio of the truly horizontal diffusivity in the + ! mixed layer to the epipycnal diffusivity. The valid range is 0 to 1. + +! === module MOM_sum_output === +MAXTRUNC = 5000 ! [truncations save_interval-1] default = 0 + ! The run will be stopped, and the day set to a very large value if the velocity + ! is truncated more than MAXTRUNC times between energy saves. Set MAXTRUNC to 0 + ! to stop if there is any truncation of velocities. +ENERGYSAVEDAYS = 0.25 ! [days] default = 1.0 + ! The interval in units of TIMEUNIT between saves of the energies of the run and + ! other globally summed diagnostics. + +! === module MOM_surface_forcing === +BUOY_CONFIG = "linear" ! default = "zero" + ! The character string that indicates how buoyancy forcing is specified. Valid + ! options include (file), (zero), (linear), (USER), (BFB) and (NONE). +WIND_CONFIG = "gyres" ! default = "zero" + ! The character string that indicates how wind forcing is specified. Valid + ! options include (file), (2gyre), (1gyre), (gyres), (zero), and (USER). +TAUX_SIN_AMP = 0.1 ! [Pa] default = 0.0 + ! With the gyres wind_config, the sine amplitude in the zonal wind stress + ! profile: B in taux = A + B*sin(n*pi*y/L) + C*cos(n*pi*y/L). +TAUX_N_PIS = 1.0 ! [nondim] default = 0.0 + ! With the gyres wind_config, the number of gyres in the zonal wind stress + ! profile: n in taux = A + B*sin(n*pi*y/L) + C*cos(n*pi*y/L). +RESTOREBUOY = True ! [Boolean] default = False + ! If true, the buoyancy fluxes drive the model back toward some specified + ! surface state with a rate given by FLUXCONST. +FLUXCONST = 0.5 ! [m day-1] default = 0.0 + ! The constant that relates the restoring surface fluxes to the relative surface + ! anomalies (akin to a piston velocity). Note the non-MKS units. +SST_NORTH = 27.0 ! [deg C] default = 0.0 + ! With buoy_config linear, the sea surface temperature at the northern end of + ! the domain toward which to to restore. +SST_SOUTH = 3.0 ! [deg C] default = 0.0 + ! With buoy_config linear, the sea surface temperature at the southern end of + ! the domain toward which to to restore. +GUST_CONST = 0.02 ! [Pa] default = 0.0 + ! The background gustiness in the winds. + +! === module MOM_main (MOM_driver) === +DT_FORCING = 3600.0 ! [s] default = 900.0 + ! The time step for changing forcing, coupling with other components, or + ! potentially writing certain diagnostics. The default value is given by DT. +DAYMAX = 3.0 ! [days] + ! The final time of the whole simulation, in units of TIMEUNIT seconds. This + ! also sets the potential end time of the present run segment if the end time is + ! not set via ocean_solo_nml in input.nml. +RESTART_CONTROL = 3 ! default = 1 + ! An integer whose bits encode which restart files are written. Add 2 (bit 1) + ! for a time-stamped file, and odd (bit 0) for a non-time-stamped file. A + ! non-time-stamped restart file is saved at the end of the run segment for any + ! non-negative value. +RESTINT = 365.0 ! [days] default = 0.0 + ! The interval between saves of the restart file in units of TIMEUNIT. Use 0 + ! (the default) to not save incremental restart files at all. + +! === module MOM_write_cputime === +MAXCPU = 2.88E+04 ! [wall-clock seconds] default = -1.0 + ! The maximum amount of cpu time per processor for which MOM should run before + ! saving a restart file and quitting with a return value that indicates that a + ! further run is required to complete the simulation. If automatic restarts are + ! not desired, use a negative value for MAXCPU. MAXCPU has units of wall-clock + ! seconds, so the actual CPU time used is larger by a factor of the number of + ! processors used. + +! Debugging parameters set to non-default values +U_TRUNC_FILE = "U_velocity_truncations" ! default = "" + ! The absolute path to a file into which the accelerations leading to zonal + ! velocity truncations are written. Undefine this for efficiency if this + ! diagnostic is not needed. +V_TRUNC_FILE = "V_velocity_truncations" ! default = "" + ! The absolute path to a file into which the accelerations leading to meridional + ! velocity truncations are written. Undefine this for efficiency if this + ! diagnostic is not needed. diff --git a/.testing/p0/MOM_override b/.testing/p0/MOM_override new file mode 100644 index 0000000000..e69de29bb2 diff --git a/.testing/p0/diag_table b/.testing/p0/diag_table new file mode 100644 index 0000000000..68c71dd2c4 --- /dev/null +++ b/.testing/p0/diag_table @@ -0,0 +1,91 @@ +"MOM benchmark Experiment" +1 1 1 0 0 0 +"prog", 1,"days",1,"days","time", +#"ave_prog", 5,"days",1,"days","Time",365,"days" +#"cont", 5,"days",1,"days","Time",365,"days" + +#This is the field section of the diag_table. + +# Prognostic Ocean fields: +#========================= + +"ocean_model","u","u","prog","all",.false.,"none",2 +"ocean_model","v","v","prog","all",.false.,"none",2 +"ocean_model","h","h","prog","all",.false.,"none",1 +"ocean_model","e","e","prog","all",.false.,"none",2 +"ocean_model","temp","temp","prog","all",.false.,"none",2 +#"ocean_model","salt","salt","prog","all",.false.,"none",2 + +#"ocean_model","u","u","ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","v","v","ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","h","h","ave_prog_%4yr_%3dy","all",.true.,"none",1 +#"ocean_model","e","e","ave_prog_%4yr_%3dy","all",.true.,"none",2 + +# Auxilary Tracers: +#================== +#"ocean_model","vintage","vintage","prog_%4yr_%3dy","all",.false.,"none",2 +#"ocean_model","age","age","prog_%4yr_%3dy","all",.false.,"none",2 + +# Continuity Equation Terms: +#=========================== +#"ocean_model","dhdt","dhdt","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","wd","wd","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","uh","uh","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","vh","vh","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","h_rho","h_rho","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","uh_rho","uh_rho","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","vh_rho","vh_rho","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","uhGM_rho","uhGM_rho","cont_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","vhGM_rho","vhGM_rho","cont_%4yr_%3dy","all",.true.,"none",2 + +# +# Tracer Fluxes: +#================== +#"ocean_model","T_adx", "T_adx", "ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","T_ady", "T_ady", "ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","T_diffx","T_diffx","ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","T_diffy","T_diffy","ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","S_adx", "S_adx", "ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","S_ady", "S_ady", "ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","S_diffx","S_diffx","ave_prog_%4yr_%3dy","all",.true.,"none",2 +#"ocean_model","S_diffy","S_diffy","ave_prog_%4yr_%3dy","all",.true.,"none",2 + +# testing +# ======= +#"ocean_model","Kv_u","Kv_u","prog","all",.false.,"none",2 +#"ocean_model","Kv_v","Kv_v","prog","all",.false.,"none",2 + +#============================================================================================= +# +#===- This file can be used with diag_manager/v2.0a (or higher) ==== +# +# +# FORMATS FOR FILE ENTRIES (not all input values are used) +# ------------------------ +# +#"file_name", output_freq, "output_units", format, "time_units", "time_long_name", ... +# (opt) new_file_frequecy, (opt) "new_file_freq_units", "new_file_start_date" +# +# +#output_freq: > 0 output frequency in "output_units" +# = 0 output frequency every time step +# =-1 output frequency at end of run +# +#output_units = units used for output frequency +# (years, months, days, minutes, hours, seconds) +# +#time_units = units used to label the time axis +# (days, minutes, hours, seconds) +# +# +# FORMAT FOR FIELD ENTRIES (not all input values are used) +# ------------------------ +# +#"module_name", "field_name", "output_name", "file_name" "time_sampling", time_avg, "other_opts", packing +# +#time_avg = .true. or .false. +# +#packing = 1 double precision +# = 2 float +# = 4 packed 16-bit integers +# = 8 packed 1-byte (not tested?) diff --git a/.testing/p0/input.nml b/.testing/p0/input.nml new file mode 100644 index 0000000000..41555b8822 --- /dev/null +++ b/.testing/p0/input.nml @@ -0,0 +1,22 @@ + &MOM_input_nml + output_directory = './', + input_filename = 'n' + restart_input_dir = 'INPUT/', + restart_output_dir = 'RESTART/', + parameter_filename = 'MOM_input', + 'MOM_override' / + + &diag_manager_nml + / + + &fms_nml + clock_grain='ROUTINE' + clock_flags='SYNC' + !domains_stack_size = 955296 + domains_stack_size = 14256000 + stack_size =0 / + +!&ocean_solo_nml +! hours = 1 +! !days = 1 +!/ diff --git a/.testing/tools/compare_clocks.py b/.testing/tools/compare_clocks.py new file mode 100755 index 0000000000..77198fda6a --- /dev/null +++ b/.testing/tools/compare_clocks.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +import argparse +import json + +# Ignore timers below this threshold (in seconds) +DEFAULT_THRESHOLD = 0.05 + +# Thresholds for reporting +DT_WARN = 0.10 # Slowdown warning +DT_FAIL = 0.25 # Slowdown abort + +ANSI_RED = '\033[31m' +ANSI_GREEN = '\033[32m' +ANSI_YELLOW = '\033[33m' +ANSI_RESET = '\033[0m' + + +def main(): + desc = ( + 'Compare two FMS clock output files and report any differences within ' + 'a defined threshold.' + ) + + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('expt') + parser.add_argument('ref') + parser.add_argument('--threshold') + parser.add_argument('--verbose', action='store_true') + args = parser.parse_args() + + threshold = float(args.threshold) if args.threshold else DEFAULT_THRESHOLD + verbose = args.verbose + + clock_cmp = {} + + print('{:35s}{:8s} {:8s}'.format('', 'Profile', 'Reference')) + print() + + with open(args.expt) as log_expt, open(args.ref) as log_ref: + clocks_expt = json.load(log_expt)['clocks'] + clocks_ref = json.load(log_ref)['clocks'] + + # Gather timers which appear in both clocks + clock_tags = [clk for clk in clocks_expt if clk in clocks_ref] + + for clk in clock_tags: + clock_cmp[clk] = {} + + # For now, we only comparge tavg, the rank-averaged timing + rec = 'tavg' + + t_expt = clocks_expt[clk][rec] + t_ref = clocks_ref[clk][rec] + + # Compare the relative runtimes + if all(t > threshold for t in (t_expt, t_ref)): + dclk = (t_expt - t_ref) / t_ref + else: + dclk = 0. + clock_cmp[clk][rec] = dclk + + # Skip trivially low clocks + if all(t < threshold for t in (t_expt, t_ref)) and not verbose: + continue + + # Report the time differences + ansi_color = ANSI_RESET + + if abs(t_expt - t_ref) > threshold: + if dclk > DT_FAIL: + ansi_color = ANSI_RED + elif dclk > DT_WARN: + ansi_color = ANSI_YELLOW + elif dclk < -DT_WARN: + ansi_color = ANSI_GREEN + + print('{}{}: {:7.3f}s, {:7.3f}s ({:.1f}%){}'.format( + ansi_color, + ' ' * (32 - len(clk)) + clk, + t_expt, + t_ref, + 100. * dclk, + ANSI_RESET, + )) + + +if __name__ == '__main__': + main() diff --git a/.testing/tools/compare_perf.py b/.testing/tools/compare_perf.py new file mode 100755 index 0000000000..e4a651c709 --- /dev/null +++ b/.testing/tools/compare_perf.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +import argparse +import json + +# Ignore timers below this threshold (in seconds) +DEFAULT_THRESHOLD = 0.05 + +# Thresholds for reporting +DT_WARN = 0.10 # Slowdown warning +DT_FAIL = 0.25 # Slowdown abort + +ANSI_RED = '\033[31m' +ANSI_GREEN = '\033[32m' +ANSI_YELLOW = '\033[33m' +ANSI_RESET = '\033[0m' + + +def main(): + desc = ( + 'Compare two FMS clock output files and report any differences within ' + 'a defined threshold.' + ) + + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('expt') + parser.add_argument('ref') + parser.add_argument('--threshold') + parser.add_argument('--verbose', action='store_true') + args = parser.parse_args() + + threshold = float(args.threshold) if args.threshold else DEFAULT_THRESHOLD + verbose = args.verbose + + clock_cmp = {} + + print('{:35s}{:8s} {:8s}'.format('', 'Profile', 'Reference')) + print() + + with open(args.expt) as profile_expt, open(args.ref) as profile_ref: + perf_expt = json.load(profile_expt) + perf_ref = json.load(profile_ref) + + events = [ev for ev in perf_expt if ev in perf_ref] + + for event in events: + # For now, only report the times + if event not in ('task-clock', 'cpu-clock'): + continue + + count_expt = perf_expt[event]['count'] + count_ref = perf_ref[event]['count'] + + symbols_expt = perf_expt[event]['symbol'] + symbols_ref = perf_ref[event]['symbol'] + + symbols = [ + s for s in symbols_expt + if s in symbols_ref + and not s.startswith('0x') + ] + + for symbol in symbols: + t_expt = float(symbols_expt[symbol]) / 1e9 + t_ref = float(symbols_ref[symbol]) / 1e9 + + # Compare the relative runtimes + if all(t > threshold for t in (t_expt, t_ref)): + dclk = (t_expt - t_ref) / t_ref + else: + dclk = 0. + + # Skip trivially low clocks + if all(t < threshold for t in (t_expt, t_ref)) and not verbose: + continue + + # Report the time differences + ansi_color = ANSI_RESET + + if abs(t_expt - t_ref) > threshold: + if dclk > DT_FAIL: + ansi_color = ANSI_RED + elif dclk > DT_WARN: + ansi_color = ANSI_YELLOW + elif dclk < -DT_WARN: + ansi_color = ANSI_GREEN + + # Remove module name + sname = symbol.split('_MOD_', 1)[-1] + + # Strip version from glibc calls + sname = sname.split('@')[0] + + # Remove GCC optimization renaming + sname = sname.replace('.constprop.0', '') + + if len(sname) > 32: + sname = sname[:29] + '...' + + print('{}{}: {:7.3f}s, {:7.3f}s ({:.1f}%){}'.format( + ansi_color, + ' ' * (32 - len(sname)) + sname, + t_expt, + t_ref, + 100. * dclk, + ANSI_RESET, + )) + + +if __name__ == '__main__': + main() diff --git a/.testing/tools/parse_fms_clocks.py b/.testing/tools/parse_fms_clocks.py new file mode 100755 index 0000000000..b57fc481ab --- /dev/null +++ b/.testing/tools/parse_fms_clocks.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python +import argparse +import collections +import json +import os +import sys + +import f90nml + +record_type = collections.defaultdict(lambda: float) +for rec in ('grain', 'pemin', 'pemax',): + record_type[rec] = int + + +def main(): + desc = 'Parse MOM6 model stdout and return clock data in JSON format.' + + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--format', '-f', action='store_true') + parser.add_argument('--dir', '-d') + parser.add_argument('log') + args = parser.parse_args() + + config = {} + + if args.dir: + # Gather model configuration + input_nml = os.path.join(args.dir, 'input.nml') + nml = f90nml.read(input_nml) + config['input.nml'] = nml.todict() + + parameter_filenames = [ + ('params', 'MOM_parameter_doc.all'), + ('layout', 'MOM_parameter_doc.layout'), + ('debug', 'MOM_parameter_doc.debugging'), + ] + for key, fname in parameter_filenames: + config[key] = {} + with open(os.path.join(args.dir, fname)) as param_file: + params = parse_mom6_param(param_file) + config[key].update(params) + + # Get log path + if os.path.isfile(args.log): + log_path = args.log + elif os.path.isfile(os.path.join(args.dir, args.log)): + log_path = os.path.join(args.dir, args.log) + else: + sys.exit('stdout log not found.') + + # Parse timings + with open(log_path) as log: + clocks = parse_clocks(log) + + config['clocks'] = clocks + + if args.format: + print(json.dumps(config, indent=4)) + else: + print(json.dumps(config)) + + +def parse_mom6_param(param_file): + params = {} + for line in param_file: + param_stmt = line.split('!')[0].strip() + if param_stmt: + key, val = [s.strip() for s in param_stmt.split('=')] + + # TODO: Convert to equivalent Python types + if val in ('True', 'False'): + params[key] = bool(val) + else: + params[key] = val + + return params + + +def parse_clocks(log): + clock_start_msg = 'Tabulating mpp_clock statistics across' + clock_end_msg = 'MPP_STACK high water mark=' + + fields = [] + for line in log: + if line.startswith(clock_start_msg): + npes = line.lstrip(clock_start_msg).split()[0] + + # Get records + fields = [] + line = next(log) + + # Skip blank lines + while line.isspace(): + line = next(log) + + fields = line.split() + + # Exit this loop, begin clock parsing + break + + clocks = {} + for line in log: + # Treat MPP_STACK usage as end of clock reports + if line.lstrip().startswith(clock_end_msg): + break + + record = line.split()[-len(fields):] + + clk = line.split(record[0])[0].strip() + clocks[clk] = {} + + for fld, rec in zip(fields, record): + rtype = record_type[fld] + clocks[clk][fld] = rtype(rec) + + return clocks + + +if __name__ == '__main__': + main() diff --git a/.testing/tools/parse_perf.py b/.testing/tools/parse_perf.py new file mode 100755 index 0000000000..b86b1cc106 --- /dev/null +++ b/.testing/tools/parse_perf.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +import argparse +import collections +import json +import os +import shlex +import subprocess +import sys + + +def main(): + desc = 'Parse perf.data and return in JSON format.' + + parser = argparse.ArgumentParser(description=desc) + parser.add_argument('--format', '-f', action='store_true') + parser.add_argument('data') + args = parser.parse_args() + + profile = parse_perf_report(args.data) + + if args.format: + print(json.dumps(profile, indent=4)) + else: + print(json.dumps(profile)) + + +def parse_perf_report(perf_data_path): + profile = {} + + cmd = shlex.split( + 'perf report -s symbol,period -i {}'.format(perf_data_path) + ) + with subprocess.Popen(cmd, stdout=subprocess.PIPE, text=True) as proc: + event_name = None + for line in proc.stdout: + # Skip blank lines: + if not line or line.isspace(): + continue + + # Set the current event + if line.startswith('# Samples: '): + event_name = line.split()[-1].strip("'") + + # Remove perf modifiers for now + event_name = event_name.rsplit(':', 1)[0] + + profile[event_name] = {} + profile[event_name]['symbol'] = {} + + # Get total count + elif line.startswith('# Event count '): + event_count = int(line.split()[-1]) + profile[event_name]['count'] = event_count + + # skip all other 'comment' lines + elif line.startswith('#'): + continue + + # get per-symbol count + else: + tokens = line.split() + symbol = tokens[2] + period = int(tokens[3]) + + profile[event_name]['symbol'][symbol] = period + + return profile + + +if __name__ == '__main__': + main() From 03d034567b85cdc842abc56ead68e030b4759904 Mon Sep 17 00:00:00 2001 From: Marshall Ward Date: Wed, 18 Aug 2021 12:23:32 -0400 Subject: [PATCH 2/6] Testing: Replace bc with awk; framework support This patch adds three minor updates to the build and test systems: 1. A framework selection bug was fixed. The variable name was missing a $ token. 2. the FRAMEWORK variable was added to .testing/Makefile to facilitate testing of both FMS frameworks. 3. Restart half-periods no longer use `bc` to compute the duration in seconds; we now use awk, which is more commonly available. --- .testing/Makefile | 16 +++++++++++----- ac/configure.ac | 2 +- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.testing/Makefile b/.testing/Makefile index 06b29dc690..de7038eeff 100644 --- a/.testing/Makefile +++ b/.testing/Makefile @@ -114,6 +114,9 @@ CONFIGS := $(wildcard tc*) TESTS = grids layouts restarts nans dims openmps rotations DIMS = t l h z q r +# Set the framework +FRAMEWORK ?= fms1 + # REPRO tests enable reproducibility with optimization, and often do not match # the DEBUG results in older GCCs and vendor compilers, so we can optionally # disable them. @@ -267,7 +270,7 @@ build/%/MOM6: build/%/Makefile build/%/Makefile: ../ac/configure ../ac/Makefile.in $(DEPS)/lib/libFMS.a $(MKMF) $(LIST_PATHS) mkdir -p $(@D) cd $(@D) \ - && $(MOM_ENV) ../../../ac/configure $(MOM_ACFLAGS) \ + && $(MOM_ENV) ../../../ac/configure $(MOM_ACFLAGS) --with-framework=$(FRAMEWORK) \ || (cat config.log && false) @@ -549,7 +552,11 @@ $(eval $(call STAT_RULE,dim.q,symmetric,,Q_RESCALE_POWER=11,,1)) $(eval $(call STAT_RULE,dim.r,symmetric,,R_RESCALE_POWER=11,,1)) -# Restart tests require significant preprocessing, and are handled separately. +# Generate the half-period input namelist as follows: +# 1. Fetch DAYMAX and TIMEUNIT from MOM_input +# 2. Convert DAYMAX from TIMEUNIT to seconds +# 3. Apply seconds to `ocean_solo_nml` inside input.nml. +# NOTE: Assumes that runtime set by DAYMAX, will fail if set by input.nml work/%/restart/ocean.stats: build/symmetric/MOM6 $(VENV_PATH) rm -rf $(@D) mkdir -p $(@D) @@ -562,14 +569,13 @@ work/%/restart/ocean.stats: build/symmetric/MOM6 $(VENV_PATH) cd work/$*/restart; \ fi mkdir -p $(@D)/RESTART - # Generate the half-period input namelist - # TODO: Assumes that runtime set by DAYMAX, will fail if set by input.nml + # Set the half-period cd $(@D) \ && daymax=$$(grep DAYMAX MOM_input | cut -d '!' -f 1 | cut -d '=' -f 2 | xargs) \ && timeunit=$$(grep TIMEUNIT MOM_input | cut -d '!' -f 1 | cut -d '=' -f 2 | xargs) \ && if [ -z "$${timeunit}" ]; then timeunit="8.64e4"; fi \ && printf -v timeunit_int "%.f" "$${timeunit}" \ - && halfperiod=$$(printf "%.f" $$(bc <<< "scale=10; 0.5 * $${daymax} * $${timeunit_int}")) \ + && halfperiod=$$(awk -v t=$${daymax} -v dt=$${timeunit} 'BEGIN {printf "%.f", 0.5*t*dt}') \ && printf "\n&ocean_solo_nml\n seconds = $${halfperiod}\n/\n" >> input.nml # Remove any previous archived output rm -f results/$*/std.restart{1,2}.{out,err} diff --git a/ac/configure.ac b/ac/configure.ac index 9cb7147846..3d1af81b05 100644 --- a/ac/configure.ac +++ b/ac/configure.ac @@ -65,7 +65,7 @@ AS_IF([test "x$with_driver" != "x"], MODEL_FRAMEWORK=${srcdir}/config_src/infra/FMS1 AC_ARG_WITH([framework], AS_HELP_STRING([--with-framework=fms1|fms2], [Select the model framework])) -AS_CASE([with_framework], +AS_CASE(["$with_framework"], [fms1], [MODEL_FRAMEWORK=${srcdir}/config_src/infra/FMS1], [fms2], [MODEL_FRAMEWORK=${srcdir}/config_src/infra/FMS2], [MODEL_FRAMEWORK=${srcdir}/config_src/infra/FMS1] From dc205c3a29a1d9304010582d329f03c87f5fec33 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Thu, 26 Aug 2021 16:46:43 -0400 Subject: [PATCH 3/6] (*)Fix discretization issues with USE_GME=True Corrected the inconsistently staggered expressions for the total depths used to scale away the GM+E backscatter scheme that is enabled with USE_GME. As a part of these changes, the nominal bathymetric depths have been replaced by the sum of the layer thicknesses, and parentheses were added to some of the GME expressions so that they will be rotationally invariant. Although there are no changes to the answers in the MOM6-examples test suite, answers will change in any cases where USE_GME is true, and GME_H0 is larger than the minimum ocean depth. This commit will address MOM6 issue #1466, which can now be closed. --- .../lateral/MOM_hor_visc.F90 | 68 ++++++++++++------- 1 file changed, 44 insertions(+), 24 deletions(-) diff --git a/src/parameterizations/lateral/MOM_hor_visc.F90 b/src/parameterizations/lateral/MOM_hor_visc.F90 index b4f857dec4..cf1712afc6 100644 --- a/src/parameterizations/lateral/MOM_hor_visc.F90 +++ b/src/parameterizations/lateral/MOM_hor_visc.F90 @@ -276,6 +276,8 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, grad_vel_mag_h, & ! Magnitude of the velocity gradient tensor squared at h-points [T-2 ~> s-2] grad_vel_mag_bt_h, & ! Magnitude of the barotropic velocity gradient tensor squared at h-points [T-2 ~> s-2] grad_d2vel_mag_h, & ! Magnitude of the Laplacian of the velocity vector, squared [L-2 T-2 ~> m-2 s-2] + GME_effic_h, & ! The filtered efficiency of the GME terms at h points [nondim] + htot, & ! The total thickness of all layers [Z ~> m] boundary_mask_h ! A mask that zeroes out cells with at least one land edge [nondim] real, allocatable, dimension(:,:,:) :: h_diffu ! h x diffu [H L T-2 ~> m2 s-2] @@ -301,6 +303,7 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, hq, & ! harmonic mean of the harmonic means of the u- & v point thicknesses [H ~> m or kg m-2] ! This form guarantees that hq/hu < 4. grad_vel_mag_bt_q, & ! Magnitude of the barotropic velocity gradient tensor squared at q-points [T-2 ~> s-2] + GME_effic_q, & ! The filtered efficiency of the GME terms at q points [nondim] boundary_mask_q ! A mask that zeroes out cells with at least one land edge [nondim] real, dimension(SZIB_(G),SZJB_(G),SZK_(GV)) :: & @@ -338,6 +341,10 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, real :: hu, hv ! Thicknesses interpolated by arithmetic means to corner ! points; these are first interpolated to u or v velocity ! points where masks are applied [H ~> m or kg m-2]. + real :: h_arith_q ! The arithmetic mean total thickness at q points [Z ~> m] + real :: h_harm_q ! The harmonic mean total thickness at q points [Z ~> m] + real :: I_hq ! The inverse of the arithmetic mean total thickness at q points [Z-1 ~> m-1] + real :: I_GME_h0 ! The inverse of GME tapering scale [Z-1 ~> m-1] real :: h_neglect ! thickness so small it can be lost in roundoff and so neglected [H ~> m or kg m-2] real :: h_neglect3 ! h_neglect^3 [H3 ~> m3 or kg3 m-6] real :: h_min ! Minimum h at the 4 neighboring velocity points [H ~> m] @@ -501,6 +508,35 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, (0.25*((dvdy_bt(i,j)+dvdy_bt(i+1,j+1))+(dvdy_bt(i,j+1)+dvdy_bt(i+1,j))))**2) enddo ; enddo + do j=js-1,je+1 ; do i=is-1,ie+1 + htot(i,j) = 0.0 + enddo ; enddo + do k=1,nz ; do j=js-1,je+1 ; do i=is-1,ie+1 + htot(i,j) = htot(i,j) + GV%H_to_Z*h(i,j,k) + enddo ; enddo ; enddo + + I_GME_h0 = 1.0 / CS%GME_h0 + do j=Jsq,Jeq+1 ; do i=Isq,Ieq+1 + if (grad_vel_mag_bt_h(i,j)>0) then + GME_effic_h(i,j) = CS%GME_efficiency * boundary_mask_h(i,j) * & + (MIN(htot(i,j) * I_GME_h0, 1.0)**2) + else + GME_effic_h(i,j) = 0.0 + endif + enddo ; enddo + + do J=js-1,Jeq ; do I=is-1,Ieq + if (grad_vel_mag_bt_q(I,J)>0) then + h_arith_q = 0.25 * ((htot(i,j) + htot(i+1,j+1)) + (htot(i+1,j) + htot(i,j+1))) + I_hq = 1.0 / h_arith_q + h_harm_q = 0.25 * h_arith_q * ((htot(i,j)*I_hq + htot(i+1,j+1)*I_hq) + & + (htot(i+1,j)*I_hq + htot(i,j+1)*I_hq)) + GME_effic_q(I,J) = CS%GME_efficiency * boundary_mask_q(I,J) * (MIN(h_harm_q * I_GME_h0, 1.0)**2) + else + GME_effic_q(I,J) = 0.0 + endif + enddo ; enddo + endif ! use_GME !$OMP parallel do default(none) & @@ -509,7 +545,7 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, !$OMP is, ie, js, je, Isq, Ieq, Jsq, Jeq, nz, & !$OMP apply_OBC, rescale_Kh, legacy_bound, find_FrictWork, & !$OMP use_MEKE_Ku, use_MEKE_Au, boundary_mask_h, boundary_mask_q, & - !$OMP backscat_subround, GME_coeff_limiter, & + !$OMP backscat_subround, GME_coeff_limiter, GME_effic_h, GME_effic_q, & !$OMP h_neglect, h_neglect3, FWfrac, inv_PI3, inv_PI6, H0_GME, & !$OMP diffu, diffv, Kh_h, Kh_q, Ah_h, Ah_q, FrictWork, FrictWork_GME, & !$OMP div_xx_h, sh_xx_h, vort_xy_q, sh_xy_q, GME_coeff_h, GME_coeff_q, & @@ -1388,15 +1424,8 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, call pass_vector(KH_u_GME, KH_v_GME, G%Domain) do j=Jsq,Jeq+1 ; do i=Isq,Ieq+1 - if (grad_vel_mag_bt_h(i,j)>0) then - GME_coeff = CS%GME_efficiency * (MIN(G%bathyT(i,j) / CS%GME_h0, 1.0)**2) * & - (0.25*(KH_u_GME(I,j,k)+KH_u_GME(I-1,j,k)+KH_v_GME(i,J,k)+KH_v_GME(i,J-1,k))) - else - GME_coeff = 0.0 - endif - - ! apply mask - GME_coeff = GME_coeff * boundary_mask_h(i,j) + GME_coeff = GME_effic_h(i,j) * 0.25 * & + ((KH_u_GME(I,j,k)+KH_u_GME(I-1,j,k)) + (KH_v_GME(i,J,k)+KH_v_GME(i,J-1,k))) GME_coeff = MIN(GME_coeff, CS%GME_limiter) if ((CS%id_GME_coeff_h>0) .or. find_FrictWork) GME_coeff_h(i,j,k) = GME_coeff @@ -1405,17 +1434,8 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, enddo ; enddo do J=js-1,Jeq ; do I=is-1,Ieq - if (grad_vel_mag_bt_q(I,J)>0) then - !### This expression is not rotationally invariant - bathyT is to the SW of q points, - ! and it needs parentheses in the sum of the 4 diffusivities. - GME_coeff = CS%GME_efficiency * (MIN(G%bathyT(i,j) / CS%GME_h0, 1.0)**2) * & - (0.25*(KH_u_GME(I,j,k)+KH_u_GME(I,j+1,k)+KH_v_GME(i,J,k)+KH_v_GME(i+1,J,k))) - else - GME_coeff = 0.0 - endif - - ! apply mask - GME_coeff = GME_coeff * boundary_mask_q(I,J) + GME_coeff = GME_effic_q(I,J) * 0.25 * & + ((KH_u_GME(I,j,k)+KH_u_GME(I,j+1,k)) + (KH_v_GME(i,J,k)+KH_v_GME(i+1,J,k))) GME_coeff = MIN(GME_coeff, CS%GME_limiter) if (CS%id_GME_coeff_q>0) GME_coeff_q(I,J,k) = GME_coeff @@ -1424,8 +1444,8 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, enddo ; enddo ! Applying GME diagonal term. This is linear and the arguments can be rescaled. - call smooth_GME(CS,G,GME_flux_h=str_xx_GME) - call smooth_GME(CS,G,GME_flux_q=str_xy_GME) + call smooth_GME(CS, G, GME_flux_h=str_xx_GME) + call smooth_GME(CS, G, GME_flux_q=str_xy_GME) do J=Jsq,Jeq+1 ; do i=Isq,Ieq+1 str_xx(i,j) = (str_xx(i,j) + str_xx_GME(i,j)) * (h(i,j,k) * CS%reduction_xx(i,j)) @@ -1446,7 +1466,7 @@ subroutine horizontal_viscosity(u, v, h, diffu, diffv, MEKE, VarMix, G, GV, US, enddo ; enddo endif - else ! use_GME + else ! .not. use_GME do J=Jsq,Jeq+1 ; do i=Isq,Ieq+1 str_xx(i,j) = str_xx(i,j) * (h(i,j,k) * CS%reduction_xx(i,j)) enddo ; enddo From 12569050c3ab746387b1ecbdc7413f571d220ef6 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Thu, 26 Aug 2021 17:02:35 -0400 Subject: [PATCH 4/6] (*)Use depths in DOME and dye tracer initialization Initialize the horizontal stripes of tracers in the DOME and dye examples based on the depth from the (flat) sea surface rather than the height above the seafloor. These are mathematically equivalent, but there could be small changes in the tracer distributions. Because these changes only impact passive tracers, all of the physical solutions are bitwise identical. --- src/tracer/DOME_tracer.F90 | 16 +++++++++------- src/tracer/dye_example.F90 | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/tracer/DOME_tracer.F90 b/src/tracer/DOME_tracer.F90 index c20eda7745..44421c7387 100644 --- a/src/tracer/DOME_tracer.F90 +++ b/src/tracer/DOME_tracer.F90 @@ -75,7 +75,7 @@ function register_DOME_tracer(HI, GV, param_file, CS, tr_Reg, restart_CS) character(len=48) :: flux_units ! The units for tracer fluxes, usually ! kg(tracer) kg(water)-1 m3 s-1 or kg(tracer) s-1. character(len=200) :: inputdir - real, pointer :: tr_ptr(:,:,:) => NULL() + real, pointer :: tr_ptr(:,:,:) => NULL() ! A pointer to the tracer field logical :: register_DOME_tracer integer :: isd, ied, jsd, jed, nz, m isd = HI%isd ; ied = HI%ied ; jsd = HI%jsd ; jed = HI%jed ; nz = GV%ke @@ -166,13 +166,15 @@ subroutine initialize_DOME_tracer(restart, day, G, GV, US, h, diag, OBC, CS, & character(len=48) :: units ! The dimensions of the variable. character(len=48) :: flux_units ! The units for tracer fluxes, usually ! kg(tracer) kg(water)-1 m3 s-1 or kg(tracer) s-1. - real, pointer :: tr_ptr(:,:,:) => NULL() + real, pointer :: tr_ptr(:,:,:) => NULL() ! A pointer to the tracer field real :: PI ! 3.1415926... calculated as 4*atan(1) real :: tr_y ! Initial zonally uniform tracer concentrations. real :: h_neglect ! A thickness that is so small it is usually lost ! in roundoff and can be neglected [H ~> m or kg m-2]. - real :: e(SZK_(GV)+1), e_top, e_bot ! Heights [Z ~> m]. - real :: d_tr ! A change in tracer concentraions, in tracer units. + real :: e(SZK_(GV)+1) ! Interface heights relative to the sea surface (negative down) [Z ~> m] + real :: e_top ! Height of the top of the tracer band relative to the sea surface [Z ~> m] + real :: e_bot ! Height of the bottom of the tracer band relative to the sea surface [Z ~> m] + real :: d_tr ! A change in tracer concentrations, in tracer units. integer :: i, j, k, is, ie, js, je, isd, ied, jsd, jed, nz, m integer :: IsdB, IedB, JsdB, JedB @@ -215,9 +217,9 @@ subroutine initialize_DOME_tracer(restart, day, G, GV, US, h, diag, OBC, CS, & if (NTR > 7) then do j=js,je ; do i=is,ie - e(nz+1) = -G%bathyT(i,j) - do k=nz,1,-1 - e(K) = e(K+1) + h(i,j,k)*GV%H_to_Z + e(1) = 0.0 + do k=1,nz + e(K+1) = e(K) - h(i,j,k)*GV%H_to_Z do m=7,NTR e_top = (-600.0*real(m-1) + 3000.0) * US%m_to_Z e_bot = (-600.0*real(m-1) + 2700.0) * US%m_to_Z diff --git a/src/tracer/dye_example.F90 b/src/tracer/dye_example.F90 index 2919f2d95f..2bf3cd94ed 100644 --- a/src/tracer/dye_example.F90 +++ b/src/tracer/dye_example.F90 @@ -203,9 +203,10 @@ subroutine initialize_dye_tracer(restart, day, G, GV, h, diag, OBC, CS, sponge_C character(len=48) :: units ! The dimensions of the variable. character(len=48) :: flux_units ! The units for age tracer fluxes, either ! years m3 s-1 or years kg s-1. + real :: z_bot ! Height of the bottom of the layer relative to the sea surface [Z ~> m] + real :: z_center ! Height of the center of the layer relative to the sea surface [Z ~> m] logical :: OK integer :: i, j, k, m - real :: z_bot, z_center if (.not.associated(CS)) return if (CS%ntr < 1) return @@ -221,14 +222,14 @@ subroutine initialize_dye_tracer(restart, day, G, GV, h, diag, OBC, CS, sponge_C CS%dye_source_minlat(m)=G%geoLatT(i,j) .and. & G%mask2dT(i,j) > 0.0 ) then - z_bot = -G%bathyT(i,j) - do k = GV%ke, 1, -1 + z_bot = 0.0 + do k = 1, GV%ke + z_bot = z_bot - h(i,j,k)*GV%H_to_Z z_center = z_bot + 0.5*h(i,j,k)*GV%H_to_Z if ( z_center > -CS%dye_source_maxdepth(m) .and. & z_center < -CS%dye_source_mindepth(m) ) then CS%tr(i,j,k,m) = 1.0 endif - z_bot = z_bot + h(i,j,k)*GV%H_to_Z enddo endif enddo ; enddo @@ -273,9 +274,10 @@ subroutine dye_tracer_column_physics(h_old, h_new, ea, eb, fluxes, dt, G, GV, US real :: sfc_val ! The surface value for the tracers. real :: Isecs_per_year ! The number of seconds in a year. real :: year ! The time in years. + real :: z_bot ! Height of the bottom of the layer relative to the sea surface [Z ~> m] + real :: z_center ! Height of the center of the layer relative to the sea surface [Z ~> m] integer :: secs, days ! Integer components of the time type. integer :: i, j, k, is, ie, js, je, nz, m - real :: z_bot, z_center is = G%isc ; ie = G%iec ; js = G%jsc ; je = G%jec ; nz = GV%ke @@ -305,14 +307,14 @@ subroutine dye_tracer_column_physics(h_old, h_new, ea, eb, fluxes, dt, G, GV, US CS%dye_source_minlat(m)=G%geoLatT(i,j) .and. & G%mask2dT(i,j) > 0.0 ) then - z_bot = -G%bathyT(i,j) - do k=nz,1,-1 + z_bot = 0.0 + do k=1,nz + z_bot = z_bot - h_new(i,j,k)*GV%H_to_Z z_center = z_bot + 0.5*h_new(i,j,k)*GV%H_to_Z if ( z_center > -CS%dye_source_maxdepth(m) .and. & z_center < -CS%dye_source_mindepth(m) ) then CS%tr(i,j,k,m) = 1.0 endif - z_bot = z_bot + h_new(i,j,k)*GV%H_to_Z enddo endif enddo ; enddo From 8cd43302e7810ed93e1e415167954c97ff965084 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Thu, 26 Aug 2021 21:18:10 -0400 Subject: [PATCH 5/6] (*+)New internal tide bathymetric parameters Replaced the nominal bathymetric depth with the sum of the layer thicknesses in the calculation of the quadratic drag acting on the internal wave field. Also replaced the hard-coded 1 m scale at which the drag is reduced with a new runtime parameter, INTERNAL_TIDE_DRAG_MIN_DEPTH, which is only logged if the internal tides are used and INTERNAL_TIDE_QUAD_DRAG=True, and made the maximum scale of the topographic roughness as a fraction of the bottom depth into the new runtime parameter INTERNAL_TIDE_ROUGHNESS_FRAC, whose default, 0.1, is the same as the previous hard-coded value. Several of the comments in the MOM_internal_tides module were also clarified or standardized. All answers in the MOM-examples test suite are bitwise identical, but answers could change and there could be changes in the MOM_parameter_doc files in some cases that use the internal tides module. --- .../lateral/MOM_internal_tides.F90 | 57 ++++++++++++------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/src/parameterizations/lateral/MOM_internal_tides.F90 b/src/parameterizations/lateral/MOM_internal_tides.F90 index 4d66471408..1bff6287f4 100644 --- a/src/parameterizations/lateral/MOM_internal_tides.F90 +++ b/src/parameterizations/lateral/MOM_internal_tides.F90 @@ -96,6 +96,9 @@ module MOM_internal_tides real :: decay_rate !< A constant rate at which internal tide energy is !! lost to the interior ocean internal wave field. real :: cdrag !< The bottom drag coefficient [nondim]. + real :: drag_min_depth !< The minimum total ocean thickness that will be used in the denominator + !! of the quadratic drag terms for internal tides when + !! INTERNAL_TIDE_QUAD_DRAG is true [Z ~> m] logical :: apply_background_drag !< If true, apply a drag due to background processes as a sink. logical :: apply_bottom_drag @@ -185,6 +188,7 @@ subroutine propagate_int_tide(h, tv, cn, TKE_itidal_input, vel_btTide, Nb, dt, & tot_En, & ! energy summed over angles, modes, frequencies [R Z3 T-2 ~> J m-2] tot_leak_loss, tot_quad_loss, tot_itidal_loss, tot_Froude_loss, tot_allprocesses_loss, & ! energy loss rates summed over angle, freq, and mode [R Z3 T-3 ~> W m-2] + htot, & ! The vertical sum of the layer thicknesses [H ~> m or kg m-2] drag_scale, & ! bottom drag scale [T-1 ~> s-1] itidal_loss_mode, allprocesses_loss_mode ! energy loss rates for a given mode and frequency (summed over angles) [R Z3 T-3 ~> W m-2] @@ -200,7 +204,7 @@ subroutine propagate_int_tide(h, tv, cn, TKE_itidal_input, vel_btTide, Nb, dt, & real :: En_initial, Delta_E_check ! Energies for debugging [R Z3 T-2 ~> J m-2] real :: TKE_Froude_loss_check, TKE_Froude_loss_tot ! Energy losses for debugging [R Z3 T-3 ~> W m-2] character(len=160) :: mesg ! The text of an error message - integer :: a, m, fr, i, j, is, ie, js, je, isd, ied, jsd, jed, nAngle, nzm + integer :: a, m, fr, i, j, k, is, ie, js, je, isd, ied, jsd, jed, nAngle, nzm integer :: id_g, jd_g ! global (decomp-invar) indices (for debugging) type(group_pass_type), save :: pass_test, pass_En type(time_type) :: time_end @@ -360,9 +364,12 @@ subroutine propagate_int_tide(h, tv, cn, TKE_itidal_input, vel_btTide, Nb, dt, & ! Extract the energy for mixing due to bottom drag------------------------------- if (CS%apply_bottom_drag) then + do j=jsd,jed ; do i=isd,ied ; htot(i,j) = 0.0 ; enddo ; enddo + do k=1,GV%ke ; do j=jsd,jed ; do i=isd,ied + htot(i,j) = htot(i,j) + h(i,j,k) + enddo ; enddo ; enddo do j=jsd,jed ; do i=isd,ied - ! Note the 1 m dimensional scale here. Should this be a parameter? - I_D_here = 1.0 / (max(G%bathyT(i,j), 1.0*US%m_to_Z)) + I_D_here = 1.0 / (max(GV%H_to_Z*htot(i,j), CS%drag_min_depth)) drag_scale(i,j) = CS%cdrag * sqrt(max(0.0, US%L_to_Z**2*vel_btTide(i,j)**2 + & tot_En(i,j) * I_rho0 * I_D_here)) * I_D_here enddo ; enddo @@ -2143,20 +2150,20 @@ subroutine internal_tides_init(Time, G, GV, US, param_file, diag, CS) ! Local variables real :: Angle_size ! size of wedges, rad real, allocatable :: angles(:) ! orientations of wedge centers, rad - real, allocatable, dimension(:,:) :: h2 ! topographic roughness scale, m^2 - real :: kappa_itides, kappa_h2_factor - ! characteristic topographic wave number - ! and a scaling factor - real, allocatable :: ridge_temp(:,:) - ! array for temporary storage of flags + real, dimension(:,:), allocatable :: h2 ! topographic roughness scale squared [Z2 ~> m2] + real :: kappa_itides ! characteristic topographic wave number [L-1 ~> m-1] + real, dimension(:,:), allocatable :: ridge_temp ! array for temporary storage of flags ! of cells with double-reflecting ridges - logical :: use_int_tides, use_temperature - real :: period_1 ! The period of the gravest modeled mode [T ~> s] + logical :: use_int_tides, use_temperature + real :: kappa_h2_factor ! A roughness scaling factor [nondim] + real :: RMS_roughness_frac ! The maximum RMS topographic roughness as a fraction of the + ! nominal ocean depth, or a negative value for no limit [nondim] + real :: period_1 ! The period of the gravest modeled mode [T ~> s] integer :: num_angle, num_freq, num_mode, m, fr integer :: isd, ied, jsd, jed, a, id_ang, i, j type(axes_grp) :: axes_ang ! This include declares and sets the variable "version". -#include "version_variable.h" +# include "version_variable.h" character(len=40) :: mdl = "MOM_internal_tides" ! This module's name. character(len=16), dimension(8) :: freq_name character(len=40) :: var_name @@ -2280,16 +2287,20 @@ subroutine internal_tides_init(Time, G, GV, US, param_file, diag, CS) "1st-order upwind advection. This scheme is highly "//& "continuity solver. This scheme is highly "//& "diffusive but may be useful for debugging.", default=.false.) - call get_param(param_file, mdl, "INTERNAL_TIDE_BACKGROUND_DRAG", & - CS%apply_background_drag, "If true, the internal tide "//& - "ray-tracing advection uses a background drag term as a sink.",& - default=.false.) + call get_param(param_file, mdl, "INTERNAL_TIDE_BACKGROUND_DRAG", CS%apply_background_drag, & + "If true, the internal tide ray-tracing advection uses a background drag "//& + "term as a sink.", default=.false.) call get_param(param_file, mdl, "INTERNAL_TIDE_QUAD_DRAG", CS%apply_bottom_drag, & "If true, the internal tide ray-tracing advection uses "//& "a quadratic bottom drag term as a sink.", default=.false.) call get_param(param_file, mdl, "INTERNAL_TIDE_WAVE_DRAG", CS%apply_wave_drag, & "If true, apply scattering due to small-scale roughness as a sink.", & default=.false.) + call get_param(param_file, mdl, "INTERNAL_TIDE_DRAG_MIN_DEPTH", CS%drag_min_depth, & + "The minimum total ocean thickness that will be used in the denominator "//& + "of the quadratic drag terms for internal tides.", & + units="m", default=1.0, scale=US%m_to_Z, do_not_log=.not.CS%apply_bottom_drag) + CS%drag_min_depth = MAX(CS%drag_min_depth, GV%H_subroundoff * GV%H_to_Z) call get_param(param_file, mdl, "INTERNAL_TIDE_FROUDE_DRAG", CS%apply_Froude_drag, & "If true, apply wave breaking as a sink.", & default=.false.) @@ -2340,10 +2351,18 @@ subroutine internal_tides_init(Time, G, GV, US, param_file, diag, CS) fail_if_missing=.true.) filename = trim(CS%inputdir) // trim(h2_file) call log_param(param_file, mdl, "INPUTDIR/H2_FILE", filename) - call MOM_read_data(filename, 'h2', h2, G%domain, scale=US%m_to_Z) + call get_param(param_file, mdl, "INTERNAL_TIDE_ROUGHNESS_FRAC", RMS_roughness_frac, & + "The maximum RMS topographic roughness as a fraction of the nominal ocean depth, "//& + "or a negative value for no limit.", units="nondim", default=0.1) + + call MOM_read_data(filename, 'h2', h2, G%domain, scale=US%m_to_Z**2) do j=G%jsc,G%jec ; do i=G%isc,G%iec - ! Restrict rms topo to 10 percent of column depth. - h2(i,j) = min(0.01*(G%bathyT(i,j))**2, h2(i,j)) + ! Restrict RMS topographic roughness to a fraction (10 percent by default) of the column depth. + if (RMS_roughness_frac >= 0.0) then + h2(i,j) = max(min((RMS_roughness_frac*G%bathyT(i,j))**2, h2(i,j)), 0.0) + else + h2(i,j) = max(h2(i,j), 0.0) + endif ! Compute the fixed part; units are [R L-2 Z3 ~> kg m-2] here ! will be multiplied by N and the squared near-bottom velocity to get into [R Z3 T-3 ~> W m-2] CS%TKE_itidal_loss_fixed(i,j) = 0.5*kappa_h2_factor*GV%Rho0 * US%L_to_Z*kappa_itides * h2(i,j) From 6979c298419a3a724edacfda506f378a6fab96f1 Mon Sep 17 00:00:00 2001 From: Robert Hallberg Date: Thu, 26 Aug 2021 21:36:49 -0400 Subject: [PATCH 6/6] +(*)Add option for MEKE to calculate total depth Added the new run-time option, MEKE_FIXED_TOTAL_DEPTH, to use the actual total thickness instead the nominal depth in bathyT in several of the MEKE calculations. Also simplified and corrected a minor dimensional inconsistency in some expressions that effectively set masks when estimating the interface height diffusivities at tracer points for use in MEKE. By default, the answers in the MOM-examples test cases are bitwise identical, but answers could change in some cases due to the proper thickness weighting in the calculation of MEKE%Kh_diff. There are new entries in some MOM_parameter_doc files, so a minor spelling error correction in a MOM_parameter_doc entry was also included in this PR. --- src/parameterizations/lateral/MOM_MEKE.F90 | 34 ++++++++++++------- .../lateral/MOM_thickness_diffuse.F90 | 33 ++++++++++-------- .../vertical/MOM_ALE_sponge.F90 | 4 +-- 3 files changed, 43 insertions(+), 28 deletions(-) diff --git a/src/parameterizations/lateral/MOM_MEKE.F90 b/src/parameterizations/lateral/MOM_MEKE.F90 index 8779968bcd..8206fd9717 100644 --- a/src/parameterizations/lateral/MOM_MEKE.F90 +++ b/src/parameterizations/lateral/MOM_MEKE.F90 @@ -82,7 +82,9 @@ module MOM_MEKE real :: MEKE_topographic_beta !< Weight for how much topographic beta is considered !! when computing beta in Rhines scale [nondim] real :: MEKE_restoring_rate !< Inverse of the timescale used to nudge MEKE toward its equilibrium value [s-1]. - + logical :: fixed_total_depth !< If true, use the nominal bathymetric depth as the estimate of + !! the time-varying ocean depth. Otherwise base the depth on the total + !! ocean mass per unit area. logical :: kh_flux_enabled !< If true, lateral diffusive MEKE flux is enabled. logical :: initialize !< If True, invokes a steady state solver to calculate MEKE. logical :: debug !< If true, write out checksums of data for debugging @@ -283,12 +285,17 @@ subroutine step_forward_MEKE(MEKE, h, SN_u, SN_v, visc, dt, G, GV, US, CS, hu, h enddo enddo - !$OMP parallel do default(shared) - do j=js-1,je+1 ; do i=is-1,ie+1 - depth_tot(i,j) = G%bathyT(i,j) - !### Try changing this to: - ! depth_tot(i,j) = mass(i,j) * I_Rho0 - enddo ; enddo + if (CS%fixed_total_depth) then + !$OMP parallel do default(shared) + do j=js-1,je+1 ; do i=is-1,ie+1 + depth_tot(i,j) = G%bathyT(i,j) + enddo ; enddo + else + !$OMP parallel do default(shared) + do j=js-1,je+1 ; do i=is-1,ie+1 + depth_tot(i,j) = mass(i,j) * I_Rho0 + enddo ; enddo + endif if (CS%initialize) then call MEKE_equilibrium(CS, MEKE, G, GV, US, SN_u, SN_v, drag_rate_visc, I_mass, depth_tot) @@ -711,8 +718,7 @@ subroutine MEKE_equilibrium(CS, MEKE, G, GV, US, SN_u, SN_v, drag_rate_visc, I_m if (CS%MEKE_topographic_beta == 0. .or. (depth_tot(i,j) == 0.0)) then beta_topo_x = 0. ; beta_topo_y = 0. else - !### Consider different combinations of these estimates of topographic beta, and the use - ! of the water column thickness instead of the bathymetric depth. + !### Consider different combinations of these estimates of topographic beta. beta_topo_x = -CS%MEKE_topographic_beta * FatH * 0.5 * ( & (depth_tot(i+1,j)-depth_tot(i,j)) * G%IdxCu(I,j) & / max(depth_tot(i+1,j), depth_tot(i,j), dZ_neglect) & @@ -887,14 +893,13 @@ subroutine MEKE_lengthScales(CS, MEKE, G, GV, US, SN_u, SN_v, EKE, depth_tot, & FatH = 0.25* ( ( G%CoriolisBu(I,J) + G%CoriolisBu(I-1,J-1) ) + & ( G%CoriolisBu(I-1,J) + G%CoriolisBu(I,J-1) ) ) ! Coriolis parameter at h points - ! If bathyT is zero, then a division by zero FPE will be raised. In this + ! If depth_tot is zero, then a division by zero FPE will be raised. In this ! case, we apply Adcroft's rule of reciprocals and set the term to zero. ! Since zero-bathymetry cells are masked, this should not affect values. if (CS%MEKE_topographic_beta == 0. .or. (depth_tot(i,j) == 0.0)) then beta_topo_x = 0. ; beta_topo_y = 0. else - !### Consider different combinations of these estimates of topographic beta, and the use - ! of the water column thickness instead of the bathymetric depth. + !### Consider different combinations of these estimates of topographic beta. beta_topo_x = -CS%MEKE_topographic_beta * FatH * 0.5 * ( & (depth_tot(i+1,j)-depth_tot(i,j)) * G%IdxCu(I,j) & / max(depth_tot(i+1,j), depth_tot(i,j), dZ_neglect) & @@ -1167,6 +1172,11 @@ logical function MEKE_init(Time, G, US, param_file, diag, CS, MEKE, restart_CS) "If positive, is a fixed length contribution to the expression "//& "for mixing length used in MEKE-derived diffusivity.", & units="m", default=0.0, scale=US%m_to_L) + call get_param(param_file, mdl, "MEKE_FIXED_TOTAL_DEPTH", CS%fixed_total_depth, & + "If true, use the nominal bathymetric depth as the estimate of the "//& + "time-varying ocean depth. Otherwise base the depth on the total ocean mass"//& + "per unit area.", default=.true.) + call get_param(param_file, mdl, "MEKE_ALPHA_DEFORM", CS%aDeform, & "If positive, is a coefficient weighting the deformation scale "//& "in the expression for mixing length used in MEKE-derived diffusivity.", & diff --git a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 index 3b3d72576c..0c8f25b42e 100644 --- a/src/parameterizations/lateral/MOM_thickness_diffuse.F90 +++ b/src/parameterizations/lateral/MOM_thickness_diffuse.F90 @@ -145,6 +145,8 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp KH_u_CFL ! The maximum stable interface height diffusivity at u grid points [L2 T-1 ~> m2 s-1] real, dimension(SZI_(G), SZJB_(G)) :: & KH_v_CFL ! The maximum stable interface height diffusivity at v grid points [L2 T-1 ~> m2 s-1] + real, dimension(SZI_(G), SZJ_(G)) :: & + htot ! The sum of the total layer thicknesses [H ~> m or kg m-2] real :: Khth_Loc_u(SZIB_(G), SZJ_(G)) real :: Khth_Loc_v(SZI_(G), SZJB_(G)) real :: Khth_Loc(SZIB_(G), SZJB_(G)) ! locally calculated thickness diffusivity [L2 T-1 ~> m2 s-1] @@ -479,38 +481,41 @@ subroutine thickness_diffuse(h, uhtr, vhtr, tv, dt, G, GV, US, MEKE, VarMix, CDp ! thicknesses across u and v faces, converted to 0/1 mask ! layer average of the interface diffusivities KH_u and KH_v do j=js,je ; do I=is-1,ie - hu(I,j) = 2.0*h(i,j,k)*h(i+1,j,k)/(h(i,j,k)+h(i+1,j,k)+h_neglect) - if (hu(I,j) /= 0.0) hu(I,j) = 1.0 - !### The same result would be accomplished with the following without a division: - ! hu(I,j) = 0.0 ; if (h(i,j,k)*h(i+1,j,k) /= 0.0) hu(I,j) = 1.0 + ! This expression uses harmonic mean thicknesses: + ! hu(I,j) = 2.0*h(i,j,k)*h(i+1,j,k) / (h(i,j,k)+h(i+1,j,k)+h_neglect) + ! This expression is a 0/1 mask based on depths where there are thick layers: + hu(I,j) = 0.0 ; if (h(i,j,k)*h(i+1,j,k) /= 0.0) hu(I,j) = 1.0 KH_u_lay(I,j) = 0.5*(KH_u(I,j,k)+KH_u(I,j,k+1)) enddo ; enddo do J=js-1,je ; do i=is,ie - hv(i,J) = 2.0*h(i,j,k)*h(i,j+1,k)/(h(i,j,k)+h(i,j+1,k)+h_neglect) - if (hv(i,J) /= 0.0) hv(i,J) = 1.0 - !### The same result would be accomplished with the following without a division: - ! hv(i,J) = 0.0 ; if (h(i,j,k)*h(i,j+1,k) /= 0.0) hv(i,J) = 1.0 + ! This expression uses harmonic mean thicknesses: + ! hv(i,J) = 2.0*h(i,j,k)*h(i,j+1,k)/(h(i,j,k)+h(i,j+1,k)+h_neglect) + ! This expression is a 0/1 mask based on depths where there are thick layers: + hv(i,J) = 0.0 ; if (h(i,j,k)*h(i,j+1,k) /= 0.0) hv(i,J) = 1.0 KH_v_lay(i,J) = 0.5*(KH_v(i,J,k)+KH_v(i,J,k+1)) enddo ; enddo - ! diagnose diffusivity at T-point - !### Because hu and hv are nondimensional here, the denominator is dimensionally inconsistent. + ! diagnose diffusivity at T-points do j=js,je ; do i=is,ie - Kh_t(i,j,k) = ((hu(I-1,j)*KH_u_lay(i-1,j)+hu(I,j)*KH_u_lay(I,j)) & - +(hv(i,J-1)*KH_v_lay(i,J-1)+hv(i,J)*KH_v_lay(i,J))) & - / (hu(I-1,j)+hu(I,j)+hv(i,J-1)+hv(i,J)+h_neglect) + Kh_t(i,j,k) = ((hu(I-1,j)*KH_u_lay(i-1,j) + hu(I,j)*KH_u_lay(I,j)) + & + (hv(i,J-1)*KH_v_lay(i,J-1) + hv(i,J)*KH_v_lay(i,J))) / & + ((hu(I-1,j)+hu(I,j)) + (hv(i,J-1)+hv(i,J)) + 1.0e-20) + ! Use this denominator instead if hu and hv are actual thicknesses rather than a 0/1 mask: + ! ((hu(I-1,j)+hu(I,j)) + (hv(i,J-1)+hv(i,J)) + h_neglect) enddo ; enddo enddo if (CS%Use_KH_in_MEKE) then MEKE%Kh_diff(:,:) = 0.0 + htot(:,:) = 0.0 do k=1,nz do j=js,je ; do i=is,ie MEKE%Kh_diff(i,j) = MEKE%Kh_diff(i,j) + Kh_t(i,j,k) * h(i,j,k) + htot(i,j) = htot(i,j) + h(i,j,k) enddo ; enddo enddo do j=js,je ; do i=is,ie - MEKE%Kh_diff(i,j) = GV%H_to_Z * MEKE%Kh_diff(i,j) / MAX(1.0*US%m_to_Z, G%bathyT(i,j)) + MEKE%Kh_diff(i,j) = MEKE%Kh_diff(i,j) / MAX(1.0*GV%m_to_H, htot(i,j)) enddo ; enddo endif diff --git a/src/parameterizations/vertical/MOM_ALE_sponge.F90 b/src/parameterizations/vertical/MOM_ALE_sponge.F90 index 419b012387..be1d265620 100644 --- a/src/parameterizations/vertical/MOM_ALE_sponge.F90 +++ b/src/parameterizations/vertical/MOM_ALE_sponge.F90 @@ -218,7 +218,7 @@ subroutine initialize_ALE_sponge_fixed(Iresttime, G, GV, param_file, CS, data_h, "answers from the end of 2018. Otherwise, use updated and more robust "//& "forms of the same expressions.", default=default_2018_answers) call get_param(param_file, mdl, "HOR_REGRID_2018_ANSWERS", CS%hor_regrid_answers_2018, & - "If true, use the order of arithmetic for horizonal regridding that recovers "//& + "If true, use the order of arithmetic for horizontal regridding that recovers "//& "the answers from the end of 2018. Otherwise, use rotationally symmetric "//& "forms of the same expressions.", default=default_2018_answers) call get_param(param_file, mdl, "REENTRANT_X", CS%reentrant_x, & @@ -478,7 +478,7 @@ subroutine initialize_ALE_sponge_varying(Iresttime, G, GV, param_file, CS, Irest "answers from the end of 2018. Otherwise, use updated and more robust "//& "forms of the same expressions.", default=default_2018_answers) call get_param(param_file, mdl, "HOR_REGRID_2018_ANSWERS", CS%hor_regrid_answers_2018, & - "If true, use the order of arithmetic for horizonal regridding that recovers "//& + "If true, use the order of arithmetic for horizontal regridding that recovers "//& "the answers from the end of 2018 and retain a bug in the 3-dimensional mask "//& "returned in certain cases. Otherwise, use rotationally symmetric "//& "forms of the same expressions and initialize the mask properly.", &