Skip to content

Commit

Permalink
Merge branch 'master' into feature/improve_wandb_cyclegan_reporting
Browse files Browse the repository at this point in the history
  • Loading branch information
mcgibbon committed Jun 22, 2023
2 parents 26de524 + 9327855 commit 5f5913e
Show file tree
Hide file tree
Showing 200 changed files with 32,418 additions and 585 deletions.
37 changes: 37 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -232,10 +232,43 @@ jobs:
cd external/radiation
. /home/circleci/.nix-profile/etc/profile.d/nix.sh
nix-shell --command "pytest -s tests/test_driver"
build_and_test_scream:
parameters:
image:
default: prognostic_scream_run
type: string
machine:
image: ubuntu-2004:202111-02
resource_class: large
environment:
GOOGLE_PROJECT_ID: vcm-ml
GOOGLE_APPLICATION_CREDENTIALS: /tmp/key.json
GOOGLE_COMPUTE_ZONE: us-central1
IMAGE: <<parameters.image>>
steps:
- checkout
- run:
name: "gcloud auth"
command: |
echo $ENCODED_GOOGLE_CREDENTIALS | base64 -d > $GOOGLE_APPLICATION_CREDENTIALS
echo "export GCLOUD_SERVICE_KEY=\$(echo \$ENCODED_GOOGLE_CREDENTIALS | base64 --decode)" >> $BASH_ENV
- gcp-gcr/gcr-auth
- run:
name: "Build and push scream image"
no_output_timeout: 20m
command: |
sudo chown -R circleci:circleci /home/circleci/.docker && \
.circleci/build_and_push_image.sh
parameters:
run-weekly-workflow:
type: boolean
default: false

workflows:
version: 2
test_and_lint:
when:
not: << pipeline.parameters.run-weekly-workflow >>
jobs:
- lint
- argo
Expand Down Expand Up @@ -309,3 +342,7 @@ workflows:
branches:
only: master
- validate_radiation_port
weekly-scream-workflow:
when: << pipeline.parameters.run-weekly-workflow >>
jobs:
- build_and_test_scream
1 change: 1 addition & 0 deletions .environment-scripts/install_local_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,6 @@ pip install -c constraints.txt \
-e workflows/fine_res_budget \
-e workflows/dataflow \
-e workflows/diagnostics \
-e workflows/prognostic_scream_run \
-e external/wandb-query
set +e
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ repos:
workflows/dataflow/fv3net/pipelines/restarts_to_zarr/.+ |
workflows/prognostic_c48_run/.+ |
workflows/prognostic_c48_run/tests/.+ |
workflows/prognostic_scream_run/.+ |
workflows/prognostic_scream_run/tests/.+ |
external/fv3fit/fv3fit/.+ |
external/loaders/loaders/.+ |
external/radiation/radiation/.+ |
Expand Down
21 changes: 21 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ enter_%:
-w $(PROGNOSTIC_RUN_WORKDIR) \
$(REGISTRY)/$*:$(VERSION) bash

SCREAM_INSTALL_PATH ?= docker/prognostic_scream_run/scream
SCREAM_BRANCH ?= master
clone_scream_repository:
if [ ! -d ${SCREAM_INSTALL_PATH} ]; then \
git clone -b ${SCREAM_BRANCH} https://github.com/E3SM-Project/scream.git ${SCREAM_INSTALL_PATH}; \
else \
echo "${SCREAM_INSTALL_PATH} already existed, nothing to clone";\
fi

update_scream_repository: clone_scream_repository
cd "${SCREAM_INSTALL_PATH}"; \
git pull; \
git submodule update --init --recursive; \
git submodule sync --recursive; \

build_image_prognostic_scream_run: update_scream_repository
tools/docker_build_cached.sh $(REGISTRY)/prognostic_scream_run:$(CACHE_TAG) \
-f docker/prognostic_scream_run/Dockerfile -t $(REGISTRY)/prognostic_scream_run:$(VERSION) .

image_test_prognostic_scream_run:
tools/docker-run --rm -v $(shell pwd)/tests/scream_run_integration:/tmp/scream_run_integration $(REGISTRY)/prognostic_scream_run:$(VERSION) /tmp/scream_run_integration/test_scream_run.sh
############################################################
# Documentation (rules match "deploy_docs_%")
############################################################
Expand Down
2 changes: 1 addition & 1 deletion constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ wrapt==1.13.3
xarray==0.19.0
xgcm==0.6.1
xmltodict==0.12.0
xpartition==0.2.0
xpartition==0.2.1
yarl==1.6.3
yq==2.11.0
zarr==2.13.2
Expand Down
126 changes: 126 additions & 0 deletions docker/prognostic_scream_run/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
ARG MAMBAFORGE_VERSION=4.14.0-0
FROM condaforge/mambaforge:${MAMBAFORGE_VERSION} AS base

ARG PNETCDF_VERSION=1.12.3
ENV PNETCDF_VERSION=${PNETCDF_VERSION}

ARG LIBNETCDF_VERSION=4.8.1
ENV LIBNETCDF_VERSION=${LIBNETCDF_VERSION}

ARG NETCDF_FORTRAN_VERSION=4.6.0
ENV NETCDF_FORTRAN_VERSION=${NETCDF_FORTRAN_VERSION}

ARG ESMF_VERSION=8.4.0
ENV ESMF_VERSION=${ESMF_VERSION}

ARG GCC_VERSION=10.*
ENV GCC_VERSION=${GCC_VERSION}

ENV USER=root
ENV LOGNAME=root

SHELL ["/bin/bash", "-c"]

# Install common packages
RUN mamba install --yes -c conda-forge \
cmake \
make \
wget \
curl \
subversion \
m4 \
pytest \
pytest-cov\
pyyaml \
vim \
openssh && \
rm -rf /opt/conda/pkgs/*

# Install version locked packages
RUN mamba install --yes -c conda-forge \
libnetcdf=${LIBNETCDF_VERSION}=*openmpi* \
netcdf-fortran=${NETCDF_FORTRAN_VERSION}=*openmpi* \
esmf=${ESMF_VERSION}=*openmpi* \
gcc_linux-64=${GCC_VERSION} \
gxx_linux-64=${GCC_VERSION} \
openmpi-mpifort \
gfortran_linux-64=${GCC_VERSION} && \
rm -rf /opt/conda/pkgs/* && \
ln -sf /opt/conda/bin/x86_64-conda-linux-gnu-ar /opt/conda/bin/ar && \
ln -sf /opt/conda/bin/x86_64-conda-linux-gnu-ranlib /opt/conda/bin/ranlib

# Install cpan packages
RUN cpan install XML::LibXML Switch

# Build pnetcdf
RUN curl -L -k -o "${PWD}/pnetcdf.tar.gz" \
https://parallel-netcdf.github.io/Release/pnetcdf-${PNETCDF_VERSION}.tar.gz && \
mkdir "${PWD}/pnetcdf" && \
tar -xvf "${PWD}/pnetcdf.tar.gz" -C "${PWD}/pnetcdf" --strip-components=1 && \
rm -rf "${PWD}/pnetcdf.tar.gz" && \
cd "${PWD}/pnetcdf" && \
source /opt/conda/etc/profile.d/conda.sh && \
conda activate base && \
./configure --prefix /opt/conda --disable-cxx --enable-shared \
MPICC=/opt/conda/bin/mpicc \
MPICXX=/opt/conda/bin/mpicxx \
MPIF77=/opt/conda/bin/mpif77 \
MPIF90=/opt/conda/bin/mpif90 && \
make -j4 && \
make install && \
rm -rf "${PWD}/pnetcdf"

RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive \
apt-get install -y --no-install-recommends \
curl ca-certificates software-properties-common \
gcc make libtool libhwloc-dev libx11-dev libxt-dev libedit-dev \
libical-dev ncurses-dev perl python-dev tcl-dev tk-dev swig libexpat-dev libssl-dev \
libxext-dev libxft-dev autoconf automake \
postgresql-12 postgresql-server-dev-all postgresql-contrib \
expat libedit2 python3 sendmail-bin sudo tcl tk && \
add-apt-repository ppa:deadsnakes/ppa && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive \
apt-get install -y python3.7 python3.7-dev && \
rm -rf /var/lib/apt/lists/*

# Install additional packages
RUN mamba install --yes -c conda-forge \
lapack \
blas && \
rm -rf /opt/conda/pkgs/*

# Install dependencies
COPY docker/prognostic_scream_run/scream/components/eamxx/docker/requirements.txt /tmp/requirements.txt
RUN pip install -r /tmp/requirements.txt

# install gcloud
RUN apt-get update && apt-get install -y apt-transport-https ca-certificates gnupg curl gettext

RUN echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list &&\
curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add -

RUN apt-get update && apt-get install -y google-cloud-sdk
RUN gcloud config set project vcm-ml

ENV OMPI_ALLOW_RUN_AS_ROOT=1
ENV OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1
ENV IS_DOCKER=TRUE
COPY docker/prognostic_scream_run/scream/ /src/E3SM
COPY external/vcm /src/vcm
RUN pip install /src/vcm
COPY workflows/prognostic_scream_run/ /src/prognostic_scream_run
RUN pip install -r /src/prognostic_scream_run/requirements.txt
RUN pip install -e /src/prognostic_scream_run
COPY docker/prognostic_scream_run/precompile_scream.sh /src/precompile_scream.sh
ENV CC=/opt/conda/bin/mpicc
ENV CXX=/opt/conda/bin/mpicxx
ENV FC=/opt/conda/bin/mpif90
ENV AR=/opt/conda/bin/x86_64-conda-linux-gnu-ar
ENV RANLIB=/opt/conda/bin/x86_64-conda-linux-gnu-ranlib
ENV LDFLAGS="-Wl,-O2 -Wl,--sort-common -Wl,--as-needed -Wl,-z,relro -Wl,-z,now -Wl,--disable-new-dtags -Wl,--gc-sections -Wl,--allow-shlib-undefined -Wl,-rpath,/opt/conda/lib -Wl,-rpath-link,/opt/conda/lib -L/opt/conda/lib"
ENV GOOGLE_APPLICATION_CREDENTIALS /tmp/key.json
RUN --mount=type=secret,id=gcp,dst=/tmp/key.json \
export FSSPEC_GS_TOKEN=/tmp/key.json && \
/src/precompile_scream.sh
25 changes: 25 additions & 0 deletions docker/prognostic_scream_run/precompile_scream.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
function fixup_mct {
local mct_path="${1}"

# TODO make PR to fix
if [[ ! -e "${mct_path}/mct/Makefile.bak" ]]
then
sed -i".bak" "s/\$(AR)/\$(AR) \$(ARFLAGS)/g" "${mct_path}/mct/Makefile"
fi

if [[ ! -e "${mct_path}/mpeu/Makefile.bak" ]]
then
sed -i".bak" "s/\$(AR)/\$(AR) \$(ARFLAGS)/g" "${mct_path}/mpeu/Makefile"
fi
}

# Fixes mct/mpeu to use ARFLAGS environment variable
# CIME will eventually have this fixed, remove this function when it does
fixup_mct "/src/E3SM/externals/mct"
for number_of_processors in 16 180; do
cd /tmp
cp /src/prognostic_scream_run/tests/example_configs/scream_ne30pg2.yaml ${number_of_processors}.yaml
sed -i -e "s/number_of_processors: 16/number_of_processors: $(printf "%d" $number_of_processors)/g" ${number_of_processors}.yaml
mkdir -p rundir
scream_run write-rundir ${number_of_processors}.yaml rundir
done
9 changes: 9 additions & 0 deletions external/emulation/emulation/masks.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,14 @@ def __call__(self, state: FortranState, emulator: FortranState) -> FortranState:
use_fortran_state = slice(self.start, self.stop)
# Fortran state TOA is index 79, and dims are [z, sample]
emulator_field = np.copy(emulator[self.key])

# Currently, fortran fields pushed into python state are 64bit floats
# while the emulator output is float32, since there are no post-hoc adjustments
# for precpd, this lead to noise in the tendencies estimated from the
# masked levels due to 32 -> 64 casting, this hack resolves
if emulator_field.dtype != np.float64:
emulator_field = emulator_field.astype(np.float64)

if self.fill_value is None:
emulator_field[use_fortran_state] = state[self.key][use_fortran_state]
elif isinstance(self.fill_value, str):
Expand All @@ -64,4 +72,5 @@ def __call__(self, state: FortranState, emulator: FortranState) -> FortranState:
]
elif isinstance(self.fill_value, float):
emulator_field[use_fortran_state] = self.fill_value

return {**emulator, self.key: emulator_field}
1 change: 1 addition & 0 deletions external/emulation/emulation/zhao_carr.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def enforce_conservative_gscond(state, emulator):
def enforce_conservative_phase_dependent(state, emulator):
cloud_out = emulator[GscondOutput.cloud_water]
net_condensation = cloud_out - state[Input.cloud_water]
net_condensation = _limit_net_condensation_conserving(state, net_condensation)
return {**emulator, **apply_condensation_phase_dependent(state, net_condensation)}


Expand Down
29 changes: 26 additions & 3 deletions external/fv3fit/docs/composite-models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ Models augmented with out-of-sample detection can be defined with a config file
Tapered models
--------------------
A tapering transform can be applied to an existing saved model:

.. code-block:: yaml
model: gs://vcm-ml-experiments/some_path
tapering:
dQ1:
Expand All @@ -60,7 +62,28 @@ Combined models
Combines multiple models with nonoverlapping output variables into a single model.
Similar functionality is also in the prognostic run's MultipleModelAdapter, but sometimes it
is more efficient to combine models earlier in the workflow.

.. code-block:: yaml
models:
- gs://vcm-ml-experiments/model1
- gs://vcm-ml-experiments/model2
models:
- gs://vcm-ml-experiments/model1
- gs://vcm-ml-experiments/model2
Squashed output models
----------------------
"Squashes" the output of a model, which means that samples less than a threshold value for a
particular output variable will be set to a target. Configured by a list of squashing rules,
which specify the name of the variable to determine the threshold, the threshold and the target,
and additional variables that should also be squared at the same positions.

.. code-block:: yaml
base_model_path: gs://vcm-ml-experiments/model1
squashing:
- squash_by_name: cloud_amount
squash_threshold: 0.08
squash_to: 0.0
additional_squash_target_names:
- cloud_water_mixing_ratio
- cloud_ice_mixing_ratio
2 changes: 2 additions & 0 deletions external/fv3fit/fv3fit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,7 @@
# need to import this to register the training func
import fv3fit.train_microphysics
import fv3fit.dataclasses
import fv3fit.reservoir.train
import fv3fit.reservoir.transformers.autoencoder

__version__ = "0.1.0"
3 changes: 2 additions & 1 deletion external/fv3fit/fv3fit/_shared/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .config import SliceConfig, PackerConfig
from .config import SliceConfig, PackerConfig, OptimizerConfig
from .training_config import TrainingConfig, register_training_function
from .packer import (
pack,
Expand All @@ -21,3 +21,4 @@
)
from .models import EnsembleModel, DerivedModel, TransformedPredictor
from .filesystem import get_dir, put_dir
from .xr_prediction import DatasetPredictor
37 changes: 31 additions & 6 deletions external/fv3fit/fv3fit/_shared/config.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import dataclasses
from typing import (
Any,
Hashable,
Mapping,
Optional,
)
from typing import Any, Hashable, Mapping, Optional, Sequence

# TODO: move all keras configs under fv3fit.keras
import tensorflow as tf
Expand Down Expand Up @@ -115,3 +110,33 @@ class PackerConfig:
"""

clip: Mapping[Hashable, SliceConfig] = dataclasses.field(default_factory=dict)


@dataclasses.dataclass
class SquashedOutputConfig:
""""
Configuration of output squashing
Attributes:
squash_by_name: name of the variable that will determine whether outputs
are squashed
additional_squash_target_names: name of the variables to be squashed in
addition to `squash_by_name`
squash_threshold: threshold value in squash_by_name below which squashing will
occur for this sample and feature position for all target variables
squash_to: value to which squashed values will be set
"""

squash_by_name: Hashable
squash_threshold: float
squash_to: float = 0.0
additional_squash_target_names: Sequence[Hashable] = ()

def squash(self, predictions: xr.Dataset) -> xr.Dataset:
squashed_predictions = predictions.copy()
for name in [self.squash_by_name] + list(self.additional_squash_target_names):
squashed_predictions[name] = predictions[name].where(
predictions[self.squash_by_name] > self.squash_threshold,
self.squash_to,
)
return squashed_predictions
File renamed without changes.
Loading

0 comments on commit 5f5913e

Please sign in to comment.