Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make unstacked_dims property default to ("z",) #2135

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions external/loaders/loaders/batches/_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class BatchesFromMapperConfig(BatchesLoader):
needs_grid: Add grid information into batched datasets. [Warning] requires
remote GCS access
in_memory: if True, load data eagerly and keep it in memory
unstacked_dims: if given, produce stacked and shuffled batches retaining
these dimensions as unstacked (non-sample) dimensions
unstacked_dims: if not None, produce stacked and shuffled batches retaining
these dimensions as unstacked (non-sample) dimensions. Defaults to ("z",).
subsample_ratio: the fraction of data to retain in each batch, selected
at random along the sample dimension.
drop_nans: if True, drop samples with NaN values from the data, and raise an
Expand All @@ -73,7 +73,7 @@ class BatchesFromMapperConfig(BatchesLoader):
res: str = "c48"
needs_grid: bool = True
in_memory: bool = False
unstacked_dims: Optional[Sequence[str]] = None
unstacked_dims: Optional[Sequence[str]] = ("z",)
subsample_ratio: float = 1.0
drop_nans: bool = False
shuffle_timesteps: bool = True
Expand Down
2 changes: 0 additions & 2 deletions tests/end_to_end_integration/test-data-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ mapper_config:
- x_wind
- y_wind
- pressure_thickness_of_atmospheric_layer
# note this differs from the training config in that
# there is no unstacked_dims arg here
timesteps_per_batch: 2
timesteps:
- "20160801.004500"
Expand Down
2 changes: 0 additions & 2 deletions tests/end_to_end_integration/training-data-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@ mapper_config:
- x_wind
- y_wind
- pressure_thickness_of_atmospheric_layer
unstacked_dims:
- z
timesteps_per_batch: 2
timesteps:
- "20160801.004500"
Expand Down
12 changes: 7 additions & 5 deletions workflows/diagnostics/fv3net/diagnostics/offline/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ def main(args):
as_dict = yaml.safe_load(f)
config = loaders.BatchesLoader.from_dict(as_dict)

if hasattr(config, "unstacked_dims") and not (config.unstacked_dims is None):
logger.warn(
"The unstacked_dims property of data configuration is being set to None."
)
config.unstacked_dims = None

if args.evaluation_grid is None:
evaluation_grid = load_grid_info(EVALUATION_RESOLUTION)
else:
Expand All @@ -317,11 +323,7 @@ def main(args):
config=config, model=model, evaluation_resolution=evaluation_grid.sizes["x"]
)

output_data_yaml = os.path.join(args.output_path, "data_config.yaml")
with fsspec.open(args.data_yaml, "r") as f_in, fsspec.open(
output_data_yaml, "w"
) as f_out:
f_out.write(f_in.read())
vcm.cloud.copy(args.data_yaml, os.path.join(args.output_path, "data_config.yaml"))

# compute diags
ds_diagnostics, ds_scalar_metrics = _compute_diagnostics(
Expand Down