Skip to content

Commit

Permalink
Update pooch downloader: use CESMDATAROOT variable when available(#52)
Browse files Browse the repository at this point in the history
  • Loading branch information
andersy005 authored Dec 15, 2020
1 parent 7c993d4 commit bd1236c
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 20 deletions.
1 change: 1 addition & 0 deletions pop_tools/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
path=['~', '.pop_tools', 'data'],
version_dev='master',
base_url='ftp://ftp.cgd.ucar.edu/archive/aletheia-data/cesm-data/ocn/',
env='POP_TOOLS_DATA_DIR',
)
DATASETS.load_registry(pkg_resources.resource_stream('pop_tools', 'data_registry.txt'))

Expand Down
72 changes: 67 additions & 5 deletions pop_tools/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,34 +14,96 @@
tqdm = None


INPUTDATA_DIR = ['~', '.pop_tools']

# On Cheyenne/Casper and/or CGD machines, use local inputdata directory
# See: https://github.com/NCAR/pop-tools/issues/24#issue-523701065
# The name of the environment variable that can overwrite the path argument
cesm_data_root_path = os.environ.get('CESMDATAROOT')

INPUTDATA = pooch.create(
if cesm_data_root_path is not None and os.path.exists(cesm_data_root_path):
INPUTDATA_DIR = cesm_data_root_path
else:
# This is still the default in case the environment variable isn't defined
INPUTDATA_DIR = ['~', '.pop_tools']


INPUTDATA = pooch.create(
path=INPUTDATA_DIR,
version_dev='master',
base_url='https://svn-ccsm-inputdata.cgd.ucar.edu/trunk/',
# The name of the environment variable that can overwrite the path argument
env='CESMDATAROOT',
)


INPUTDATA.load_registry(pkg_resources.resource_stream('pop_tools', 'inputdata_registry.txt'))

if tqdm is not None:
downloader = pooch.HTTPDownloader(progressbar=True, verify=False, allow_redirects=True)
else:
downloader = pooch.HTTPDownloader(verify=False, allow_redirects=True)


grid_def_file = pkg_resources.resource_filename('pop_tools', 'pop_grid_definitions.yaml')
input_templates_dir = pkg_resources.resource_filename('pop_tools', 'input_templates')

with open(grid_def_file) as f:
grid_defs = yaml.safe_load(f)


def fetch(self, fname, processor=None, downloader=None):

"""
This is a modified version of Pooch.fetch() method. This modification is necessary
due to the fact that on Cheyenne/Casper path to the local data storage folder points
to a folder (CESMDATAROOT: /glade/p/cesmdata/cseg), and this is not a location that
we have permissions to write to.
Parameters
----------
fname : str
The file name (relative to the *base_url* of the remote data
storage) to fetch from the local storage.
processor : None or callable
If not None, then a function (or callable object) that will be
called before returning the full path and after the file has been
downloaded (if required).
downloader : None or callable
If not None, then a function (or callable object) that will be
called to download a given URL to a provided local file name. By
default, downloads are done through HTTP without authentication
using :class:`pooch.HTTPDownloader`.
Returns
-------
full_path : str
The absolute path (including the file name) of the file in the
local storage.
"""

self._assert_file_in_registry(fname)
url = self.get_url(fname)
full_path = self.abspath / fname
known_hash = self.registry[fname]
abspath = str(self.abspath)
action, verb = pooch.core.download_action(full_path, known_hash)

if action in ('download', 'update'):
pooch.utils.get_logger().info("%s file '%s' from '%s' to '%s'.", verb, fname, url, abspath)
if downloader is None:
downloader = pooch.downloaders.choose_downloader(url)

pooch.core.stream_download(url, full_path, known_hash, downloader, pooch=self)

if processor is not None:
return processor(str(full_path), action, self)

return str(full_path)


# Override fetch method at instance level
# Reference: https://stackoverflow.com/a/46757134/7137180
# Replace fetch() with modified fetch() for this object only
INPUTDATA.fetch = fetch.__get__(INPUTDATA, pooch.Pooch)


def get_grid(grid_name, scrip=False):
"""Return a xarray.Dataset() with POP grid variables.
Expand Down
30 changes: 15 additions & 15 deletions tests/test_grid.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import os

import pytest
import xarray as xr

import pop_tools
from pop_tools import DATASETS

from .util import ds_compare
from .util import ds_compare, is_ncar_host


def test_template():
print(pop_tools.grid_defs)


def test_get_grid():
for grid in pop_tools.grid_defs.keys():
print('-' * 80)
print(grid)
ds = pop_tools.get_grid(grid)
ds.info()
assert isinstance(ds, xr.Dataset)
print()
@pytest.mark.parametrize('grid', pop_tools.grid_defs.keys())
def test_get_grid(grid):
print(grid)
ds = pop_tools.get_grid(grid)
ds.info()
assert isinstance(ds, xr.Dataset)


def test_get_grid_scrip():
ds_test = pop_tools.get_grid('POP_gx3v7', scrip=True)
ds_ref = xr.open_dataset(DATASETS.fetch('POP_gx3v7.nc'))
ds_ref = xr.open_dataset(pop_tools.DATASETS.fetch('POP_gx3v7.nc'))
assert ds_compare(ds_test, ds_ref, assertion='allclose', rtol=1e-14, atol=1e-14)


@pytest.mark.skipif(not is_ncar_host(), reason="Requires access to one of NCAR's machines.")
def test_cesm_local_inputdata():
cesm_dataroot = os.environ.get('CESMDATAROOT', None)
assert pop_tools.grid.INPUTDATA.path.as_posix() == cesm_dataroot


def test_get_grid_twice():
ds1 = pop_tools.get_grid('POP_gx1v7')
ds2 = pop_tools.get_grid('POP_gx1v7')
Expand Down
14 changes: 14 additions & 0 deletions tests/util.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
import os
import re
import socket

import numpy as np
import pytest
import xarray as xr


def is_ncar_host():
"""Determine if host is an NCAR machine."""
hostname = socket.getfqdn()

return any(
[
re.compile(ncar_host).search(hostname)
for ncar_host in ['cheyenne', 'casper', 'hobart', 'izumi']
]
)


def ds_compare(ds1, ds2, assertion='allequal', rtol=1e-5, atol=1e-8):
"""Compare two datasets."""
assert assertion in ['allequal', 'allclose'], f'unknown assertion: {assertion}'
Expand Down

0 comments on commit bd1236c

Please sign in to comment.