Skip to content

Commit

Permalink
fix codacy stuff and add simple docstring
Browse files Browse the repository at this point in the history
- add timeout to request.get
- better exceptions
- add docstring
- {} instead of dict()?
- nog wat ander gejank
  • Loading branch information
martinvonk committed May 25, 2023
1 parent 5d9c7e1 commit abaa491
Showing 1 changed file with 41 additions and 23 deletions.
64 changes: 41 additions & 23 deletions nlmod/read/knmi_data_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,17 @@ def get_anonymous_api_key() -> str:
logger.info(f"Retrieved anonymous API Key from {url}")
return api_key
except Exception as exc:
if Timestamp.today() < Timestamp("2023-07-01"):
if Timestamp.today() < Timestamp("2024-07-01"):
logger.info("Retrieved anonymous API Key from memory")
api_key = (
"eyJvcmciOiI1ZTU1NGUxOTI3NGE5NjAwMDEyYTNlYjEiLCJpZCI6IjI4ZWZl"
"OTZkNDk2ZjQ3ZmE5YjMzNWY5NDU3NWQyMzViIiwiaCI6Im11cm11cjEyOCJ9"
"eyJvcmciOiI1ZTU1NGUxOTI3NGE5NjAwMDEyYTNlYjEiLCJpZCI6ImE1OGI5"
"NGZmMDY5NDRhZDNhZjFkMDBmNDBmNTQyNjBkIiwiaCI6Im11cm11cjEyOCJ9"
)
return api_key
else:
logger.error(
f"Could not retrieve anonymous API Key from {url}, please"
" create your own at https://api.dataplatform.knmi.nl/"
" create your own at https://developer.dataplatform.knmi.nl/"
)
raise exc

Expand All @@ -58,18 +58,22 @@ def get_list_of_files(
api_key: Optional[str] = None,
max_keys: int = 500,
start_after_filename: Optional[str] = None,
timout: int = 120,
) -> List[str]:
"""Download list of files from KNMI data platform"""
if api_key is None:
api_key = get_anonymous_api_key()
files = []
is_trucated = True
while is_trucated:
url = f"{base_url}/datasets/{dataset_name}/versions/{dataset_version}/files"
r = requests.get(url, headers={"Authorization": api_key})
r = requests.get(url, headers={"Authorization": api_key}, timeout=timout)
params = {"maxKeys": f"{max_keys}"}
if start_after_filename is not None:
params["startAfterFilename"] = start_after_filename
r = requests.get(url, params=params, headers={"Authorization": api_key})
r = requests.get(
url, params=params, headers={"Authorization": api_key}, timeout=timeout
)
rjson = r.json()
files.extend([x["filename"] for x in rjson["files"]])
is_trucated = rjson["isTruncated"]
Expand All @@ -84,22 +88,24 @@ def download_file(
fname: str,
dirname: str = ".",
api_key: Optional[str] = None,
timeout: int = 120,
) -> None:
"""Download file from KNMI data platform"""
if api_key is None:
api_key = get_anonymous_api_key()
url = (
f"{base_url}/datasets/{dataset_name}/versions/"
f"{dataset_version}/files/{fname}/url"
)
r = requests.get(url, headers={"Authorization": api_key})
r = requests.get(url, headers={"Authorization": api_key}, timeout=timeout)
if not os.path.isdir(dirname):
os.makedirs(dirname)
logger.info(f"Download {fname} to {dirname}")
fname = os.path.join(dirname, fname)
data = r.json()
if "temporaryDownloadUrl" not in data:
raise (Exception(f"{fname} not found"))
with requests.get(data["temporaryDownloadUrl"], stream=True) as r:
raise FileNotFoundError(f"{fname} not found")
with requests.get(data["temporaryDownloadUrl"], stream=True, timeout=timeout) as r:
r.raise_for_status()
with open(fname, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
Expand All @@ -112,18 +118,22 @@ def download_files(
fnames: List[str],
dirname: str = ".",
api_key: Optional[str] = None,
timeout: int = 120,
) -> None:
"""Download multiple files from KNMI data platform"""
for fname in tqdm(fnames):
download_file(
dataset_name=dataset_name,
dataset_version=dataset_version,
fname=fname,
dirname=dirname,
api_key=api_key,
timeout=timeout,
)


def read_nc(fo: Union[str, FileIO], **kwargs: dict) -> xr.Dataset:
"""Read netcdf (.nc) file to xarray Dataset"""
# could help to provide argument: engine="h5netcdf"
return xr.open_dataset(fo, **kwargs)

Expand All @@ -146,10 +156,14 @@ def get_timestamp_from_fname(fname: str) -> Union[Timestamp, None]:
dtime = Timestamp(year=year, month=month, day=day, hour=hour, minute=minute)
return dtime
else:
raise Exception("Could not fine timestamp formatted as YYYYMMDDHHMM from fname")
raise FileNotFoundError(
"Could not find filename with timestamp formatted as YYYYMMDDHHMM"
)


def add_h5_meta(meta: Dict[str, Any], h5obj: Any, orig_ky: str = "") -> Dict[str, Any]:
"""Read metadata from hdf5 (.h5) file and add to existing metadata dictionary"""

def cleanup(val: Any) -> Any:
if isinstance(val, (ndarray, list)):
if len(val) == 1:
Expand All @@ -163,29 +177,35 @@ def cleanup(val: Any) -> Any:
if hasattr(h5obj, "attrs"):
attrs = getattr(h5obj, "attrs")
submeta = {f"{orig_ky}/{ky}": cleanup(val) for ky, val in attrs.items()}
return meta | submeta
return meta.update(submeta)
else:
return meta


class MultipleDatasetsFound(Exception):
pass


def read_h5_contents(h5fo: h5File) -> Tuple[ndarray, Dict[str, Any]]:
"""Read contents from a hdf5 (.h5) file"""
data = None
meta = {}
for ky in h5fo.keys():
for ky in h5fo:
group = h5fo[ky]
meta = add_h5_meta(meta, group, f"{ky}")
for gky in group.keys():
for gky in group:
member = group[gky]
meta = add_h5_meta(meta, member, f"{ky}/{gky}")
if isinstance(member, h5Dataset):
if data is None:
data = member[:]
else:
raise Exception("h5 contains multiple Datasets")
raise MultipleDatasetsFound("h5 contains multiple datasets")
return data, meta


def read_h5(fo: Union[str, FileIO]) -> xr.Dataset:
"""Read hdf5 (.h5) file to xarray Dataset"""
with h5File(fo) as h5fo:
data, meta = read_h5_contents(h5fo)

Expand All @@ -198,26 +218,22 @@ def read_h5(fo: Union[str, FileIO]) -> xr.Dataset:
t = Timestamp(meta["overview/product_datetime_start"])

ds = xr.Dataset(
data_vars=dict(data=(["y", "x"], array(data, dtype=float))),
coords=dict(
x=x,
y=y,
time=t,
),
data_vars={"data": (["y", "x"], array(data, dtype=float))},
coords={"x": x, "y": y, "time": t},
attrs=meta,
)

return ds


def read_grib(
fo: Union[str, FileIO], filter_by_keys=None, **kwargs: dict
) -> xr.Dataset:
"""Read GRIB file to xarray Dataset"""
if kwargs is None:
kwargs = {}

if filter_by_keys is not None:
if "backend_kwargs" not in kwargs.keys():
if "backend_kwargs" not in kwargs:
kwargs["backend_kwargs"] = {}
kwargs["backend_kwargs"]["filter_by_keys"] = filter_by_keys
if "errors" not in kwargs["backend_kwargs"]:
Expand All @@ -229,6 +245,7 @@ def read_grib(
def read_dataset_from_zip(
fname: str, hour: Optional[int] = None, **kwargs: dict
) -> xr.Dataset:
"""Read KNMI data platfrom .zip file to xarray Dataset"""
if fname.endswith(".zip"):
with ZipFile(fname) as zipfo:
fnames = sorted([x for x in zipfo.namelist() if not x.endswith("/")])
Expand Down Expand Up @@ -256,6 +273,7 @@ def read_dataset(
hour: Optional[int] = None,
**kwargs: dict,
) -> xr.Dataset:
"""Read xarray dataset from different file types; .nc, .h5 or grib file"""
if hour is not None:
if hour == 24:
hour = 0
Expand All @@ -282,6 +300,6 @@ def read_dataset(
elif isinstance(zipfo, ZipFile):
data.append(read_grib(fo, **kwargs))
else:
raise Exception(f"Can't read file {file}")
raise ValueError(f"Can't read/handle file {file}")

return xr.concat(data, dim="time")

0 comments on commit abaa491

Please sign in to comment.