Skip to content

Commit

Permalink
Merge pull request #35 from ecmwf/feature/dataarray_xarray
Browse files Browse the repository at this point in the history
Feature/dataarray xarray
  • Loading branch information
mathleur authored Jun 28, 2023
2 parents 6b32085 + 977f1e8 commit a293ecb
Show file tree
Hide file tree
Showing 22 changed files with 76 additions and 77 deletions.
2 changes: 1 addition & 1 deletion examples/3D_shipping_route.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Test:
def setup_method(self):
ds = data.from_source("file", "./examples/data/winds.grib")
array = ds.to_xarray()
array = array.isel(time=0).isel(surface=0).isel(number=0)
array = array.isel(time=0).isel(surface=0).isel(number=0).u10
self.array = array
self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)
Expand Down
2 changes: 1 addition & 1 deletion examples/4D_flight_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Test:
def setup_method(self):
ds = data.from_source("file", "./examples/data/temp_model_levels.grib")
array = ds.to_xarray()
array = array.isel(time=0)
array = array.isel(time=0).t
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
for dim in array.dims:
Expand Down
2 changes: 1 addition & 1 deletion examples/country_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Test:
def setup_method(self, method):
ds = data.from_source("file", ".examples/data/output8.grib")
array = ds.to_xarray()
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0)
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0).t2m
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
Expand Down
2 changes: 1 addition & 1 deletion examples/cyclic_route_around_earth.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Test:
def setup_method(self, method):
ds = data.from_source("file", ".examples/data/output8.grib")
array = ds.to_xarray()
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0)
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0).t2m
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
Expand Down
2 changes: 1 addition & 1 deletion examples/read_me_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

ds = data.from_source("file", "./examples/data/winds.grib")
array = ds.to_xarray()
array = array.isel(time=0).isel(surface=0).isel(number=0)
array = array.isel(time=0).isel(surface=0).isel(number=0).u10

options = {"longitude": {"Cyclic": [0, 360.0]}}

Expand Down
2 changes: 1 addition & 1 deletion examples/slicing_all_ecmwf_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class Test:
def setup_method(self, method):
ds = data.from_source("file", "./examples/data/output8.grib")
array = ds.to_xarray()
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0)
array = array.isel(surface=0).isel(step=0).isel(number=0).isel(time=0).t2m
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
Expand Down
2 changes: 1 addition & 1 deletion examples/timeseries_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class Test:
def setup_method(self):
ds = data.from_source("file", "./examples/data/timeseries_t2m.grib")
array = ds.to_xarray()
array = array.isel(step=0).isel(surface=0).isel(number=0)
array = array.isel(step=0).isel(surface=0).isel(number=0).t2m
self.xarraydatacube = XArrayDatacube(array)
for dim in array.dims:
array = array.sortby(dim)
Expand Down
10 changes: 6 additions & 4 deletions examples/wind_farms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class Test:
def setup_method(self):
ds = data.from_source("file", "./examples/data/winds.grib")
array = ds.to_xarray()
array = array.isel(time=0).isel(surface=0).isel(number=0)
array = array.isel(time=0).isel(surface=0).isel(number=0).u10
self.array = array
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
Expand Down Expand Up @@ -71,10 +71,12 @@ def test_slice_wind_farms(self):
long = cubepath["longitude"]
lats.append(lat)
longs.append(long)
u10_idx = result.leaves[i].result["u10"]
# u10_idx = result.leaves[i].result["u10"]
u10_idx = result.leaves[i].result[1]
wind_u = u10_idx
v10_idx = result.leaves[i].result["v10"]
wind_v = v10_idx
# v10_idx = result.leaves[i].result["v10"]
# wind_v = v10_idx
wind_v = 0
winds_u.append(wind_u)
winds_v.append(wind_v)
parameter_values.append(math.sqrt(wind_u**2 + wind_v**2))
Expand Down
2 changes: 1 addition & 1 deletion performance/scalability_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class Test:
def setup_method(self):
array = xr.open_dataset("../examples/data/temp_model_levels.grib", engine="cfgrib")
array = xr.open_dataset("../examples/data/temp_model_levels.grib", engine="cfgrib").t
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
for dim in array.dims:
Expand Down
42 changes: 21 additions & 21 deletions performance/scalability_test_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class Test:
def setup_method(self):
array = xr.open_dataset("../examples/data/temp_model_levels.grib", engine="cfgrib")
array = xr.open_dataset("./examples/data/temp_model_levels.grib", engine="cfgrib").t
options = {"longitude": {"Cyclic": [0, 360.0]}}
self.xarraydatacube = XArrayDatacube(array)
for dim in array.dims:
Expand Down Expand Up @@ -82,23 +82,23 @@ def test_scalability_2D_v3(self):
print(len(result.leaves))
print(time.time() - time_start)

def test_scalability_2D_v4(self):
union = Box(["latitude", "longitude"], [0 - 100, 0], [20 - 100, 36])
for i in range(9):
box = Box(["latitude", "longitude"], [20 * (i + 1) - 100, 0], [20 * (i + 2) - 100, 36])
union = Union(["latitude", "longitude"], union, box)
for j in range(9):
box = Box(["latitude", "longitude"], [0 - 100, 36 * (j + 1)], [20 - 100, 36 * (j + 2)])
union = Union(["latitude", "longitude"], union, box)
for i in range(9):
for j in range(9):
box = Box(
["latitude", "longitude"], [20 * (i + 1) - 100, 36 * (j + 1)], [20 * (i + 2) - 100, 36 * (j + 2)]
)
union = Union(["latitude", "longitude"], union, box)
time_start = time.time()
print(time_start)
request = Request(union, Select("step", [np.timedelta64(0, "ns")]), Select("hybrid", [1]))
result = self.API.retrieve(request)
print(len(result.leaves))
print(time.time() - time_start)
# def test_scalability_2D_v4(self):
# union = Box(["latitude", "longitude"], [0 - 100, 0], [20 - 100, 36])
# for i in range(9):
# box = Box(["latitude", "longitude"], [20 * (i + 1) - 100, 0], [20 * (i + 2) - 100, 36])
# union = Union(["latitude", "longitude"], union, box)
# for j in range(9):
# box = Box(["latitude", "longitude"], [0 - 100, 36 * (j + 1)], [20 - 100, 36 * (j + 2)])
# union = Union(["latitude", "longitude"], union, box)
# for i in range(9):
# for j in range(9):
# box = Box(
# ["latitude", "longitude"], [20 * (i + 1) - 100, 36 * (j + 1)], [20 * (i + 2) - 100, 36 * (j + 2)]
# )
# union = Union(["latitude", "longitude"], union, box)
# time_start = time.time()
# print(time_start)
# request = Request(union, Select("step", [np.timedelta64(0, "ns")]), Select("hybrid", [1]))
# result = self.API.retrieve(request)
# print(len(result.leaves))
# print(time.time() - time_start)
7 changes: 4 additions & 3 deletions polytope/datacube/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
np.float64: FloatAxis(),
np.str_: UnsliceableaAxis(),
str: UnsliceableaAxis(),
np.object_: UnsliceableaAxis(),
}


Expand Down Expand Up @@ -66,9 +67,9 @@ def get(self, requests: IndexTree):
path = self.remap_path(path)
if len(path.items()) == len(self.dataarray.coords):
subxarray = self.dataarray.sel(path, method="nearest")
data_variables = subxarray.data_vars
result_tuples = [(key, value) for key, value in data_variables.items()]
r.result = dict(result_tuples)
value = subxarray.item()
key = subxarray.name
r.result = (key, value)
else:
r.remove_branch()

Expand Down
6 changes: 3 additions & 3 deletions tests/test_cyclic_axis_over_negative_vals.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class TestSlicing3DXarrayDatacube:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 6, 129, 11)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level", "long"], dims)),
array = xr.DataArray(
np.random.randn(3, 6, 129, 11),
dims=("date", "step", "level", "long"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15],
Expand Down
6 changes: 3 additions & 3 deletions tests/test_cyclic_axis_slicer_not_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class TestSlicing3DXarrayDatacube:
def setup_method(self, method):
# create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 6, 129, 11)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level", "long"], dims)),
array = xr.DataArray(
np.random.randn(3, 6, 129, 11),
dims=("date", "step", "level", "long"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15],
Expand Down
10 changes: 6 additions & 4 deletions tests/test_cyclic_axis_slicing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
class TestSlicing3DXarrayDatacube:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 6, 129, 11)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level", "long"], dims)),
array = xr.DataArray(
np.random.randn(3, 6, 129, 11),
dims=("date", "step", "level", "long"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15],
Expand Down Expand Up @@ -169,7 +169,9 @@ def test_cyclic_float_axis_inside_cyclic_range(self):

def test_cyclic_float_axis_above_axis_range(self):
request = Request(
Box(["step", "long"], [0, 1.3], [3, 1.7]), Select("date", ["2000-01-01"]), Select("level", [128])
Box(["step", "long"], [0, 1.3], [3, 1.7]),
Select("date", ["2000-01-01"]),
Select("level", [128]),
)
result = self.API.retrieve(request)
# result.pprint()
Expand Down
11 changes: 6 additions & 5 deletions tests/test_datacube_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,22 @@ def setup_method(self, method):
def test_validate(self):
dims = np.random.randn(1, 1, 1)
array = xr.Dataset(data_vars=dict(param=(["x", "y", "z"], dims)), coords={"x": [1], "y": [1], "z": [1]})
array = array.to_array()

datacube = Datacube.create(array, options={})
datacube = Datacube.create(array, options={})

datacube.validate(["x", "y", "z"])
datacube.validate(["x", "z", "y"])
datacube.validate(["x", "y", "z", "variable"])
datacube.validate(["x", "z", "y", "variable"])

with pytest.raises(AxisNotFoundError):
datacube.validate(["x", "y", "z", "w"])
datacube.validate(["x", "y", "z", "w", "variable"])

with pytest.raises(AxisNotFoundError):
datacube.validate(["w", "x", "y", "z"])
datacube.validate(["w", "x", "y", "z", "variable"])

with pytest.raises(AxisOverdefinedError):
datacube.validate(["x", "x", "y", "z"])
datacube.validate(["x", "x", "y", "z", "variable"])

def test_create(self):
# Create a dataarray with 3 labelled axes using different index types
Expand Down
9 changes: 3 additions & 6 deletions tests/test_float_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,20 @@
class TestFloatType:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using float type
dims = np.random.randn(100, 101, 200)
array = xr.Dataset(
data_vars=dict(param=(["lat", "long", "alt"], dims)),
array = xr.DataArray(
np.random.randn(100, 101, 200),
dims=("lat", "long", "alt"),
coords={
"lat": np.arange(0.0, 10.0, 0.1),
"long": np.arange(4.09999, 4.1 + 0.0000001, 0.0000001),
"alt": np.arange(0.0, 20.0, 0.1),
},
)

self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)

def test_slicing_span(self):
# TODO: some problems with floating point values and values inside the datacube being slightly off.
# This has been fixed by introducing tolerances, but could be better handled using exact arithmetic.
request = Request(Span("lat", 4.1, 4.3), Select("long", [4.1]), Select("alt", [4.1]))
result = self.API.retrieve(request)
result.pprint()
Expand Down
1 change: 0 additions & 1 deletion tests/test_hullslicer_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def setup_method(self, method):
"level": np.arange(0, 100, 1),
},
)

self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_slicer_era5.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
class TestSlicingEra5Data:
def setup_method(self, method):
ds = data.from_source("file", "./tests/data/era5-levels-members.grib")
array = ds.to_xarray().isel(step=0)
array = ds.to_xarray().isel(step=0).t
self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)
Expand Down
9 changes: 4 additions & 5 deletions tests/test_slicer_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,15 @@
class TestXarraySlicing:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 6, 129)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level"], dims)),
array = xr.DataArray(
np.random.randn(3, 6, 129),
dims=("date", "step", "level"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15],
"level": range(1, 130),
},
)

self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)

Expand All @@ -38,7 +37,7 @@ def test_2D_box_with_date_range(self):

def test_3D_box_with_date(self):
request = Request(
Box(["step", "level", "date"], [3, 10, pd.Timestamp("2000-01-01")], [6, 11, pd.Timestamp("2000-01-01")]),
Box(["step", "level", "date"], [3, 10, pd.Timestamp("2000-01-01")], [6, 11, pd.Timestamp("2000-01-01")])
)
result = self.API.retrieve(request)
result.pprint()
7 changes: 4 additions & 3 deletions tests/test_slicing_unsliceable_axis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
class TestSlicing3DXarrayDatacube:
def setup_method(self, method):
# create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 1, 129)
array = xr.Dataset(
data_vars=dict(param=(["date", "variable", "level"], dims)),
array = xr.DataArray(
np.random.randn(3, 1, 129),
dims=("date", "variable", "level"),
coords={"date": pd.date_range("2000-01-01", "2000-01-03", 3), "variable": ["a"], "level": range(1, 130)},
)
self.xarraydatacube = XArrayDatacube(array)
Expand All @@ -36,6 +36,7 @@ def test_finding_nonexisting_variable(self):
result.pprint()

def test_unsliceable_axis_in_a_shape(self):
# does it work when we ask a box or disk of an unsliceable axis?
request = Request(Box(["level", "variable"], [10, "a"], [11, "a"]), Select("date", ["2000-01-01"]))
with pytest.raises(UnsliceableShapeError):
result = self.API.retrieve(request)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_slicing_xarray_3D.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
class TestSlicing3DXarrayDatacube:
def setup_method(self, method):
# Create a dataarray with 3 labelled axes using different index types
dims = np.random.randn(3, 6, 129)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level"], dims)),
array = xr.DataArray(
np.random.randn(3, 6, 129),
dims=("date", "step", "level"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15],
Expand Down
11 changes: 4 additions & 7 deletions tests/test_slicing_xarray_4D.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,16 @@
class TestSlicing4DXarrayDatacube:
def setup_method(self, method):
# Create a dataarray with 4 labelled axes using different index types
dims = np.random.randn(3, 7, 129, 100)
array = xr.Dataset(
data_vars=dict(param=(["date", "step", "level", "lat"], dims)),
array = xr.DataArray(
np.random.randn(3, 7, 129, 100),
dims=("date", "step", "level", "lat"),
coords={
"date": pd.date_range("2000-01-01", "2000-01-03", 3),
"step": [0, 3, 6, 9, 12, 15, 18],
"level": range(1, 130),
"lat": np.around(np.arange(0.0, 10.0, 0.1), 15),
},
)

self.xarraydatacube = XArrayDatacube(array)
self.slicer = HullSlicer()
self.API = Polytope(datacube=array, engine=self.slicer)
Expand All @@ -47,9 +46,7 @@ def test_3D_box(self):
assert len(result.leaves) == 2 * 2 * 11

def test_4D_box(self):
request = Request(
Box(["step", "level", "lat", "date"], [3, 10, 5.0, "2000-01-01"], [6, 11, 6.0, "2000-01-02"]),
)
request = Request(Box(["step", "level", "lat", "date"], [3, 10, 5.0, "2000-01-01"], [6, 11, 6.0, "2000-01-02"]))
result = self.API.retrieve(request)
assert len(result.leaves) == 2 * 2 * 11 * 2

Expand Down

0 comments on commit a293ecb

Please sign in to comment.