From 4dc72de59f72c45637085733dad25132803e8189 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 10:59:53 +0530 Subject: [PATCH 01/35] xarray.dataset.tail --- xarray/core/dataset.py | 143 ++++++++++++++++++++++++++++++++++------- 1 file changed, 121 insertions(+), 22 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 433c724cc21..dba3f8c0c77 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2720,6 +2720,37 @@ def tail( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. + Example + ------- + + # Sample dataset + >>> data = xr.DataArray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dims=("x", "y")) + >>> dataset = xr.Dataset({"data": data}) + + # Print the original dataset + >>> "Original Dataset:" + >>> dataset + + # Get the last 2 elements using tail() + >>> tail_dataset = dataset.tail(2) + + # Print the tail dataset + >>> "Tail Dataset:" + >>> tail_dataset + 'Original Dataset:' + + Dimensions: (x: 3, y: 3) + Dimensions without coordinates: x, y + Data variables: + data (x, y) int64 1 2 3 4 5 6 7 8 9 + + 'Tail Dataset:' + + Dimensions: (x: 2, y: 2) + Dimensions without coordinates: x, y + Data variables: + data (x, y) int64 5 6 8 9 + See Also -------- Dataset.head @@ -8399,30 +8430,64 @@ def idxmax( def argmin(self: T_Dataset, dim: Hashable | None = None, **kwargs) -> T_Dataset: """Indices of the minima of the member variables. - If there are multiple minima, the indices of the first one found will be - returned. + If there are multiple minima, the indices of the first one found will be + returned. + + Parameters + ---------- + dim : Hashable, optional + The dimension over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmin will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). + + Returns + ------- + result : Dataset + + Example + ------- - Parameters - ---------- - dim : Hashable, optional - The dimension over which to find the minimum. By default, finds minimum over - all dimensions - for now returning an int for backward compatibility, but - this is deprecated, in future will be an error, since DataArray.argmin will - return a dict with indices for all dimensions, which does not make sense for - a Dataset. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). + # Defined the dataset + >>> dataset = xr.Dataset( + ... { + ... "math_scores": ( + ... ["student", "test"], + ... [[90, 85, 79], [78, 80, 85], [95, 92, 98]], + ... ), + ... "english_scores": ( + ... ["student", "test"], + ... [[88, 80, 92], [75, 95, 79], [93, 96, 78]], + ... ), + ... }, + ... coords={ + ... "student": ["Alice", "Bob", "Charlie"], + ... "test": ["Test 1", "Test 2", "Test 3"], + ... }, + ... ) - Returns - ------- - result : Dataset + # Indices of the minimum values along the 'student' dimension are calculated + >>> argmin_indices = dataset.argmin(dim="student") + + # Print the indices of the minimum values + >>> argmin_indices + + Dimensions: (test: 3) + Coordinates: + * test (test) T_Dataset: ------- result : Dataset + Example + ------- + + #Defined the dataset + >>> dataset = xr.Dataset( + ... { + ... "math_scores": ( + ... ["student", "test"], + ... [[90, 85, 92], [78, 80, 85], [95, 92, 98]], + ... ), + ... "english_scores": ( + ... ["student", "test"], + ... [[88, 90, 92], [75, 82, 79], [93, 96, 91]], + ... ), + ... }, + ... coords={ + ... "student": ["Alice", "Bob", "Charlie"], + ... "test": ["Test 1", "Test 2", "Test 3"], + ... }, + ... ) + + # Indices of the minimum values along the 'student' dimension are calculated + >>> argmax_indices = dataset.argmax(dim="test") + + # Print the indices of the minimum values + >>> argmax_indices + + Dimensions: (student: 3) + Coordinates: + * student (student) Date: Tue, 20 Jun 2023 11:23:54 +0530 Subject: [PATCH 02/35] xarray.dataset.head --- xarray/core/dataset.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dba3f8c0c77..1c58659febe 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2674,6 +2674,43 @@ def head( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. + Example + ------- + + # Sample Dataset + >>> dataset = xr.Dataset( + ... { + ... "temperature": [25.1, 28.3, 30.5, 27.2, 26.8], + ... "humidity": [60.2, 55.6, 50.3, 58.8, 61.7], + ... }, + ... coords={"time": [1, 2, 3, 4, 5]}, + ... ) + + # Originial Dataset + >>> dataset + + Dimensions: (temperature: 5, humidity: 5, time: 5) + Coordinates: + * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 + * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 + * time (time) int64 1 2 3 4 5 + Data variables: + *empty* + + # Use head() function to retrieve the first three elements + >>> head_dataset = dataset.head(2) + + # Print the head dataset + >>> head_dataset + + Dimensions: (temperature: 2, humidity: 2, time: 2) + Coordinates: + * temperature (temperature) float64 25.1 28.3 + * humidity (humidity) float64 60.2 55.6 + * time (time) int64 1 2 + Data variables: + *empty* + See Also -------- Dataset.tail From 052fc8fd56b424eeac6d2f94d72ebcaa726e4618 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 11:55:53 +0530 Subject: [PATCH 03/35] xarray.dataset.dropna --- xarray/core/dataset.py | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 1c58659febe..8c99a7194fc 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5617,6 +5617,79 @@ def dropna( Which variables to check for missing values. By default, all variables in the dataset are checked. + Example + ------- + + # Sample dataset with missing values + >>> data = { + ... "time": [0, 1, 2, 3], + ... "temperature": [25.0, None, 27.5, 28.0], + ... "humidity": [60.0, 65.0, None, 70.0], + ... } + >>> dataset = xr.Dataset(data) + # Print the original dataset + >>> dataset + + Dimensions: (time: 4, temperature: 4, humidity: 4) + Coordinates: + * time (time) int64 0 1 2 3 + * temperature (temperature) object 25.0 None 27.5 28.0 + * humidity (humidity) object 60.0 65.0 None 70.0 + Data variables: + *empty* + + # Drop rows with any missing values + >>> dataset_dropped_any = dataset.dropna(dim="time", how="any") + # Print the dataset after dropping rows with any missing values + >>> dataset_dropped_any + + Dimensions: (time: 4, temperature: 4, humidity: 4) + Coordinates: + * time (time) int64 0 1 2 3 + * temperature (temperature) object 25.0 None 27.5 28.0 + * humidity (humidity) object 60.0 65.0 None 70.0 + Data variables: + *empty* + + # Drop rows with all missing values + >>> dataset_dropped_all = dataset.dropna(dim="time", how="all") + # Print the dataset after dropping rows with all missing values + >>> dataset_dropped_all + + Dimensions: (time: 0, temperature: 4, humidity: 4) + Coordinates: + * time (time) int64 + * temperature (temperature) object 25.0 None 27.5 28.0 + * humidity (humidity) object 60.0 65.0 None 70.0 + Data variables: + *empty* + + # Drop rows with a threshold of non-missing values + >>> dataset_dropped_thresh = dataset.dropna(dim="time", thresh=2) + # Print the dataset after dropping rows with a threshold of non-missing values + >>> dataset_dropped_thresh + + Dimensions: (time: 0, temperature: 4, humidity: 4) + Coordinates: + * time (time) int64 + * temperature (temperature) object 25.0 None 27.5 28.0 + * humidity (humidity) object 60.0 65.0 None 70.0 + Data variables: + *empty* + + # Drop rows for a subset of variables + >>> dataset_dropped_subset = dataset.dropna(dim="time", subset=["temperature"]) + # Print the dataset after dropping rows for a subset of variables + >>> dataset_dropped_subset + + Dimensions: (time: 4, temperature: 4, humidity: 4) + Coordinates: + * time (time) int64 0 1 2 3 + * temperature (temperature) object 25.0 None 27.5 28.0 + * humidity (humidity) object 60.0 65.0 None 70.0 + Data variables: + *empty* + Returns ------- Dataset From 682086357761a727e5794ec3142a15eeabe8a2a0 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 12:26:49 +0530 Subject: [PATCH 04/35] xarray.dataset.ffill --- xarray/core/dataset.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8c99a7194fc..90affdc049c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5959,6 +5959,34 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset than 0 or None for no limit. Must be None or greater than or equal to axis length if filling along chunked axes (dimensions). + Example + ------- + # Create a sample dataset with missing values + >>> time = pd.date_range("2023-01-01", periods=10, freq="D") + >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) + >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) + + # Perform forward fill (ffill) on the dataset + >>> filled_dataset = dataset.ffill(dim="time") + + # Print the original dataset + >>> dataset + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + Data variables: + data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 + + # Print the filled dataset, fills NaN values by propagating values forward + >>> filled_dataset + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + Data variables: + data (time) float64 1.0 1.0 3.0 3.0 5.0 6.0 6.0 8.0 8.0 10.0 + Returns ------- Dataset From fd7eed46b7f789edafbb239b11d22222653262bf Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 15:15:54 +0530 Subject: [PATCH 05/35] xarray.dataset.bfill --- xarray/core/dataset.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 90affdc049c..b311b5079cf 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6013,6 +6013,36 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset than 0 or None for no limit. Must be None or greater than or equal to axis length if filling along chunked axes (dimensions). + Example + ------- + # Create a sample dataset + >>> data = np.array([[1, 2, np.nan], [4, np.nan, 6], [np.nan, 8, 9]]) + >>> coords = {"x": [0, 1, 2], "y": [0, 1, 2]} + >>> dataset = xr.Dataset({"data": (["x", "y"], data)}, coords=coords) + + # Print the original dataset + >>> dataset + + Dimensions: (x: 3, y: 3) + Coordinates: + * x (x) int64 0 1 2 + * y (y) int64 0 1 2 + Data variables: + data (x, y) float64 1.0 2.0 nan 4.0 nan 6.0 nan 8.0 9.0 + + # Apply backward fill (bfill) along the 'y' dimension + >>> dataset_bfill = dataset.bfill(dim="y") + + # Print the dataset after backward fill + >>> dataset_bfill + + Dimensions: (x: 3, y: 3) + Coordinates: + * x (x) int64 0 1 2 + * y (y) int64 0 1 2 + Data variables: + data (x, y) float64 1.0 2.0 nan 4.0 6.0 6.0 8.0 8.0 9.0 + Returns ------- Dataset From f9ff19c926a9fdc6ca9749e2148a519724b9bc78 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 16:26:09 +0530 Subject: [PATCH 06/35] xarray.dataset.set_Coords --- xarray/core/dataset.py | 61 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index b311b5079cf..18442c77193 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1726,19 +1726,58 @@ def data_vars(self) -> DataVariables: def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Dataset: """Given names of one or more variables, set them as coordinates - Parameters - ---------- - names : hashable or iterable of hashable - Name(s) of variables in this dataset to convert into coordinates. + Parameters + ---------- + names : hashable or iterable of hashable + Name(s) of variables in this dataset to convert into coordinates. - Returns - ------- - Dataset + Example + ------- - See Also - -------- - Dataset.swap_dims - Dataset.assign_coords + # Sample dataset + >>> data = xr.DataArray( + ... [[1, 2], [3, 4]], + ... dims=("x", "y"), + ... coords={"x": [0, 1], "y": [0, 1]}, + ... name="data", + ... ) + >>> dataset = xr.Dataset(data_vars={"data": data}) + + # Print the dataset before setting coordinates + >>> dataset + + Dimensions: (x: 2, y: 2) + Coordinates: + * x (x) int64 0 1 + * y (y) int64 0 1 + Data variables: + data (x, y) int64 1 2 3 4 + + + # Set "x" and "y" variables as coordinates + >>> dataset_coords = dataset.set_coords(["x", "y"]) + + # Print the dataset after setting coordinates + >>> dataset_coords + + Dimensions: (x: 2, y: 2) + Coordinates: + x (x) int64 0 1 + y (y) int64 0 1 + Data variables: + data (x, y) int64 1 2 3 4 + + In the initial dataset, the "x" and "y" variables are present as dimensions. After calling ``set_coords`` (["x", "y"]), these + variables are converted to coordinates, as shown in the final dataset. + + Returns + ------- + Dataset + + See Also + -------- + Dataset.swap_dims + Dataset.assign_coords """ # TODO: allow inserting new coordinates with this method, like # DataFrame.set_index? From 2562c5833b01aa71ea469486182ab64b3964a1d1 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 20 Jun 2023 16:39:02 +0530 Subject: [PATCH 07/35] xarray.dataset.reset_coords --- xarray/core/dataset.py | 52 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 18442c77193..ab147a7438b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1808,6 +1808,58 @@ def reset_coords( If True, remove coordinates instead of converting them into variables. + Example + ------- + + # Sample dataset + >>> dataset = xr.Dataset( + ... { + ... "temperature": ( + ... ["time", "lat", "lon"], + ... [[[25, 26], [27, 28]], [[29, 30], [31, 32]]], + ... ), + ... "precipitation": ( + ... ["time", "lat", "lon"], + ... [[[0.5, 0.8], [0.2, 0.4]], [[0.3, 0.6], [0.7, 0.9]]], + ... ), + ... }, + ... coords={ + ... "time": pd.date_range(start="2023-01-01", periods=2), + ... "lat": [40, 41], + ... "lon": [-80, -79], + ... "altitude": 1000, + ... }, + ... ) + + # Print the dataset before resetting coordinates + >>> dataset + + Dimensions: (time: 2, lat: 2, lon: 2) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 + altitude int64 1000 + Data variables: + temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 + precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 + + # Reset the 'altitude' coordinate + >>> dataset_reset = dataset.reset_coords("altitude") + + # Print the dataset after resetting coordinates + >>> dataset_reset + + Dimensions: (time: 2, lat: 2, lon: 2) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 + Data variables: + temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 + precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 + altitude int64 1000 + Returns ------- Dataset From 3e25744425d6513b6b20001751462e7a7b553005 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Fri, 23 Jun 2023 15:18:32 +0530 Subject: [PATCH 08/35] indentation changes --- xarray/core/dataset.py | 103 ++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ab147a7438b..67caaa93e7a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1748,8 +1748,8 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Dimensions: (x: 2, y: 2) Coordinates: - * x (x) int64 0 1 - * y (y) int64 0 1 + * x (x) int64 0 1 + * y (y) int64 0 1 Data variables: data (x, y) int64 1 2 3 4 @@ -1836,9 +1836,9 @@ def reset_coords( Dimensions: (time: 2, lat: 2, lon: 2) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 - * lat (lat) int64 40 41 - * lon (lon) int64 -80 -79 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 altitude int64 1000 Data variables: temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 @@ -1852,9 +1852,9 @@ def reset_coords( Dimensions: (time: 2, lat: 2, lon: 2) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 - * lat (lat) int64 40 41 - * lon (lon) int64 -80 -79 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 Data variables: temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 @@ -2782,9 +2782,9 @@ def head( Dimensions: (temperature: 5, humidity: 5, time: 5) Coordinates: - * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 - * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 - * time (time) int64 1 2 3 4 5 + * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 + * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 + * time (time) int64 1 2 3 4 5 Data variables: *empty* @@ -2796,9 +2796,9 @@ def head( Dimensions: (temperature: 2, humidity: 2, time: 2) Coordinates: - * temperature (temperature) float64 25.1 28.3 - * humidity (humidity) float64 60.2 55.6 - * time (time) int64 1 2 + * temperature (temperature) float64 25.1 28.3 + * humidity (humidity) float64 60.2 55.6 + * time (time) int64 1 2 Data variables: *empty* @@ -2856,23 +2856,18 @@ def tail( >>> dataset = xr.Dataset({"data": data}) # Print the original dataset - >>> "Original Dataset:" >>> dataset - - # Get the last 2 elements using tail() - >>> tail_dataset = dataset.tail(2) - - # Print the tail dataset - >>> "Tail Dataset:" - >>> tail_dataset - 'Original Dataset:' Dimensions: (x: 3, y: 3) Dimensions without coordinates: x, y Data variables: data (x, y) int64 1 2 3 4 5 6 7 8 9 - 'Tail Dataset:' + # Get the last 2 elements using tail() + >>> tail_dataset = dataset.tail(2) + + # Print the tail dataset + >>> tail_dataset Dimensions: (x: 2, y: 2) Dimensions without coordinates: x, y @@ -6065,7 +6060,7 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 @@ -6074,7 +6069,7 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 1.0 3.0 3.0 5.0 6.0 6.0 8.0 8.0 10.0 @@ -6116,8 +6111,8 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (x: 3, y: 3) Coordinates: - * x (x) int64 0 1 2 - * y (y) int64 0 1 2 + * x (x) int64 0 1 2 + * y (y) int64 0 1 2 Data variables: data (x, y) float64 1.0 2.0 nan 4.0 nan 6.0 nan 8.0 9.0 @@ -6129,8 +6124,8 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (x: 3, y: 3) Coordinates: - * x (x) int64 0 1 2 - * y (y) int64 0 1 2 + * x (x) int64 0 1 2 + * y (y) int64 0 1 2 Data variables: data (x, y) float64 1.0 2.0 nan 4.0 6.0 6.0 8.0 8.0 9.0 @@ -8689,30 +8684,30 @@ def idxmax( def argmin(self: T_Dataset, dim: Hashable | None = None, **kwargs) -> T_Dataset: """Indices of the minima of the member variables. - If there are multiple minima, the indices of the first one found will be - returned. + If there are multiple minima, the indices of the first one found will be + returned. - Parameters - ---------- - dim : Hashable, optional - The dimension over which to find the minimum. By default, finds minimum over - all dimensions - for now returning an int for backward compatibility, but - this is deprecated, in future will be an error, since DataArray.argmin will - return a dict with indices for all dimensions, which does not make sense for - a Dataset. - keep_attrs : bool, optional - If True, the attributes (`attrs`) will be copied from the original - object to the new one. If False (default), the new object will be - returned without attributes. - skipna : bool, optional - If True, skip missing values (as marked by NaN). By default, only - skips missing values for float dtypes; other dtypes either do not - have a sentinel missing value (int) or skipna=True has not been - implemented (object, datetime64 or timedelta64). + Parameters + ---------- + dim : Hashable, optional + The dimension over which to find the minimum. By default, finds minimum over + all dimensions - for now returning an int for backward compatibility, but + this is deprecated, in future will be an error, since DataArray.argmin will + return a dict with indices for all dimensions, which does not make sense for + a Dataset. + keep_attrs : bool, optional + If True, the attributes (`attrs`) will be copied from the original + object to the new one. If False (default), the new object will be + returned without attributes. + skipna : bool, optional + If True, skip missing values (as marked by NaN). By default, only + skips missing values for float dtypes; other dtypes either do not + have a sentinel missing value (int) or skipna=True has not been + implemented (object, datetime64 or timedelta64). - Returns - ------- - result : Dataset + Returns + ------- + result : Dataset Example ------- @@ -8743,7 +8738,7 @@ def argmin(self: T_Dataset, dim: Hashable | None = None, **kwargs) -> T_Dataset: Dimensions: (test: 3) Coordinates: - * test (test) T_Dataset: Dimensions: (student: 3) Coordinates: - * student (student) Date: Tue, 27 Jun 2023 11:42:29 +0530 Subject: [PATCH 09/35] indentation --- xarray/core/dataset.py | 97 +++++++++++++++++++++--------------------- 1 file changed, 48 insertions(+), 49 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 67caaa93e7a..572ece76d31 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1726,58 +1726,57 @@ def data_vars(self) -> DataVariables: def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Dataset: """Given names of one or more variables, set them as coordinates - Parameters - ---------- - names : hashable or iterable of hashable - Name(s) of variables in this dataset to convert into coordinates. - - Example - ------- - - # Sample dataset - >>> data = xr.DataArray( - ... [[1, 2], [3, 4]], - ... dims=("x", "y"), - ... coords={"x": [0, 1], "y": [0, 1]}, - ... name="data", - ... ) - >>> dataset = xr.Dataset(data_vars={"data": data}) - - # Print the dataset before setting coordinates - >>> dataset - - Dimensions: (x: 2, y: 2) - Coordinates: - * x (x) int64 0 1 - * y (y) int64 0 1 - Data variables: - data (x, y) int64 1 2 3 4 + Parameters + ---------- + names : hashable or iterable of hashable + Name(s) of variables in this dataset to convert into coordinates. + Example + ------- - # Set "x" and "y" variables as coordinates - >>> dataset_coords = dataset.set_coords(["x", "y"]) + # Sample dataset + >>> data = xr.DataArray( + ... [[1, 2], [3, 4]], + ... dims=("x", "y"), + ... coords={"x": [0, 1], "y": [0, 1]}, + ... name="data", + ... ) + >>> dataset = xr.Dataset(data_vars={"data": data}) - # Print the dataset after setting coordinates - >>> dataset_coords - - Dimensions: (x: 2, y: 2) - Coordinates: - x (x) int64 0 1 - y (y) int64 0 1 - Data variables: - data (x, y) int64 1 2 3 4 - - In the initial dataset, the "x" and "y" variables are present as dimensions. After calling ``set_coords`` (["x", "y"]), these - variables are converted to coordinates, as shown in the final dataset. - - Returns - ------- - Dataset - - See Also - -------- - Dataset.swap_dims - Dataset.assign_coords + # Print the dataset before setting coordinates + >>> dataset + + Dimensions: (x: 2, y: 2) + Coordinates: + * x (x) int64 0 1 + * y (y) int64 0 1 + Data variables: + data (x, y) int64 1 2 3 4 + + # Set "x" and "y" variables as coordinates + >>> dataset_coords = dataset.set_coords(["x", "y"]) + + # Print the dataset after setting coordinates + >>> dataset_coords + + Dimensions: (x: 2, y: 2) + Coordinates: + x (x) int64 0 1 + y (y) int64 0 1 + Data variables: + data (x, y) int64 1 2 3 4 + +In the initial dataset, the "x" and "y" variables are present as dimensions. After calling ``set_coords`` (["x", "y"]), these +variables are converted to coordinates, as shown in the final dataset. + + Returns + ------- + Dataset + + See Also + -------- + Dataset.swap_dims + Dataset.assign_coords """ # TODO: allow inserting new coordinates with this method, like # DataFrame.set_index? From 44b18b858cb86dad4f204be03623d32536824ef9 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 27 Jun 2023 12:05:12 +0530 Subject: [PATCH 10/35] reset_coords example --- xarray/core/dataset.py | 62 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 572ece76d31..2f33d05a19f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1859,6 +1859,68 @@ def reset_coords( precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 altitude int64 1000 + # Sample data + >>> cities = ["New York", "London", "Tokyo"] + >>> time = pd.date_range(start="2022-01-01", periods=12, freq="M") + + >>> temperature_data = [ + ... # Temperature values for New York + ... [32, 34, 36, 40, 45, 50, 55, 60, 55, 45, 38, 35], + ... # Temperature values for London + ... [40, 42, 44, 48, 53, 58, 62, 64, 59, 52, 46, 42], + ... # Temperature values for Tokyo + ... [45, 47, 50, 58, 65, 72, 79, 82, 77, 68, 58, 50] + ... ] + + >>> precipitation_data = [ + ... # Precipitation values for New York + ... [1.2, 1.5, 1.8, 2.5, 3.0, 2.8, 2.3, 2.0, 2.4, 2.8, 2.3, 1.8], + ... # Precipitation values for London + ... [2.0, 2.2, 2.5, 2.8, 3.0, 2.7, 2.3, 2.1, 2.3, 2.7, 2.5, 2.2], + ... # Precipitation values for Tokyo + ... [0.8, 0.9, 1.2, 1.5, 1.8, 2.0, 2.2, 2.1, 2.0, 1.7, 1.4, 1.2] + ... ] + + # Create the dataset + >>> dataset = xr.Dataset( + ... { + ... "temperature": (["city", "time"], temperature_data), + ... "precipitation": (["city", "time"], precipitation_data), + ... }, + ... coords={ + ... "city": cities, + ... "time": time, + ... "altitude": 1000, + ... } + ... ) + + # Dataset before resetting coordinates + >>> dataset + + Dimensions: (city: 3, time: 12) + Coordinates: + * city (city) >> dataset_reset = dataset.reset_coords("altitude") + + # Dataset after resetting coordinates + >>> dataset_reset + + Dimensions: (city: 3, time: 12) + Coordinates: + * city (city) Date: Tue, 27 Jun 2023 12:22:31 +0530 Subject: [PATCH 11/35] tail_edited --- xarray/core/dataset.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2f33d05a19f..28bfd8e42e0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2838,7 +2838,7 @@ def head( ... coords={"time": [1, 2, 3, 4, 5]}, ... ) - # Originial Dataset + # Original Dataset >>> dataset Dimensions: (temperature: 5, humidity: 5, time: 5) @@ -2925,10 +2925,7 @@ def tail( data (x, y) int64 1 2 3 4 5 6 7 8 9 # Get the last 2 elements using tail() - >>> tail_dataset = dataset.tail(2) - - # Print the tail dataset - >>> tail_dataset + dataset.tail(2) Dimensions: (x: 2, y: 2) Dimensions without coordinates: x, y From 1ee84e80206c1f721cc5fdc9a3436a2b415960b4 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 27 Jun 2023 12:38:00 +0530 Subject: [PATCH 12/35] bfill change --- xarray/core/dataset.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 28bfd8e42e0..c2ab60601d3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6159,33 +6159,35 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Example ------- - # Create a sample dataset - >>> data = np.array([[1, 2, np.nan], [4, np.nan, 6], [np.nan, 8, 9]]) - >>> coords = {"x": [0, 1, 2], "y": [0, 1, 2]} - >>> dataset = xr.Dataset({"data": (["x", "y"], data)}, coords=coords) + # Define the time range + >>> time = pd.date_range("2023-01-01", periods=10, freq="D") + + # Define the data array with missing values + >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) + + # Create the dataset with the data array + >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) # Print the original dataset >>> dataset - Dimensions: (x: 3, y: 3) + Dimensions: (time: 10) Coordinates: - * x (x) int64 0 1 2 - * y (y) int64 0 1 2 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (x, y) float64 1.0 2.0 nan 4.0 nan 6.0 nan 8.0 9.0 + data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 - # Apply backward fill (bfill) along the 'y' dimension - >>> dataset_bfill = dataset.bfill(dim="y") + # Perform backward fill (bfill) on the dataset + >>> filled_dataset = dataset.bfill(dim="time") - # Print the dataset after backward fill - >>> dataset_bfill + # Print the filled dataset, fills NaN values by propagating values backward + >>> filled_dataset - Dimensions: (x: 3, y: 3) + Dimensions: (time: 10) Coordinates: - * x (x) int64 0 1 2 - * y (y) int64 0 1 2 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (x, y) float64 1.0 2.0 nan 4.0 6.0 6.0 8.0 8.0 9.0 + data (time) float64 1.0 3.0 3.0 5.0 5.0 6.0 8.0 8.0 10.0 10.0 Returns ------- From 4ba26a68953f463e470a35bedfedce562dbd3ba4 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 27 Jun 2023 18:58:54 +0530 Subject: [PATCH 13/35] change --- xarray/core/dataset.py | 64 ++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index c2ab60601d3..9e63277b484 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1734,40 +1734,25 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Example ------- - # Sample dataset - >>> data = xr.DataArray( - ... [[1, 2], [3, 4]], - ... dims=("x", "y"), - ... coords={"x": [0, 1], "y": [0, 1]}, - ... name="data", - ... ) - >>> dataset = xr.Dataset(data_vars={"data": data}) - - # Print the dataset before setting coordinates + >>> dataset = xr.Dataset({'temperature': ('time', [25, 30, 27]), 'time': pd.date_range('2023-01-01', periods=3)}) >>> dataset - Dimensions: (x: 2, y: 2) + Dimensions: (time: 3) Coordinates: - * x (x) int64 0 1 - * y (y) int64 0 1 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: - data (x, y) int64 1 2 3 4 - - # Set "x" and "y" variables as coordinates - >>> dataset_coords = dataset.set_coords(["x", "y"]) + temperature (time) int64 25 30 27 - # Print the dataset after setting coordinates - >>> dataset_coords + >>> ds.set_coords('temperature') - Dimensions: (x: 2, y: 2) + Dimensions: (time: 3) Coordinates: - x (x) int64 0 1 - y (y) int64 0 1 + temperature (time) int64 25 30 27 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: - data (x, y) int64 1 2 3 4 + *empty* -In the initial dataset, the "x" and "y" variables are present as dimensions. After calling ``set_coords`` (["x", "y"]), these -variables are converted to coordinates, as shown in the final dataset. +On calling ``set_coords`` , these variables are converted to coordinates, as shown in the final dataset. Returns ------- @@ -5767,70 +5752,75 @@ def dropna( # Sample dataset with missing values >>> data = { ... "time": [0, 1, 2, 3], - ... "temperature": [25.0, None, 27.5, 28.0], - ... "humidity": [60.0, 65.0, None, 70.0], + ... "temperature": [25.0, np.nan, 27.5, 28.0], + ... "humidity": [60.0, 65.0, np.nan, 70.0], ... } >>> dataset = xr.Dataset(data) + # Print the original dataset >>> dataset Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: * time (time) int64 0 1 2 3 - * temperature (temperature) object 25.0 None 27.5 28.0 - * humidity (humidity) object 60.0 65.0 None 70.0 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* # Drop rows with any missing values >>> dataset_dropped_any = dataset.dropna(dim="time", how="any") + # Print the dataset after dropping rows with any missing values >>> dataset_dropped_any Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: * time (time) int64 0 1 2 3 - * temperature (temperature) object 25.0 None 27.5 28.0 - * humidity (humidity) object 60.0 65.0 None 70.0 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* # Drop rows with all missing values >>> dataset_dropped_all = dataset.dropna(dim="time", how="all") + # Print the dataset after dropping rows with all missing values >>> dataset_dropped_all Dimensions: (time: 0, temperature: 4, humidity: 4) Coordinates: * time (time) int64 - * temperature (temperature) object 25.0 None 27.5 28.0 - * humidity (humidity) object 60.0 65.0 None 70.0 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* # Drop rows with a threshold of non-missing values >>> dataset_dropped_thresh = dataset.dropna(dim="time", thresh=2) + # Print the dataset after dropping rows with a threshold of non-missing values >>> dataset_dropped_thresh Dimensions: (time: 0, temperature: 4, humidity: 4) Coordinates: * time (time) int64 - * temperature (temperature) object 25.0 None 27.5 28.0 - * humidity (humidity) object 60.0 65.0 None 70.0 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* # Drop rows for a subset of variables >>> dataset_dropped_subset = dataset.dropna(dim="time", subset=["temperature"]) + # Print the dataset after dropping rows for a subset of variables >>> dataset_dropped_subset Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: * time (time) int64 0 1 2 3 - * temperature (temperature) object 25.0 None 27.5 28.0 - * humidity (humidity) object 60.0 65.0 None 70.0 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* From 7bb884fe0f360ced0d9b7ea87108eec65c91c325 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Fri, 30 Jun 2023 18:39:44 +0530 Subject: [PATCH 14/35] changes --- xarray/core/dataset.py | 149 ++++++----------------------------------- 1 file changed, 20 insertions(+), 129 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d5f845a08a9..e6951e5d5fb 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1742,9 +1742,8 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas names : hashable or iterable of hashable Name(s) of variables in this dataset to convert into coordinates. - Example - ------- - + Examples + -------- >>> dataset = xr.Dataset({'temperature': ('time', [25, 30, 27]), 'time': pd.date_range('2023-01-01', periods=3)}) >>> dataset @@ -1803,10 +1802,8 @@ def reset_coords( If True, remove coordinates instead of converting them into variables. - Example - ------- - - # Sample dataset + Examples + -------- >>> dataset = xr.Dataset( ... { ... "temperature": ( @@ -1825,8 +1822,7 @@ def reset_coords( ... "altitude": 1000, ... }, ... ) - - # Print the dataset before resetting coordinates + # Dataset before resetting coordinates >>> dataset Dimensions: (time: 2, lat: 2, lon: 2) @@ -1838,11 +1834,9 @@ def reset_coords( Data variables: temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 - # Reset the 'altitude' coordinate >>> dataset_reset = dataset.reset_coords("altitude") - - # Print the dataset after resetting coordinates + # Dataset after resetting coordinates >>> dataset_reset Dimensions: (time: 2, lat: 2, lon: 2) @@ -1855,10 +1849,8 @@ def reset_coords( precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 altitude int64 1000 - # Sample data >>> cities = ["New York", "London", "Tokyo"] >>> time = pd.date_range(start="2022-01-01", periods=12, freq="M") - >>> temperature_data = [ ... # Temperature values for New York ... [32, 34, 36, 40, 45, 50, 55, 60, 55, 45, 38, 35], @@ -1867,7 +1859,6 @@ def reset_coords( ... # Temperature values for Tokyo ... [45, 47, 50, 58, 65, 72, 79, 82, 77, 68, 58, 50] ... ] - >>> precipitation_data = [ ... # Precipitation values for New York ... [1.2, 1.5, 1.8, 2.5, 3.0, 2.8, 2.3, 2.0, 2.4, 2.8, 2.3, 1.8], @@ -1876,8 +1867,6 @@ def reset_coords( ... # Precipitation values for Tokyo ... [0.8, 0.9, 1.2, 1.5, 1.8, 2.0, 2.2, 2.1, 2.0, 1.7, 1.4, 1.2] ... ] - - # Create the dataset >>> dataset = xr.Dataset( ... { ... "temperature": (["city", "time"], temperature_data), @@ -1889,7 +1878,6 @@ def reset_coords( ... "altitude": 1000, ... } ... ) - # Dataset before resetting coordinates >>> dataset @@ -1901,10 +1889,8 @@ def reset_coords( Data variables: temperature (city, time) int64 32 34 36 40 45 50 55 ... 79 82 77 68 58 50 precipitation (city, time) float64 1.2 1.5 1.8 2.5 3.0 ... 2.0 1.7 1.4 1.2 - # Reset the 'altitude' coordinate >>> dataset_reset = dataset.reset_coords("altitude") - # Dataset after resetting coordinates >>> dataset_reset @@ -2822,10 +2808,8 @@ def head( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. - Example - ------- - - # Sample Dataset + Examples + -------- >>> dataset = xr.Dataset( ... { ... "temperature": [25.1, 28.3, 30.5, 27.2, 26.8], @@ -2833,8 +2817,6 @@ def head( ... }, ... coords={"time": [1, 2, 3, 4, 5]}, ... ) - - # Original Dataset >>> dataset Dimensions: (temperature: 5, humidity: 5, time: 5) @@ -2844,12 +2826,8 @@ def head( * time (time) int64 1 2 3 4 5 Data variables: *empty* - # Use head() function to retrieve the first three elements - >>> head_dataset = dataset.head(2) - - # Print the head dataset - >>> head_dataset + >>> dataset.head(2) Dimensions: (temperature: 2, humidity: 2, time: 2) Coordinates: @@ -2905,23 +2883,18 @@ def tail( The keyword arguments form of ``indexers``. One of indexers or indexers_kwargs must be provided. - Example - ------- - - # Sample dataset + Examples + -------- >>> data = xr.DataArray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dims=("x", "y")) >>> dataset = xr.Dataset({"data": data}) - - # Print the original dataset >>> dataset Dimensions: (x: 3, y: 3) Dimensions without coordinates: x, y Data variables: data (x, y) int64 1 2 3 4 5 6 7 8 9 - # Get the last 2 elements using tail() - dataset.tail(2) + >>> dataset.tail(2) Dimensions: (x: 2, y: 2) Dimensions without coordinates: x, y @@ -5757,10 +5730,8 @@ def dropna( Which variables to check for missing values. By default, all variables in the dataset are checked. - Example - ------- - - # Sample dataset with missing values + Examples + -------- >>> data = { ... "time": [0, 1, 2, 3], ... "temperature": [25.0, np.nan, 27.5, 28.0], @@ -5768,7 +5739,6 @@ def dropna( ... } >>> dataset = xr.Dataset(data) - # Print the original dataset >>> dataset Dimensions: (time: 4, temperature: 4, humidity: 4) @@ -6104,17 +6074,14 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset than 0 or None for no limit. Must be None or greater than or equal to axis length if filling along chunked axes (dimensions). - Example + Examples ------- - # Create a sample dataset with missing values + # Sample dataset with missing values >>> time = pd.date_range("2023-01-01", periods=10, freq="D") >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) - # Perform forward fill (ffill) on the dataset >>> filled_dataset = dataset.ffill(dim="time") - - # Print the original dataset >>> dataset Dimensions: (time: 10) @@ -6158,18 +6125,13 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset than 0 or None for no limit. Must be None or greater than or equal to axis length if filling along chunked axes (dimensions). - Example - ------- + Examples + -------- # Define the time range >>> time = pd.date_range("2023-01-01", periods=10, freq="D") - # Define the data array with missing values >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) - - # Create the dataset with the data array >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) - - # Print the original dataset >>> dataset Dimensions: (time: 10) @@ -6178,11 +6140,8 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 - # Perform backward fill (bfill) on the dataset - >>> filled_dataset = dataset.bfill(dim="time") - - # Print the filled dataset, fills NaN values by propagating values backward - >>> filled_dataset + # filled dataset, fills NaN values by propagating values backward + >>> dataset.bfill(dim="time") Dimensions: (time: 10) Coordinates: @@ -8770,40 +8729,6 @@ def argmin(self: T_Dataset, dim: Hashable | None = None, **kwargs) -> T_Dataset: ------- result : Dataset - Example - ------- - - # Defined the dataset - >>> dataset = xr.Dataset( - ... { - ... "math_scores": ( - ... ["student", "test"], - ... [[90, 85, 79], [78, 80, 85], [95, 92, 98]], - ... ), - ... "english_scores": ( - ... ["student", "test"], - ... [[88, 80, 92], [75, 95, 79], [93, 96, 78]], - ... ), - ... }, - ... coords={ - ... "student": ["Alice", "Bob", "Charlie"], - ... "test": ["Test 1", "Test 2", "Test 3"], - ... }, - ... ) - - # Indices of the minimum values along the 'student' dimension are calculated - >>> argmin_indices = dataset.argmin(dim="student") - - # Print the indices of the minimum values - >>> argmin_indices - - Dimensions: (test: 3) - Coordinates: - * test (test) T_Dataset: ------- result : Dataset - Example - ------- - - #Defined the dataset - >>> dataset = xr.Dataset( - ... { - ... "math_scores": ( - ... ["student", "test"], - ... [[90, 85, 92], [78, 80, 85], [95, 92, 98]], - ... ), - ... "english_scores": ( - ... ["student", "test"], - ... [[88, 90, 92], [75, 82, 79], [93, 96, 91]], - ... ), - ... }, - ... coords={ - ... "student": ["Alice", "Bob", "Charlie"], - ... "test": ["Test 1", "Test 2", "Test 3"], - ... }, - ... ) - - # Indices of the minimum values along the 'student' dimension are calculated - >>> argmax_indices = dataset.argmax(dim="test") - - # Print the indices of the minimum values - >>> argmax_indices - - Dimensions: (student: 3) - Coordinates: - * student (student) Date: Sun, 2 Jul 2023 23:23:18 +0530 Subject: [PATCH 15/35] indented --- xarray/core/dataset.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e6951e5d5fb..a415b8eb404 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5747,11 +5747,9 @@ def dropna( * temperature (temperature) float64 25.0 nan 27.5 28.0 * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: - *empty* - + *empty* # Drop rows with any missing values >>> dataset_dropped_any = dataset.dropna(dim="time", how="any") - # Print the dataset after dropping rows with any missing values >>> dataset_dropped_any @@ -5762,10 +5760,8 @@ def dropna( * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* - # Drop rows with all missing values >>> dataset_dropped_all = dataset.dropna(dim="time", how="all") - # Print the dataset after dropping rows with all missing values >>> dataset_dropped_all @@ -5776,10 +5772,8 @@ def dropna( * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* - # Drop rows with a threshold of non-missing values >>> dataset_dropped_thresh = dataset.dropna(dim="time", thresh=2) - # Print the dataset after dropping rows with a threshold of non-missing values >>> dataset_dropped_thresh @@ -5790,10 +5784,8 @@ def dropna( * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* - # Drop rows for a subset of variables >>> dataset_dropped_subset = dataset.dropna(dim="time", subset=["temperature"]) - # Print the dataset after dropping rows for a subset of variables >>> dataset_dropped_subset @@ -6079,7 +6071,9 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset # Sample dataset with missing values >>> time = pd.date_range("2023-01-01", periods=10, freq="D") >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) + >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) + # Perform forward fill (ffill) on the dataset >>> filled_dataset = dataset.ffill(dim="time") >>> dataset @@ -6089,7 +6083,6 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 - # Print the filled dataset, fills NaN values by propagating values forward >>> filled_dataset @@ -6128,8 +6121,10 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Examples -------- # Define the time range + >>> time = pd.date_range("2023-01-01", periods=10, freq="D") # Define the data array with missing values + >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) >>> dataset @@ -6141,6 +6136,7 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 # filled dataset, fills NaN values by propagating values backward + >>> dataset.bfill(dim="time") Dimensions: (time: 10) From 74a94f9ba77ba1d8189941d15c81b6eb60771f3f Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Mon, 3 Jul 2023 19:12:38 +0530 Subject: [PATCH 16/35] indented --- xarray/core/dataset.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index a415b8eb404..4919b514039 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6057,8 +6057,7 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Parameters ---------- dim : Hashable - Specifies the dimension along which to propagate values when - filling. + Specifies the dimension along which to propagate values when filling. limit : int or None, optional The maximum number of consecutive NaN values to forward fill. In other words, if there is a gap with more than this number of @@ -6067,14 +6066,16 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset to axis length if filling along chunked axes (dimensions). Examples - ------- + -------- # Sample dataset with missing values + >>> time = pd.date_range("2023-01-01", periods=10, freq="D") >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) # Perform forward fill (ffill) on the dataset + >>> filled_dataset = dataset.ffill(dim="time") >>> dataset From 1a728a2e9de47bab9f6ff5bda22b2aee4acdbf24 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Mon, 3 Jul 2023 19:54:04 +0530 Subject: [PATCH 17/35] minute_changes --- xarray/core/dataset.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 4919b514039..8a04f7840c9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1744,7 +1744,12 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Examples -------- - >>> dataset = xr.Dataset({'temperature': ('time', [25, 30, 27]), 'time': pd.date_range('2023-01-01', periods=3)}) + >>> dataset = xr.Dataset( + ... { + ... "temperature": ("time", [25, 30, 27]), + ... "time": pd.date_range("2023-01-01", periods=3), + ... } + ... ) >>> dataset Dimensions: (time: 3) @@ -1753,7 +1758,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Data variables: temperature (time) int64 25 30 27 - >>> ds.set_coords('temperature') + >>> dataset.set_coords("temperature") Dimensions: (time: 3) Coordinates: @@ -1762,7 +1767,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Data variables: *empty* -On calling ``set_coords`` , these variables are converted to coordinates, as shown in the final dataset. + On calling ``set_coords`` , these variables are converted to coordinates, as shown in the final dataset. Returns ------- @@ -1857,7 +1862,7 @@ def reset_coords( ... # Temperature values for London ... [40, 42, 44, 48, 53, 58, 62, 64, 59, 52, 46, 42], ... # Temperature values for Tokyo - ... [45, 47, 50, 58, 65, 72, 79, 82, 77, 68, 58, 50] + ... [45, 47, 50, 58, 65, 72, 79, 82, 77, 68, 58, 50], ... ] >>> precipitation_data = [ ... # Precipitation values for New York @@ -1865,7 +1870,7 @@ def reset_coords( ... # Precipitation values for London ... [2.0, 2.2, 2.5, 2.8, 3.0, 2.7, 2.3, 2.1, 2.3, 2.7, 2.5, 2.2], ... # Precipitation values for Tokyo - ... [0.8, 0.9, 1.2, 1.5, 1.8, 2.0, 2.2, 2.1, 2.0, 1.7, 1.4, 1.2] + ... [0.8, 0.9, 1.2, 1.5, 1.8, 2.0, 2.2, 2.1, 2.0, 1.7, 1.4, 1.2], ... ] >>> dataset = xr.Dataset( ... { @@ -1876,7 +1881,7 @@ def reset_coords( ... "city": cities, ... "time": time, ... "altitude": 1000, - ... } + ... }, ... ) # Dataset before resetting coordinates >>> dataset From a06a8c89c701aab479bc1137198b614d9f1bb99f Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Mon, 3 Jul 2023 21:30:04 +0530 Subject: [PATCH 18/35] doctest failure change --- xarray/core/dataset.py | 94 ++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 36 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 8a04f7840c9..3081fb09767 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1754,7 +1754,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Dimensions: (time: 3) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: temperature (time) int64 25 30 27 @@ -1827,28 +1827,34 @@ def reset_coords( ... "altitude": 1000, ... }, ... ) + # Dataset before resetting coordinates + >>> dataset Dimensions: (time: 2, lat: 2, lon: 2) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 - * lat (lat) int64 40 41 - * lon (lon) int64 -80 -79 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 altitude int64 1000 Data variables: temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 + # Reset the 'altitude' coordinate + >>> dataset_reset = dataset.reset_coords("altitude") + # Dataset after resetting coordinates + >>> dataset_reset Dimensions: (time: 2, lat: 2, lon: 2) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 - * lat (lat) int64 40 41 - * lon (lon) int64 -80 -79 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 + * lat (lat) int64 40 41 + * lon (lon) int64 -80 -79 Data variables: temperature (time, lat, lon) int64 25 26 27 28 29 30 31 32 precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 @@ -1883,7 +1889,9 @@ def reset_coords( ... "altitude": 1000, ... }, ... ) + # Dataset before resetting coordinates + >>> dataset Dimensions: (city: 3, time: 12) @@ -1894,9 +1902,13 @@ def reset_coords( Data variables: temperature (city, time) int64 32 34 36 40 45 50 55 ... 79 82 77 68 58 50 precipitation (city, time) float64 1.2 1.5 1.8 2.5 3.0 ... 2.0 1.7 1.4 1.2 + # Reset the 'altitude' coordinate + >>> dataset_reset = dataset.reset_coords("altitude") + # Dataset after resetting coordinates + >>> dataset_reset Dimensions: (city: 3, time: 12) @@ -2826,19 +2838,21 @@ def head( Dimensions: (temperature: 5, humidity: 5, time: 5) Coordinates: - * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 - * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 - * time (time) int64 1 2 3 4 5 + * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 + * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 + * time (time) int64 1 2 3 4 5 Data variables: *empty* + # Use head() function to retrieve the first three elements + >>> dataset.head(2) Dimensions: (temperature: 2, humidity: 2, time: 2) Coordinates: - * temperature (temperature) float64 25.1 28.3 - * humidity (humidity) float64 60.2 55.6 - * time (time) int64 1 2 + * temperature (temperature) float64 25.1 28.3 + * humidity (humidity) float64 60.2 55.6 + * time (time) int64 1 2 Data variables: *empty* @@ -2898,7 +2912,9 @@ def tail( Dimensions without coordinates: x, y Data variables: data (x, y) int64 1 2 3 4 5 6 7 8 9 + # Get the last 2 elements using tail() + >>> dataset.tail(2) Dimensions: (x: 2, y: 2) @@ -5748,57 +5764,62 @@ def dropna( Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 0 1 2 3 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: - *empty* - # Drop rows with any missing values + *empty* + >>> dataset_dropped_any = dataset.dropna(dim="time", how="any") + # Print the dataset after dropping rows with any missing values + >>> dataset_dropped_any Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 0 1 2 3 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* + # Drop rows with all missing values + >>> dataset_dropped_all = dataset.dropna(dim="time", how="all") - # Print the dataset after dropping rows with all missing values >>> dataset_dropped_all Dimensions: (time: 0, temperature: 4, humidity: 4) Coordinates: - * time (time) int64 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* + # Drop rows with a threshold of non-missing values + >>> dataset_dropped_thresh = dataset.dropna(dim="time", thresh=2) - # Print the dataset after dropping rows with a threshold of non-missing values >>> dataset_dropped_thresh Dimensions: (time: 0, temperature: 4, humidity: 4) Coordinates: - * time (time) int64 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* + # Drop rows for a subset of variables + >>> dataset_dropped_subset = dataset.dropna(dim="time", subset=["temperature"]) - # Print the dataset after dropping rows for a subset of variables >>> dataset_dropped_subset Dimensions: (time: 4, temperature: 4, humidity: 4) Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 0 1 2 3 + * temperature (temperature) float64 25.0 nan 27.5 28.0 + * humidity (humidity) float64 60.0 65.0 nan 70.0 Data variables: *empty* @@ -6086,15 +6107,15 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 - # Print the filled dataset, fills NaN values by propagating values forward + >>> filled_dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 1.0 3.0 3.0 5.0 6.0 6.0 8.0 8.0 10.0 @@ -6129,6 +6150,7 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset # Define the time range >>> time = pd.date_range("2023-01-01", periods=10, freq="D") + # Define the data array with missing values >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) From 1aa8c92a297df0072e8d457d385d248c9041c06b Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Mon, 3 Jul 2023 22:36:55 +0530 Subject: [PATCH 19/35] changes_ --- xarray/core/dataset.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3081fb09767..e9185e45d8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1763,7 +1763,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Dimensions: (time: 3) Coordinates: temperature (time) int64 25 30 27 - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: *empty* @@ -1896,8 +1896,8 @@ def reset_coords( Dimensions: (city: 3, time: 12) Coordinates: - * city (city) Dimensions: (city: 3, time: 12) Coordinates: - * city (city) T_Dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 From 7b6a2f6b35a79225ecc74a1a6b0ccb61deefe159 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Mon, 3 Jul 2023 23:03:30 +0530 Subject: [PATCH 20/35] change --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index e9185e45d8a..0c7fb28421f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -6169,7 +6169,7 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Dimensions: (time: 10) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: data (time) float64 1.0 3.0 3.0 5.0 5.0 6.0 8.0 8.0 10.0 10.0 From f0a9b548d5ad17ae086430f97f2d42e64bb87e91 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Thu, 6 Jul 2023 20:49:41 +0530 Subject: [PATCH 21/35] change --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0c7fb28421f..46c9a22de5b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5784,7 +5784,7 @@ def dropna( Data variables: *empty* - # Drop rows with all missing values + # Drop rows with all the missing values >>> dataset_dropped_all = dataset.dropna(dim="time", how="all") >>> dataset_dropped_all From c8999da4f428804f7fcafc9cc5b6d16059a368a0 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Fri, 7 Jul 2023 15:43:21 +0530 Subject: [PATCH 22/35] change --- xarray/core/dataset.py | 78 +++++++++++------------------------------- 1 file changed, 20 insertions(+), 58 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 46c9a22de5b..ee2d3edb727 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5753,75 +5753,37 @@ def dropna( Examples -------- - >>> data = { - ... "time": [0, 1, 2, 3], - ... "temperature": [25.0, np.nan, 27.5, 28.0], - ... "humidity": [60.0, 65.0, np.nan, 70.0], - ... } - >>> dataset = xr.Dataset(data) + >>> dataset = xr.Dataset( + ... { + ... "temperature": ( + ... ["time", "location"], + ... [[23.4, 24.1], [np.nan, 22.1], [21.8, 24.2]], + ... ) + ... }, + ... coords={"time": [1, 2, 3], "location": ["A", "B"]}, + ... ) >>> dataset - Dimensions: (time: 4, temperature: 4, humidity: 4) - Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 - Data variables: - *empty* - - >>> dataset_dropped_any = dataset.dropna(dim="time", how="any") - - # Print the dataset after dropping rows with any missing values - - >>> dataset_dropped_any - - Dimensions: (time: 4, temperature: 4, humidity: 4) + Dimensions: (time: 3, location: 2) Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 1 2 3 + * location (location) >> dataset_dropped_all = dataset.dropna(dim="time", how="all") - >>> dataset_dropped_all - - Dimensions: (time: 0, temperature: 4, humidity: 4) - Coordinates: - * time (time) int64 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 - Data variables: - *empty* - - # Drop rows with a threshold of non-missing values + dataset_dropped = data.dropna(dim='time') - >>> dataset_dropped_thresh = dataset.dropna(dim="time", thresh=2) - >>> dataset_dropped_thresh + >>> dataset_dropped - Dimensions: (time: 0, temperature: 4, humidity: 4) + Dimensions: (time: 2, location: 2) Coordinates: - * time (time) int64 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 + * time (time) int64 1 3 + * location (location) >> dataset_dropped_subset = dataset.dropna(dim="time", subset=["temperature"]) - >>> dataset_dropped_subset - - Dimensions: (time: 4, temperature: 4, humidity: 4) - Coordinates: - * time (time) int64 0 1 2 3 - * temperature (temperature) float64 25.0 nan 27.5 28.0 - * humidity (humidity) float64 60.0 65.0 nan 70.0 - Data variables: - *empty* + temperature (time, location) float64 23.4 24.1 21.8 24.2 Returns ------- From fd3f7ac5205924873bf4d652d787a2914cdfe971 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Fri, 7 Jul 2023 16:22:47 +0530 Subject: [PATCH 23/35] indented --- xarray/core/dataset.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ee2d3edb727..71c5c80b165 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5774,8 +5774,7 @@ def dropna( # Drop NaN values from the dataset - dataset_dropped = data.dropna(dim='time') - + >>> dataset_dropped = data.dropna(dim="time") >>> dataset_dropped Dimensions: (time: 2, location: 2) From 11edc306df7c2d5288b760e3672c1210ab6582d2 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Fri, 7 Jul 2023 17:43:59 +0530 Subject: [PATCH 24/35] . --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d39a4463781..ffce82f71c2 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5831,7 +5831,7 @@ def dropna( # Drop NaN values from the dataset - >>> dataset_dropped = data.dropna(dim="time") + >>> dataset_dropped = dataset.dropna(dim="time") >>> dataset_dropped Dimensions: (time: 2, location: 2) From e85f43cfc760039d411ada1da2a98a7170d445ee Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Sun, 9 Jul 2023 12:17:32 +0530 Subject: [PATCH 25/35] what's new --- doc/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 87559ee9b02..59c7c048c3f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -41,6 +41,9 @@ Bug fixes Documentation ~~~~~~~~~~~~~ +- Added examples to docstrings of :py:meth:`Dataset.tail`, :py:meth:`Dataset.head`, :py:meth:`Dataset.dropna`, + :py:meth:`Dataset.ffill`, :py:meth:`Dataset.bfill`, :py:meth:`Dataset.set_coords`, :py:meth:`Dataset.reset_coords` + (:issue:`6793`, :pull:`7936`) By `Harshitha `_ . - Added page on wrapping chunked numpy-like arrays as alternatives to dask arrays. (:pull:`7951`) By `Tom Nicholas `_. - Expanded the page on wrapping numpy-like "duck" arrays. From d683a8206d1c390312ef19da1086a7208966af6d Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Sun, 9 Jul 2023 12:55:32 +0530 Subject: [PATCH 26/35] . --- xarray/core/dataset.py | 85 ++++++------------------------------------ 1 file changed, 12 insertions(+), 73 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ffce82f71c2..86e990e39aa 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1746,28 +1746,28 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas -------- >>> dataset = xr.Dataset( ... { - ... "temperature": ("time", [25, 30, 27]), + ... "pressure": ("time", [1.013, 1.2, 3.5]), ... "time": pd.date_range("2023-01-01", periods=3), ... } ... ) >>> dataset - Dimensions: (time: 3) + Dimensions: (time: 3) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: - temperature (time) int64 25 30 27 + pressure (time) float64 1.013 1.2 3.5 - >>> dataset.set_coords("temperature") + >>> dataset.set_coords("pressure") - Dimensions: (time: 3) + Dimensions: (time: 3) Coordinates: - temperature (time) int64 25 30 27 - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + pressure (time) float64 1.013 1.2 3.5 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: *empty* - On calling ``set_coords`` , these variables are converted to coordinates, as shown in the final dataset. + On calling ``set_coords`` , these data variables are converted to coordinates, as shown in the final dataset. Returns ------- @@ -1860,66 +1860,6 @@ def reset_coords( precipitation (time, lat, lon) float64 0.5 0.8 0.2 0.4 0.3 0.6 0.7 0.9 altitude int64 1000 - >>> cities = ["New York", "London", "Tokyo"] - >>> time = pd.date_range(start="2022-01-01", periods=12, freq="M") - >>> temperature_data = [ - ... # Temperature values for New York - ... [32, 34, 36, 40, 45, 50, 55, 60, 55, 45, 38, 35], - ... # Temperature values for London - ... [40, 42, 44, 48, 53, 58, 62, 64, 59, 52, 46, 42], - ... # Temperature values for Tokyo - ... [45, 47, 50, 58, 65, 72, 79, 82, 77, 68, 58, 50], - ... ] - >>> precipitation_data = [ - ... # Precipitation values for New York - ... [1.2, 1.5, 1.8, 2.5, 3.0, 2.8, 2.3, 2.0, 2.4, 2.8, 2.3, 1.8], - ... # Precipitation values for London - ... [2.0, 2.2, 2.5, 2.8, 3.0, 2.7, 2.3, 2.1, 2.3, 2.7, 2.5, 2.2], - ... # Precipitation values for Tokyo - ... [0.8, 0.9, 1.2, 1.5, 1.8, 2.0, 2.2, 2.1, 2.0, 1.7, 1.4, 1.2], - ... ] - >>> dataset = xr.Dataset( - ... { - ... "temperature": (["city", "time"], temperature_data), - ... "precipitation": (["city", "time"], precipitation_data), - ... }, - ... coords={ - ... "city": cities, - ... "time": time, - ... "altitude": 1000, - ... }, - ... ) - - # Dataset before resetting coordinates - - >>> dataset - - Dimensions: (city: 3, time: 12) - Coordinates: - * city (city) >> dataset_reset = dataset.reset_coords("altitude") - - # Dataset after resetting coordinates - - >>> dataset_reset - - Dimensions: (city: 3, time: 12) - Coordinates: - * city (city) T_Dataset >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) - - # Perform forward fill (ffill) on the dataset - - >>> filled_dataset = dataset.ffill(dim="time") >>> dataset Dimensions: (time: 10) @@ -6129,6 +6065,9 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Data variables: data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 + # Perform forward fill (ffill) on the dataset + + >>> filled_dataset = dataset.ffill(dim="time") >>> filled_dataset Dimensions: (time: 10) From 04f167faa5a504a31630f403d9ca6757e0f87e2a Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Sun, 9 Jul 2023 14:13:01 +0530 Subject: [PATCH 27/35] head & tail --- xarray/core/dataset.py | 67 +++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 86e990e39aa..d71c4bc8517 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2824,34 +2824,28 @@ def head( Examples -------- + >>> dates = pd.date_range(start="2023-01-01", periods=5) + >>> pageviews = [1200, 1500, 900, 1800, 2000] + >>> visitors = [800, 1000, 600, 1200, 1500] >>> dataset = xr.Dataset( ... { - ... "temperature": [25.1, 28.3, 30.5, 27.2, 26.8], - ... "humidity": [60.2, 55.6, 50.3, 58.8, 61.7], + ... "pageviews": (("date"), pageviews), + ... "visitors": (("date"), visitors), ... }, - ... coords={"time": [1, 2, 3, 4, 5]}, + ... coords={"date": dates}, ... ) - >>> dataset - - Dimensions: (temperature: 5, humidity: 5, time: 5) - Coordinates: - * temperature (temperature) float64 25.1 28.3 30.5 27.2 26.8 - * humidity (humidity) float64 60.2 55.6 50.3 58.8 61.7 - * time (time) int64 1 2 3 4 5 - Data variables: - *empty* - # Use head() function to retrieve the first three elements + # Retrieve the 3 most busiest days in terms of pageviews and visitors - >>> dataset.head(2) + >>> busiest_days = dataset.sortby(["pageviews", "visitors"], ascending=False) + >>> busiest_days.head(3) - Dimensions: (temperature: 2, humidity: 2, time: 2) + Dimensions: (date: 3) Coordinates: - * temperature (temperature) float64 25.1 28.3 - * humidity (humidity) float64 60.2 55.6 - * time (time) int64 1 2 + * date (date) datetime64[ns] 2023-01-05 2023-01-04 2023-01-02 Data variables: - *empty* + pageviews (date) int64 2000 1800 1500 + visitors (date) int64 1500 1200 1000 See Also -------- @@ -2901,23 +2895,36 @@ def tail( Examples -------- - >>> data = xr.DataArray([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dims=("x", "y")) - >>> dataset = xr.Dataset({"data": data}) - >>> dataset + >>> activity_names = ["Walking", "Running", "Cycling", "Swimming", "Yoga"] + >>> durations = [30, 45, 60, 45, 60] # in minutes + >>> energies = [150, 300, 250, 400, 100] # in calories + >>> dataset = xr.Dataset( + ... { + ... "duration": (["activity"], durations), + ... "energy_expenditure": (["activity"], energies), + ... }, + ... coords={"activity": activity_names}, + ... ) + >>> sorted_dataset = dataset.sortby("energy_expenditure", ascending=False) + >>> sorted_dataset - Dimensions: (x: 3, y: 3) - Dimensions without coordinates: x, y + Dimensions: (activity: 5) + Coordinates: + * activity (activity) >> dataset.tail(2) + >>> sorted_dataset.tail(3) - Dimensions: (x: 2, y: 2) - Dimensions without coordinates: x, y + Dimensions: (activity: 3) + Coordinates: + * activity (activity) Date: Sun, 9 Jul 2023 14:46:23 +0530 Subject: [PATCH 28/35] bfill & ffill --- xarray/core/dataset.py | 59 ++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d71c4bc8517..d62d01fc99b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1863,6 +1863,10 @@ def reset_coords( Returns ------- Dataset + + See Also + -------- + Dataset.set_coords """ if names is None: names = self._coord_names - set(self._indexes) @@ -6058,11 +6062,10 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Examples -------- - # Sample dataset with missing values - >>> time = pd.date_range("2023-01-01", periods=10, freq="D") - >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) - + >>> data = np.array( + ... [1, np.nan, np.nan, np.nan, 5, np.nan, np.nan, 8, np.nan, 10] + ... ) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) >>> dataset @@ -6070,22 +6073,35 @@ def ffill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Coordinates: * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 + data (time) float64 1.0 nan nan nan 5.0 nan nan 8.0 nan 10.0 # Perform forward fill (ffill) on the dataset - >>> filled_dataset = dataset.ffill(dim="time") - >>> filled_dataset + >>> dataset.ffill(dim="time") Dimensions: (time: 10) Coordinates: * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (time) float64 1.0 1.0 3.0 3.0 5.0 6.0 6.0 8.0 8.0 10.0 + data (time) float64 1.0 1.0 1.0 1.0 5.0 5.0 5.0 8.0 8.0 10.0 + + # Limit the forward filling to a maximum of 2 consecutive NaN values + + >>> dataset.ffill(dim="time", limit=2) + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + Data variables: + data (time) float64 1.0 1.0 1.0 nan 5.0 5.0 5.0 8.0 8.0 10.0 Returns ------- Dataset + + See Also + -------- + Dataset.bfill """ from xarray.core.missing import _apply_over_vars_with_dim, ffill @@ -6111,13 +6127,10 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Examples -------- - # Define the time range - >>> time = pd.date_range("2023-01-01", periods=10, freq="D") - - # Define the data array with missing values - - >>> data = np.array([1, np.nan, 3, np.nan, 5, 6, np.nan, 8, np.nan, 10]) + >>> data = np.array( + ... [1, np.nan, np.nan, np.nan, 5, np.nan, np.nan, 8, np.nan, 10] + ... ) >>> dataset = xr.Dataset({"data": (("time",), data)}, coords={"time": time}) >>> dataset @@ -6125,7 +6138,7 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Coordinates: * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (time) float64 1.0 nan 3.0 nan 5.0 6.0 nan 8.0 nan 10.0 + data (time) float64 1.0 nan nan nan 5.0 nan nan 8.0 nan 10.0 # filled dataset, fills NaN values by propagating values backward @@ -6135,11 +6148,25 @@ def bfill(self: T_Dataset, dim: Hashable, limit: int | None = None) -> T_Dataset Coordinates: * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 Data variables: - data (time) float64 1.0 3.0 3.0 5.0 5.0 6.0 8.0 8.0 10.0 10.0 + data (time) float64 1.0 5.0 5.0 5.0 5.0 8.0 8.0 8.0 10.0 10.0 + + # Limit the backward filling to a maximum of 2 consecutive NaN values + + >>> dataset.bfill(dim="time", limit=2) + + Dimensions: (time: 10) + Coordinates: + * time (time) datetime64[ns] 2023-01-01 2023-01-02 ... 2023-01-10 + Data variables: + data (time) float64 1.0 nan 5.0 5.0 5.0 8.0 8.0 8.0 10.0 10.0 Returns ------- Dataset + + See Also + -------- + Dataset.ffill """ from xarray.core.missing import _apply_over_vars_with_dim, bfill From 718438c3082ea8aa3f1a04a85dd39fe3a6ebc3b0 Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Sun, 9 Jul 2023 15:41:15 +0530 Subject: [PATCH 29/35] dropna --- xarray/core/dataset.py | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index d62d01fc99b..ba8e292b19f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5761,7 +5761,6 @@ def dropna( Examples -------- - >>> dataset = xr.Dataset( ... { ... "temperature": ( @@ -5782,8 +5781,7 @@ def dropna( # Drop NaN values from the dataset - >>> dataset_dropped = dataset.dropna(dim="time") - >>> dataset_dropped + >>> dataset.dropna(dim="time") Dimensions: (time: 2, location: 2) Coordinates: @@ -5792,6 +5790,39 @@ def dropna( Data variables: temperature (time, location) float64 23.4 24.1 21.8 24.2 + # Drop labels with any NAN values + + >>> dataset.dropna(dim="time", how="any") + + Dimensions: (time: 1, location: 2) + Coordinates: + * time (time) int64 1 + * location (location) >> dataset.dropna(dim="time", how="all") + + Dimensions: (time: 3, location: 2) + Coordinates: + * time (time) int64 1 2 3 + * location (location) >> dataset.dropna(dim="time", thresh=2) + + Dimensions: (time: 1, location: 2) + Coordinates: + * time (time) int64 1 + * location (location) Date: Sun, 9 Jul 2023 16:00:18 +0530 Subject: [PATCH 30/35] head & tail --- xarray/core/dataset.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index ba8e292b19f..7cfc8157f1f 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2851,6 +2851,17 @@ def head( pageviews (date) int64 2000 1800 1500 visitors (date) int64 1500 1200 1000 + # Using a dictionary to specify the number of elements for specific dimensions + + >>> busiest_days.head({"date": 3}) + + Dimensions: (date: 3) + Coordinates: + * date (date) datetime64[ns] 2023-01-05 2023-01-04 2023-01-02 + Data variables: + pageviews (date) int64 2000 1800 1500 + visitors (date) int64 1500 1200 1000 + See Also -------- Dataset.tail @@ -2930,6 +2941,15 @@ def tail( duration (activity) int64 60 30 60 energy_expenditure (activity) int64 250 150 100 + >>> sorted_dataset.tail({"activity": 3}) + + Dimensions: (activity: 3) + Coordinates: + * activity (activity) Date: Sun, 9 Jul 2023 17:00:37 +0530 Subject: [PATCH 31/35] doctest --- xarray/core/dataset.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7cfc8157f1f..174170d896d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1754,7 +1754,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Dimensions: (time: 3) Coordinates: - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: pressure (time) float64 1.013 1.2 3.5 @@ -5814,12 +5814,12 @@ def dropna( >>> dataset.dropna(dim="time", how="any") - Dimensions: (time: 1, location: 2) + Dimensions: (time: 2, location: 2) Coordinates: - * time (time) int64 1 + * time (time) int64 1 3 * location (location) Date: Sun, 9 Jul 2023 18:11:35 +0530 Subject: [PATCH 32/35] doctest error --- xarray/core/dataset.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 174170d896d..7f273ea9e69 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1763,7 +1763,7 @@ def set_coords(self: T_Dataset, names: Hashable | Iterable[Hashable]) -> T_Datas Dimensions: (time: 3) Coordinates: pressure (time) float64 1.013 1.2 3.5 - * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 + * time (time) datetime64[ns] 2023-01-01 2023-01-02 2023-01-03 Data variables: *empty* @@ -5785,63 +5785,63 @@ def dropna( ... { ... "temperature": ( ... ["time", "location"], - ... [[23.4, 24.1], [np.nan, 22.1], [21.8, 24.2]], + ... [[23.4, 24.1], [np.nan, 22.1], [21.8, 24.2], [20.5, 25.3]], ... ) ... }, - ... coords={"time": [1, 2, 3], "location": ["A", "B"]}, + ... coords={"time": [1, 2, 3, 4], "location": ["A", "B"]}, ... ) >>> dataset - Dimensions: (time: 3, location: 2) + Dimensions: (time: 4, location: 2) Coordinates: - * time (time) int64 1 2 3 + * time (time) int64 1 2 3 4 * location (location) >> dataset.dropna(dim="time") - Dimensions: (time: 2, location: 2) + Dimensions: (time: 3, location: 2) Coordinates: - * time (time) int64 1 3 + * time (time) int64 1 3 4 * location (location) >> dataset.dropna(dim="time", how="any") - Dimensions: (time: 2, location: 2) + Dimensions: (time: 3, location: 2) Coordinates: - * time (time) int64 1 3 + * time (time) int64 1 3 4 * location (location) >> dataset.dropna(dim="time", how="all") - Dimensions: (time: 3, location: 2) + Dimensions: (time: 4, location: 2) Coordinates: - * time (time) int64 1 2 3 + * time (time) int64 1 2 3 4 * location (location) >> dataset.dropna(dim="time", thresh=2) - Dimensions: (time: 1, location: 2) + Dimensions: (time: 3, location: 2) Coordinates: - * time (time) int64 1 + * time (time) int64 1 3 4 * location (location) Date: Tue, 11 Jul 2023 08:01:00 +0530 Subject: [PATCH 33/35] Update xarray/core/dataset.py Co-authored-by: Tom Nicholas --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 7f273ea9e69..5565ecfeae3 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2841,7 +2841,7 @@ def head( # Retrieve the 3 most busiest days in terms of pageviews and visitors - >>> busiest_days = dataset.sortby(["pageviews", "visitors"], ascending=False) + >>> busiest_days = dataset.sortby("pageviews", ascending=False) >>> busiest_days.head(3) Dimensions: (date: 3) From 2028eac17a96b048199bdff0b95303fec370bcdf Mon Sep 17 00:00:00 2001 From: harshitha1201 Date: Tue, 11 Jul 2023 08:10:37 +0530 Subject: [PATCH 34/35] . --- xarray/core/dataset.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5565ecfeae3..189fb08c95d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2838,10 +2838,18 @@ def head( ... }, ... coords={"date": dates}, ... ) + >>> busiest_days = dataset.sortby("pageviews", ascending=False) + >>> busiest_days.head() + + Dimensions: (date: 5) + Coordinates: + * date (date) datetime64[ns] 2023-01-05 2023-01-04 ... 2023-01-03 + Data variables: + pageviews (date) int64 2000 1800 1500 1200 900 + visitors (date) int64 1500 1200 1000 800 600 - # Retrieve the 3 most busiest days in terms of pageviews and visitors + # Retrieve the 3 most busiest days in terms of pageviews - >>> busiest_days = dataset.sortby("pageviews", ascending=False) >>> busiest_days.head(3) Dimensions: (date: 3) From 4ecabe9da8073539c4365197b5d2108dbbc0ab4e Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Tue, 11 Jul 2023 00:05:32 -0400 Subject: [PATCH 35/35] Fix doctest --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 189fb08c95d..7bd92ea32a0 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -2843,7 +2843,7 @@ def head( Dimensions: (date: 5) Coordinates: - * date (date) datetime64[ns] 2023-01-05 2023-01-04 ... 2023-01-03 + * date (date) datetime64[ns] 2023-01-05 2023-01-04 ... 2023-01-03 Data variables: pageviews (date) int64 2000 1800 1500 1200 900 visitors (date) int64 1500 1200 1000 800 600