From 1f2ae026b6afd21579be14e89f9194f5c48d6599 Mon Sep 17 00:00:00 2001
From: Jabin Kong <1059978534@qq.com>
Date: Wed, 18 Aug 2021 15:50:38 +0800
Subject: [PATCH 01/11] Add iterrows() and itertuples() DataFrame API

---
 .../api/eland.DataFrame.iterrows.rst          |   6 ++
 .../api/eland.DataFrame.itertuples.rst        |   6 ++
 docs/sphinx/reference/dataframe.rst           |   2 +
 eland/dataframe.py                            | 100 +++++++++++++++++-
 eland/operations.py                           |  59 +++++++++++
 eland/query_compiler.py                       |  33 ++++++
 .../test_iterrows_itertuples_pytest.py        |  45 ++++++++
 7 files changed, 250 insertions(+), 1 deletion(-)
 create mode 100644 docs/sphinx/reference/api/eland.DataFrame.iterrows.rst
 create mode 100644 docs/sphinx/reference/api/eland.DataFrame.itertuples.rst
 create mode 100644 tests/dataframe/test_iterrows_itertuples_pytest.py
diff --git a/docs/sphinx/reference/api/eland.DataFrame.iterrows.rst b/docs/sphinx/reference/api/eland.DataFrame.iterrows.rst
new file mode 100644
index 00000000..2e3812ce
--- /dev/null
+++ b/docs/sphinx/reference/api/eland.DataFrame.iterrows.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.iterrows
+========================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.iterrows
diff --git a/docs/sphinx/reference/api/eland.DataFrame.itertuples.rst b/docs/sphinx/reference/api/eland.DataFrame.itertuples.rst
new file mode 100644
index 00000000..3c3959df
--- /dev/null
+++ b/docs/sphinx/reference/api/eland.DataFrame.itertuples.rst
@@ -0,0 +1,6 @@
+eland.DataFrame.itertuples
+==========================
+
+.. currentmodule:: eland
+
+.. automethod:: DataFrame.itertuples
diff --git a/docs/sphinx/reference/dataframe.rst b/docs/sphinx/reference/dataframe.rst
index 3a5c24be..2faf9dd5 100644
--- a/docs/sphinx/reference/dataframe.rst
+++ b/docs/sphinx/reference/dataframe.rst
@@ -38,6 +38,8 @@ Indexing, Iteration
    DataFrame.get
    DataFrame.query
    DataFrame.sample
+   DataFrame.iterrows
+   DataFrame.itertuples
 
 Function Application, GroupBy & Window
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/eland/dataframe.py b/eland/dataframe.py
index 80352a6f..ce367102 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -19,7 +19,7 @@
 import sys
 import warnings
 from io import StringIO
-from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, Any, Iterable, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd  # type: ignore
@@ -1446,6 +1446,104 @@ def keys(self) -> pd.Index:
         """
         return self.columns
 
+    def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
+        """
+        Iterate over eland.DataFrame rows as (index, pandas.Series) pairs.
+
+        Yields:
+            index: index
+                The index of the row.
+            data: pandas Series
+                The data of the row as a pandas Series.
+
+        See Also
+        --------
+        eland.DataFrame.itertuples: Iterate over eland.DataFrame rows as namedtuples.
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost:9200', 'flights')
+        >>> df.head()
+            AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
+        0      841.265642      False  ...         0 2018-01-01 00:00:00
+        1      882.982662      False  ...         0 2018-01-01 18:27:00
+        2      190.636904      False  ...         0 2018-01-01 17:11:14
+        3      181.694216       True  ...         0 2018-01-01 10:33:28
+        4      730.041778      False  ...         0 2018-01-01 05:13:00
+        <BLANKLINE>
+        [5 rows x 27 columns]
+
+        >>> for index, row in df.iterrows()
+        ...     print(row)
+
+        AvgTicketPrice  841.265642
+        Cancelled       False
+        dayOfWeek       0
+        timestamp       2018-01-01 00:00:00
+        Name: 0, dtype: object
+
+        AvgTicketPrice  882.982662
+        Cancelled       False
+        dayOfWeek       0
+        timestamp       2018-01-01 18:27:00
+        Name: 1, dtype: object
+        """
+        return self._query_compiler.iterrows()
+
+    def itertuples(
+        self, index: bool = True, name: Union[str, None] = "Eland"
+    ) -> Iterable[Tuple[Any, ...]]:
+        """
+        Iterate over eland.DataFrame rows as namedtuples.
+
+        Args:
+            index: bool, default True
+                If True, return the index as the first element of the tuple.
+            name: str or None, default "Eland"
+                The name of the returned namedtuples or None to return regular tuples.
+
+        Returns:
+            iterator
+                An object to iterate over namedtuples for each row in the
+                DataFrame with the first field possibly being the index and
+                following fields being the column values.
+
+        See Also
+        --------
+        eland.DataFrame.iterrows: Iterate over eland.DataFrame rows as (index, pandas.Series) pairs.
+
+        Examples
+        --------
+        >>> df = ed.DataFrame('localhost:9200', 'flights')
+        >>> df.head()
+            AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
+        0      841.265642      False  ...         0 2018-01-01 00:00:00
+        1      882.982662      False  ...         0 2018-01-01 18:27:00
+        2      190.636904      False  ...         0 2018-01-01 17:11:14
+        3      181.694216       True  ...         0 2018-01-01 10:33:28
+        4      730.041778      False  ...         0 2018-01-01 05:13:00
+        <BLANKLINE>
+        [5 rows x 27 columns]
+
+        >>> for row in df.itertuples():
+        ...     print(row)
+        Eland(Index='0', AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
+        Eland(Index='1', AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+
+        By setting the `index` parameter to False we can remove the index as the first element of the tuple:
+        >>> for row in df.itertuples(index=False):
+        ...     print(row)
+        Eland(AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
+        Eland(AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+
+        With the `name` parameter set we set a custom name for the yielded namedtuples:
+        >>> for row in df.itertuples(name='Flight'):
+        ...     print(row)
+        Flight(Index='0', AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
+        Flight(Index='1', AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+        """
+        return self._query_compiler.itertuples(index=index, name=name)
+
     def aggregate(
         self,
         func: Union[str, List[str]],
diff --git a/eland/operations.py b/eland/operations.py
index 48c33448..7fc494ca 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -23,6 +23,7 @@
     Any,
     Dict,
     Generator,
+    Iterable,
     List,
     Optional,
     Sequence,
@@ -1195,6 +1196,64 @@ def describe(self, query_compiler: "QueryCompiler") -> pd.DataFrame:
             ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
         )
 
+    def iterrows(
+        self, query_compiler: "QueryCompiler"
+    ) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
+        query_params, post_processing = self._resolve_tasks(query_compiler)
+        result_size, sort_params = Operations._query_params_to_size_and_sort(
+            query_params
+        )
+
+        script_fields = query_params.script_fields
+        query = Query(query_params.query)
+
+        body = query.to_search_body()
+        if script_fields is not None:
+            body["script_fields"] = script_fields
+
+        # Only return requested field_names and add them to body
+        _source = query_compiler.get_field_names(include_scripted_fields=False)
+        body["_source"] = _source if _source else False
+
+        if sort_params:
+            body["sort"] = [sort_params]
+
+        for hits in _search_yield_hits(
+            query_compiler=query_compiler, body=body, max_number_of_hits=result_size
+        ):
+            df = query_compiler._es_results_to_pandas(hits)
+            df = self._apply_df_post_processing(df, post_processing)
+            yield from df.iterrows()
+
+    def itertuples(
+        self, query_compiler: "QueryCompiler", index: bool, name: Union[str, None]
+    ) -> Iterable[Tuple[Any, ...]]:
+        query_params, post_processing = self._resolve_tasks(query_compiler)
+        result_size, sort_params = Operations._query_params_to_size_and_sort(
+            query_params
+        )
+
+        script_fields = query_params.script_fields
+        query = Query(query_params.query)
+
+        body = query.to_search_body()
+        if script_fields is not None:
+            body["script_fields"] = script_fields
+
+        # Only return requested field_names and add them to body
+        _source = query_compiler.get_field_names(include_scripted_fields=False)
+        body["_source"] = _source if _source else False
+
+        if sort_params:
+            body["sort"] = [sort_params]
+
+        for hits in _search_yield_hits(
+            query_compiler=query_compiler, body=body, max_number_of_hits=result_size
+        ):
+            df = query_compiler._es_results_to_pandas(hits)
+            df = self._apply_df_post_processing(df, post_processing)
+            yield from df.itertuples(index=index, name=name)
+
     def to_pandas(
         self, query_compiler: "QueryCompiler", show_progress: bool = False
     ) -> pd.DataFrame:
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index 207d1c15..e06db4e6 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -21,6 +21,7 @@
     TYPE_CHECKING,
     Any,
     Dict,
+    Iterable,
     List,
     Optional,
     Sequence,
@@ -527,6 +528,38 @@ def to_csv(self, **kwargs) -> Optional[str]:
         """
         return self._operations.to_csv(self, **kwargs)
 
+    def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
+        """
+        Iterate over ed.DataFrame rows as (index, pd.Series) pairs.
+
+        Yields:
+            index: index
+                The index of the row.
+            data: pandas Series
+                The data of the row as a pandas Series.
+        """
+        return self._operations.iterrows(self)
+
+    def itertuples(
+        self, index: bool, name: Union[str, None]
+    ) -> Iterable[Tuple[Any, ...]]:
+        """
+        Iterate over eland.DataFrame rows as namedtuples.
+
+        Args:
+            index : bool, default True
+                If True, return the index as the first element of the tuple.
+            name : str or None, default "Eland"
+                The name of the returned namedtuples or None to return regular tuples.
+
+        Returns:
+            iterator
+                An object to iterate over namedtuples for each row in the
+                DataFrame with the first field possibly being the index and
+                following fields being the column values.
+        """
+        return self._operations.itertuples(self, index, name)
+
     # __getitem__ methods
     def getitem_column_array(self, key, numeric=False):
         """Get column data for target labels.
diff --git a/tests/dataframe/test_iterrows_itertuples_pytest.py b/tests/dataframe/test_iterrows_itertuples_pytest.py
new file mode 100644
index 00000000..5a7ea689
--- /dev/null
+++ b/tests/dataframe/test_iterrows_itertuples_pytest.py
@@ -0,0 +1,45 @@
+#  Licensed to Elasticsearch B.V. under one or more contributor
+#  license agreements. See the NOTICE file distributed with
+#  this work for additional information regarding copyright
+#  ownership. Elasticsearch B.V. licenses this file to you under
+#  the Apache License, Version 2.0 (the "License"); you may
+#  not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+# 	http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an
+#  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#  KIND, either express or implied.  See the License for the
+#  specific language governing permissions and limitations
+#  under the License.
+
+# File called _pytest for PyCharm compatability
+
+from pandas.testing import assert_index_equal, assert_series_equal
+
+from tests.common import TestData
+
+
+class TestDataFrameIterrowsItertuples(TestData):
+    def test_iterrows(self):
+        ed_flights_iterrows = self.ed_flights().iterrows()
+        pd_flights_iterrows = self.pd_flights().iterrows()
+        assert len(ed_flights_iterrows) == len(pd_flights_iterrows)
+
+        for i in len(ed_flights_iterrows):
+            ed_index, ed_row = next(ed_flights_iterrows)
+            pd_index, pd_row = next(pd_flights_iterrows)
+            assert_index_equal(ed_index, pd_index)
+            assert_series_equal(ed_row, pd_row)
+
+    def test_itertuples(self):
+        ed_flights_itertuples = self.ed_flights().itertuples(name=None)
+        pd_flights_itertuples = self.pd_flights().itertuples(name=None)
+        assert len(ed_flights_itertuples) == len(pd_flights_itertuples)
+
+        for i in len(ed_flights_itertuples):
+            ed_row = next(ed_flights_itertuples)
+            pd_row = next(pd_flights_itertuples)
+            assert ed_row == pd_row

From 4d3bde29abad659fa62da3820070bbc9092a7401 Mon Sep 17 00:00:00 2001
From: Jabin Kong <1059978534@qq.com>
Date: Thu, 19 Aug 2021 14:29:24 +0800
Subject: [PATCH 02/11] Yielding from the `iterrows()` and `itertuples()` in
 dataframe.py

---
 eland/dataframe.py                            | 15 ++++---
 eland/operations.py                           | 30 +++++++++++++
 eland/query_compiler.py                       | 35 ++-------------
 .../test_iterrows_itertuples_pytest.py        | 43 ++++++++++++++-----
 4 files changed, 76 insertions(+), 47 deletions(-)

diff --git a/eland/dataframe.py b/eland/dataframe.py
index ce367102..5b4875c8 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1450,7 +1450,8 @@ def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
         """
         Iterate over eland.DataFrame rows as (index, pandas.Series) pairs.
 
-        Yields:
+        Yields
+        ------
             index: index
                 The index of the row.
             data: pandas Series
@@ -1488,7 +1489,8 @@ def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
         timestamp       2018-01-01 18:27:00
         Name: 1, dtype: object
         """
-        return self._query_compiler.iterrows()
+        for df in self._query_compiler.yield_pandas_dataframe():
+            yield from df.iterrows()
 
     def itertuples(
         self, index: bool = True, name: Union[str, None] = "Eland"
@@ -1496,13 +1498,15 @@ def itertuples(
         """
         Iterate over eland.DataFrame rows as namedtuples.
 
-        Args:
+        Args
+        ----
             index: bool, default True
                 If True, return the index as the first element of the tuple.
             name: str or None, default "Eland"
                 The name of the returned namedtuples or None to return regular tuples.
 
-        Returns:
+        Returns
+        -------
             iterator
                 An object to iterate over namedtuples for each row in the
                 DataFrame with the first field possibly being the index and
@@ -1542,7 +1546,8 @@ def itertuples(
         Flight(Index='0', AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
         Flight(Index='1', AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
         """
-        return self._query_compiler.itertuples(index=index, name=name)
+        for df in self._query_compiler.yield_pandas_dataframe():
+            yield from df.itertuples(index=index, name=name)
 
     def aggregate(
         self,
diff --git a/eland/operations.py b/eland/operations.py
index 7fc494ca..784d31a4 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -1270,6 +1270,36 @@ def to_csv(
         df = self._es_results(query_compiler, show_progress)
         return df.to_csv(**kwargs)  # type: ignore[no-any-return]
 
+    def search_yield_pandas_dataframe(
+        self, query_compiler: "QueryCompiler"
+    ) -> Generator["pd.DataFrame", None, None]:
+        query_params, post_processing = self._resolve_tasks(query_compiler)
+
+        result_size, sort_params = Operations._query_params_to_size_and_sort(
+            query_params
+        )
+
+        script_fields = query_params.script_fields
+        query = Query(query_params.query)
+
+        body = query.to_search_body()
+        if script_fields is not None:
+            body["script_fields"] = script_fields
+
+        # Only return requested field_names and add them to body
+        _source = query_compiler.get_field_names(include_scripted_fields=False)
+        body["_source"] = _source if _source else False
+
+        if sort_params:
+            body["sort"] = [sort_params]
+
+        for hits in _search_yield_hits(
+            query_compiler=query_compiler, body=body, max_number_of_hits=result_size
+        ):
+            df = query_compiler._es_results_to_pandas(hits)
+            df = self._apply_df_post_processing(df, post_processing)
+            yield df
+
     def _es_results(
         self, query_compiler: "QueryCompiler", show_progress: bool = False
     ) -> pd.DataFrame:
diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index e06db4e6..79f2abae 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -21,7 +21,7 @@
     TYPE_CHECKING,
     Any,
     Dict,
-    Iterable,
+    Generator,
     List,
     Optional,
     Sequence,
@@ -528,37 +528,8 @@ def to_csv(self, **kwargs) -> Optional[str]:
         """
         return self._operations.to_csv(self, **kwargs)
 
-    def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
-        """
-        Iterate over ed.DataFrame rows as (index, pd.Series) pairs.
-
-        Yields:
-            index: index
-                The index of the row.
-            data: pandas Series
-                The data of the row as a pandas Series.
-        """
-        return self._operations.iterrows(self)
-
-    def itertuples(
-        self, index: bool, name: Union[str, None]
-    ) -> Iterable[Tuple[Any, ...]]:
-        """
-        Iterate over eland.DataFrame rows as namedtuples.
-
-        Args:
-            index : bool, default True
-                If True, return the index as the first element of the tuple.
-            name : str or None, default "Eland"
-                The name of the returned namedtuples or None to return regular tuples.
-
-        Returns:
-            iterator
-                An object to iterate over namedtuples for each row in the
-                DataFrame with the first field possibly being the index and
-                following fields being the column values.
-        """
-        return self._operations.itertuples(self, index, name)
+    def yield_pandas_dataframe(self) -> Generator["pd.DataFrame", None, None]:
+        return self._operations.search_yield_pandas_dataframe(self)
 
     # __getitem__ methods
     def getitem_column_array(self, key, numeric=False):
diff --git a/tests/dataframe/test_iterrows_itertuples_pytest.py b/tests/dataframe/test_iterrows_itertuples_pytest.py
index 5a7ea689..2400417f 100644
--- a/tests/dataframe/test_iterrows_itertuples_pytest.py
+++ b/tests/dataframe/test_iterrows_itertuples_pytest.py
@@ -24,22 +24,45 @@
 
 class TestDataFrameIterrowsItertuples(TestData):
     def test_iterrows(self):
-        ed_flights_iterrows = self.ed_flights().iterrows()
-        pd_flights_iterrows = self.pd_flights().iterrows()
-        assert len(ed_flights_iterrows) == len(pd_flights_iterrows)
+        ed_flights = self.ed_flights()
+        pd_flights = self.pd_flights()
+
+        ed_flights_iterrows = ed_flights.iterrows()
+        pd_flights_iterrows = pd_flights.iterrows()
+
+        assert len(list(ed_flights_iterrows)) == len(list(pd_flights_iterrows))
+
+        for ed_index, ed_row in ed_flights_iterrows:
 
-        for i in len(ed_flights_iterrows):
-            ed_index, ed_row = next(ed_flights_iterrows)
             pd_index, pd_row = next(pd_flights_iterrows)
+
             assert_index_equal(ed_index, pd_index)
             assert_series_equal(ed_row, pd_row)
 
+        for pd_index, pd_row in pd_flights_iterrows:
+
+            ed_index, ed_row = next(ed_flights_iterrows)
+
+            assert_index_equal(pd_index, ed_index)
+            assert_series_equal(pd_row, ed_row)
+
     def test_itertuples(self):
-        ed_flights_itertuples = self.ed_flights().itertuples(name=None)
-        pd_flights_itertuples = self.pd_flights().itertuples(name=None)
-        assert len(ed_flights_itertuples) == len(pd_flights_itertuples)
+        ed_flights = self.ed_flights()
+        pd_flights = self.pd_flights()
+
+        ed_flights_itertuples = ed_flights.itertuples(name=None)
+        pd_flights_itertuples = pd_flights.itertuples(name=None)
+
+        assert len(list(ed_flights_itertuples)) == len(list(pd_flights_itertuples))
+
+        for ed_row in ed_flights_itertuples:
 
-        for i in len(ed_flights_itertuples):
-            ed_row = next(ed_flights_itertuples)
             pd_row = next(pd_flights_itertuples)
+
             assert ed_row == pd_row
+
+        for pd_row in pd_flights_itertuples:
+
+            ed_row = next(ed_flights_itertuples)
+
+            assert pd_row == ed_row

From 9a0a9da033e4bdb66ca58d2e0c37b705cd846b02 Mon Sep 17 00:00:00 2001
From: Jabin Kong <1059978534@qq.com>
Date: Thu, 19 Aug 2021 14:37:27 +0800
Subject: [PATCH 03/11] Remove Operations.iterrows and Operations.itertuples

---
 eland/operations.py | 58 ---------------------------------------------
 1 file changed, 58 deletions(-)

diff --git a/eland/operations.py b/eland/operations.py
index 784d31a4..8faddbe2 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -1196,64 +1196,6 @@ def describe(self, query_compiler: "QueryCompiler") -> pd.DataFrame:
             ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]
         )
 
-    def iterrows(
-        self, query_compiler: "QueryCompiler"
-    ) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
-        query_params, post_processing = self._resolve_tasks(query_compiler)
-        result_size, sort_params = Operations._query_params_to_size_and_sort(
-            query_params
-        )
-
-        script_fields = query_params.script_fields
-        query = Query(query_params.query)
-
-        body = query.to_search_body()
-        if script_fields is not None:
-            body["script_fields"] = script_fields
-
-        # Only return requested field_names and add them to body
-        _source = query_compiler.get_field_names(include_scripted_fields=False)
-        body["_source"] = _source if _source else False
-
-        if sort_params:
-            body["sort"] = [sort_params]
-
-        for hits in _search_yield_hits(
-            query_compiler=query_compiler, body=body, max_number_of_hits=result_size
-        ):
-            df = query_compiler._es_results_to_pandas(hits)
-            df = self._apply_df_post_processing(df, post_processing)
-            yield from df.iterrows()
-
-    def itertuples(
-        self, query_compiler: "QueryCompiler", index: bool, name: Union[str, None]
-    ) -> Iterable[Tuple[Any, ...]]:
-        query_params, post_processing = self._resolve_tasks(query_compiler)
-        result_size, sort_params = Operations._query_params_to_size_and_sort(
-            query_params
-        )
-
-        script_fields = query_params.script_fields
-        query = Query(query_params.query)
-
-        body = query.to_search_body()
-        if script_fields is not None:
-            body["script_fields"] = script_fields
-
-        # Only return requested field_names and add them to body
-        _source = query_compiler.get_field_names(include_scripted_fields=False)
-        body["_source"] = _source if _source else False
-
-        if sort_params:
-            body["sort"] = [sort_params]
-
-        for hits in _search_yield_hits(
-            query_compiler=query_compiler, body=body, max_number_of_hits=result_size
-        ):
-            df = query_compiler._es_results_to_pandas(hits)
-            df = self._apply_df_post_processing(df, post_processing)
-            yield from df.itertuples(index=index, name=name)
-
     def to_pandas(
         self, query_compiler: "QueryCompiler", show_progress: bool = False
     ) -> pd.DataFrame:

From 37c3e31dc0e9fa6fb68cb561c6b03337d011a398 Mon Sep 17 00:00:00 2001
From: Jabin Kong <1059978534@qq.com>
Date: Thu, 19 Aug 2021 14:46:31 +0800
Subject: [PATCH 04/11] Remove unuser import

---
 eland/operations.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/eland/operations.py b/eland/operations.py
index 8faddbe2..29d66bef 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -23,7 +23,6 @@
     Any,
     Dict,
     Generator,
-    Iterable,
     List,
     Optional,
     Sequence,

From 24618113a7645cd73e90c2ebda7d69723a502b31 Mon Sep 17 00:00:00 2001
From: Jabin Kong <1059978534@qq.com>
Date: Fri, 20 Aug 2021 11:48:02 +0800
Subject: [PATCH 05/11] Format docstrings

---
 eland/dataframe.py | 82 ++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 35 deletions(-)

diff --git a/eland/dataframe.py b/eland/dataframe.py
index 5b4875c8..32093991 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1463,31 +1463,34 @@ def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
 
         Examples
         --------
-        >>> df = ed.DataFrame('localhost:9200', 'flights')
-        >>> df.head()
-            AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
-        0      841.265642      False  ...         0 2018-01-01 00:00:00
-        1      882.982662      False  ...         0 2018-01-01 18:27:00
-        2      190.636904      False  ...         0 2018-01-01 17:11:14
-        3      181.694216       True  ...         0 2018-01-01 10:33:28
-        4      730.041778      False  ...         0 2018-01-01 05:13:00
+        >>> df = ed.DataFrame('localhost:9200', 'flights', columns=['AvgTicketPrice', 'Cancelled']).head()
+        >>> df
+           AvgTicketPrice  Cancelled
+        0      841.265642      False
+        1      882.982662      False
+        2      190.636904      False
+        3      181.694216       True
+        4      730.041778      False
         <BLANKLINE>
-        [5 rows x 27 columns]
+        [5 rows x 2 columns]
 
-        >>> for index, row in df.iterrows()
+        >>> for index, row in df.iterrows():
         ...     print(row)
-
-        AvgTicketPrice  841.265642
-        Cancelled       False
-        dayOfWeek       0
-        timestamp       2018-01-01 00:00:00
+        AvgTicketPrice    841.265642
+        Cancelled              False
         Name: 0, dtype: object
-
-        AvgTicketPrice  882.982662
-        Cancelled       False
-        dayOfWeek       0
-        timestamp       2018-01-01 18:27:00
+        AvgTicketPrice    882.982662
+        Cancelled              False
         Name: 1, dtype: object
+        AvgTicketPrice    190.636904
+        Cancelled              False
+        Name: 2, dtype: object
+        AvgTicketPrice    181.694216
+        Cancelled               True
+        Name: 3, dtype: object
+        AvgTicketPrice    730.041778
+        Cancelled              False
+        Name: 4, dtype: object
         """
         for df in self._query_compiler.yield_pandas_dataframe():
             yield from df.iterrows()
@@ -1518,33 +1521,42 @@ def itertuples(
 
         Examples
         --------
-        >>> df = ed.DataFrame('localhost:9200', 'flights')
-        >>> df.head()
-            AvgTicketPrice  Cancelled  ... dayOfWeek           timestamp
-        0      841.265642      False  ...         0 2018-01-01 00:00:00
-        1      882.982662      False  ...         0 2018-01-01 18:27:00
-        2      190.636904      False  ...         0 2018-01-01 17:11:14
-        3      181.694216       True  ...         0 2018-01-01 10:33:28
-        4      730.041778      False  ...         0 2018-01-01 05:13:00
+        >>> df = ed.DataFrame('localhost:9200', 'flights', columns=['AvgTicketPrice', 'Cancelled']).head()
+        >>> df
+           AvgTicketPrice  Cancelled
+        0      841.265642      False
+        1      882.982662      False
+        2      190.636904      False
+        3      181.694216       True
+        4      730.041778      False
         <BLANKLINE>
-        [5 rows x 27 columns]
+        [5 rows x 2 columns]
 
         >>> for row in df.itertuples():
         ...     print(row)
-        Eland(Index='0', AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
-        Eland(Index='1', AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+        Eland(Index='0', AvgTicketPrice=841.2656419677076, Cancelled=False)
+        Eland(Index='1', AvgTicketPrice=882.9826615595518, Cancelled=False)
+        Eland(Index='2', AvgTicketPrice=190.6369038508356, Cancelled=False)
+        Eland(Index='3', AvgTicketPrice=181.69421554118, Cancelled=True)
+        Eland(Index='4', AvgTicketPrice=730.041778346198, Cancelled=False)
 
         By setting the `index` parameter to False we can remove the index as the first element of the tuple:
         >>> for row in df.itertuples(index=False):
         ...     print(row)
-        Eland(AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
-        Eland(AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+        Eland(AvgTicketPrice=841.2656419677076, Cancelled=False)
+        Eland(AvgTicketPrice=882.9826615595518, Cancelled=False)
+        Eland(AvgTicketPrice=190.6369038508356, Cancelled=False)
+        Eland(AvgTicketPrice=181.69421554118, Cancelled=True)
+        Eland(AvgTicketPrice=730.041778346198, Cancelled=False)
 
         With the `name` parameter set we set a custom name for the yielded namedtuples:
         >>> for row in df.itertuples(name='Flight'):
         ...     print(row)
-        Flight(Index='0', AvgTicketPrice=841.265642, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 00:00:00')
-        Flight(Index='1', AvgTicketPrice=882.982662, Cancelled=False, ..., dayOfWeek=0, timestamp='2018-01-01 18:27:00')
+        Flight(Index='0', AvgTicketPrice=841.2656419677076, Cancelled=False)
+        Flight(Index='1', AvgTicketPrice=882.9826615595518, Cancelled=False)
+        Flight(Index='2', AvgTicketPrice=190.6369038508356, Cancelled=False)
+        Flight(Index='3', AvgTicketPrice=181.69421554118, Cancelled=True)
+        Flight(Index='4', AvgTicketPrice=730.041778346198, Cancelled=False)
         """
         for df in self._query_compiler.yield_pandas_dataframe():
             yield from df.itertuples(index=index, name=name)

From 408f01a9cc4c9c98f446327573cb3c6ac2b2911f Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 07:38:32 -0500
Subject: [PATCH 06/11] Rename to search_yield_pandas_dataframes()

---
 eland/operations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/eland/operations.py b/eland/operations.py
index 29d66bef..0fad8ef2 100644
--- a/eland/operations.py
+++ b/eland/operations.py
@@ -1211,7 +1211,7 @@ def to_csv(
         df = self._es_results(query_compiler, show_progress)
         return df.to_csv(**kwargs)  # type: ignore[no-any-return]
 
-    def search_yield_pandas_dataframe(
+    def search_yield_pandas_dataframes(
         self, query_compiler: "QueryCompiler"
     ) -> Generator["pd.DataFrame", None, None]:
         query_params, post_processing = self._resolve_tasks(query_compiler)

From 8169879c8fa7163f1095d1982aae3f70a19d3bb2 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 07:39:19 -0500
Subject: [PATCH 07/11] Rename to search_yield_pandas_dataframes()

---
 eland/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/eland/dataframe.py b/eland/dataframe.py
index 32093991..d579379c 100644
--- a/eland/dataframe.py
+++ b/eland/dataframe.py
@@ -1492,7 +1492,7 @@ def iterrows(self) -> Iterable[Tuple[Union[str, Tuple[str, ...]], pd.Series]]:
         Cancelled              False
         Name: 4, dtype: object
         """
-        for df in self._query_compiler.yield_pandas_dataframe():
+        for df in self._query_compiler.search_yield_pandas_dataframes():
             yield from df.iterrows()
 
     def itertuples(
@@ -1558,7 +1558,7 @@ def itertuples(
         Flight(Index='3', AvgTicketPrice=181.69421554118, Cancelled=True)
         Flight(Index='4', AvgTicketPrice=730.041778346198, Cancelled=False)
         """
-        for df in self._query_compiler.yield_pandas_dataframe():
+        for df in self._query_compiler.search_yield_pandas_dataframes():
             yield from df.itertuples(index=index, name=name)
 
     def aggregate(

From 3bee54c72da04ed0a1ee1b0afe0ab5942c081efa Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 07:39:57 -0500
Subject: [PATCH 08/11] Rename to search_yield_pandas_dataframes()

---
 eland/query_compiler.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/eland/query_compiler.py b/eland/query_compiler.py
index 79f2abae..4a818049 100644
--- a/eland/query_compiler.py
+++ b/eland/query_compiler.py
@@ -528,8 +528,8 @@ def to_csv(self, **kwargs) -> Optional[str]:
         """
         return self._operations.to_csv(self, **kwargs)
 
-    def yield_pandas_dataframe(self) -> Generator["pd.DataFrame", None, None]:
-        return self._operations.search_yield_pandas_dataframe(self)
+    def search_yield_pandas_dataframes(self) -> Generator["pd.DataFrame", None, None]:
+        return self._operations.search_yield_pandas_dataframes(self)
 
     # __getitem__ methods
     def getitem_column_array(self, key, numeric=False):

From be0ce73671f0438f60d2084992a82cd036594218 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 07:46:41 -0500
Subject: [PATCH 09/11] Update test_iterrows_itertuples_pytest.py

---
 .../test_iterrows_itertuples_pytest.py        | 32 +++++--------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/tests/dataframe/test_iterrows_itertuples_pytest.py b/tests/dataframe/test_iterrows_itertuples_pytest.py
index 2400417f..90523f9a 100644
--- a/tests/dataframe/test_iterrows_itertuples_pytest.py
+++ b/tests/dataframe/test_iterrows_itertuples_pytest.py
@@ -30,39 +30,23 @@ def test_iterrows(self):
         ed_flights_iterrows = ed_flights.iterrows()
         pd_flights_iterrows = pd_flights.iterrows()
 
-        assert len(list(ed_flights_iterrows)) == len(list(pd_flights_iterrows))
-
         for ed_index, ed_row in ed_flights_iterrows:
-
             pd_index, pd_row = next(pd_flights_iterrows)
 
             assert_index_equal(ed_index, pd_index)
             assert_series_equal(ed_row, pd_row)
 
-        for pd_index, pd_row in pd_flights_iterrows:
-
-            ed_index, ed_row = next(ed_flights_iterrows)
-
-            assert_index_equal(pd_index, ed_index)
-            assert_series_equal(pd_row, ed_row)
+        # Assert that both are the same length and are exhausted.
+        with pytest.raises(StopIteration):
+            next(ed_flights_iterrows)
+        with pytest.raises(StopIteration):
+            next(pd_flights_iterrows)
 
     def test_itertuples(self):
         ed_flights = self.ed_flights()
         pd_flights = self.pd_flights()
 
-        ed_flights_itertuples = ed_flights.itertuples(name=None)
-        pd_flights_itertuples = pd_flights.itertuples(name=None)
-
-        assert len(list(ed_flights_itertuples)) == len(list(pd_flights_itertuples))
-
-        for ed_row in ed_flights_itertuples:
-
-            pd_row = next(pd_flights_itertuples)
-
-            assert ed_row == pd_row
-
-        for pd_row in pd_flights_itertuples:
-
-            ed_row = next(ed_flights_itertuples)
+        ed_flights_itertuples = list(ed_flights.itertuples(name=None))
+        pd_flights_itertuples = list(pd_flights.itertuples(name=None))
 
-            assert pd_row == ed_row
+        assert ed_flights_itertuples == pd_flights_itertuples

From 3445ca8733481df0d87c22ecc0b608ec1c13d151 Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 07:49:22 -0500
Subject: [PATCH 10/11] Add pytest import

---
 tests/dataframe/test_iterrows_itertuples_pytest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/dataframe/test_iterrows_itertuples_pytest.py b/tests/dataframe/test_iterrows_itertuples_pytest.py
index 90523f9a..5021dd0e 100644
--- a/tests/dataframe/test_iterrows_itertuples_pytest.py
+++ b/tests/dataframe/test_iterrows_itertuples_pytest.py
@@ -17,6 +17,7 @@
 
 # File called _pytest for PyCharm compatability
 
+import pytest
 from pandas.testing import assert_index_equal, assert_series_equal
 
 from tests.common import TestData

From 152e0b65415538293d6ef62eeb24d47eef07a27c Mon Sep 17 00:00:00 2001
From: Seth Michael Larson <seth.larson@elastic.co>
Date: Fri, 20 Aug 2021 08:20:25 -0500
Subject: [PATCH 11/11] Fix test cases to use proper assertions

---
 .../test_iterrows_itertuples_pytest.py          | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/dataframe/test_iterrows_itertuples_pytest.py b/tests/dataframe/test_iterrows_itertuples_pytest.py
index 5021dd0e..9dc495e9 100644
--- a/tests/dataframe/test_iterrows_itertuples_pytest.py
+++ b/tests/dataframe/test_iterrows_itertuples_pytest.py
@@ -18,7 +18,7 @@
 # File called _pytest for PyCharm compatability
 
 import pytest
-from pandas.testing import assert_index_equal, assert_series_equal
+from pandas.testing import assert_series_equal
 
 from tests.common import TestData
 
@@ -34,7 +34,7 @@ def test_iterrows(self):
         for ed_index, ed_row in ed_flights_iterrows:
             pd_index, pd_row = next(pd_flights_iterrows)
 
-            assert_index_equal(ed_index, pd_index)
+            assert ed_index == pd_index
             assert_series_equal(ed_row, pd_row)
 
         # Assert that both are the same length and are exhausted.
@@ -50,4 +50,15 @@ def test_itertuples(self):
         ed_flights_itertuples = list(ed_flights.itertuples(name=None))
         pd_flights_itertuples = list(pd_flights.itertuples(name=None))
 
-        assert ed_flights_itertuples == pd_flights_itertuples
+        def assert_tuples_almost_equal(left, right):
+            # Shim which uses pytest.approx() for floating point values inside tuples.
+            assert len(left) == len(right)
+            assert all(
+                (lt == rt)  # Not floats? Use ==
+                if not isinstance(lt, float) and not isinstance(rt, float)
+                else (lt == pytest.approx(rt))  # If both are floats use pytest.approx()
+                for lt, rt in zip(left, right)
+            )
+
+        for ed_tuple, pd_tuple in zip(ed_flights_itertuples, pd_flights_itertuples):
+            assert_tuples_almost_equal(ed_tuple, pd_tuple)