Merge remote-tracking branch 'upstream/master' into bug/categorical-i…

…ndexing-1row-df * upstream/master: (185 commits) ENH: add BooleanArray extension array (pandas-dev#29555) DOC: Add link to dev calendar and meeting notes (pandas-dev#29737) ENH: Add built-in function for Styler to format the text displayed for missing values (pandas-dev#29118) DEPR: remove statsmodels/seaborn compat shims (pandas-dev#29822) DEPR: remove Index.summary (pandas-dev#29807) DEPR: passing an int to read_excel use_cols (pandas-dev#29795) STY: fstrings in io.pytables (pandas-dev#29758) BUG: Fix melt with mixed int/str columns (pandas-dev#29792) TST: add test for ffill/bfill for non unique multilevel (pandas-dev#29763) Changed description of parse_dates in read_excel(). (pandas-dev#29796) BUG: pivot_table not returning correct type when margin=True and aggfunc='mean' (pandas-dev#28248) REF: Create _lib/window directory (pandas-dev#29817) Fixed small mistake (pandas-dev#29815) minor cleanups (pandas-dev#29798) DEPR: enforce deprecations in core.internals (pandas-dev#29723) add test for unused level raises KeyError (pandas-dev#29760) Add documentation linking to sqlalchemy (pandas-dev#29373) io/parsers: ensure decimal is str on PythonParser (pandas-dev#29743) Reenabled no-unused-function (pandas-dev#29767) CLN:F-string in pandas/_libs/tslibs/*.pyx (pandas-dev#29775) ... # Conflicts: # pandas/tests/frame/indexing/test_indexing.py
keechongtan · Nov 25, 2019 · ca60804 · ca60804
2 parents 3e847e9 + 7d7f885
commit ca60804
Show file tree

Hide file tree

Showing 522 changed files with 9,473 additions and 7,183 deletions.
diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml
@@ -1,2 +1,3 @@
 custom: https://pandas.pydata.org/donate.html
+github: [numfocus]
 tidelift: pypi/pandas
diff --git a/.github/workflows/assign.yml b/.github/workflows/assign.yml
@@ -0,0 +1,15 @@
+name: Assign
+on:
+  issue_comment:
+    types: created
+
+jobs:
+  one:
+    runs-on: ubuntu-latest
+    steps:
+      - name:
+        run: |
+            if [[ "${{ github.event.comment.body }}" == "take" ]]; then
+                echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+                curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://github.com/gitapi/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
+            fi
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,103 @@
+name: CI
+
+on:
+  push:
+    branches: master
+  pull_request:
+    branches: master
+
+env:
+  ENV_FILE: environment.yml
+  # TODO: remove export PATH=... in each step once this works
+  # PATH: $HOME/miniconda3/bin:$PATH
+
+jobs:
+  checks:
+    name: Checks
+    runs-on: ubuntu-latest
+    steps:
+
+    - name: Checkout
+      uses: actions/checkout@v1
+
+    - name: Looking for unwanted patterns
+      run: ci/code_checks.sh patterns
+      if: true
+
+    - name: Setup environment and build pandas
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        ci/setup_env.sh
+      if: true
+
+    - name: Linting
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh lint
+      if: true
+
+    - name: Dependencies consistency
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh dependencies
+      if: true
+
+    - name: Checks on imported code
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh code
+      if: true
+
+    - name: Running doctests
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh doctests
+      if: true
+
+    - name: Docstring validation
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh docstrings
+      if: true
+
+    - name: Typing validation
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        ci/code_checks.sh typing
+      if: true
+
+    - name: Testing docstring validation script
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        pytest --capture=no --strict scripts
+      if: true
+
+    - name: Running benchmarks
+      run: |
+        export PATH=$HOME/miniconda3/bin:$PATH
+        source activate pandas-dev
+        cd asv_bench
+        asv check -E existing
+        git remote add upstream https://github.com/pandas-dev/pandas.git
+        git fetch upstream
+        if git diff upstream/master --name-only | grep -q "^asv_bench/"; then
+            asv machine --yes
+            ASV_OUTPUT="$(asv dev)"
+            if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then
+                echo "##vso[task.logissue type=error]Benchmarks run with errors"
+                echo "$ASV_OUTPUT"
+                exit 1
+            else
+                echo "Benchmarks run without errors"
+            fi
+        else
+            echo "Benchmarks did not run, no changes detected"
+        fi
+      if: true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/python/black
-    rev: stable
+    rev: 19.10b0
     hooks:
     -   id: black
         language_version: python3.7
@@ -9,7 +9,7 @@ repos:
     hooks:
     -   id: flake8
         language: python_venv
-        additional_dependencies: [flake8-comprehensions]
+        additional_dependencies: [flake8-comprehensions>=3.1.0]
 -   repo: https://github.com/pre-commit/mirrors-isort
     rev: v4.3.20
     hooks:

diff --git a/.travis.yml b/.travis.yml
@@ -30,11 +30,9 @@ matrix:
       - python: 3.5
 
     include:
-    - dist: bionic
-      # 18.04
-      python: 3.8.0
+    - dist: trusty
       env:
-        - JOB="3.8-dev" PATTERN="(not slow and not network)"
+        - JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
 
     - dist: trusty
       env:
@@ -85,19 +83,10 @@ install:
   - ci/submit_cython_cache.sh
   - echo "install done"
 
-
-before_script:
-  # display server (for clipboard functionality) needs to be started here,
-  # does not work if done in install:setup_env.sh (GH-26103)
-  - export DISPLAY=":99.0"
-  - echo "sh -e /etc/init.d/xvfb start"
-  - if [ "$JOB" != "3.8-dev" ]; then sh -e /etc/init.d/xvfb start; fi
-  - sleep 3
-
 script:
   - echo "script start"
   - echo "$JOB"
-  - if [ "$JOB" != "3.8-dev" ]; then source activate pandas-dev; fi
+  - source activate pandas-dev
   - ci/run_tests.sh
 
 after_script:

diff --git a/Makefile b/Makefile
@@ -15,7 +15,7 @@ lint-diff:
 	git diff upstream/master --name-only -- "*.py" | xargs flake8
 
 black:
-	black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
+	black .
 
 develop: build
 	python -m pip install --no-build-isolation -e .

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
@@ -84,7 +84,7 @@ class ValueCounts:
 
     def setup(self, dropna):
         n = 5 * 10 ** 5
-        arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
+        arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
     def time_value_counts(self, dropna):
@@ -102,7 +102,7 @@ def time_rendering(self):
 class SetCategories:
     def setup(self):
         n = 5 * 10 ** 5
-        arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
+        arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
     def time_set_categories(self):
@@ -112,7 +112,7 @@ def time_set_categories(self):
 class RemoveCategories:
     def setup(self):
         n = 5 * 10 ** 5
-        arr = ["s{:04d}".format(i) for i in np.random.randint(0, n // 10, size=n)]
+        arr = [f"s{i:04d}" for i in np.random.randint(0, n // 10, size=n)]
         self.ts = pd.Series(arr).astype("category")
 
     def time_remove_categories(self):
@@ -164,9 +164,9 @@ def setup(self, dtype):
         np.random.seed(1234)
         n = 5 * 10 ** 5
         sample_size = 100
-        arr = [i for i in np.random.randint(0, n // 10, size=n)]
+        arr = list(np.random.randint(0, n // 10, size=n))
         if dtype == "object":
-            arr = ["s{:04d}".format(i) for i in arr]
+            arr = [f"s{i:04d}" for i in arr]
         self.sample = np.random.choice(arr, sample_size)
         self.series = pd.Series(arr).astype("category")
 
@@ -225,7 +225,7 @@ def setup(self, index):
         elif index == "non_monotonic":
             self.data = pd.Categorical.from_codes([0, 1, 2] * N, categories=categories)
         else:
-            raise ValueError("Invalid index param: {}".format(index))
+            raise ValueError(f"Invalid index param: {index}")
 
         self.scalar = 10000
         self.list = list(range(10000))

diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
@@ -99,7 +99,7 @@ class FromLists:
     def setup(self):
         N = 1000
         M = 100
-        self.data = [[j for j in range(M)] for i in range(N)]
+        self.data = [list(range(M)) for i in range(N)]
 
     def time_frame_from_lists(self):
         self.df = DataFrame(self.data)

diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
@@ -37,7 +37,7 @@ def wrapper(fname):
         return wrapper
 
 
-from .pandas_vb_common import BaseIO  # noqa: E402 isort:skip
+from .pandas_vb_common import BaseIO  # isort:skip
 
 
 class ParallelGroupbyMethods:
@@ -250,13 +250,11 @@ def setup(self, dtype):
                 np.random.randn(rows, cols), index=date_range("1/1/2000", periods=rows)
             ),
             "object": DataFrame(
-                "foo",
-                index=range(rows),
-                columns=["object%03d".format(i) for i in range(5)],
+                "foo", index=range(rows), columns=["object%03d" for _ in range(5)]
             ),
         }
 
-        self.fname = "__test_{}__.csv".format(dtype)
+        self.fname = f"__test_{dtype}__.csv"
         df = data[dtype]
         df.to_csv(self.fname)
 

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
@@ -146,7 +146,7 @@ class Indexing:
 
     def setup(self, dtype):
         N = 10 ** 6
-        self.idx = getattr(tm, "make{}Index".format(dtype))(N)
+        self.idx = getattr(tm, f"make{dtype}Index")(N)
         self.array_mask = (np.arange(N) % 3) == 0
         self.series_mask = Series(self.array_mask)
         self.sorted = self.idx.sort_values()

diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
@@ -132,7 +132,7 @@ class ReadCSVConcatDatetimeBadDateValue(StringIORewind):
     param_names = ["bad_date_value"]
 
     def setup(self, bad_date_value):
-        self.StringIO_input = StringIO(("%s,\n" % bad_date_value) * 50000)
+        self.StringIO_input = StringIO((f"{bad_date_value},\n") * 50000)
 
     def time_read_csv(self, bad_date_value):
         read_csv(
@@ -202,7 +202,7 @@ def setup(self, sep, thousands):
         data = np.random.randn(N, K) * np.random.randint(100, 10000, (N, K))
         df = DataFrame(data)
         if thousands is not None:
-            fmt = ":{}".format(thousands)
+            fmt = f":{thousands}"
             fmt = "{" + fmt + "}"
             df = df.applymap(lambda x: fmt.format(x))
         df.to_csv(self.fname, sep=sep)
@@ -231,7 +231,7 @@ def setup(self, sep, decimal, float_precision):
         floats = [
             "".join(random.choice(string.digits) for _ in range(28)) for _ in range(15)
         ]
-        rows = sep.join(["0{}".format(decimal) + "{}"] * 3) + "\n"
+        rows = sep.join([f"0{decimal}" + "{}"] * 3) + "\n"
         data = rows * 5
         data = data.format(*floats) * 200  # 1000 x 3 strings csv
         self.StringIO_input = StringIO(data)
@@ -309,9 +309,7 @@ class ReadCSVCachedParseDates(StringIORewind):
     param_names = ["do_cache"]
 
     def setup(self, do_cache):
-        data = (
-            "\n".join("10/{}".format(year) for year in range(2000, 2100)) + "\n"
-        ) * 10
+        data = ("\n".join(f"10/{year}" for year in range(2000, 2100)) + "\n") * 10
         self.StringIO_input = StringIO(data)
 
     def time_read_csv_cached(self, do_cache):
@@ -336,7 +334,7 @@ class ReadCSVMemoryGrowth(BaseIO):
     def setup(self):
         with open(self.fname, "w") as f:
             for i in range(self.num_rows):
-                f.write("{i}\n".format(i=i))
+                f.write(f"{i}\n")
 
     def mem_parser_chunks(self):
         # see gh-24805.

diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
@@ -14,7 +14,7 @@ def _generate_dataframe():
     C = 5
     df = DataFrame(
         np.random.randn(N, C),
-        columns=["float{}".format(i) for i in range(C)],
+        columns=[f"float{i}" for i in range(C)],
         index=date_range("20000101", periods=N, freq="H"),
     )
     df["object"] = tm.makeStringIndex(N)

diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py
@@ -115,7 +115,7 @@ def setup(self, format):
         C = 5
         self.df = DataFrame(
             np.random.randn(N, C),
-            columns=["float{}".format(i) for i in range(C)],
+            columns=[f"float{i}" for i in range(C)],
             index=date_range("20000101", periods=N, freq="H"),
         )
         self.df["object"] = tm.makeStringIndex(N)

diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
@@ -20,7 +20,7 @@ def setup(self, orient, index):
         }
         df = DataFrame(
             np.random.randn(N, 5),
-            columns=["float_{}".format(i) for i in range(5)],
+            columns=[f"float_{i}" for i in range(5)],
             index=indexes[index],
         )
         df.to_json(self.fname, orient=orient)
@@ -43,7 +43,7 @@ def setup(self, index):
         }
         df = DataFrame(
             np.random.randn(N, 5),
-            columns=["float_{}".format(i) for i in range(5)],
+            columns=[f"float_{i}" for i in range(5)],
             index=indexes[index],
         )
         df.to_json(self.fname, orient="records", lines=True)

diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py
@@ -15,7 +15,7 @@ def setup(self):
         C = 5
         self.df = DataFrame(
             np.random.randn(N, C),
-            columns=["float{}".format(i) for i in range(C)],
+            columns=[f"float{i}" for i in range(C)],
             index=date_range("20000101", periods=N, freq="H"),
         )
         self.df["object"] = tm.makeStringIndex(N)

diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py
@@ -13,7 +13,7 @@ def setup(self):
         C = 5
         self.df = DataFrame(
             np.random.randn(N, C),
-            columns=["float{}".format(i) for i in range(C)],
+            columns=[f"float{i}" for i in range(C)],
             index=date_range("20000101", periods=N, freq="H"),
         )
         self.df["object"] = tm.makeStringIndex(N)

diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py
@@ -19,7 +19,7 @@ def setup(self, connection):
             "sqlite": sqlite3.connect(":memory:"),
         }
         self.table_name = "test_type"
-        self.query_all = "SELECT * FROM {}".format(self.table_name)
+        self.query_all = f"SELECT * FROM {self.table_name}"
         self.con = con[connection]
         self.df = DataFrame(
             {
@@ -58,7 +58,7 @@ def setup(self, connection, dtype):
             "sqlite": sqlite3.connect(":memory:"),
         }
         self.table_name = "test_type"
-        self.query_col = "SELECT {} FROM {}".format(dtype, self.table_name)
+        self.query_col = f"SELECT {dtype} FROM {self.table_name}"
         self.con = con[connection]
         self.df = DataFrame(
             {