Add support to pandas 2 (#461)

equinor · Dec 7, 2023 · bf4029a · bf4029a
1 parent 8fa3b8f
commit bf4029a
Show file tree

Hide file tree

Showing 9 changed files with 25 additions and 19 deletions.
diff --git a/.github/workflows/res2df.yml b/.github/workflows/res2df.yml
@@ -20,6 +20,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.8', '3.9', '3.10']
+        pandas-version: ['pandas<2', 'pandas>2']
         include:
           # For one of the Python versions we
           # install the extra dependency ert
@@ -44,6 +45,7 @@ jobs:
         run: |
           pip install --upgrade pip
           pip install .
+          pip install "${{matrix.pandas-version}}"
           python -c "import res2df"
 
       - name: Install ert

diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ docs/res2df.rst
 res2df/version.py
 \#*
 .\#*
+venv*
+build
diff --git a/res2df/compdat.py b/res2df/compdat.py
@@ -931,7 +931,7 @@ def applywelopen(
         new_state["KEYWORD_IDX"] = row["KEYWORD_IDX"]
         new_state["DATE"] = row["DATE"]
 
-        compdat_df = compdat_df.append(new_state)
+        compdat_df = pd.concat([compdat_df, new_state], ignore_index=True)
 
     if not compdat_df.empty:
         compdat_df = (

diff --git a/res2df/gruptree.py b/res2df/gruptree.py
@@ -427,7 +427,7 @@ def prettyprint(dframe: pd.DataFrame) -> str:
     output = ""
     for date in dframe["DATE"].dropna().unique():
         df_date = dframe[dframe.DATE == date]
-        output += "Date: " + str(date.astype("M8[D]")) + "\n"
+        output += "Date: " + pd.to_datetime(date).strftime("%Y-%m-%d") + "\n"
 
         for treetype in ["GRUPTREE", "BRANPROP"]:
             if treetype in df_date["KEYWORD"].unique():

diff --git a/res2df/rft.py b/res2df/rft.py
@@ -409,15 +409,15 @@ def merge_icd_seg_conseg(
 
     if logger.level <= logging.DEBUG:
         logger.debug("Writing connection data to con.csv")
-        con_data[CON_TOPOLOGY_COLS.intersection(con_data.columns)].to_csv(
+        con_data[list(CON_TOPOLOGY_COLS.intersection(con_data.columns))].to_csv(
             "con.csv", index=False
         )
         logger.debug("Writing segment data to seg.csv")
-        seg_data[SEG_TOPOLOGY_COLS.intersection(seg_data.columns)].to_csv(
+        seg_data[list(SEG_TOPOLOGY_COLS.intersection(seg_data.columns))].to_csv(
             "seg.csv", index=False
         )
         logger.debug("Writing ICD data to icd.csv")
-        icd_data[ICD_TOPOLOGY_COLS.intersection(icd_data.columns)].to_csv(
+        icd_data[list(ICD_TOPOLOGY_COLS.intersection(icd_data.columns))].to_csv(
             "icd.csv", index=False
         )
 
@@ -444,7 +444,7 @@ def merge_icd_seg_conseg(
         # Gather connections that are not associated to ICDs:
         no_icd_con_segments = set(con_data["CONSEGNO"]) - set(icd_data["ICD_SEGIDX"])
         con_data_no_icd = (
-            con_data.set_index("CONSEGNO").loc[no_icd_con_segments].reset_index()
+            con_data.set_index("CONSEGNO").loc[list(no_icd_con_segments)].reset_index()
         )
     else:
         con_data_no_icd = con_data
@@ -615,7 +615,7 @@ def df(
                 "ICD_LONELYSEG",
             }
         )
-        con_icd_seg = con_icd_seg[set(con_icd_seg.columns) - delete_cols]
+        con_icd_seg = con_icd_seg[list(set(con_icd_seg.columns) - delete_cols)]
 
         rftdata.append(con_icd_seg)
 

diff --git a/setup.py b/setup.py
@@ -19,7 +19,7 @@
     "resdata>=4.0.0",
     "numpy",
     "opm>=2020.10.2,<=2022.4",  # NB: Pypi versions.
-    "pandas<2.0",
+    "pandas",
     "pyarrow",
     "pyyaml>=5.1",
     "treelib",

diff --git a/tests/test_summary.py b/tests/test_summary.py
@@ -946,7 +946,10 @@ def test_smry_meta_synthetic():
 def test_fix_dframe_for_resdata(dframe, expected_dframe):
     """Test the dataframe preprocessor/validator for df2ressum works"""
     pd.testing.assert_frame_equal(
-        _fix_dframe_for_resdata(dframe), expected_dframe, check_index_type=False
+        _fix_dframe_for_resdata(dframe),
+        expected_dframe,
+        check_index_type=False,
+        check_column_type=False,
     )
 
 
@@ -1119,13 +1122,6 @@ def test_df2pyarrow_500years():
     dframe.index.name = "BOGUS"
     pyat = _df2pyarrow(dframe)
 
-    # pylint: disable=c-extension-no-member
-    with pytest.raises(pyarrow.lib.ArrowInvalid):
-        # We cannot convert this back to Pandas, since it will bail on failing
-        # to use nanosecond timestamps in the dataframe object for these dates.
-        # This is maybe a PyArrow bug/limitation that we must be aware of.
-        pyat.to_pandas()
-
     assert (
         np.array(pyat.column(0))
         == [

diff --git a/tests/test_wellcompletiondata.py b/tests/test_wellcompletiondata.py
@@ -86,7 +86,9 @@ def test_df2pyarrow():
         resdatafiles, zonemap=EIGHTCELLS_ZONEMAP, use_wellconnstatus=False
     )
     df["KH"] = df["KH"].astype(np.int32)
-    pd.testing.assert_frame_equal(df, _df2pyarrow(df).to_pandas(), check_like=True)
+    pd.testing.assert_frame_equal(
+        df, _df2pyarrow(df).to_pandas(), check_like=True, check_dtype=False
+    )
 
 
 def test_metadata():

diff --git a/tests/test_wellconnstatus.py b/tests/test_wellconnstatus.py
@@ -111,9 +111,13 @@ def test_extract_status_changes(smry, expected_wellconnstatus):
     """Testing that the extract_status_changes function is working
     correctly with various summary input
     """
-    smry["DATE"] = pd.to_datetime(smry["DATE"])
+    time_format = None if int(pd.__version__.split(".")[0]) == 1 else "mixed"
+
+    smry["DATE"] = pd.to_datetime(smry["DATE"], format=time_format, dayfirst=True)
     smry.set_index("DATE", inplace=True)
-    expected_wellconnstatus["DATE"] = pd.to_datetime(expected_wellconnstatus["DATE"])
+    expected_wellconnstatus["DATE"] = pd.to_datetime(
+        expected_wellconnstatus["DATE"], format=time_format, dayfirst=True
+    )
 
     # pylint: disable=protected-access
     pd.testing.assert_frame_equal(